From 8931d75855de889874c94c1980cc593937213a40 Mon Sep 17 00:00:00 2001 From: "Diederik van Liere (drdee)" Date: Fri, 5 Jul 2019 13:36:52 -0400 Subject: [PATCH 01/13] Escape string in hstore value --- tap_postgres/sync_strategies/logical_replication.py | 8 ++++++-- tests/test_discovery.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tap_postgres/sync_strategies/logical_replication.py b/tap_postgres/sync_strategies/logical_replication.py index 6f930bf..d857f82 100644 --- a/tap_postgres/sync_strategies/logical_replication.py +++ b/tap_postgres/sync_strategies/logical_replication.py @@ -10,6 +10,7 @@ import tap_postgres.sync_strategies.common as sync_common from dateutil.parser import parse import psycopg2 +from psycopg2 import sql import copy from select import select from functools import reduce @@ -68,11 +69,14 @@ def tuples_to_map(accum, t): accum[t[0]] = t[1] return accum +def create_hstore_elem_query(elem): + return sql.SQL("SELECT hstore_to_array({})").format(sql.Literal(elem)) + def create_hstore_elem(conn_info, elem): with post_db.open_connection(conn_info) as conn: with conn.cursor() as cur: - sql = """SELECT hstore_to_array('{}')""".format(elem) - cur.execute(sql) + query = create_hstore_elem_query(elem) + cur.execute(query) res = cur.fetchone()[0] hstore_elem = reduce(tuples_to_map, [res[i:i + 2] for i in range(0, len(res), 2)], {}) return hstore_elem diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 96d2bf8..82152bf 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -365,6 +365,16 @@ def test_catalog(self): 'definitions' : tap_postgres.BASE_RECURSIVE_SCHEMAS}, stream_dict.get('schema')) + def test_escaping_values(self): + key = 'nickname' + value = "Dave's Courtyard" + elem = '"{}"=>"{}"'.format(key, value) + + with get_test_connection() as conn: + with conn.cursor() as cur: + query = tap_postgres.sync_strategies.logical_replication.create_hstore_elem_query(elem) + self.assertEqual(query.as_string(cur), "SELECT hstore_to_array('\"nickname\"=>\"Dave''s Courtyard\"')") + class TestEnumTable(unittest.TestCase): maxDiff = None From 4ad8149e3fe3464c6533295ddca24b3dd803d07e Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Tue, 23 Jul 2019 15:55:34 +0000 Subject: [PATCH 02/13] Version 0.0.60 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8b3a890..1e0acb3 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.59', + version='0.0.60', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io', From 430d46b12aaeb6ac3a293e7ff34b7fcb8a674262 Mon Sep 17 00:00:00 2001 From: Christopher Merrick Date: Tue, 30 Jul 2019 17:58:50 +0000 Subject: [PATCH 03/13] make discovery work when user has SELECT to only some columns on a table --- tap_postgres/__init__.py | 2 +- tests/test_discovery.py | 57 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/tap_postgres/__init__.py b/tap_postgres/__init__.py index 9f08834..00afb6c 100644 --- a/tap_postgres/__init__.py +++ b/tap_postgres/__init__.py @@ -273,7 +273,7 @@ def produce_table_info(conn): AND NOT a.attisdropped AND pg_class.relkind IN ('r', 'v', 'm') AND n.nspname NOT in ('pg_toast', 'pg_catalog', 'information_schema') -AND has_table_privilege(pg_class.oid, 'SELECT') = true """) +AND has_column_privilege(pg_class.oid, attname, 'SELECT') = true """) for row in cur.fetchall(): row_count, is_view, schema_name, table_name, *col_info = row diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 82152bf..7984318 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -576,6 +576,63 @@ def test_catalog(self): 'definitions' : tap_postgres.BASE_RECURSIVE_SCHEMAS}, stream_dict.get('schema')) +class TestColumnGrants(unittest.TestCase): + maxDiff = None + table_name = 'CHICKEN TIMES' + user = 'tmp_user_for_grant_tests' + password = 'password' + + def setUp(self): + table_spec = {"columns": [{"name" : "id", "type" : "integer", "serial" : True}, + {"name" : 'size integer', "type" : "integer", "quoted" : True}, + {"name" : 'size smallint', "type" : "smallint", "quoted" : True}, + {"name" : 'size bigint', "type" : "bigint", "quoted" : True}], + "name" : TestColumnGrants.table_name} + ensure_test_table(table_spec) + + with get_test_connection() as conn: + cur = conn.cursor() + + sql = """ DROP USER IF EXISTS {} """.format(self.user, self.password) + LOGGER.info(sql) + cur.execute(sql) + + sql = """ CREATE USER {} WITH PASSWORD '{}' """.format(self.user, self.password) + LOGGER.info(sql) + cur.execute(sql) + + sql = """ GRANT SELECT ("id") ON "{}" TO {}""".format(TestColumnGrants.table_name, self.user) + LOGGER.info("running sql: {}".format(sql)) + cur.execute(sql) + + + + + def test_catalog(self): + conn_config = get_test_connection_config() + conn_config['user'] = self.user + conn_config['password'] = self.password + streams = tap_postgres.do_discovery(conn_config) + chicken_streams = [s for s in streams if s['tap_stream_id'] == 'postgres-public-CHICKEN TIMES'] + + self.assertEqual(len(chicken_streams), 1) + stream_dict = chicken_streams[0] + + self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('table_name')) + self.assertEqual(TestStringTableWithPK.table_name, stream_dict.get('stream')) + + + stream_dict.get('metadata').sort(key=lambda md: md['breadcrumb']) + + self.assertEqual(metadata.to_map(stream_dict.get('metadata')), + {() : {'table-key-properties': [], 'database-name': 'postgres', 'schema-name': 'public', 'is-view': False, 'row-count': 0}, + ('properties', 'id') : {'inclusion': 'available', 'sql-datatype' : 'integer', 'selected-by-default' : True}}) + + self.assertEqual({'definitions' : tap_postgres.BASE_RECURSIVE_SCHEMAS, + 'type': 'object', + 'properties': {'id': {'type': ['null', 'integer'], 'minimum': -2147483648, 'maximum': 2147483647}}}, + stream_dict.get('schema')) + if __name__== "__main__": test1 = TestArraysTable() From 77edd508a7c6438f2ed55a601b32b7e24669d391 Mon Sep 17 00:00:00 2001 From: Dan Mosora Date: Tue, 30 Jul 2019 19:49:48 +0000 Subject: [PATCH 04/13] Version 0.0.61 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1e0acb3..b15c421 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.60', + version='0.0.61', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io', From 60e7e1f8265e23de873360e99957677b5a788e6a Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 1 Aug 2019 16:29:37 -0400 Subject: [PATCH 05/13] Fix circleci (#61) * Fix locale issues * Change variable name to not override a module name to make pylint happy * Revert the addition of the catalog flag --- .circleci/config.yml | 1 + tap_postgres/__init__.py | 4 ++-- tap_postgres/sync_strategies/logical_replication.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a0580f0..26c76f1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,6 +10,7 @@ jobs: command: | aws s3 cp s3://com-stitchdata-dev-deployment-assets/environments/tap-tester/sandbox dev_env.sh source dev_env.sh + export LC_ALL=C python3 -m venv /usr/local/share/virtualenvs/tap-postgres source /usr/local/share/virtualenvs/tap-postgres/bin/activate pip install . diff --git a/tap_postgres/__init__.py b/tap_postgres/__init__.py index 00afb6c..38dfe6a 100644 --- a/tap_postgres/__init__.py +++ b/tap_postgres/__init__.py @@ -691,9 +691,9 @@ def main_impl(): if args.discover: do_discovery(conn_config) - elif args.properties or args.catalog: + elif args.properties: state = args.state - do_sync(conn_config, args.catalog.to_dict() if args.catalog else args.properties, args.config.get('default_replication_method'), state) + do_sync(conn_config, args.properties, args.config.get('default_replication_method'), state) else: LOGGER.info("No properties were selected") diff --git a/tap_postgres/sync_strategies/logical_replication.py b/tap_postgres/sync_strategies/logical_replication.py index d857f82..bddf10c 100644 --- a/tap_postgres/sync_strategies/logical_replication.py +++ b/tap_postgres/sync_strategies/logical_replication.py @@ -134,8 +134,8 @@ def create_array_elem(elem, sql_datatype, conn_info): #custom datatypes like enums cast_datatype = 'text[]' - sql = """SELECT $stitch_quote${}$stitch_quote$::{}""".format(elem, cast_datatype) - cur.execute(sql) + sql_stmt = """SELECT $stitch_quote${}$stitch_quote$::{}""".format(elem, cast_datatype) + cur.execute(sql_stmt) res = cur.fetchone()[0] return res From ea5018f40aeb8f1e200def632ffdd7301cc7c5e2 Mon Sep 17 00:00:00 2001 From: Andy Lu Date: Thu, 1 Aug 2019 20:32:00 +0000 Subject: [PATCH 06/13] Bump to v0.0.62 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b15c421..8e3b583 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.61', + version='0.0.62', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io', From a5c9463933d42c651b38b888e0503716959d28a8 Mon Sep 17 00:00:00 2001 From: Kyle Allan Date: Fri, 2 Aug 2019 17:28:58 +0000 Subject: [PATCH 07/13] add more logging for 'Unrecognized replication_method' --- tap_postgres/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_postgres/__init__.py b/tap_postgres/__init__.py index 38dfe6a..0cc9b28 100644 --- a/tap_postgres/__init__.py +++ b/tap_postgres/__init__.py @@ -493,7 +493,7 @@ def sync_method_for_streams(streams, state, default_replication_method): state = clear_state_on_replication_change(state, stream['tap_stream_id'], replication_key, replication_method) if replication_method not in set(['LOG_BASED', 'FULL_TABLE', 'INCREMENTAL']): - raise Exception("Unrecognized replication_method {}".format(replication_method)) + raise Exception("Unrecognized replication_method {} for stream {}".format(replication_method, stream['tap_stream_id'])) md_map = metadata.to_map(stream['metadata']) desired_columns = [c for c in stream['schema']['properties'].keys() if sync_common.should_sync_column(md_map, c)] From 9f345d6623a65269bcfd86908efcc823137eec65 Mon Sep 17 00:00:00 2001 From: Kyle Allan Date: Fri, 2 Aug 2019 17:29:19 +0000 Subject: [PATCH 08/13] bump to 0.0.63 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8e3b583..bc82f3a 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.62', + version='0.0.63', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io', From a8c2bfa4a9598821108c765808987a140bb77414 Mon Sep 17 00:00:00 2001 From: Kyle Allan Date: Tue, 8 Oct 2019 19:17:28 +0000 Subject: [PATCH 09/13] encountering a wal2json decimal needs to be string --- tap_postgres/sync_strategies/logical_replication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_postgres/sync_strategies/logical_replication.py b/tap_postgres/sync_strategies/logical_replication.py index bddf10c..d2f47aa 100644 --- a/tap_postgres/sync_strategies/logical_replication.py +++ b/tap_postgres/sync_strategies/logical_replication.py @@ -168,7 +168,7 @@ def selected_value_to_singer_value_impl(elem, og_sql_datatype, conn_info): if sql_datatype == 'hstore': return create_hstore_elem(conn_info, elem) if 'numeric' in sql_datatype: - return decimal.Decimal(elem) + return decimal.Decimal(str(elem)) if isinstance(elem, int): return elem if isinstance(elem, float): From 3e8431679b2944a1f14a4aa5922f56534f156aca Mon Sep 17 00:00:00 2001 From: nick-mccoy Date: Wed, 9 Oct 2019 14:24:13 +0000 Subject: [PATCH 10/13] bump version to 0.0.64 --- CHANGELOG.md | 4 ++++ setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f12e2de --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,4 @@ +# Changelog + +## 0.0.64 + * Pass string to `decimal.Decimal` when handling numeric data type [#67](https://github.com/singer-io/tap-postgres/pull/67) diff --git a/setup.py b/setup.py index bc82f3a..beb0c58 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.63', + version='0.0.64', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io', From ece50546bb72ff021cddd1b18eeb28ab3db3024c Mon Sep 17 00:00:00 2001 From: Christopher Merrick Date: Mon, 14 Oct 2019 11:26:33 -0400 Subject: [PATCH 11/13] Create pull_request_template.md --- .github/pull_request_template.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..c71d3b3 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,11 @@ +# Description of change +(write a short description here or paste a link to JIRA) + +# QA steps + - [ ] automated tests passing + - [ ] manual qa steps passing (list below) + +# Risks + +# Rollback steps + - revert this branch From b551aabbc411f4124b1c48c9fb1c4adcf318a9ea Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Wed, 23 Oct 2019 13:25:08 -0400 Subject: [PATCH 12/13] Add bigint[] to log-based type lookup (#69) --- tap_postgres/sync_strategies/logical_replication.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tap_postgres/sync_strategies/logical_replication.py b/tap_postgres/sync_strategies/logical_replication.py index d2f47aa..3f2b82c 100644 --- a/tap_postgres/sync_strategies/logical_replication.py +++ b/tap_postgres/sync_strategies/logical_replication.py @@ -105,6 +105,8 @@ def create_array_elem(elem, sql_datatype, conn_info): cast_datatype = 'text[]' elif sql_datatype == 'integer[]': cast_datatype = 'integer[]' + elif sql_datatype == 'bigint[]': + cast_datatype = 'bigint[]' elif sql_datatype == 'inet[]': cast_datatype = 'inet[]' elif sql_datatype == 'json[]': From f1060b108506994e2e406de338c7721fbe528ff7 Mon Sep 17 00:00:00 2001 From: Dan Mosora <30501696+dmosorast@users.noreply.github.com> Date: Wed, 23 Oct 2019 13:29:57 -0400 Subject: [PATCH 13/13] Version 0.0.65 and changelog (#70) --- CHANGELOG.md | 3 +++ setup.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f12e2de..f5ca3f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ # Changelog +## 0.0.65 + * Add support for `int8[]` (`bigint[]`) array types to log-based replication [#69](https://github.com/singer-io/tap-postgres/pull/69) + ## 0.0.64 * Pass string to `decimal.Decimal` when handling numeric data type [#67](https://github.com/singer-io/tap-postgres/pull/67) diff --git a/setup.py b/setup.py index beb0c58..8a3e1db 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup setup(name='tap-postgres', - version='0.0.64', + version='0.0.65', description='Singer.io tap for extracting data from PostgreSQL', author='Stitch', url='https://singer.io',