From 70f780f986ba15341d8c9f8db93391514acb56ae Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 19 Feb 2022 10:14:28 -0700 Subject: [PATCH 01/16] Initial steps to add append support to Docker process. Ran into issue with conninfo string not working in osm2pgsql-replication --- Dockerfile | 2 +- docker/osm2pgsql_recommendation.py | 6 ++--- docker/pgosm_flex.py | 38 ++++++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9e838f6..580c289 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,4 +45,4 @@ COPY ./sqitch.conf /etc/sqitch/sqitch.conf WORKDIR /app COPY . ./ -RUN pip install -r requirements.txt +RUN pip install --upgrade pip && pip install -r requirements.txt diff --git a/docker/osm2pgsql_recommendation.py b/docker/osm2pgsql_recommendation.py index 4eda22e..569c17c 100644 --- a/docker/osm2pgsql_recommendation.py +++ b/docker/osm2pgsql_recommendation.py @@ -10,7 +10,7 @@ LOGGER = logging.getLogger('pgosm-flex') -def osm2pgsql_recommendation(ram, pbf_filename, out_path): +def osm2pgsql_recommendation(ram, pbf_filename, out_path, append): """Returns recommended osm2pgsql command. Recommendation from API at https://osm2pgsql-tuner.com @@ -24,6 +24,8 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path): out_path : str + append : boolean + Returns ---------------------- osm2pgsql_cmd : str @@ -38,8 +40,6 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path): osm_pbf_gb = os.path.getsize(pbf_file) / 1024 / 1024 / 1024 LOGGER.info(f'PBF size (GB): {osm_pbf_gb}') - # PgOSM-Flex currently does not support/test append mode. - append = False osm2pgsql_cmd = get_recommended_script(system_ram_gb, osm_pbf_gb, append, diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 04848f9..b57d81c 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -32,6 +32,7 @@ @click.option('--subregion', required=False, help='Sub-region name matching the filename for data sourced from Geofabrik. e.g. district-of-columbia') # Remainder of options in alphabetical order +@click.option('--append', default=False, is_flag=True, help='EXPERIMENTAL - Enable Append mode to enable updates via osm2pgsql-replication.') @click.option('--basepath', required=False, default=BASE_PATH_DEFAULT, @@ -70,7 +71,7 @@ @click.option('--srid', required=False, default=helpers.DEFAULT_SRID, envvar="PGOSM_SRID", help="SRID for data loaded by osm2pgsql to PostGIS. Defaults to 3857") -def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, +def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, input_file, layerset, layerset_path, language, pgosm_date, schema_name, skip_dump, skip_nested, srid): """Run PgOSM Flex within Docker to automate osm2pgsql flex processing. @@ -98,7 +99,8 @@ def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, pbf_filename = geofabrik.get_region_filename(region, subregion) osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=pbf_filename, - out_path=paths['out_path']) + out_path=paths['out_path'], + append=append) else: osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=input_file, @@ -118,6 +120,12 @@ def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, run_post_processing(flex_path=flex_path, skip_nested=skip_nested) + if append: + run_osm2pgsql_replication_init(pbf_path=paths['out_path'], + pbf_filename=pbf_filename) + else: + print('DEBUG MESSAGE -- Not using append mode.') + if input_file is None: geofabrik.remove_latest_files(region, subregion, paths) @@ -357,6 +365,32 @@ def run_post_processing(flex_path, skip_nested): db.pgosm_nested_admin_polygons(flex_path) +def run_osm2pgsql_replication_init(pbf_path, pbf_filename): + logger = logging.getLogger('pgosm-flex') + pbf_path = os.path.join(pbf_path, pbf_filename) + init_cmd = 'osm2pgsql-replication init -d $PGOSM_CONN ' + init_cmd += f'--osm-file {pbf_path}' + print(f'RUNNING INIT COMMAND:\n{init_cmd}') + conn_string = db.connection_string() + ## Currently fails - osm2pgsql-replication not working with conninfo string + init_cmd = init_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') + output = subprocess.run(init_cmd.split(), + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to run osm2pgsql-replication. Return code: {output.returncode}' + logger.error(err_msg) + sys.exit(f'{err_msg} - Check the log output for details.') + + logger.info('osm2pgsql-replication init completed.') + + + if __name__ == "__main__": logging.getLogger('pgosm-flex').info('Running PgOSM Flex!') run_pgosm_flex() From 26496ba6d4d4aedceadeed7270d935acb676ac5f Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 25 Feb 2022 16:33:08 -0700 Subject: [PATCH 02/16] Use patched branch of osm2pgsql to allow conninfo string --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 580c289..c222ea9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM postgis/postgis:14-3.1 LABEL maintainer="PgOSM-Flex - https://github.com/rustprooflabs/pgosm-flex" -ARG OSM2PGSQL_BRANCH=1.6.0 +ARG OSM2PGSQL_BRANCH=replication-conninfo RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -11,7 +11,7 @@ RUN apt-get update \ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ libbz2-dev libpq-dev libproj-dev lua5.2 liblua5.2-dev \ - python3 python3-distutils python3-psycopg2 \ + python3 python3-distutils \ postgresql-server-dev-14 \ curl luarocks \ && rm -rf /var/lib/apt/lists/* @@ -25,7 +25,7 @@ RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/ WORKDIR /tmp -RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH git://github.com/openstreetmap/osm2pgsql.git \ +RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH git://github.com/rustprooflabs/osm2pgsql.git \ && mkdir osm2pgsql/build \ && cd osm2pgsql/build \ && cmake .. \ From 34d11a3e279273e1000510a8a36d1103850ca94b Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 25 Feb 2022 17:10:21 -0700 Subject: [PATCH 03/16] Bump Docker base image to Postgres 14 PostGIS 3.2 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c222ea9..fde1387 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM postgis/postgis:14-3.1 +FROM postgis/postgis:14-3.2 LABEL maintainer="PgOSM-Flex - https://github.com/rustprooflabs/pgosm-flex" From 2d044e051d47460f3d955f04b7344b4519d924d8 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 27 Feb 2022 10:39:35 -0700 Subject: [PATCH 04/16] Check for replication update status, add logic to switch between base osm2pgsql mode and replication update mode. --- docker/db.py | 26 +++++++--- docker/geofabrik.py | 6 +-- docker/pgosm_flex.py | 121 +++++++++++++++++++++++++++++++------------ 3 files changed, 110 insertions(+), 43 deletions(-) diff --git a/docker/db.py b/docker/db.py index bba3310..7a8ba28 100644 --- a/docker/db.py +++ b/docker/db.py @@ -173,18 +173,24 @@ def pg_isready(): return True -def prepare_pgosm_db(data_only, db_path): +def prepare_pgosm_db(data_only, db_path, append): """Runs through series of steps to prepare database for PgOSM. Parameters -------------------------- data_only : bool db_path : str + append : bool """ if pg_conn_parts()['pg_host'] == 'localhost': LOGGER.debug('Running standard database prep for in-Docker operation. Includes DROP/CREATE DATABASE') - drop_pgosm_db() + if append: + LOGGER.debug('Skipping DB drop b/c of append mode') + else: + LOGGER.debug('Dropping database') + drop_pgosm_db() + create_pgosm_db() else: LOGGER.info('Using external database. Ensure the target database is setup properly for PgOSM Flex with PostGIS, osm schema, and proper permissions.') @@ -260,12 +266,16 @@ def create_pgosm_db(): LOGGER.debug('Setting Pg conn to enable autocommit - required for drop/create DB') conn.autocommit = True - conn.execute(sql_raw) - conn.close() - LOGGER.info('Created pgosm database') - - sql_create_postgis = "CREATE EXTENSION postgis;" - sql_create_schema = "CREATE SCHEMA osm;" + try: + conn.execute(sql_raw) + LOGGER.info('Created pgosm database') + except psycopg.errors.DuplicateDatabase: + LOGGER.info('Database already existed.') + finally: + conn.close() + + sql_create_postgis = "CREATE EXTENSION IF NOT EXISTS postgis;" + sql_create_schema = "CREATE SCHEMA IF NOT EXISTS osm;" with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: cur = conn.cursor() diff --git a/docker/geofabrik.py b/docker/geofabrik.py index d5dc8d6..70f20c1 100644 --- a/docker/geofabrik.py +++ b/docker/geofabrik.py @@ -216,7 +216,7 @@ def unarchive_data(pbf_file, md5_file, pbf_file_with_date, md5_file_with_date): shutil.copy2(md5_file_with_date, md5_file) -def remove_latest_files(region, subregion, paths): +def remove_latest_files(region, subregion, out_path): """Removes the PBF and MD5 file with -latest in the name. Files are archived via prepare_data() before processing starts @@ -225,11 +225,11 @@ def remove_latest_files(region, subregion, paths): ------------------------- region : str subregion : str - paths : dict + out_path : str """ pbf_filename = get_region_filename(region, subregion) - pbf_file = os.path.join(paths['out_path'], pbf_filename) + pbf_file = os.path.join(out_path, pbf_filename) md5_file = f'{pbf_file}.md5' logging.info(f'Done with {pbf_file}, removing.') os.remove(pbf_file) diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index a524f90..b8b0ba0 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -93,27 +93,72 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, helpers.set_env_vars(region, subregion, srid, language, pgosm_date, layerset, layerset_path) + db.wait_for_postgres() + db.prepare_pgosm_db(data_only=data_only, + db_path=paths['db_path'], + append=append) + + if append: + replication_update = check_replication_exists() + else: + replication_update = False + + if replication_update: + logger.error('UPDATE mode coming soon!') + else: + logger.info('Running normal osm2pgsql mode') + run_osm2pgsql_standard(region=region, + subregion=subregion, + input_file=input_file, + pgosm_date=pgosm_date, + out_path=paths['out_path'], + flex_path=paths['flex_path'], + ram=ram, + skip_nested=skip_nested, + layerset_path=layerset_path, + layerset=layerset, + append=append) + + + if schema_name != 'osm': + db.rename_schema(schema_name) + + if skip_dump: + logger.info('Skipping pg_dump') + else: + export_filename = get_export_filename(region, + subregion, + layerset, + pgosm_date, + input_file) + + export_path = get_export_full_path(paths['out_path'], export_filename) + + db.run_pg_dump(export_path=export_path, + data_only=data_only, + schema_name=schema_name) + logger.info('PgOSM Flex complete!') + + +def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, + flex_path, ram, skip_nested, layerset_path, + layerset, append): if input_file is None: geofabrik.prepare_data(region=region, subregion=subregion, pgosm_date=pgosm_date, - out_path=paths['out_path']) + out_path=out_path) pbf_filename = geofabrik.get_region_filename(region, subregion) osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=pbf_filename, - out_path=paths['out_path'], + out_path=out_path, append=append) else: osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=input_file, - out_path=paths['out_path']) - - db.wait_for_postgres() - - db.prepare_pgosm_db(data_only=data_only, db_path=paths['db_path']) + out_path=out_path) - flex_path = paths['flex_path'] run_osm2pgsql(osm2pgsql_command=osm2pgsql_command, flex_path=flex_path) @@ -124,32 +169,13 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, run_post_processing(flex_path=flex_path, skip_nested=skip_nested) if append: - run_osm2pgsql_replication_init(pbf_path=paths['out_path'], + run_osm2pgsql_replication_init(pbf_path=out_path, pbf_filename=pbf_filename) else: print('DEBUG MESSAGE -- Not using append mode.') if input_file is None: - geofabrik.remove_latest_files(region, subregion, paths) - - export_filename = get_export_filename(region, - subregion, - layerset, - pgosm_date, - input_file) - - export_path = get_export_full_path(paths['out_path'], export_filename) - - if schema_name != 'osm': - db.rename_schema(schema_name) - - if skip_dump: - logger.info('Skipping pg_dump') - else: - db.run_pg_dump(export_path=export_path, - data_only=data_only, - schema_name=schema_name) - logger.info('PgOSM Flex complete!') + geofabrik.remove_latest_files(region, subregion, out_path) def validate_region_inputs(region, subregion, input_file): @@ -368,14 +394,45 @@ def run_post_processing(flex_path, skip_nested): db.pgosm_nested_admin_polygons(flex_path) +def check_replication_exists(): + """Checks if replication already setup, if so should only run update. + """ + logger = logging.getLogger('pgosm-flex') + check_cmd = "osm2pgsql-replication status -d $PGOSM_CONN " + logger.debug(f'Command to check DB for replication status:\n{check_cmd}') + conn_string = db.connection_string() + check_cmd = check_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') + output = subprocess.run(check_cmd.split(), + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failure. Return code: {output.returncode}' + logger.warning(err_msg) + return False + + logger.debug('osm2pgsql-replication status checked.') + return True + + def run_osm2pgsql_replication_init(pbf_path, pbf_filename): + """Runs osm2pgsql-replication init to support append mode. + + Parameters + --------------------- + pbf_path : str + pbf_filename : str + """ logger = logging.getLogger('pgosm-flex') pbf_path = os.path.join(pbf_path, pbf_filename) init_cmd = 'osm2pgsql-replication init -d $PGOSM_CONN ' init_cmd += f'--osm-file {pbf_path}' - print(f'RUNNING INIT COMMAND:\n{init_cmd}') + logger.debug(f'Initializing DB for replication with command:\n{init_cmd}') conn_string = db.connection_string() - ## Currently fails - osm2pgsql-replication not working with conninfo string init_cmd = init_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') output = subprocess.run(init_cmd.split(), text=True, @@ -390,7 +447,7 @@ def run_osm2pgsql_replication_init(pbf_path, pbf_filename): logger.error(err_msg) sys.exit(f'{err_msg} - Check the log output for details.') - logger.info('osm2pgsql-replication init completed.') + logger.debug('osm2pgsql-replication init completed.') From 19c860f86dbfd851d86f2fe70d9507724e70faec Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 27 Feb 2022 10:43:58 -0700 Subject: [PATCH 05/16] Fix osm2pgsql command with input_file --- docker/pgosm_flex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index b8b0ba0..30ea53d 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -157,7 +157,8 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, else: osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=input_file, - out_path=out_path) + out_path=out_path, + append=append) run_osm2pgsql(osm2pgsql_command=osm2pgsql_command, flex_path=flex_path) From 828791b91b2efaf511cf38e17f385faac9384509 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Thu, 3 Mar 2022 16:17:35 -0700 Subject: [PATCH 06/16] Switch back to main osm2pgsql repo --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index fde1387..7b7fa83 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM postgis/postgis:14-3.2 LABEL maintainer="PgOSM-Flex - https://github.com/rustprooflabs/pgosm-flex" -ARG OSM2PGSQL_BRANCH=replication-conninfo +ARG OSM2PGSQL_BRANCH=master RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -25,7 +25,7 @@ RUN luarocks install luasql-postgres PGSQL_INCDIR=/usr/include/postgresql/ WORKDIR /tmp -RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH git://github.com/rustprooflabs/osm2pgsql.git \ +RUN git clone --depth 1 --branch $OSM2PGSQL_BRANCH git://github.com/openstreetmap/osm2pgsql.git \ && mkdir osm2pgsql/build \ && cd osm2pgsql/build \ && cmake .. \ From c0230adde8a38cfa9493fb44ab69b09af7221dc3 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 4 Mar 2022 16:26:52 -0700 Subject: [PATCH 07/16] Outline logic of final steps, not fully implemented --- docker/pgosm_flex.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 30ea53d..1657552 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -86,6 +86,10 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, validate_region_inputs(region, subregion, input_file) + if schema_name != 'osm' and append: + sys.exit('ERROR: Append mode with custom schema name currently not supported') + + # Ensure always a region name if region is None and input_file: region = input_file @@ -105,6 +109,7 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, if replication_update: logger.error('UPDATE mode coming soon!') + run_replication_update() else: logger.info('Running normal osm2pgsql mode') run_osm2pgsql_standard(region=region, @@ -179,6 +184,43 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, geofabrik.remove_latest_files(region, subregion, out_path) +def run_replication_update(): + logger = logging.getLogger('pgosm-flex') + conn_string = db.connection_string() + + logger.error('Not running cleanup step in SQL yet!') + sql_prep_replication = 'CALL osm.append_data_start();' + + cmd_osm2pgsql_replication = """ +osm2pgsql-replication update -d $PGOSM_CONN \ + -- \ + --output=flex --style=./run.lua \ + --slim \ + -d $PGOSM_CONN +""" + cmd_osm2pgsql_replication = cmd_osm2pgsql_replication.replace('-d $PGOSM_CONN', f'-d {conn_string}') + + output = subprocess.run(cmd_osm2pgsql_replication.split(), + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failure. Return code: {output.returncode}' + logger.warning(err_msg) + return False + + logger.info('osm2pgsql-replication update complete.') + return True + + + logger.error('Not running post-import step in SQL yet!') + sql_finish_replication = 'CALL osm.append_data_finish();' + + def validate_region_inputs(region, subregion, input_file): """Ensures the combination of region, subregion and input_file is valid. @@ -409,7 +451,7 @@ def check_replication_exists(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + logger.debug(f'osm2pgsql-replication output:\n{output.stdout}') if output.returncode != 0: err_msg = f'Failure. Return code: {output.returncode}' From c907087f9b576beadfd757e11aac3fe17f89bbf2 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 5 Mar 2022 08:15:56 -0700 Subject: [PATCH 08/16] Progressing, encountered error where post-processing hasn't ran --- docker/db.py | 28 ++++++++++++++++++++++++++++ docker/pgosm_flex.py | 21 ++++++++++----------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/docker/db.py b/docker/db.py index 7a8ba28..95a849a 100644 --- a/docker/db.py +++ b/docker/db.py @@ -466,6 +466,34 @@ def pgosm_nested_admin_polygons(flex_path): sys.exit(f'{err_msg} - Check the log output for details.') + +def osm2pgsql_replication_start(): + LOGGER.error('Not running cleanup step in SQL yet!') + sql_raw = 'CALL osm.append_data_start ();' + + + with get_db_conn(conn_string=connection_string()) as conn: + cur = conn.cursor() + cur.execute(sql_raw) + results = cur.fetchone() + + LOGGER.error(f'TESTING OUTPUT {results}') + + +def osm2pgsql_replication_finish(skip_nested): + logger.error('Not running post-import step in SQL yet!') + sql_raw = 'CALL osm.append_data_finish(skip_nested := %(skip_nested)s );' + params = {'skip_nested': skip_nested} + + with get_db_conn(conn_string=connection_string()) as conn: + cur = conn.cursor() + cur.execute(sql_raw) + results = cur.fetchone() + + LOGGER.error(f'TESTING OUTPUT {results}') + + + def rename_schema(schema_name): """Renames default schema name "osm" to `schema_name` diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 1657552..f205b58 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -109,7 +109,8 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, if replication_update: logger.error('UPDATE mode coming soon!') - run_replication_update() + run_replication_update(skip_nested=skip_nested, + flex_path=paths['flex_path']) else: logger.info('Running normal osm2pgsql mode') run_osm2pgsql_standard(region=region, @@ -184,25 +185,25 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, geofabrik.remove_latest_files(region, subregion, out_path) -def run_replication_update(): +def run_replication_update(skip_nested, flex_path): logger = logging.getLogger('pgosm-flex') conn_string = db.connection_string() - logger.error('Not running cleanup step in SQL yet!') - sql_prep_replication = 'CALL osm.append_data_start();' + db.osm2pgsql_replication_start() - cmd_osm2pgsql_replication = """ + update_cmd = """ osm2pgsql-replication update -d $PGOSM_CONN \ -- \ --output=flex --style=./run.lua \ --slim \ -d $PGOSM_CONN """ - cmd_osm2pgsql_replication = cmd_osm2pgsql_replication.replace('-d $PGOSM_CONN', f'-d {conn_string}') + update_cmd = update_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') - output = subprocess.run(cmd_osm2pgsql_replication.split(), + output = subprocess.run(update_cmd.split(), text=True, check=False, + cwd=flex_path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) @@ -213,14 +214,12 @@ def run_replication_update(): logger.warning(err_msg) return False + db.osm2pgsql_replication_finish(skip_nested=skip_nested) + logger.info('osm2pgsql-replication update complete.') return True - logger.error('Not running post-import step in SQL yet!') - sql_finish_replication = 'CALL osm.append_data_finish();' - - def validate_region_inputs(region, subregion, input_file): """Ensures the combination of region, subregion and input_file is valid. From 6554d0ecb341265425951c58fbabc0f16931f346 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 5 Mar 2022 08:48:34 -0700 Subject: [PATCH 09/16] Fix post-processing SQL to run again, adding error message when applicable --- docker/db.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docker/db.py b/docker/db.py index 95a849a..4e2829e 100644 --- a/docker/db.py +++ b/docker/db.py @@ -434,10 +434,15 @@ def pgosm_after_import(flex_path): output = subprocess.run(cmds, text=True, - capture_output=True, cwd=flex_path, - check=True) - LOGGER.info(f'Post-processing output: \n {output.stderr}') + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + LOGGER.info(f'Post-processing SQL output: \n {output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to run post-processing SQL. Return code: {output.returncode}' + LOGGER.error(err_msg) def pgosm_nested_admin_polygons(flex_path): From e8f14a99b90ee35f48960cd9612dbc6cf0e0a3d6 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 5 Mar 2022 10:00:39 -0700 Subject: [PATCH 10/16] Improve error reporting in post-processing SQL --- docker/db.py | 3 +++ docker/pgosm_flex.py | 40 +++++++++++++++++++++++++--------------- flex-config/run-sql.lua | 22 +++++++++++++++++++--- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/docker/db.py b/docker/db.py index 4e2829e..8a8bffa 100644 --- a/docker/db.py +++ b/docker/db.py @@ -443,6 +443,9 @@ def pgosm_after_import(flex_path): if output.returncode != 0: err_msg = f'Failed to run post-processing SQL. Return code: {output.returncode}' LOGGER.error(err_msg) + return False + + return True def pgosm_nested_admin_polygons(flex_path): diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index f205b58..e5b5ff1 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -113,17 +113,17 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, flex_path=paths['flex_path']) else: logger.info('Running normal osm2pgsql mode') - run_osm2pgsql_standard(region=region, - subregion=subregion, - input_file=input_file, - pgosm_date=pgosm_date, - out_path=paths['out_path'], - flex_path=paths['flex_path'], - ram=ram, - skip_nested=skip_nested, - layerset_path=layerset_path, - layerset=layerset, - append=append) + success = run_osm2pgsql_standard(region=region, + subregion=subregion, + input_file=input_file, + pgosm_date=pgosm_date, + out_path=paths['out_path'], + flex_path=paths['flex_path'], + ram=ram, + skip_nested=skip_nested, + layerset_path=layerset_path, + layerset=layerset, + append=append) if schema_name != 'osm': @@ -143,8 +143,10 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, db.run_pg_dump(export_path=export_path, data_only=data_only, schema_name=schema_name) - logger.info('PgOSM Flex complete!') - + if success: + logger.info('PgOSM Flex complete!') + else: + logger.warning('PgOSM Flex completed with errors. Details in output') def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, flex_path, ram, skip_nested, layerset_path, @@ -173,7 +175,8 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, # Auto-set skip_nested when place layer not imported skip_nested = check_layerset_places(layerset_path, layerset, flex_path) - run_post_processing(flex_path=flex_path, skip_nested=skip_nested) + post_processing = run_post_processing(flex_path=flex_path, + skip_nested=skip_nested) if append: run_osm2pgsql_replication_init(pbf_path=out_path, @@ -184,6 +187,8 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, if input_file is None: geofabrik.remove_latest_files(region, subregion, out_path) + return post_processing + def run_replication_update(skip_nested, flex_path): logger = logging.getLogger('pgosm-flex') @@ -427,7 +432,7 @@ def run_post_processing(flex_path, skip_nested): skip_nested : bool """ - db.pgosm_after_import(flex_path) + post_processing_sql = db.pgosm_after_import(flex_path) logger = logging.getLogger('pgosm-flex') if skip_nested: logger.info('Skipping calculating nested polygons') @@ -435,6 +440,11 @@ def run_post_processing(flex_path, skip_nested): logger.info('Calculating nested polygons') db.pgosm_nested_admin_polygons(flex_path) + if not post_processing_sql: + return False + + return True + def check_replication_exists(): """Checks if replication already setup, if so should only run update. diff --git a/flex-config/run-sql.lua b/flex-config/run-sql.lua index a4929b3..0a03b84 100644 --- a/flex-config/run-sql.lua +++ b/flex-config/run-sql.lua @@ -27,7 +27,14 @@ local function post_processing(layerset) sql_raw = sql_file:read( '*all' ) sql_file:close() local result = con:execute(sql_raw) - --print(result) -- Returns 0.0 on success? nil on error? + + -- Returns 0 on success, nil on error. + if result == nil then + print(string.format("Error in post-processing layerset: %s", layerset)) + return false + end + + return true end @@ -44,12 +51,17 @@ while row do row = cur:fetch (row, "a") end +local errors = 0 -post_processing('pgosm-meta') +if not post_processing('pgosm-meta') then + errors = errors + 1 +end for ix, layer in ipairs(layers) do if conf['layerset'][layer] then - post_processing(layer) + if not post_processing(layer) then + errors = errors + 1 + end end end @@ -58,3 +70,7 @@ end cur:close() con:close() env:close() + +if errors > 0 then + os.exit(1) +end From acac426c6a0f675de3739cde370bddf14164999e Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sat, 5 Mar 2022 10:05:36 -0700 Subject: [PATCH 11/16] Fix function signature to allow meta script to deploy properly --- flex-config/sql/pgosm-meta.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex-config/sql/pgosm-meta.sql b/flex-config/sql/pgosm-meta.sql index 4dad979..2ad7ac2 100644 --- a/flex-config/sql/pgosm-meta.sql +++ b/flex-config/sql/pgosm-meta.sql @@ -53,5 +53,5 @@ END $$; COMMENT ON PROCEDURE osm.append_data_start() IS 'Prepares PgOSM Flex database for running osm2pgsql in append mode. Removes records from place_polygon_nested if they existed.'; -COMMENT ON PROCEDURE osm.append_data_finish() IS 'Finalizes PgOSM Flex after osm2pgsql-replication. Refreshes materialized view and (optionally) processes the place_polygon_nested data.'; +COMMENT ON PROCEDURE osm.append_data_finish(BOOLEAN) IS 'Finalizes PgOSM Flex after osm2pgsql-replication. Refreshes materialized view and (optionally) processes the place_polygon_nested data.'; From 29f1c0506c654fb837f56139f242c82f55aa45e3 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 6 Mar 2022 08:15:12 -0700 Subject: [PATCH 12/16] Finishing steps to implement --apend mode. Minor cleanup --- docker/db.py | 33 ++++++++++++++++++++++----------- docker/pgosm_flex.py | 20 ++++++++++++++++---- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/docker/db.py b/docker/db.py index 8a8bffa..62bb78e 100644 --- a/docker/db.py +++ b/docker/db.py @@ -479,26 +479,37 @@ def osm2pgsql_replication_start(): LOGGER.error('Not running cleanup step in SQL yet!') sql_raw = 'CALL osm.append_data_start ();' - with get_db_conn(conn_string=connection_string()) as conn: cur = conn.cursor() cur.execute(sql_raw) - results = cur.fetchone() - LOGGER.error(f'TESTING OUTPUT {results}') + def osm2pgsql_replication_finish(skip_nested): - logger.error('Not running post-import step in SQL yet!') - sql_raw = 'CALL osm.append_data_finish(skip_nested := %(skip_nested)s );' - params = {'skip_nested': skip_nested} + # Fails via psycopg, using psql + if skip_nested: + LOGGER.info('Finishing Replication, skipping nested polygons') + sql_raw = 'CALL osm.append_data_finish(skip_nested := True );' + else: + LOGGER.info('Finishing Replication, including nested polygons') + sql_raw = 'CALL osm.append_data_finish(skip_nested := False );' - with get_db_conn(conn_string=connection_string()) as conn: - cur = conn.cursor() - cur.execute(sql_raw) - results = cur.fetchone() + conn_string = os.environ['PGOSM_CONN'] + cmds = ['psql', '-d', conn_string, '-c', sql_raw] + LOGGER.info('Finishing Replication') + output = subprocess.run(cmds, + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + LOGGER.info(f'Finishing replication output: \n {output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to finish replication. Return code: {output.returncode}' + LOGGER.error(err_msg) + sys.exit(f'{err_msg} - Check the log output for details.') - LOGGER.error(f'TESTING OUTPUT {results}') diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index e5b5ff1..5f377d7 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -109,8 +109,8 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, if replication_update: logger.error('UPDATE mode coming soon!') - run_replication_update(skip_nested=skip_nested, - flex_path=paths['flex_path']) + success = run_replication_update(skip_nested=skip_nested, + flex_path=paths['flex_path']) else: logger.info('Running normal osm2pgsql mode') success = run_osm2pgsql_standard(region=region, @@ -151,6 +151,7 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, flex_path, ram, skip_nested, layerset_path, layerset, append): + logger = logging.getLogger('pgosm-flex') if input_file is None: geofabrik.prepare_data(region=region, subregion=subregion, @@ -172,7 +173,6 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, flex_path=flex_path) if not skip_nested: - # Auto-set skip_nested when place layer not imported skip_nested = check_layerset_places(layerset_path, layerset, flex_path) post_processing = run_post_processing(flex_path=flex_path, @@ -182,7 +182,7 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, run_osm2pgsql_replication_init(pbf_path=out_path, pbf_filename=pbf_filename) else: - print('DEBUG MESSAGE -- Not using append mode.') + logger.debug('Not using append mode') if input_file is None: geofabrik.remove_latest_files(region, subregion, out_path) @@ -191,6 +191,18 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, def run_replication_update(skip_nested, flex_path): + """Runs osm2pgsql-replication between the DB start/finish steps. + + Parameters + ----------------------- + skip_nested : bool + flex_path : str + + Returns + --------------------- + bool + Indicates success/failure of replication process. + """ logger = logging.getLogger('pgosm-flex') conn_string = db.connection_string() From 7ac44b7d654dd9b1679fd607186f27183f0f9509 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 6 Mar 2022 17:54:36 -0700 Subject: [PATCH 13/16] Add basic --append mode instructions. Start restructuring to pass fewer vars around, relying more on the env vars being set --- docker/helpers.py | 10 +++++-- docker/pgosm_flex.py | 46 +++++++++++++++++----------- docker/tests/test_pgosm_flex.py | 51 +++++++++++++++++++------------- docs/DOCKER-RUN.md | 45 ++++++++++++++++++++++++++++ flex-config/helpers.lua | 8 ++--- flex-config/style/pgosm-meta.lua | 2 +- 6 files changed, 117 insertions(+), 45 deletions(-) diff --git a/docker/helpers.py b/docker/helpers.py index 7ac9cb5..6be88e7 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -72,13 +72,17 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, unset_env_vars() logger.debug('Setting environment variables') + os.environ['PGOSM_REGION'] = region + if subregion is None: pgosm_region = f'{region}' else: + os.environ['PGOSM_SUBREGION'] = subregion pgosm_region = f'{region}-{subregion}' - logger.debug(f'PGOSM_REGION: {pgosm_region}') - os.environ['PGOSM_REGION'] = pgosm_region + # Used by helpers.lua + logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}') + os.environ['PGOSM_REGION_COMBINED'] = pgosm_region if srid != DEFAULT_SRID: logger.info(f'SRID set: {srid}') @@ -106,6 +110,8 @@ def unset_env_vars(): Does not pop POSTGRES_DB on purpose to allow non-Docker operation. """ os.environ.pop('PGOSM_REGION', None) + os.environ.pop('PGOSM_SUBREGION', None) + os.environ.pop('PGOSM_COMBINED', None) os.environ.pop('PGOSM_SRID', None) os.environ.pop('PGOSM_LANGUAGE', None) os.environ.pop('PGOSM_LAYERSET_PATH', None) diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 5f377d7..def36e3 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -129,20 +129,11 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, if schema_name != 'osm': db.rename_schema(schema_name) - if skip_dump: - logger.info('Skipping pg_dump') - else: - export_filename = get_export_filename(region, - subregion, - layerset, - pgosm_date, - input_file) - - export_path = get_export_full_path(paths['out_path'], export_filename) + dump_database(input_file=input_file, + out_path=paths['out_path'], + skip_dump=skip_dump, data_only=data_only, + schema_name=schema_name) - db.run_pg_dump(export_path=export_path, - data_only=data_only, - schema_name=schema_name) if success: logger.info('PgOSM Flex complete!') else: @@ -316,15 +307,11 @@ def get_paths(base_path): -def get_export_filename(region, subregion, layerset, pgosm_date, input_file): +def get_export_filename(input_file): """Returns the .sql filename to use for pg_dump. Parameters ---------------------- - region : str - subregion : str - layerset : str - pgosm_date : str input_file : str Returns @@ -332,6 +319,11 @@ def get_export_filename(region, subregion, layerset, pgosm_date, input_file): filename : str """ # region is always set internally, even with --input-file and no --region + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + layerset = os.environ.get('PGOSM_LAYERSET') + pgosm_date = os.environ.get('PGOSM_DATE') + region = region.replace('/', '-') if subregion: subregion = subregion.replace('/', '-') @@ -458,6 +450,24 @@ def run_post_processing(flex_path, skip_nested): return True +def dump_database(input_file, out_path, skip_dump, data_only, schema_name): + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + pgosm_date = os.environ.get('PGOSM_DATE') + layerset = os.environ.get('PGOSM_LAYERSET') + + if skip_dump: + logging.getLogger('pgosm-flex').info('Skipping pg_dump') + else: + export_filename = get_export_filename(input_file) + + export_path = get_export_full_path(out_path, export_filename) + + db.run_pg_dump(export_path=export_path, + data_only=data_only, + schema_name=schema_name) + + def check_replication_exists(): """Checks if replication already setup, if so should only run update. """ diff --git a/docker/tests/test_pgosm_flex.py b/docker/tests/test_pgosm_flex.py index a58fb1d..61092a1 100644 --- a/docker/tests/test_pgosm_flex.py +++ b/docker/tests/test_pgosm_flex.py @@ -1,12 +1,28 @@ """ Unit tests to cover the DB module.""" import unittest -import pgosm_flex +import pgosm_flex, helpers REGION_US = 'north-america/us' SUBREGION_DC = 'district-of-columbia' +LAYERSET = 'default' +PGOSM_DATE = '2021-12-02' + class PgOSMFlexTests(unittest.TestCase): + def setUp(self): + helpers.set_env_vars(region=REGION_US, + subregion=SUBREGION_DC, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + + def tearDown(self): + helpers.unset_env_vars() + def test_get_paths_returns_dict(self): base_path = pgosm_flex.BASE_PATH_DEFAULT expected = dict @@ -52,35 +68,30 @@ def test_get_export_filename_slash_to_dash(self): Also tests the filename w/ region & subregion - no need for an additional test covering that behavior. """ - region = 'north-america/us' - subregion = 'not/real' - layerset = 'default' - pgosm_date = '2021-12-02' input_file = None - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) - expected = 'north-america-us-not-real-default-2021-12-02.sql' + result = pgosm_flex.get_export_filename(input_file) + expected = 'north-america-us-district-of-columbia-default-2021-12-02.sql' self.assertEqual(expected, result) def test_get_export_filename_input_file_defined_overrides_region_subregion(self): - region = 'doesnotmatter' # Not setting to None to ensure expected behavior - subregion = 'alsodoesnotmatter' # Not setting to None to ensure expected behavior - layerset = 'default' - pgosm_date = '2021-12-02' input_file = '/my/inputfile.osm.pbf' - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) + result = pgosm_flex.get_export_filename(input_file) expected = '/my/inputfile-default-2021-12-02.sql' self.assertEqual(expected, result) def test_get_export_filename_region_only(self): - # Need 4 tests covering this function - # Check name when region , no subregion - # - region = 'north-america' - subregion = None - layerset = 'default' - pgosm_date = '2021-12-02' + # Override Subregion to None + helpers.unset_env_vars() + helpers.set_env_vars(region='north-america', + subregion=None, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + input_file = None - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) + result = pgosm_flex.get_export_filename(input_file) expected = 'north-america-default-2021-12-02.sql' self.assertEqual(expected, result) diff --git a/docs/DOCKER-RUN.md b/docs/DOCKER-RUN.md index 63aff7b..32cfaae 100644 --- a/docs/DOCKER-RUN.md +++ b/docs/DOCKER-RUN.md @@ -314,3 +314,48 @@ docker exec -it \ --skip-dump ``` + +## Use `--append` for updates + +> Added `--append` as **Experimental** feature in 0.4.6. + + +Using `--append` mode wraps around the `osm2pgsql-replication` package +included with `osm2pgsql`. The first time running an import with `--append` +mode runs osm2pgsql normally, with `--slim` mode and without `--drop`. +After osm2pgsql completes, `osm2pgsql-replication init ...` is ran to setup +the DB for updates. + +Need to increase Postgres' `max_connections`, see +[this discussion on osm2pgsql](https://github.com/openstreetmap/osm2pgsql/discussions/1650). + + +```bash +docker run --name pgosm -d --rm \ + -v ~/pgosm-data:/app/output \ + -v /etc/localtime:/etc/localtime:ro \ + -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ + -p 5433:5432 -d rustprooflabs/pgosm-flex \ + -c max_connections=300 +``` + +Run the `docker exec` step with `--append` and `--skip-dump`. This results in +a larger database as the intermediate osm2pgsql tables must be left +in the database. + +```bash +docker exec -it \ + pgosm python3 docker/pgosm_flex.py \ + --ram=8 \ + --region=north-america/us \ + --subregion=district-of-columbia \ + --pgosm-date 2022-02-22 \ + --append --skip-dump +``` + +Running the above command a second time will detect that the target database +has osm2pgsql replication setup and load data via the defined replication +service. + + + diff --git a/flex-config/helpers.lua b/flex-config/helpers.lua index 10b6eda..3c2622d 100644 --- a/flex-config/helpers.lua +++ b/flex-config/helpers.lua @@ -23,12 +23,12 @@ else end -local pgosm_region_env = os.getenv("PGOSM_REGION") +local pgosm_region_env = os.getenv("PGOSM_REGION_COMBINED") if pgosm_region_env then - pgosm_region = pgosm_region_env - print('INFO - Region: ' .. pgosm_region) + pgosm_region_combined = pgosm_region_env + print('INFO - Region: ' .. pgosm_region_combined) else - pgosm_region = 'Not Specified' + pgosm_region_combined = 'Not Specified' print('INFO - Set PGOSM_REGION env var to customize region. ') end diff --git a/flex-config/style/pgosm-meta.lua b/flex-config/style/pgosm-meta.lua index e3dd7e7..30710c1 100644 --- a/flex-config/style/pgosm-meta.lua +++ b/flex-config/style/pgosm-meta.lua @@ -90,7 +90,7 @@ local sql_insert = [[ INSERT INTO osm.pgosm_flex (osm_date, default_date, region [[ VALUES (']] .. con:escape(pgosm_date) .. [[', ]] .. default_date_str .. [[ , ']] .. -- special handling for boolean - con:escape(pgosm_region) .. [[', ']] .. + con:escape(pgosm_region_combined) .. [[', ']] .. con:escape(pgosm_flex_version) .. [[', ']] .. con:escape(srid) .. [[', ']] .. con:escape(project_url) .. [[', ']] .. From fd81ba70b8a56e0ae2907d313bd74a272acfb823 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 6 Mar 2022 18:25:38 -0700 Subject: [PATCH 14/16] Switching more to env vars --- docker/geofabrik.py | 23 +++++++++--------- docker/pgosm_flex.py | 40 +++++++++++++++---------------- docker/tests/test_geofabrik.py | 43 ++++++++++++++++++++++++---------- 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/docker/geofabrik.py b/docker/geofabrik.py index 70f20c1..d837659 100644 --- a/docker/geofabrik.py +++ b/docker/geofabrik.py @@ -8,18 +8,16 @@ import helpers -def get_region_filename(region, subregion): +def get_region_filename(): """Returns the filename needed to download/manage PBF files. - Parameters - ---------------------- - region : str - subregion : str - Returns ---------------------- filename : str """ + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + base_name = '{}-latest.osm.pbf' if subregion is None: filename = base_name.format(region) @@ -29,7 +27,7 @@ def get_region_filename(region, subregion): return filename -def prepare_data(region, subregion, pgosm_date, out_path): +def prepare_data(out_path): """Ensures the PBF file is available. Checks if it already exists locally, download if needed, @@ -37,9 +35,6 @@ def prepare_data(region, subregion, pgosm_date, out_path): Parameters ---------------------- - region : str - subregion : str - pgosm_date : str out_path : str Returns @@ -47,7 +42,11 @@ def prepare_data(region, subregion, pgosm_date, out_path): pbf_file : str Full path to PBF file """ - pbf_filename = get_region_filename(region, subregion) + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + pgosm_date = os.environ.get('PGOSM_DATE') + + pbf_filename = get_region_filename() pbf_file = os.path.join(out_path, pbf_filename) pbf_file_with_date = pbf_file.replace('latest', pgosm_date) @@ -227,7 +226,7 @@ def remove_latest_files(region, subregion, out_path): subregion : str out_path : str """ - pbf_filename = get_region_filename(region, subregion) + pbf_filename = get_region_filename() pbf_file = os.path.join(out_path, pbf_filename) md5_file = f'{pbf_file}.md5' diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index def36e3..5c0f0fa 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -113,16 +113,11 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, flex_path=paths['flex_path']) else: logger.info('Running normal osm2pgsql mode') - success = run_osm2pgsql_standard(region=region, - subregion=subregion, - input_file=input_file, - pgosm_date=pgosm_date, + success = run_osm2pgsql_standard(input_file=input_file, out_path=paths['out_path'], flex_path=paths['flex_path'], ram=ram, skip_nested=skip_nested, - layerset_path=layerset_path, - layerset=layerset, append=append) @@ -139,17 +134,22 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, else: logger.warning('PgOSM Flex completed with errors. Details in output') -def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, - flex_path, ram, skip_nested, layerset_path, - layerset, append): +def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, + append): + """Runs standard osm2pgsql command and optionally inits for append mode. + """ logger = logging.getLogger('pgosm-flex') + + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + layerset = os.environ.get('PGOSM_LAYERSET') + layerset_path = os.environ.get('PGOSM_LAYERSET_PATH') + pgosm_date = os.environ.get('PGOSM_DATE') + if input_file is None: - geofabrik.prepare_data(region=region, - subregion=subregion, - pgosm_date=pgosm_date, - out_path=out_path) + geofabrik.prepare_data(out_path=out_path) - pbf_filename = geofabrik.get_region_filename(region, subregion) + pbf_filename = geofabrik.get_region_filename() osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=pbf_filename, out_path=out_path, @@ -164,7 +164,7 @@ def run_osm2pgsql_standard(region, subregion, input_file, pgosm_date, out_path, flex_path=flex_path) if not skip_nested: - skip_nested = check_layerset_places(layerset_path, layerset, flex_path) + skip_nested = check_layerset_places(flex_path) post_processing = run_post_processing(flex_path=flex_path, skip_nested=skip_nested) @@ -319,12 +319,11 @@ def get_export_filename(input_file): filename : str """ # region is always set internally, even with --input-file and no --region - region = os.environ.get('PGOSM_REGION') + region = os.environ.get('PGOSM_REGION').replace('/', '-') subregion = os.environ.get('PGOSM_SUBREGION') layerset = os.environ.get('PGOSM_LAYERSET') pgosm_date = os.environ.get('PGOSM_DATE') - region = region.replace('/', '-') if subregion: subregion = subregion.replace('/', '-') @@ -389,13 +388,11 @@ def run_osm2pgsql(osm2pgsql_command, flex_path): logger.info('osm2pgsql completed.') -def check_layerset_places(layerset_path, layerset, flex_path): +def check_layerset_places(flex_path): """If `place` layer is not included `skip_nested` should be true. Parameters ------------------------ - layerset_path : str - layerset : str flex_path : str Returns @@ -404,6 +401,9 @@ def check_layerset_places(layerset_path, layerset, flex_path): """ logger = logging.getLogger('pgosm-flex') + layerset = os.environ.get('PGOSM_LAYERSET') + layerset_path = os.environ.get('PGOSM_LAYERSET_PATH') + if layerset_path is None: layerset_path = os.path.join(flex_path, 'layerset') logger.info(f'Using default layerset path {layerset_path}') diff --git a/docker/tests/test_geofabrik.py b/docker/tests/test_geofabrik.py index 2f4a9e7..89fc6ef 100644 --- a/docker/tests/test_geofabrik.py +++ b/docker/tests/test_geofabrik.py @@ -1,24 +1,45 @@ """ Unit tests to cover the Geofabrik module.""" import unittest -import geofabrik +import geofabrik, helpers REGION_US = 'north-america/us' SUBREGION_DC = 'district-of-columbia' +LAYERSET = 'default' +PGOSM_DATE = '2021-12-02' class GeofabrikTests(unittest.TestCase): + def setUp(self): + helpers.set_env_vars(region=REGION_US, + subregion=SUBREGION_DC, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + + def tearDown(self): + helpers.unset_env_vars() + def test_get_region_filename_returns_subregion_when_exists(self): - region = REGION_US - subregion = SUBREGION_DC - result = geofabrik.get_region_filename(region, subregion) + result = geofabrik.get_region_filename() expected = f'{SUBREGION_DC}-latest.osm.pbf' self.assertEqual(expected, result) def test_get_region_filename_returns_region_when_subregion_None(self): - region = REGION_US - subregion = None - result = geofabrik.get_region_filename(region, subregion) + # Override Subregion to None + helpers.unset_env_vars() + helpers.set_env_vars(region='north-america/us', + subregion=None, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + result = geofabrik.get_region_filename() expected = f'{REGION_US}-latest.osm.pbf' self.assertEqual(expected, result) @@ -37,10 +58,8 @@ def test_get_pbf_url_returns_proper_with_region_and_subregion(self): self.assertEqual(expected, result) def test_pbf_download_needed_returns_boolean(self): - region = REGION_US - subregion = SUBREGION_DC pgosm_date = geofabrik.helpers.get_today() - region_filename = geofabrik.get_region_filename(region, subregion) + region_filename = geofabrik.get_region_filename() expected = bool result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter', md5_file_with_date='not-a-file', @@ -48,10 +67,8 @@ def test_pbf_download_needed_returns_boolean(self): self.assertEqual(expected, type(result)) def test_pbf_download_needed_returns_true_when_file_not_exists(self): - region = REGION_US - subregion = SUBREGION_DC pgosm_date = geofabrik.helpers.get_today() - region_filename = geofabrik.get_region_filename(region, subregion) + region_filename = geofabrik.get_region_filename() expected = True result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter', md5_file_with_date='not-a-file', From de21ad7954b02612abe8e08682b356dea3ff8fef Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Sun, 6 Mar 2022 18:47:42 -0700 Subject: [PATCH 15/16] Update expected output --- docker/tests/test_osm2pgsql_recommendation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/tests/test_osm2pgsql_recommendation.py b/docker/tests/test_osm2pgsql_recommendation.py index 105475e..c05a4ec 100644 --- a/docker/tests/test_osm2pgsql_recommendation.py +++ b/docker/tests/test_osm2pgsql_recommendation.py @@ -23,7 +23,7 @@ def test_get_recommended_script_returns_str(self): self.assertEqual(expected, actual) def test_get_recommended_script_returns_expected_str(self): - expected = 'osm2pgsql -d postgresql://postgres:mysecretpassword@localhost/pgosm?application_name=pgosm-flex --cache=0 --slim --drop --flat-nodes=/tmp/nodes --output=flex --style=./run.lua this-is-a-test/This-is-a-test.osm.pbf' + expected = 'osm2pgsql -d postgresql://postgres:mysecretpassword@localhost/pgosm?application_name=pgosm-flex --cache=0 --slim --drop --flat-nodes=/tmp/nodes --output=flex --style=./run.lua This-is-a-test.osm.pbf' system_ram_gb = 2 osm_pbf_gb = 10 append = False From 7271bff463662485c9521e6c43ccff151586f472 Mon Sep 17 00:00:00 2001 From: Ryan Lambert Date: Fri, 11 Mar 2022 06:58:06 -0700 Subject: [PATCH 16/16] Cleanup from dev, doc blocks, pylint improvements --- docker/db.py | 27 +++++++++++++++++--- docker/geofabrik.py | 5 +--- docker/helpers.py | 2 +- docker/pgosm_flex.py | 59 +++++++++++++++++++++++++++++++------------- docs/DOCKER-RUN.md | 2 +- 5 files changed, 68 insertions(+), 27 deletions(-) diff --git a/docker/db.py b/docker/db.py index 62bb78e..1a6f11f 100644 --- a/docker/db.py +++ b/docker/db.py @@ -237,6 +237,10 @@ def drop_pgosm_db(): """Drops the pgosm database if it exists. Intentionally hard coded to `pgosm` database for in-Docker use only. + + Returns + ------------------------ + status : bool """ if not pg_conn_parts()['pg_host'] == 'localhost': LOGGER.error('Attempted to drop database external from Docker. Not doing that') @@ -250,12 +254,17 @@ def drop_pgosm_db(): conn.execute(sql_raw) conn.close() LOGGER.info('Removed pgosm database') + return True def create_pgosm_db(): """Creates the pgosm database and prepares with PostGIS and osm schema Intentionally hard coded to `pgosm` database for in-Docker use only. + + Returns + ----------------------- + status : bool """ if not pg_conn_parts()['pg_host'] == 'localhost': LOGGER.error('Attempted to create database external from Docker. Not doing that') @@ -284,6 +293,8 @@ def create_pgosm_db(): cur.execute(sql_create_schema) LOGGER.debug('Created osm schema') + return True + def run_sqitch_prep(db_path): """Runs Sqitch to create DB structure and populate helper data. @@ -476,6 +487,8 @@ def pgosm_nested_admin_polygons(flex_path): def osm2pgsql_replication_start(): + """Runs pre-replication step to clean out FKs that would prevent updates. + """ LOGGER.error('Not running cleanup step in SQL yet!') sql_raw = 'CALL osm.append_data_start ();' @@ -484,9 +497,13 @@ def osm2pgsql_replication_start(): cur.execute(sql_raw) - - def osm2pgsql_replication_finish(skip_nested): + """Runs post-replication step to put FKs back and refresh materialied views. + + Parameters + --------------------- + skip_nested : bool + """ # Fails via psycopg, using psql if skip_nested: LOGGER.info('Finishing Replication, skipping nested polygons') @@ -511,8 +528,6 @@ def osm2pgsql_replication_finish(skip_nested): sys.exit(f'{err_msg} - Check the log output for details.') - - def rename_schema(schema_name): """Renames default schema name "osm" to `schema_name` @@ -568,6 +583,10 @@ def fix_pg_dump_create_public(export_path): """Using pg_dump with `--schema=public` results in a .sql script containing `CREATE SCHEMA public;`, nearly always breaks in target DB. Replaces with `CREATE SCHEMA IF NOT EXISTS public;` + + Parameters + ---------------------- + export_path : str """ result = sh.sed('-i', 's/CREATE SCHEMA public;/CREATE SCHEMA IF NOT EXISTS public;/', diff --git a/docker/geofabrik.py b/docker/geofabrik.py index d837659..57efa81 100644 --- a/docker/geofabrik.py +++ b/docker/geofabrik.py @@ -215,15 +215,13 @@ def unarchive_data(pbf_file, md5_file, pbf_file_with_date, md5_file_with_date): shutil.copy2(md5_file_with_date, md5_file) -def remove_latest_files(region, subregion, out_path): +def remove_latest_files(out_path): """Removes the PBF and MD5 file with -latest in the name. Files are archived via prepare_data() before processing starts Parameters ------------------------- - region : str - subregion : str out_path : str """ pbf_filename = get_region_filename() @@ -234,4 +232,3 @@ def remove_latest_files(region, subregion, out_path): os.remove(pbf_file) logging.info(f'Done with {md5_file}, removing.') os.remove(md5_file) - diff --git a/docker/helpers.py b/docker/helpers.py index 6be88e7..58ebac8 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -47,7 +47,7 @@ def verify_checksum(md5_file, path): logger.error(err_msg) sys.exit(err_msg) - logger.info(f'md5sum validated') + logger.info('md5sum validated') def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 5c0f0fa..ae13db3 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -17,7 +17,9 @@ import click import osm2pgsql_recommendation as rec -import db, geofabrik, helpers +import db +import geofabrik +import helpers BASE_PATH_DEFAULT = '/app' @@ -35,7 +37,10 @@ @click.option('--subregion', required=False, help='Sub-region name matching the filename for data sourced from Geofabrik. e.g. district-of-columbia') # Remainder of options in alphabetical order -@click.option('--append', default=False, is_flag=True, help='EXPERIMENTAL - Enable Append mode to enable updates via osm2pgsql-replication.') +@click.option('--append', + default=False, + is_flag=True, + help='EXPERIMENTAL - Append mode enables updates via osm2pgsql-replication.') @click.option('--basepath', required=False, default=BASE_PATH_DEFAULT, @@ -126,7 +131,8 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, dump_database(input_file=input_file, out_path=paths['out_path'], - skip_dump=skip_dump, data_only=data_only, + skip_dump=skip_dump, + data_only=data_only, schema_name=schema_name) if success: @@ -134,18 +140,27 @@ def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, else: logger.warning('PgOSM Flex completed with errors. Details in output') + def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, append): """Runs standard osm2pgsql command and optionally inits for append mode. + + Parameters + --------------------------- + input_file : str + out_path : str + flex_path : str + ram : float + skip_nested : boolean + append : boolean + + Returns + --------------------------- + post_processing : boolean + Indicates overall success/failure of the steps within this function. """ logger = logging.getLogger('pgosm-flex') - region = os.environ.get('PGOSM_REGION') - subregion = os.environ.get('PGOSM_SUBREGION') - layerset = os.environ.get('PGOSM_LAYERSET') - layerset_path = os.environ.get('PGOSM_LAYERSET_PATH') - pgosm_date = os.environ.get('PGOSM_DATE') - if input_file is None: geofabrik.prepare_data(out_path=out_path) @@ -176,7 +191,7 @@ def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, logger.debug('Not using append mode') if input_file is None: - geofabrik.remove_latest_files(region, subregion, out_path) + geofabrik.remove_latest_files(out_path) return post_processing @@ -253,7 +268,6 @@ def validate_region_inputs(region, subregion, input_file): raise ValueError(err_msg) - def setup_logger(debug): """Prepares logging. @@ -280,7 +294,6 @@ def setup_logger(debug): logger.debug('Logger configured') - def get_paths(base_path): """Returns dictionary of various paths used. @@ -433,8 +446,11 @@ def run_post_processing(flex_path, skip_nested): Parameters ---------------------- flex_path : str - skip_nested : bool + + Returns + ---------------------- + status : bool """ post_processing_sql = db.pgosm_after_import(flex_path) logger = logging.getLogger('pgosm-flex') @@ -451,11 +467,16 @@ def run_post_processing(flex_path, skip_nested): def dump_database(input_file, out_path, skip_dump, data_only, schema_name): - region = os.environ.get('PGOSM_REGION') - subregion = os.environ.get('PGOSM_SUBREGION') - pgosm_date = os.environ.get('PGOSM_DATE') - layerset = os.environ.get('PGOSM_LAYERSET') + """Runs pg_dump when necessary to export the processed OpenStreetMap data. + Parameters + ----------------------- + input_file : str + out_path : str + skip_dump : bool + data_only : bool + schema_name : str + """ if skip_dump: logging.getLogger('pgosm-flex').info('Skipping pg_dump') else: @@ -470,6 +491,10 @@ def dump_database(input_file, out_path, skip_dump, data_only, schema_name): def check_replication_exists(): """Checks if replication already setup, if so should only run update. + + Returns + ------------------- + status : bool """ logger = logging.getLogger('pgosm-flex') check_cmd = "osm2pgsql-replication status -d $PGOSM_CONN " diff --git a/docs/DOCKER-RUN.md b/docs/DOCKER-RUN.md index 32cfaae..83bc427 100644 --- a/docs/DOCKER-RUN.md +++ b/docs/DOCKER-RUN.md @@ -354,7 +354,7 @@ docker exec -it \ ``` Running the above command a second time will detect that the target database -has osm2pgsql replication setup and load data via the defined replication +has `osm2pgsql-replication` setup and load data via the defined replication service.