diff --git a/Dockerfile b/Dockerfile index 580c289..7b7fa83 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM postgis/postgis:14-3.1 +FROM postgis/postgis:14-3.2 LABEL maintainer="PgOSM-Flex - https://github.com/rustprooflabs/pgosm-flex" -ARG OSM2PGSQL_BRANCH=1.6.0 +ARG OSM2PGSQL_BRANCH=master RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -11,7 +11,7 @@ RUN apt-get update \ libboost-dev libboost-system-dev \ libboost-filesystem-dev libexpat1-dev zlib1g-dev \ libbz2-dev libpq-dev libproj-dev lua5.2 liblua5.2-dev \ - python3 python3-distutils python3-psycopg2 \ + python3 python3-distutils \ postgresql-server-dev-14 \ curl luarocks \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/db.py b/docker/db.py index bba3310..1a6f11f 100644 --- a/docker/db.py +++ b/docker/db.py @@ -173,18 +173,24 @@ def pg_isready(): return True -def prepare_pgosm_db(data_only, db_path): +def prepare_pgosm_db(data_only, db_path, append): """Runs through series of steps to prepare database for PgOSM. Parameters -------------------------- data_only : bool db_path : str + append : bool """ if pg_conn_parts()['pg_host'] == 'localhost': LOGGER.debug('Running standard database prep for in-Docker operation. Includes DROP/CREATE DATABASE') - drop_pgosm_db() + if append: + LOGGER.debug('Skipping DB drop b/c of append mode') + else: + LOGGER.debug('Dropping database') + drop_pgosm_db() + create_pgosm_db() else: LOGGER.info('Using external database. Ensure the target database is setup properly for PgOSM Flex with PostGIS, osm schema, and proper permissions.') @@ -231,6 +237,10 @@ def drop_pgosm_db(): """Drops the pgosm database if it exists. Intentionally hard coded to `pgosm` database for in-Docker use only. + + Returns + ------------------------ + status : bool """ if not pg_conn_parts()['pg_host'] == 'localhost': LOGGER.error('Attempted to drop database external from Docker. Not doing that') @@ -244,12 +254,17 @@ def drop_pgosm_db(): conn.execute(sql_raw) conn.close() LOGGER.info('Removed pgosm database') + return True def create_pgosm_db(): """Creates the pgosm database and prepares with PostGIS and osm schema Intentionally hard coded to `pgosm` database for in-Docker use only. + + Returns + ----------------------- + status : bool """ if not pg_conn_parts()['pg_host'] == 'localhost': LOGGER.error('Attempted to create database external from Docker. Not doing that') @@ -260,12 +275,16 @@ def create_pgosm_db(): LOGGER.debug('Setting Pg conn to enable autocommit - required for drop/create DB') conn.autocommit = True - conn.execute(sql_raw) - conn.close() - LOGGER.info('Created pgosm database') + try: + conn.execute(sql_raw) + LOGGER.info('Created pgosm database') + except psycopg.errors.DuplicateDatabase: + LOGGER.info('Database already existed.') + finally: + conn.close() - sql_create_postgis = "CREATE EXTENSION postgis;" - sql_create_schema = "CREATE SCHEMA osm;" + sql_create_postgis = "CREATE EXTENSION IF NOT EXISTS postgis;" + sql_create_schema = "CREATE SCHEMA IF NOT EXISTS osm;" with get_db_conn(conn_string=os.environ['PGOSM_CONN']) as conn: cur = conn.cursor() @@ -274,6 +293,8 @@ def create_pgosm_db(): cur.execute(sql_create_schema) LOGGER.debug('Created osm schema') + return True + def run_sqitch_prep(db_path): """Runs Sqitch to create DB structure and populate helper data. @@ -424,10 +445,18 @@ def pgosm_after_import(flex_path): output = subprocess.run(cmds, text=True, - capture_output=True, cwd=flex_path, - check=True) - LOGGER.info(f'Post-processing output: \n {output.stderr}') + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + LOGGER.info(f'Post-processing SQL output: \n {output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to run post-processing SQL. Return code: {output.returncode}' + LOGGER.error(err_msg) + return False + + return True def pgosm_nested_admin_polygons(flex_path): @@ -456,6 +485,49 @@ def pgosm_nested_admin_polygons(flex_path): sys.exit(f'{err_msg} - Check the log output for details.') + +def osm2pgsql_replication_start(): + """Runs pre-replication step to clean out FKs that would prevent updates. + """ + LOGGER.error('Not running cleanup step in SQL yet!') + sql_raw = 'CALL osm.append_data_start ();' + + with get_db_conn(conn_string=connection_string()) as conn: + cur = conn.cursor() + cur.execute(sql_raw) + + +def osm2pgsql_replication_finish(skip_nested): + """Runs post-replication step to put FKs back and refresh materialied views. + + Parameters + --------------------- + skip_nested : bool + """ + # Fails via psycopg, using psql + if skip_nested: + LOGGER.info('Finishing Replication, skipping nested polygons') + sql_raw = 'CALL osm.append_data_finish(skip_nested := True );' + else: + LOGGER.info('Finishing Replication, including nested polygons') + sql_raw = 'CALL osm.append_data_finish(skip_nested := False );' + + conn_string = os.environ['PGOSM_CONN'] + cmds = ['psql', '-d', conn_string, '-c', sql_raw] + LOGGER.info('Finishing Replication') + output = subprocess.run(cmds, + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + LOGGER.info(f'Finishing replication output: \n {output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to finish replication. Return code: {output.returncode}' + LOGGER.error(err_msg) + sys.exit(f'{err_msg} - Check the log output for details.') + + def rename_schema(schema_name): """Renames default schema name "osm" to `schema_name` @@ -511,6 +583,10 @@ def fix_pg_dump_create_public(export_path): """Using pg_dump with `--schema=public` results in a .sql script containing `CREATE SCHEMA public;`, nearly always breaks in target DB. Replaces with `CREATE SCHEMA IF NOT EXISTS public;` + + Parameters + ---------------------- + export_path : str """ result = sh.sed('-i', 's/CREATE SCHEMA public;/CREATE SCHEMA IF NOT EXISTS public;/', diff --git a/docker/geofabrik.py b/docker/geofabrik.py index d5dc8d6..57efa81 100644 --- a/docker/geofabrik.py +++ b/docker/geofabrik.py @@ -8,18 +8,16 @@ import helpers -def get_region_filename(region, subregion): +def get_region_filename(): """Returns the filename needed to download/manage PBF files. - Parameters - ---------------------- - region : str - subregion : str - Returns ---------------------- filename : str """ + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + base_name = '{}-latest.osm.pbf' if subregion is None: filename = base_name.format(region) @@ -29,7 +27,7 @@ def get_region_filename(region, subregion): return filename -def prepare_data(region, subregion, pgosm_date, out_path): +def prepare_data(out_path): """Ensures the PBF file is available. Checks if it already exists locally, download if needed, @@ -37,9 +35,6 @@ def prepare_data(region, subregion, pgosm_date, out_path): Parameters ---------------------- - region : str - subregion : str - pgosm_date : str out_path : str Returns @@ -47,7 +42,11 @@ def prepare_data(region, subregion, pgosm_date, out_path): pbf_file : str Full path to PBF file """ - pbf_filename = get_region_filename(region, subregion) + region = os.environ.get('PGOSM_REGION') + subregion = os.environ.get('PGOSM_SUBREGION') + pgosm_date = os.environ.get('PGOSM_DATE') + + pbf_filename = get_region_filename() pbf_file = os.path.join(out_path, pbf_filename) pbf_file_with_date = pbf_file.replace('latest', pgosm_date) @@ -216,23 +215,20 @@ def unarchive_data(pbf_file, md5_file, pbf_file_with_date, md5_file_with_date): shutil.copy2(md5_file_with_date, md5_file) -def remove_latest_files(region, subregion, paths): +def remove_latest_files(out_path): """Removes the PBF and MD5 file with -latest in the name. Files are archived via prepare_data() before processing starts Parameters ------------------------- - region : str - subregion : str - paths : dict + out_path : str """ - pbf_filename = get_region_filename(region, subregion) + pbf_filename = get_region_filename() - pbf_file = os.path.join(paths['out_path'], pbf_filename) + pbf_file = os.path.join(out_path, pbf_filename) md5_file = f'{pbf_file}.md5' logging.info(f'Done with {pbf_file}, removing.') os.remove(pbf_file) logging.info(f'Done with {md5_file}, removing.') os.remove(md5_file) - diff --git a/docker/helpers.py b/docker/helpers.py index 7ac9cb5..58ebac8 100644 --- a/docker/helpers.py +++ b/docker/helpers.py @@ -47,7 +47,7 @@ def verify_checksum(md5_file, path): logger.error(err_msg) sys.exit(err_msg) - logger.info(f'md5sum validated') + logger.info('md5sum validated') def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, @@ -72,13 +72,17 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset, unset_env_vars() logger.debug('Setting environment variables') + os.environ['PGOSM_REGION'] = region + if subregion is None: pgosm_region = f'{region}' else: + os.environ['PGOSM_SUBREGION'] = subregion pgosm_region = f'{region}-{subregion}' - logger.debug(f'PGOSM_REGION: {pgosm_region}') - os.environ['PGOSM_REGION'] = pgosm_region + # Used by helpers.lua + logger.debug(f'PGOSM_REGION_COMBINED: {pgosm_region}') + os.environ['PGOSM_REGION_COMBINED'] = pgosm_region if srid != DEFAULT_SRID: logger.info(f'SRID set: {srid}') @@ -106,6 +110,8 @@ def unset_env_vars(): Does not pop POSTGRES_DB on purpose to allow non-Docker operation. """ os.environ.pop('PGOSM_REGION', None) + os.environ.pop('PGOSM_SUBREGION', None) + os.environ.pop('PGOSM_COMBINED', None) os.environ.pop('PGOSM_SRID', None) os.environ.pop('PGOSM_LANGUAGE', None) os.environ.pop('PGOSM_LAYERSET_PATH', None) diff --git a/docker/osm2pgsql_recommendation.py b/docker/osm2pgsql_recommendation.py index 491401f..c5e6eeb 100644 --- a/docker/osm2pgsql_recommendation.py +++ b/docker/osm2pgsql_recommendation.py @@ -10,7 +10,7 @@ LOGGER = logging.getLogger('pgosm-flex') -def osm2pgsql_recommendation(ram, pbf_filename, out_path): +def osm2pgsql_recommendation(ram, pbf_filename, out_path, append): """Returns recommended osm2pgsql command. Recommendation from API at https://osm2pgsql-tuner.com @@ -24,6 +24,8 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path): out_path : str + append : boolean + Returns ---------------------- osm2pgsql_cmd : str @@ -38,8 +40,6 @@ def osm2pgsql_recommendation(ram, pbf_filename, out_path): osm_pbf_gb = os.path.getsize(pbf_file) / 1024 / 1024 / 1024 LOGGER.debug(f'PBF size (GB): {osm_pbf_gb}') - # PgOSM-Flex currently does not support/test append mode. - append = False osm2pgsql_cmd = get_recommended_script(system_ram_gb, osm_pbf_gb, append, diff --git a/docker/pgosm_flex.py b/docker/pgosm_flex.py index 0625f3a..ae13db3 100644 --- a/docker/pgosm_flex.py +++ b/docker/pgosm_flex.py @@ -17,7 +17,9 @@ import click import osm2pgsql_recommendation as rec -import db, geofabrik, helpers +import db +import geofabrik +import helpers BASE_PATH_DEFAULT = '/app' @@ -35,6 +37,10 @@ @click.option('--subregion', required=False, help='Sub-region name matching the filename for data sourced from Geofabrik. e.g. district-of-columbia') # Remainder of options in alphabetical order +@click.option('--append', + default=False, + is_flag=True, + help='EXPERIMENTAL - Append mode enables updates via osm2pgsql-replication.') @click.option('--basepath', required=False, default=BASE_PATH_DEFAULT, @@ -73,7 +79,7 @@ @click.option('--srid', required=False, default=helpers.DEFAULT_SRID, envvar="PGOSM_SRID", help="SRID for data loaded by osm2pgsql to PostGIS. Defaults to 3857") -def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, +def run_pgosm_flex(ram, region, subregion, append, basepath, data_only, debug, input_file, layerset, layerset_path, language, pgosm_date, schema_name, skip_dump, skip_nested, srid): """Run PgOSM Flex within Docker to automate osm2pgsql flex processing. @@ -85,6 +91,10 @@ def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, validate_region_inputs(region, subregion, input_file) + if schema_name != 'osm' and append: + sys.exit('ERROR: Append mode with custom schema name currently not supported') + + # Ensure always a region name if region is None and input_file: region = input_file @@ -92,56 +102,145 @@ def run_pgosm_flex(ram, region, subregion, basepath, data_only, debug, helpers.set_env_vars(region, subregion, srid, language, pgosm_date, layerset, layerset_path) + db.wait_for_postgres() + db.prepare_pgosm_db(data_only=data_only, + db_path=paths['db_path'], + append=append) + + if append: + replication_update = check_replication_exists() + else: + replication_update = False + + if replication_update: + logger.error('UPDATE mode coming soon!') + success = run_replication_update(skip_nested=skip_nested, + flex_path=paths['flex_path']) + else: + logger.info('Running normal osm2pgsql mode') + success = run_osm2pgsql_standard(input_file=input_file, + out_path=paths['out_path'], + flex_path=paths['flex_path'], + ram=ram, + skip_nested=skip_nested, + append=append) + + + if schema_name != 'osm': + db.rename_schema(schema_name) + + dump_database(input_file=input_file, + out_path=paths['out_path'], + skip_dump=skip_dump, + data_only=data_only, + schema_name=schema_name) + + if success: + logger.info('PgOSM Flex complete!') + else: + logger.warning('PgOSM Flex completed with errors. Details in output') + + +def run_osm2pgsql_standard(input_file, out_path, flex_path, ram, skip_nested, + append): + """Runs standard osm2pgsql command and optionally inits for append mode. + + Parameters + --------------------------- + input_file : str + out_path : str + flex_path : str + ram : float + skip_nested : boolean + append : boolean + + Returns + --------------------------- + post_processing : boolean + Indicates overall success/failure of the steps within this function. + """ + logger = logging.getLogger('pgosm-flex') + if input_file is None: - geofabrik.prepare_data(region=region, - subregion=subregion, - pgosm_date=pgosm_date, - out_path=paths['out_path']) + geofabrik.prepare_data(out_path=out_path) - pbf_filename = geofabrik.get_region_filename(region, subregion) + pbf_filename = geofabrik.get_region_filename() osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=pbf_filename, - out_path=paths['out_path']) + out_path=out_path, + append=append) else: osm2pgsql_command = rec.osm2pgsql_recommendation(ram=ram, pbf_filename=input_file, - out_path=paths['out_path']) - - db.wait_for_postgres() - - db.prepare_pgosm_db(data_only=data_only, db_path=paths['db_path']) + out_path=out_path, + append=append) - flex_path = paths['flex_path'] run_osm2pgsql(osm2pgsql_command=osm2pgsql_command, flex_path=flex_path) if not skip_nested: - # Auto-set skip_nested when place layer not imported - skip_nested = check_layerset_places(layerset_path, layerset, flex_path) + skip_nested = check_layerset_places(flex_path) - run_post_processing(flex_path=flex_path, skip_nested=skip_nested) + post_processing = run_post_processing(flex_path=flex_path, + skip_nested=skip_nested) + + if append: + run_osm2pgsql_replication_init(pbf_path=out_path, + pbf_filename=pbf_filename) + else: + logger.debug('Not using append mode') if input_file is None: - geofabrik.remove_latest_files(region, subregion, paths) + geofabrik.remove_latest_files(out_path) - export_filename = get_export_filename(region, - subregion, - layerset, - pgosm_date, - input_file) + return post_processing - export_path = get_export_full_path(paths['out_path'], export_filename) - if schema_name != 'osm': - db.rename_schema(schema_name) +def run_replication_update(skip_nested, flex_path): + """Runs osm2pgsql-replication between the DB start/finish steps. - if skip_dump: - logger.info('Skipping pg_dump') - else: - db.run_pg_dump(export_path=export_path, - data_only=data_only, - schema_name=schema_name) - logger.info('PgOSM Flex complete!') + Parameters + ----------------------- + skip_nested : bool + flex_path : str + + Returns + --------------------- + bool + Indicates success/failure of replication process. + """ + logger = logging.getLogger('pgosm-flex') + conn_string = db.connection_string() + + db.osm2pgsql_replication_start() + + update_cmd = """ +osm2pgsql-replication update -d $PGOSM_CONN \ + -- \ + --output=flex --style=./run.lua \ + --slim \ + -d $PGOSM_CONN +""" + update_cmd = update_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') + + output = subprocess.run(update_cmd.split(), + text=True, + check=False, + cwd=flex_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failure. Return code: {output.returncode}' + logger.warning(err_msg) + return False + + db.osm2pgsql_replication_finish(skip_nested=skip_nested) + + logger.info('osm2pgsql-replication update complete.') + return True def validate_region_inputs(region, subregion, input_file): @@ -169,7 +268,6 @@ def validate_region_inputs(region, subregion, input_file): raise ValueError(err_msg) - def setup_logger(debug): """Prepares logging. @@ -196,7 +294,6 @@ def setup_logger(debug): logger.debug('Logger configured') - def get_paths(base_path): """Returns dictionary of various paths used. @@ -223,15 +320,11 @@ def get_paths(base_path): -def get_export_filename(region, subregion, layerset, pgosm_date, input_file): +def get_export_filename(input_file): """Returns the .sql filename to use for pg_dump. Parameters ---------------------- - region : str - subregion : str - layerset : str - pgosm_date : str input_file : str Returns @@ -239,7 +332,11 @@ def get_export_filename(region, subregion, layerset, pgosm_date, input_file): filename : str """ # region is always set internally, even with --input-file and no --region - region = region.replace('/', '-') + region = os.environ.get('PGOSM_REGION').replace('/', '-') + subregion = os.environ.get('PGOSM_SUBREGION') + layerset = os.environ.get('PGOSM_LAYERSET') + pgosm_date = os.environ.get('PGOSM_DATE') + if subregion: subregion = subregion.replace('/', '-') @@ -304,13 +401,11 @@ def run_osm2pgsql(osm2pgsql_command, flex_path): logger.info('osm2pgsql completed.') -def check_layerset_places(layerset_path, layerset, flex_path): +def check_layerset_places(flex_path): """If `place` layer is not included `skip_nested` should be true. Parameters ------------------------ - layerset_path : str - layerset : str flex_path : str Returns @@ -319,6 +414,9 @@ def check_layerset_places(layerset_path, layerset, flex_path): """ logger = logging.getLogger('pgosm-flex') + layerset = os.environ.get('PGOSM_LAYERSET') + layerset_path = os.environ.get('PGOSM_LAYERSET_PATH') + if layerset_path is None: layerset_path = os.path.join(flex_path, 'layerset') logger.info(f'Using default layerset path {layerset_path}') @@ -348,10 +446,13 @@ def run_post_processing(flex_path, skip_nested): Parameters ---------------------- flex_path : str - skip_nested : bool + + Returns + ---------------------- + status : bool """ - db.pgosm_after_import(flex_path) + post_processing_sql = db.pgosm_after_import(flex_path) logger = logging.getLogger('pgosm-flex') if skip_nested: logger.info('Skipping calculating nested polygons') @@ -359,6 +460,95 @@ def run_post_processing(flex_path, skip_nested): logger.info('Calculating nested polygons') db.pgosm_nested_admin_polygons(flex_path) + if not post_processing_sql: + return False + + return True + + +def dump_database(input_file, out_path, skip_dump, data_only, schema_name): + """Runs pg_dump when necessary to export the processed OpenStreetMap data. + + Parameters + ----------------------- + input_file : str + out_path : str + skip_dump : bool + data_only : bool + schema_name : str + """ + if skip_dump: + logging.getLogger('pgosm-flex').info('Skipping pg_dump') + else: + export_filename = get_export_filename(input_file) + + export_path = get_export_full_path(out_path, export_filename) + + db.run_pg_dump(export_path=export_path, + data_only=data_only, + schema_name=schema_name) + + +def check_replication_exists(): + """Checks if replication already setup, if so should only run update. + + Returns + ------------------- + status : bool + """ + logger = logging.getLogger('pgosm-flex') + check_cmd = "osm2pgsql-replication status -d $PGOSM_CONN " + logger.debug(f'Command to check DB for replication status:\n{check_cmd}') + conn_string = db.connection_string() + check_cmd = check_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') + output = subprocess.run(check_cmd.split(), + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.debug(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failure. Return code: {output.returncode}' + logger.warning(err_msg) + return False + + logger.debug('osm2pgsql-replication status checked.') + return True + + +def run_osm2pgsql_replication_init(pbf_path, pbf_filename): + """Runs osm2pgsql-replication init to support append mode. + + Parameters + --------------------- + pbf_path : str + pbf_filename : str + """ + logger = logging.getLogger('pgosm-flex') + pbf_path = os.path.join(pbf_path, pbf_filename) + init_cmd = 'osm2pgsql-replication init -d $PGOSM_CONN ' + init_cmd += f'--osm-file {pbf_path}' + logger.debug(f'Initializing DB for replication with command:\n{init_cmd}') + conn_string = db.connection_string() + init_cmd = init_cmd.replace('-d $PGOSM_CONN', f'-d {conn_string}') + output = subprocess.run(init_cmd.split(), + text=True, + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + logger.info(f'osm2pgsql-replication output:\n{output.stdout}') + + if output.returncode != 0: + err_msg = f'Failed to run osm2pgsql-replication. Return code: {output.returncode}' + logger.error(err_msg) + sys.exit(f'{err_msg} - Check the log output for details.') + + logger.debug('osm2pgsql-replication init completed.') + + if __name__ == "__main__": logging.getLogger('pgosm-flex').info('Running PgOSM Flex!') diff --git a/docker/tests/test_geofabrik.py b/docker/tests/test_geofabrik.py index 2f4a9e7..89fc6ef 100644 --- a/docker/tests/test_geofabrik.py +++ b/docker/tests/test_geofabrik.py @@ -1,24 +1,45 @@ """ Unit tests to cover the Geofabrik module.""" import unittest -import geofabrik +import geofabrik, helpers REGION_US = 'north-america/us' SUBREGION_DC = 'district-of-columbia' +LAYERSET = 'default' +PGOSM_DATE = '2021-12-02' class GeofabrikTests(unittest.TestCase): + def setUp(self): + helpers.set_env_vars(region=REGION_US, + subregion=SUBREGION_DC, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + + def tearDown(self): + helpers.unset_env_vars() + def test_get_region_filename_returns_subregion_when_exists(self): - region = REGION_US - subregion = SUBREGION_DC - result = geofabrik.get_region_filename(region, subregion) + result = geofabrik.get_region_filename() expected = f'{SUBREGION_DC}-latest.osm.pbf' self.assertEqual(expected, result) def test_get_region_filename_returns_region_when_subregion_None(self): - region = REGION_US - subregion = None - result = geofabrik.get_region_filename(region, subregion) + # Override Subregion to None + helpers.unset_env_vars() + helpers.set_env_vars(region='north-america/us', + subregion=None, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + result = geofabrik.get_region_filename() expected = f'{REGION_US}-latest.osm.pbf' self.assertEqual(expected, result) @@ -37,10 +58,8 @@ def test_get_pbf_url_returns_proper_with_region_and_subregion(self): self.assertEqual(expected, result) def test_pbf_download_needed_returns_boolean(self): - region = REGION_US - subregion = SUBREGION_DC pgosm_date = geofabrik.helpers.get_today() - region_filename = geofabrik.get_region_filename(region, subregion) + region_filename = geofabrik.get_region_filename() expected = bool result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter', md5_file_with_date='not-a-file', @@ -48,10 +67,8 @@ def test_pbf_download_needed_returns_boolean(self): self.assertEqual(expected, type(result)) def test_pbf_download_needed_returns_true_when_file_not_exists(self): - region = REGION_US - subregion = SUBREGION_DC pgosm_date = geofabrik.helpers.get_today() - region_filename = geofabrik.get_region_filename(region, subregion) + region_filename = geofabrik.get_region_filename() expected = True result = geofabrik.pbf_download_needed(pbf_file_with_date='does-not-matter', md5_file_with_date='not-a-file', diff --git a/docker/tests/test_osm2pgsql_recommendation.py b/docker/tests/test_osm2pgsql_recommendation.py index 105475e..c05a4ec 100644 --- a/docker/tests/test_osm2pgsql_recommendation.py +++ b/docker/tests/test_osm2pgsql_recommendation.py @@ -23,7 +23,7 @@ def test_get_recommended_script_returns_str(self): self.assertEqual(expected, actual) def test_get_recommended_script_returns_expected_str(self): - expected = 'osm2pgsql -d postgresql://postgres:mysecretpassword@localhost/pgosm?application_name=pgosm-flex --cache=0 --slim --drop --flat-nodes=/tmp/nodes --output=flex --style=./run.lua this-is-a-test/This-is-a-test.osm.pbf' + expected = 'osm2pgsql -d postgresql://postgres:mysecretpassword@localhost/pgosm?application_name=pgosm-flex --cache=0 --slim --drop --flat-nodes=/tmp/nodes --output=flex --style=./run.lua This-is-a-test.osm.pbf' system_ram_gb = 2 osm_pbf_gb = 10 append = False diff --git a/docker/tests/test_pgosm_flex.py b/docker/tests/test_pgosm_flex.py index a58fb1d..61092a1 100644 --- a/docker/tests/test_pgosm_flex.py +++ b/docker/tests/test_pgosm_flex.py @@ -1,12 +1,28 @@ """ Unit tests to cover the DB module.""" import unittest -import pgosm_flex +import pgosm_flex, helpers REGION_US = 'north-america/us' SUBREGION_DC = 'district-of-columbia' +LAYERSET = 'default' +PGOSM_DATE = '2021-12-02' + class PgOSMFlexTests(unittest.TestCase): + def setUp(self): + helpers.set_env_vars(region=REGION_US, + subregion=SUBREGION_DC, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + + + def tearDown(self): + helpers.unset_env_vars() + def test_get_paths_returns_dict(self): base_path = pgosm_flex.BASE_PATH_DEFAULT expected = dict @@ -52,35 +68,30 @@ def test_get_export_filename_slash_to_dash(self): Also tests the filename w/ region & subregion - no need for an additional test covering that behavior. """ - region = 'north-america/us' - subregion = 'not/real' - layerset = 'default' - pgosm_date = '2021-12-02' input_file = None - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) - expected = 'north-america-us-not-real-default-2021-12-02.sql' + result = pgosm_flex.get_export_filename(input_file) + expected = 'north-america-us-district-of-columbia-default-2021-12-02.sql' self.assertEqual(expected, result) def test_get_export_filename_input_file_defined_overrides_region_subregion(self): - region = 'doesnotmatter' # Not setting to None to ensure expected behavior - subregion = 'alsodoesnotmatter' # Not setting to None to ensure expected behavior - layerset = 'default' - pgosm_date = '2021-12-02' input_file = '/my/inputfile.osm.pbf' - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) + result = pgosm_flex.get_export_filename(input_file) expected = '/my/inputfile-default-2021-12-02.sql' self.assertEqual(expected, result) def test_get_export_filename_region_only(self): - # Need 4 tests covering this function - # Check name when region , no subregion - # - region = 'north-america' - subregion = None - layerset = 'default' - pgosm_date = '2021-12-02' + # Override Subregion to None + helpers.unset_env_vars() + helpers.set_env_vars(region='north-america', + subregion=None, + srid=3857, + language=None, + pgosm_date=PGOSM_DATE, + layerset=LAYERSET, + layerset_path=None) + input_file = None - result = pgosm_flex.get_export_filename(region, subregion, layerset, pgosm_date, input_file) + result = pgosm_flex.get_export_filename(input_file) expected = 'north-america-default-2021-12-02.sql' self.assertEqual(expected, result) diff --git a/docs/DOCKER-RUN.md b/docs/DOCKER-RUN.md index 63aff7b..83bc427 100644 --- a/docs/DOCKER-RUN.md +++ b/docs/DOCKER-RUN.md @@ -314,3 +314,48 @@ docker exec -it \ --skip-dump ``` + +## Use `--append` for updates + +> Added `--append` as **Experimental** feature in 0.4.6. + + +Using `--append` mode wraps around the `osm2pgsql-replication` package +included with `osm2pgsql`. The first time running an import with `--append` +mode runs osm2pgsql normally, with `--slim` mode and without `--drop`. +After osm2pgsql completes, `osm2pgsql-replication init ...` is ran to setup +the DB for updates. + +Need to increase Postgres' `max_connections`, see +[this discussion on osm2pgsql](https://github.com/openstreetmap/osm2pgsql/discussions/1650). + + +```bash +docker run --name pgosm -d --rm \ + -v ~/pgosm-data:/app/output \ + -v /etc/localtime:/etc/localtime:ro \ + -e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \ + -p 5433:5432 -d rustprooflabs/pgosm-flex \ + -c max_connections=300 +``` + +Run the `docker exec` step with `--append` and `--skip-dump`. This results in +a larger database as the intermediate osm2pgsql tables must be left +in the database. + +```bash +docker exec -it \ + pgosm python3 docker/pgosm_flex.py \ + --ram=8 \ + --region=north-america/us \ + --subregion=district-of-columbia \ + --pgosm-date 2022-02-22 \ + --append --skip-dump +``` + +Running the above command a second time will detect that the target database +has `osm2pgsql-replication` setup and load data via the defined replication +service. + + + diff --git a/flex-config/helpers.lua b/flex-config/helpers.lua index 10b6eda..3c2622d 100644 --- a/flex-config/helpers.lua +++ b/flex-config/helpers.lua @@ -23,12 +23,12 @@ else end -local pgosm_region_env = os.getenv("PGOSM_REGION") +local pgosm_region_env = os.getenv("PGOSM_REGION_COMBINED") if pgosm_region_env then - pgosm_region = pgosm_region_env - print('INFO - Region: ' .. pgosm_region) + pgosm_region_combined = pgosm_region_env + print('INFO - Region: ' .. pgosm_region_combined) else - pgosm_region = 'Not Specified' + pgosm_region_combined = 'Not Specified' print('INFO - Set PGOSM_REGION env var to customize region. ') end diff --git a/flex-config/run-sql.lua b/flex-config/run-sql.lua index a4929b3..0a03b84 100644 --- a/flex-config/run-sql.lua +++ b/flex-config/run-sql.lua @@ -27,7 +27,14 @@ local function post_processing(layerset) sql_raw = sql_file:read( '*all' ) sql_file:close() local result = con:execute(sql_raw) - --print(result) -- Returns 0.0 on success? nil on error? + + -- Returns 0 on success, nil on error. + if result == nil then + print(string.format("Error in post-processing layerset: %s", layerset)) + return false + end + + return true end @@ -44,12 +51,17 @@ while row do row = cur:fetch (row, "a") end +local errors = 0 -post_processing('pgosm-meta') +if not post_processing('pgosm-meta') then + errors = errors + 1 +end for ix, layer in ipairs(layers) do if conf['layerset'][layer] then - post_processing(layer) + if not post_processing(layer) then + errors = errors + 1 + end end end @@ -58,3 +70,7 @@ end cur:close() con:close() env:close() + +if errors > 0 then + os.exit(1) +end diff --git a/flex-config/sql/pgosm-meta.sql b/flex-config/sql/pgosm-meta.sql index 4dad979..2ad7ac2 100644 --- a/flex-config/sql/pgosm-meta.sql +++ b/flex-config/sql/pgosm-meta.sql @@ -53,5 +53,5 @@ END $$; COMMENT ON PROCEDURE osm.append_data_start() IS 'Prepares PgOSM Flex database for running osm2pgsql in append mode. Removes records from place_polygon_nested if they existed.'; -COMMENT ON PROCEDURE osm.append_data_finish() IS 'Finalizes PgOSM Flex after osm2pgsql-replication. Refreshes materialized view and (optionally) processes the place_polygon_nested data.'; +COMMENT ON PROCEDURE osm.append_data_finish(BOOLEAN) IS 'Finalizes PgOSM Flex after osm2pgsql-replication. Refreshes materialized view and (optionally) processes the place_polygon_nested data.'; diff --git a/flex-config/style/pgosm-meta.lua b/flex-config/style/pgosm-meta.lua index e3dd7e7..30710c1 100644 --- a/flex-config/style/pgosm-meta.lua +++ b/flex-config/style/pgosm-meta.lua @@ -90,7 +90,7 @@ local sql_insert = [[ INSERT INTO osm.pgosm_flex (osm_date, default_date, region [[ VALUES (']] .. con:escape(pgosm_date) .. [[', ]] .. default_date_str .. [[ , ']] .. -- special handling for boolean - con:escape(pgosm_region) .. [[', ']] .. + con:escape(pgosm_region_combined) .. [[', ']] .. con:escape(pgosm_flex_version) .. [[', ']] .. con:escape(srid) .. [[', ']] .. con:escape(project_url) .. [[', ']] ..