From d77d6c9ca60d5ec55c4af77870555373f4aed828 Mon Sep 17 00:00:00 2001 From: Seb Pretzer <24555985+sebpretzer@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:36:36 -0400 Subject: [PATCH 1/2] adding test for glue endpoint override --- pyiceberg/catalog/glue.py | 6 +++++- tests/catalog/test_glue.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index adec150d84..6d87a68b62 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -105,6 +105,10 @@ GLUE_SKIP_ARCHIVE = "glue.skip-archive" GLUE_SKIP_ARCHIVE_DEFAULT = True +# Configure an alternative endpoint of the Glue service for GlueCatalog to access. +# This could be used to use GlueCatalog with any glue-compatible metastore service that has a different endpoint +GLUE_CATALOG_ENDPOINT = "glue.endpoint" + ICEBERG_FIELD_ID = "iceberg.field.id" ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional" ICEBERG_FIELD_CURRENT = "iceberg.field.current" @@ -285,7 +289,7 @@ def __init__(self, name: str, **properties: Any): aws_secret_access_key=properties.get("aws_secret_access_key"), aws_session_token=properties.get("aws_session_token"), ) - self.glue: GlueClient = session.client("glue") + self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT)) if glue_catalog_id := properties.get(GLUE_ID): _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 6d44d92724..10815b63ff 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -692,3 +692,19 @@ def test_commit_table_properties( updated_table_metadata = table.metadata assert test_catalog._parse_metadata_version(table.metadata_location) == 1 assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"} + + +def test_glue_endpoint_override(moto_endpoint_url: str, database_name: str) -> None: + catalog_name = "glue" + test_catalog = GlueCatalog( + catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": moto_endpoint_url} + ) + assert test_catalog.glue.meta.endpoint_url == moto_endpoint_url + + test_catalog.create_namespace(namespace=database_name) + assert (database_name,) in test_catalog.list_namespaces() + + with mock_aws(): + other_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) + assert other_catalog.glue.meta.endpoint_url != moto_endpoint_url + assert (database_name,) not in other_catalog.list_namespaces() From 19e970c3a1fe9d8392f8bfd90cdcf39f70711b3f Mon Sep 17 00:00:00 2001 From: Seb Pretzer <24555985+sebpretzer@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:05:32 -0400 Subject: [PATCH 2/2] removing unnecessary code --- tests/catalog/test_glue.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 10815b63ff..1c12066945 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -700,11 +700,3 @@ def test_glue_endpoint_override(moto_endpoint_url: str, database_name: str) -> N catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": moto_endpoint_url} ) assert test_catalog.glue.meta.endpoint_url == moto_endpoint_url - - test_catalog.create_namespace(namespace=database_name) - assert (database_name,) in test_catalog.list_namespaces() - - with mock_aws(): - other_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) - assert other_catalog.glue.meta.endpoint_url != moto_endpoint_url - assert (database_name,) not in other_catalog.list_namespaces()