From 0ffbf9279ec63079f883b7fad15f4e94179044b2 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Fri, 7 Feb 2025 17:53:20 -0800 Subject: [PATCH 1/5] Add policies for metadata compaction, orphan file removal and snapshot retention --- .../metadata-compaction/2025-02-03.json | 37 +++++++++++++++ .../orphan-file-removal/2025-02-03.json | 47 +++++++++++++++++++ .../system/snapshot-retention/2025-02-03.json | 38 +++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json create mode 100644 polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json create mode 100644 polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json diff --git a/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json new file mode 100644 index 0000000000..2d910e3fa2 --- /dev/null +++ b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json @@ -0,0 +1,37 @@ +{ + "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", + "$id": "https://polaris.apache.org/schemas/policies/system/metadata-compaction/2025-02-03.json", + "title": "Metadata Compaction Policy", + "description": "Inheritable Polaris policy schema for Iceberg table metadata compaction.", + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "2025-02-03", + "description": "Schema version." + }, + "enable": { + "type": "boolean", + "description": "Enable or disable metadata compaction." + }, + "config": { + "type": "object", + "description": "A map containing custom configuration properties. Please note that interoperability is not guaranteed.", + "additionalProperties": { + "type": ["string", "number", "boolean"] + } + } + }, + "required": ["enable"], + "additionalProperties": false, + "examples": [ + { + "version": "2025-02-03", + "enable": true, + "config": { + "spec_id": 1, + "my_key": "my_value" + } + } + ] +} diff --git a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json new file mode 100644 index 0000000000..3337884a35 --- /dev/null +++ b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json @@ -0,0 +1,47 @@ +{ + "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", + "$id": "https://polaris.apache.org/schemas/policies/system/orphan-file-removal/2025-02-03.json", + "title": "Orphan File Removal Policy", + "description": "Inheritable Polaris policy schema for Iceberg table orphan file removal.", + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "2025-02-03", + "description": "Schema version." + }, + "enable": { + "type": "boolean", + "description": "Enable or disable orphan file removal." + }, + "older_than": { + "type": "number", + "description": "A Unix timestamp, indicate to remove orphan files created before this timestamp." + }, + "location": { + "type": "string", + "description": "Directory to look for files in (defaults to the table's location)." + }, + "config": { + "type": "object", + "description": "A map containing custom configuration properties. Please note that interoperability is not guaranteed.", + "additionalProperties": { + "type": ["string", "number", "boolean"] + } + } + }, + "required": ["enable"], + "additionalProperties": false, + "examples": [ + { + "version": "2025-02-03", + "enable": true, + "older_than": 1707315296, + "location:": "s3://my-bucket/my-table-location", + "config": { + "prefix_mismatch_mode": "ignore", + "my_key": "my_value" + } + } + ] +} diff --git a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json new file mode 100644 index 0000000000..3617336166 --- /dev/null +++ b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json @@ -0,0 +1,38 @@ +{ + "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", + "$id": "https://polaris.apache.org/schemas/policies/system/snapshot-retention/2025-02-03.json", + "title": "Snapshot Retention Policy", + "description": "Inheritable Polaris policy schema for Iceberg table snapshot retention.", + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "2025-02-03", + "description": "Schema version." + }, + "enable": { + "type": "boolean", + "description": "Enable or disable snapshot retention." + }, + "config": { + "type": "object", + "description": "A map containing custom configuration properties. Please note that interoperability is not guaranteed.", + "additionalProperties": { + "type": ["string", "number", "boolean"] + } + } + }, + "required": ["enable"], + "additionalProperties": false, + "examples": [ + { + "version": "2025-02-03", + "enable": true, + "config": { + "min_snapshot_to_keep": 1, + "max_snapshot_age_days": 2, + "my_key": "my_value" + } + } + ] +} From 561cfb7334f484392a8324d925c96fa734ef75bd Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Sat, 8 Feb 2025 19:33:25 -0800 Subject: [PATCH 2/5] Fix typo --- .../policies/system/orphan-file-removal/2025-02-03.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json index 3337884a35..67484fa141 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json @@ -16,11 +16,11 @@ }, "older_than": { "type": "number", - "description": "A Unix timestamp, indicate to remove orphan files created before this timestamp." + "description": "A Unix timestamp. Remove orphan files created before this timestamp." }, "location": { "type": "string", - "description": "Directory to look for files in (defaults to the table's location)." + "description": "Customized directory other than table location to look for files in." }, "config": { "type": "object", @@ -37,7 +37,7 @@ "version": "2025-02-03", "enable": true, "older_than": 1707315296, - "location:": "s3://my-bucket/my-table-location", + "location": "s3://my-bucket/my-table-location", "config": { "prefix_mismatch_mode": "ignore", "my_key": "my_value" From 931094e780f176a3ea839e2a8ae2df6ad644ab47 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Mon, 10 Feb 2025 17:07:59 -0800 Subject: [PATCH 3/5] Resolve comments --- .../policies/system/data-compaction/2025-02-03.json | 2 +- .../system/metadata-compaction/2025-02-03.json | 2 +- .../system/orphan-file-removal/2025-02-03.json | 12 ++++++------ .../system/snapshot-retention/2025-02-03.json | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json index b452eb808f..18469cae7b 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json @@ -32,7 +32,7 @@ "target_file_size_bytes": 134217728, "compaction_strategy": "bin-pack", "max-concurrent-file-group-rewrites": 5, - "my-key": "my-value" + "key1": "value1" } } ] diff --git a/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json index 2d910e3fa2..03f74fe25a 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json @@ -30,7 +30,7 @@ "enable": true, "config": { "spec_id": 1, - "my_key": "my_value" + "key1": "value1" } } ] diff --git a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json index 67484fa141..7951e59627 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json @@ -14,17 +14,17 @@ "type": "boolean", "description": "Enable or disable orphan file removal." }, - "older_than": { + "max_orphan_file_age_in_days": { "type": "number", - "description": "A Unix timestamp. Remove orphan files created before this timestamp." + "description": "Specifies the maximum age (in days) for orphaned files before they are eligible for removal." }, "location": { "type": "string", - "description": "Customized directory other than table location to look for files in." + "description": "Specifies a custom directory to search for files instead of the default table location. Use with caution—if set to a broad location (e.g., s3://my-bucket instead of s3://my-bucket/my-table-location), all unreferenced files in that path may be permanently deleted, including files from other tables. Following best practices, tables should be stored in separate locations to avoid accidental data loss." }, "config": { "type": "object", - "description": "A map containing custom configuration properties. Please note that interoperability is not guaranteed.", + "description": "A map containing custom configuration properties. Note that interoperability is not guaranteed.", "additionalProperties": { "type": ["string", "number", "boolean"] } @@ -36,11 +36,11 @@ { "version": "2025-02-03", "enable": true, - "older_than": 1707315296, + "max_orphan_file_age_in_days": 30, "location": "s3://my-bucket/my-table-location", "config": { "prefix_mismatch_mode": "ignore", - "my_key": "my_value" + "key1": "value1" } } ] diff --git a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json index 3617336166..780b00b01e 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json @@ -31,7 +31,7 @@ "config": { "min_snapshot_to_keep": 1, "max_snapshot_age_days": 2, - "my_key": "my_value" + "key1": "value1" } } ] From 02f77cb716550a162d3aa00cff3be9cf2b3bb96c Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Thu, 13 Feb 2025 15:38:59 -0800 Subject: [PATCH 4/5] Resolve comments --- .../schemas/policies/system/snapshot-retention/2025-02-03.json | 1 + 1 file changed, 1 insertion(+) diff --git a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json index 780b00b01e..39cd692fd5 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json @@ -31,6 +31,7 @@ "config": { "min_snapshot_to_keep": 1, "max_snapshot_age_days": 2, + "max_ref_age_days": 3, "key1": "value1" } } From efc476739ae200ac9e0dd46a1d68e04b449610d2 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Fri, 14 Feb 2025 11:45:18 -0800 Subject: [PATCH 5/5] Resolve comments --- .../policies/system/data-compaction/2025-02-03.json | 4 ++-- .../system/metadata-compaction/2025-02-03.json | 4 ++-- .../system/orphan-file-removal/2025-02-03.json | 13 ++++++++----- .../system/snapshot-retention/2025-02-03.json | 4 ++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json index 18469cae7b..f222d7c19e 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json @@ -2,13 +2,13 @@ "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", "$id": "https://polaris.apache.org/schemas/policies/system/data-compaction/2025-02-03.json", "title": "Data Compaction Policy", - "description": "Inheritable Polaris policy schema for Iceberg table data compaction.", + "description": "Inheritable Polaris policy schema for Iceberg table data compaction", "type": "object", "properties": { "version": { "type": "string", "const": "2025-02-03", - "description": "Schema version." + "description": "Schema version" }, "enable": { "type": "boolean", diff --git a/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json index 03f74fe25a..d99dc396cd 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/metadata-compaction/2025-02-03.json @@ -2,13 +2,13 @@ "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", "$id": "https://polaris.apache.org/schemas/policies/system/metadata-compaction/2025-02-03.json", "title": "Metadata Compaction Policy", - "description": "Inheritable Polaris policy schema for Iceberg table metadata compaction.", + "description": "Inheritable Polaris policy schema for Iceberg table metadata compaction", "type": "object", "properties": { "version": { "type": "string", "const": "2025-02-03", - "description": "Schema version." + "description": "Schema version" }, "enable": { "type": "boolean", diff --git a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json index 7951e59627..19f35deca3 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/orphan-file-removal/2025-02-03.json @@ -2,13 +2,13 @@ "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", "$id": "https://polaris.apache.org/schemas/policies/system/orphan-file-removal/2025-02-03.json", "title": "Orphan File Removal Policy", - "description": "Inheritable Polaris policy schema for Iceberg table orphan file removal.", + "description": "Inheritable Polaris policy schema for Iceberg table orphan file removal", "type": "object", "properties": { "version": { "type": "string", "const": "2025-02-03", - "description": "Schema version." + "description": "Schema version" }, "enable": { "type": "boolean", @@ -18,9 +18,12 @@ "type": "number", "description": "Specifies the maximum age (in days) for orphaned files before they are eligible for removal." }, - "location": { - "type": "string", - "description": "Specifies a custom directory to search for files instead of the default table location. Use with caution—if set to a broad location (e.g., s3://my-bucket instead of s3://my-bucket/my-table-location), all unreferenced files in that path may be permanently deleted, including files from other tables. Following best practices, tables should be stored in separate locations to avoid accidental data loss." + "locations": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Specifies a list of custom directories to search for files instead of the default table location. Use with caution—if set to a broad location (e.g., s3://my-bucket instead of s3://my-bucket/my-table-location), all unreferenced files in that path may be permanently deleted, including files from other tables. Following best practices, tables should be stored in separate locations to avoid accidental data loss." }, "config": { "type": "object", diff --git a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json index 39cd692fd5..dfd79e1979 100644 --- a/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/snapshot-retention/2025-02-03.json @@ -2,13 +2,13 @@ "license": "Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)", "$id": "https://polaris.apache.org/schemas/policies/system/snapshot-retention/2025-02-03.json", "title": "Snapshot Retention Policy", - "description": "Inheritable Polaris policy schema for Iceberg table snapshot retention.", + "description": "Inheritable Polaris policy schema for Iceberg table snapshot retention", "type": "object", "properties": { "version": { "type": "string", "const": "2025-02-03", - "description": "Schema version." + "description": "Schema version" }, "enable": { "type": "boolean",