From 90f2c14c44f2a4cee562550eaf8a56c107d54767 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Mon, 3 Feb 2025 23:12:25 -0800 Subject: [PATCH 1/4] Add data compaction policy schema and its validator --- polaris-core/build.gradle.kts | 2 + .../core/policy/BasePolicyValidator.java | 51 ++++++++++++++++ .../apache/polaris/core/policy/Policy.java | 31 ++++++++++ .../polaris/core/policy/PolicyValidator.java | 23 +++++++ .../core/policy/BasePolicyValidatorTest.java | 61 +++++++++++++++++++ .../resources/data-compaction-policy.json | 32 ++++++++++ 6 files changed, 200 insertions(+) create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/policy/Policy.java create mode 100644 polaris-core/src/main/java/org/apache/polaris/core/policy/PolicyValidator.java create mode 100644 polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java create mode 100644 polaris-core/src/test/resources/data-compaction-policy.json diff --git a/polaris-core/build.gradle.kts b/polaris-core/build.gradle.kts index b611509f84..4ae7df3e1a 100644 --- a/polaris-core/build.gradle.kts +++ b/polaris-core/build.gradle.kts @@ -78,6 +78,8 @@ dependencies { implementation("software.amazon.awssdk:iam-policy-builder") implementation("software.amazon.awssdk:s3") + implementation("org.everit.json:org.everit.json.schema:1.5.1") + implementation("org.apache.iceberg:iceberg-azure") implementation(platform(libs.azuresdk.bom)) implementation("com.azure:azure-storage-blob") diff --git a/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java b/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java new file mode 100644 index 0000000000..73acaad973 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.policy; + +import java.io.InputStream; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.json.JSONTokener; + +public class BasePolicyValidator implements PolicyValidator { + @Override + public boolean validate(Policy policy) { + try { + InputStream schemaStream = getResourceAsStream("data-compaction-policy.json"); + JSONObject rawSchema = new JSONObject(new JSONTokener(schemaStream)); + Schema schema = SchemaLoader.load(rawSchema); + + var jsonData = new JSONObject(new JSONTokener(policy.content)); + + schema.validate(jsonData); + return true; + } catch (ValidationException e) { + // The JSON failed validation; handle error information + System.out.println("JSON validation failed: " + e.getMessage()); + // You can also inspect e.getCausingExceptions() for detail on multiple errors + return false; + } + } + + private static InputStream getResourceAsStream(String fileName) { + return Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName); + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/policy/Policy.java b/polaris-core/src/main/java/org/apache/polaris/core/policy/Policy.java new file mode 100644 index 0000000000..e981ad6dcc --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/policy/Policy.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.policy; + +public class Policy { + String content; + + public Policy(String content) { + this.content = content; + } + + public String getContent() { + return content; + } +} diff --git a/polaris-core/src/main/java/org/apache/polaris/core/policy/PolicyValidator.java b/polaris-core/src/main/java/org/apache/polaris/core/policy/PolicyValidator.java new file mode 100644 index 0000000000..fb18eb61e8 --- /dev/null +++ b/polaris-core/src/main/java/org/apache/polaris/core/policy/PolicyValidator.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.policy; + +public interface PolicyValidator { + boolean validate(Policy policy); +} diff --git a/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java b/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java new file mode 100644 index 0000000000..b1bc624976 --- /dev/null +++ b/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.core.policy; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class BasePolicyValidatorTest { + private BasePolicyValidator validator; + + @BeforeEach + public void setUp() { + validator = new BasePolicyValidator(); + } + + @Test + public void testValidateValidPolicy() { + var validJson = "{\"enable\": False}"; + var result = validator.validate(new Policy(validJson)); + assertThat(result).isTrue(); + + validJson = "{\"enable\": true, \"target_file_size_bytes\": 12342}"; + result = validator.validate(new Policy(validJson)); + assertThat(result).isTrue(); + + validJson = "{\"version\":\"2025-02-03\", \"enable\": true, \"target_file_size_bytes\": 12342}"; + result = validator.validate(new Policy(validJson)); + assertThat(result).isTrue(); + } + + @Test + public void testInValidateValidPolicy() { + // missing required key + var inValidJson = "{}"; + var result = validator.validate(new Policy(inValidJson)); + assertThat(result).isFalse(); + + // invalid keys + inValidJson = "{\"enable\": true, \"invalid_key\": 12342}"; + result = validator.validate(new Policy(inValidJson)); + assertThat(result).isFalse(); + } +} diff --git a/polaris-core/src/test/resources/data-compaction-policy.json b/polaris-core/src/test/resources/data-compaction-policy.json new file mode 100644 index 0000000000..ac5ee08963 --- /dev/null +++ b/polaris-core/src/test/resources/data-compaction-policy.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://polaris.apache.org/policy/system.data_compaction/schema/2025-02-03", + "title": "Data Compaction Policy", + "description": "An inheritable Polaris policy schema for Iceberg table data compaction", + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "2025-02-03", + "description": "Schema version, must always be 2025-02-03." + }, + "enable": { + "type": "boolean", + "description": "Mandatory flag to enable or disable the data compaction." + }, + "target_file_size_bytes": { + "type": "number", + "description": "Target file size in bytes." + } + }, + "required": ["enable"], + "additionalProperties": false, + "examples": [ + { + "version": "2025-02-03", + "enable": true, + "target_file_size_bytes": 134217728 + } + ] +} + From a1132fa6be83717ce82fd519ace30d13916131df Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Tue, 4 Feb 2025 16:42:13 -0800 Subject: [PATCH 2/4] Refactor on policy schema --- .../src/test/resources/data-compaction-policy.json | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/polaris-core/src/test/resources/data-compaction-policy.json b/polaris-core/src/test/resources/data-compaction-policy.json index ac5ee08963..8a7e55fa60 100644 --- a/polaris-core/src/test/resources/data-compaction-policy.json +++ b/polaris-core/src/test/resources/data-compaction-policy.json @@ -1,22 +1,21 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://polaris.apache.org/policy/system.data_compaction/schema/2025-02-03", "title": "Data Compaction Policy", - "description": "An inheritable Polaris policy schema for Iceberg table data compaction", + "description": "Inheritable Polaris policy schema for Iceberg table data compaction.", "type": "object", "properties": { "version": { "type": "string", "const": "2025-02-03", - "description": "Schema version, must always be 2025-02-03." + "description": "Schema version." }, "enable": { "type": "boolean", - "description": "Mandatory flag to enable or disable the data compaction." + "description": "Enable or disable data compaction." }, "target_file_size_bytes": { "type": "number", - "description": "Target file size in bytes." + "description": "Target data file size in bytes." } }, "required": ["enable"], @@ -29,4 +28,3 @@ } ] } - From 1b330b736b8e7fe986126d23f2a3c5f44c027719 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Tue, 4 Feb 2025 20:26:45 -0800 Subject: [PATCH 3/4] Add the schema in the right place --- .../system/data-compaction/2025-02-03.json | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 spec/policies/system/data-compaction/2025-02-03.json diff --git a/spec/policies/system/data-compaction/2025-02-03.json b/spec/policies/system/data-compaction/2025-02-03.json new file mode 100644 index 0000000000..85b33437f7 --- /dev/null +++ b/spec/policies/system/data-compaction/2025-02-03.json @@ -0,0 +1,30 @@ +{ + "$id": "https://polaris.apache.org/schemas/policies/system/data-compaction/2025-02-03.json", + "title": "Data Compaction Policy", + "description": "Inheritable Polaris policy schema for Iceberg table data compaction.", + "type": "object", + "properties": { + "version": { + "type": "string", + "const": "2025-02-03", + "description": "Schema version." + }, + "enable": { + "type": "boolean", + "description": "Enable or disable data compaction." + }, + "target_file_size_bytes": { + "type": "number", + "description": "Target data file size in bytes." + } + }, + "required": ["enable"], + "additionalProperties": false, + "examples": [ + { + "version": "2025-02-03", + "enable": true, + "target_file_size_bytes": 134217728 + } + ] +} From 2c40fa5ea6c5c2386ad8c9af43a5fb039a9b0c03 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Tue, 4 Feb 2025 20:54:49 -0800 Subject: [PATCH 4/4] Add the schema in the right place --- .../core/policy/BasePolicyValidator.java | 3 +- .../system/data-compaction/2025-02-03.json | 12 +++++++- .../core/policy/BasePolicyValidatorTest.java | 4 +++ .../resources/data-compaction-policy.json | 30 ------------------- 4 files changed, 17 insertions(+), 32 deletions(-) rename {spec => polaris-core/src/main/resources/schemas}/policies/system/data-compaction/2025-02-03.json (65%) delete mode 100644 polaris-core/src/test/resources/data-compaction-policy.json diff --git a/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java b/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java index 73acaad973..be9d276e4f 100644 --- a/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java +++ b/polaris-core/src/main/java/org/apache/polaris/core/policy/BasePolicyValidator.java @@ -29,7 +29,8 @@ public class BasePolicyValidator implements PolicyValidator { @Override public boolean validate(Policy policy) { try { - InputStream schemaStream = getResourceAsStream("data-compaction-policy.json"); + InputStream schemaStream = + getResourceAsStream("schemas/policies/system/data-compaction/2025-02-03.json"); JSONObject rawSchema = new JSONObject(new JSONTokener(schemaStream)); Schema schema = SchemaLoader.load(rawSchema); diff --git a/spec/policies/system/data-compaction/2025-02-03.json b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json similarity index 65% rename from spec/policies/system/data-compaction/2025-02-03.json rename to polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json index 85b33437f7..dbeab9db66 100644 --- a/spec/policies/system/data-compaction/2025-02-03.json +++ b/polaris-core/src/main/resources/schemas/policies/system/data-compaction/2025-02-03.json @@ -16,6 +16,11 @@ "target_file_size_bytes": { "type": "number", "description": "Target data file size in bytes." + }, + "config": { + "type": "object", + "description": "A map containing custom configuration properties. Please note that interoperability is not guaranteed.", + "additionalProperties": {} } }, "required": ["enable"], @@ -24,7 +29,12 @@ { "version": "2025-02-03", "enable": true, - "target_file_size_bytes": 134217728 + "target_file_size_bytes": 134217728, + "config": { + "compaction_strategy": "bin-pack", + "max-concurrent-file-group-rewrites": 5, + "my-key": "my-value" + } } ] } diff --git a/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java b/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java index b1bc624976..d5cb8b6f2d 100644 --- a/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java +++ b/polaris-core/src/test/java/org/apache/polaris/core/policy/BasePolicyValidatorTest.java @@ -44,6 +44,10 @@ public void testValidateValidPolicy() { validJson = "{\"version\":\"2025-02-03\", \"enable\": true, \"target_file_size_bytes\": 12342}"; result = validator.validate(new Policy(validJson)); assertThat(result).isTrue(); + + validJson = "{\"enable\": true, \"config\": {\"key1\": \"value1\", \"key2\": true}}"; + result = validator.validate(new Policy(validJson)); + assertThat(result).isTrue(); } @Test diff --git a/polaris-core/src/test/resources/data-compaction-policy.json b/polaris-core/src/test/resources/data-compaction-policy.json deleted file mode 100644 index 8a7e55fa60..0000000000 --- a/polaris-core/src/test/resources/data-compaction-policy.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "$id": "https://polaris.apache.org/policy/system.data_compaction/schema/2025-02-03", - "title": "Data Compaction Policy", - "description": "Inheritable Polaris policy schema for Iceberg table data compaction.", - "type": "object", - "properties": { - "version": { - "type": "string", - "const": "2025-02-03", - "description": "Schema version." - }, - "enable": { - "type": "boolean", - "description": "Enable or disable data compaction." - }, - "target_file_size_bytes": { - "type": "number", - "description": "Target data file size in bytes." - } - }, - "required": ["enable"], - "additionalProperties": false, - "examples": [ - { - "version": "2025-02-03", - "enable": true, - "target_file_size_bytes": 134217728 - } - ] -}