diff --git a/bom/build.gradle.kts b/bom/build.gradle.kts index 3a0688880c..e0f1838584 100644 --- a/bom/build.gradle.kts +++ b/bom/build.gradle.kts @@ -48,6 +48,8 @@ dependencies { api(project(":polaris-nodes-impl")) api(project(":polaris-nodes-spi")) + api(project(":polaris-persistence-nosql-api")) + api(project(":polaris-config-docs-annotations")) api(project(":polaris-config-docs-generator")) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1e419a0636..dea126c804 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -76,6 +76,7 @@ jakarta-validation-api = { module = "jakarta.validation:jakarta.validation-api", jakarta-ws-rs-api = { module = "jakarta.ws.rs:jakarta.ws.rs-api", version = "4.0.0" } javax-servlet-api = { module = "javax.servlet:javax.servlet-api", version = "4.0.1" } junit-bom = { module = "org.junit:junit-bom", version = "5.14.1" } +junit-pioneer = { module = "org.junit-pioneer:junit-pioneer", version = "2.3.0" } keycloak-admin-client = { module = "org.keycloak:keycloak-admin-client", version = "26.0.7" } jcstress-core = { module = "org.openjdk.jcstress:jcstress-core", version = "0.16" } jmh-core = { module = "org.openjdk.jmh:jmh-core", version.ref = "jmh" } diff --git a/gradle/projects.main.properties b/gradle/projects.main.properties index 5869bc5bc7..51308902bb 100644 --- a/gradle/projects.main.properties +++ b/gradle/projects.main.properties @@ -64,4 +64,5 @@ polaris-nodes-api=persistence/nosql/nodes/api polaris-nodes-impl=persistence/nosql/nodes/impl polaris-nodes-spi=persistence/nosql/nodes/spi # persistence / database agnostic +polaris-persistence-nosql-api=persistence/nosql/persistence/api polaris-persistence-nosql-varint=persistence/nosql/persistence/varint diff --git a/persistence/nosql/persistence/README.md b/persistence/nosql/persistence/README.md new file mode 100644 index 0000000000..f42a941ca0 --- /dev/null +++ b/persistence/nosql/persistence/README.md @@ -0,0 +1,252 @@ + + +# Database agnostic persistence framework + +The NoSQL persistence API and functional implementations are based on the assumption that all databases targeted as +backing stores for Polaris support "compare and swap" operations on a single row. +These CAS operations are the only requirement. + +Since some databases do enforce hard size limits, for example, DynamoDB has a hard 400kB row size limit. +MariaDB/MySQL has a default 512kB packet size limit. +Other databases have row-size recommendations around similar sizes. +Polaris persistence respects those limits and recommendations using a common hard limit of 350kB. + +Objects exposed via the `Persistence` interface are typed Java objects that must be immutable and serializable using +Jackson. +Each type is described via an implementation of the `ObjType` interface using a name, which must be unique +in Polaris, and a target Java type: the Jackson serializable Java type. +Object types are registered using the Java service API using `ObjType`. +The actual java target types must extend the `Obj` interface. +The (logical) key for each `Obj` is a composite of the `ObjType.id()` and a `long` ID (64-bit signed int), +combined using the `ObjId` composite type. + +The "primary key" of each object in a database is always _realmId + object-ID_, where realm-ID is a string and +object-ID is a 64-bit integer. +This allows, but does not enforce, storing multiple realms in one backend database. + +Data in/for/of each Polaris realm (think: _tenant_) is isolated using the realm's ID (string). +The base `Persistence` API interface is always scoped to exactly one realm ID. + +## Supporting more databases + +The code to support a particular database is isolated in a project, for example `polaris-persistence-nosql-inmemory` and +`polaris-persistence-nosql-mongodb`. + +When adding another database, it must also be wired up to Quarkus in `polaris-persistence-nosql-cdi-quarkus` preferably +using Quarkus extensions, added to the `polaris-persitence-corectness` tests and available in +`polaris-persistence-nosql-benchmark` for low-level benchmarks. + +## Named pointers + +Polaris represents a catalog for data lakehouses, which means that the information of and for catalog entities like +Iceberg tables, views, and namespaces must be consistent, even if multiple catalog entities are changes in a single +atomic operation. + +Polaris leverages a concept called "Named pointers." +The state of the whole catalog is referenced via the so-called HEAD (think: Git HEAD), +which _points to_ all catalog entities. +This state is persisted as an `Obj` with an index of the catalog entities, +the ID of that "current catalog state `Obj`" is maintained in one named pointer. + +Named pointers are also used for other purposes than catalog entities, for example, to maintain realms or +configurations. + +## Committing changes + +Changes are persisted using a commit mechanism, providing atomic changes across multiple entities against one named +pointer. +The logic implementation ensures that even high-frequency concurrent changes do neither let clients fail +nor cause timeouts. +The behavior and achievable throughput depend on the database being used; some databases perform +_much_ better than others. + +A use-case agnostic "committer" abstraction exists to ease implementing committing operations. +For catalog operations, there is a more specialized abstraction. + +## `long` IDs + +Polaris NoSQL persistence uses so-called Snowflake IDs, which are 64-bit integers that represent a timestamp, a +node-ID, and a sequence number. +The epoch of these timestamps is 2025-03-01-00:00:00.0 GMT. +Timestamps occupy 41 bits at millisecond precision, which lasts for about 69 years. +Node-IDs are 10 bits, which allows 1024 concurrently active "JVMs running Polaris." +Twelve (12) bits are used by the sequence number, which then allows each node to generate 4096 IDs per +millisecond. +One bit is reserved for future use. + +Node IDs are leased by every "JVM running Polaris" for a period of time. +The ID generator implementation guarantees that no IDs will be generated for a timestamp that exceeds the "lease time." +Leases can be extended. +The implementation leverages atomic database operations (CAS) for the lease implementation. + +ID generators must not use timestamps before or after the lease period, nor must they re-use an older timestamp. +This requirement is satisfied using a monotonic clock implementation. + +## Caching + +Since most `Obj`s are by default assumed to be immutable, caching is very straight forward and does not require any +coordination, which simplifies the design and implementation quite a bit. + +## Strong vs. eventual consistency + +Polaris NoSQL persistence offers two ways to persist `Obj`s: strongly consistent and eventually consistent. +The former is slower than the latter. + +Since Polaris NoSQL persistence respects the hard size limitations mentioned above, it cannot persist the serialized +representation of objects that exceed those limits in a single database row. +However, some objects legibly exceed those limits. +Polaris NoSQL persistence allows such "big object serializations" and writes those into multiple database rows, +with the restriction that this is only supported for eventually consistent write operations. +The serialized representation for strong consistency writes must always be within the hard limit. + +## Indexes + +The state of a data-lakehouse catalog can contain many thousand, potentially a few 100,000, tables/views/namespaces. +Even space-efficient serialization of an index for that many entries can exceed the "common hard 350kB limit." +New changes end in the index, which is "embedded" in the "current catalog state `Obj`". +If the respective index size limit of this "embedded" index is being approached, +the index is spilled out to separate rows in the database. +The implementation is built to split and combine when needed. + +## Change log / events / notifications + +The commit mechanism described above builds a commit log. +All changes can be inspected via that log in exactly the order in which those happened (think: `git log`). +Since the log of changes is already present, it is possible to retrieve the changes from some point in time or +commit log ID. +This allows clients to receive all changes that have happened since the last known commit ID, +offering a mechanism to poll for changes. +Since the necessary `Obj`s are immutable, +such change-log-requests likely hit already cached data and rather not the database. + +## Clean up old commits / unused data + +Despite the beauty of having a "commit log" and all metadata representation in the backing database, +the size of that database would always grow. + +Purging unused table/view metadata memoized in the database is one piece. +Purging old commit log entries is the second part. +Purging (then) unreferenced `Obj`s the third part. + +See [maintenance service](#maintenance-service) below. + +## Realms (aka tenants) + +Bootstrapping but more importantly, deleting/purging a realm is a non-trivial operation, which requires its own +lifecycle. +Bootstrapping is a straight forward operation as the necessary information can be validated and enhanced if necessary. + +Both the logical but also the physical process of realm deletion are more complex. +From a logical point of view, +users want to disable the realm for a while before they eventually are okay with deleting the information. + +The process to delete a realm's data from the database can be quite time-consuming, and how that happens is +database-specific. +While some databases can do bulk-deletions, which "just" take some time (RDBMS, BigTable), other databases +require that the process of deleting a realm must happen during a full scan of the database (for example, RocksDB +and Apache Cassandra). +Since scanning the whole database itself can take quite long, and no more than one instance should scan the database +at any time. + +The realm has a status to reflect its lifecycle. +The initial status of a realm is `CREATED`, which effectively only means that the realm-ID has been reserved and that +the necessary data needs to be populated (bootstrap). +Once a realm has been fully bootstrapped, its status is changed to `ACTIVE`. +Only `ACTIVE` realms can be used for user requests. + +Between `CREATED` and `ACTIVE`/`INACTIVE` there are two states that are mutually exclusive. +The state `INITIALIZING` means that Polaris will initialize the realm as a fresh, new realm. +The state `LOADING` means that realm data, which has been exported from another Polaris instance, is to be imported. + +Realm deletion is a multistep approach as well: Realms are first put into `INACTIVE` state, which can be reverted +to `ACTIVE` state or into `PURGING` state. +The state `PURGING` means that the realm's data is being deleted from the database, +once purging has been started, the realm's information in the database is inconsistent and cannot be restored. +Once the realm's data has been purged, the realm is put into `PURGED` state. Only realms that are in state `PURGED` +can be deleted. + +The multi-state approach also prevents that a realm can only be used when the system knows that all necessary +information is present. + +**Note**: the realm state machine is not fully implemented yet. + +## `::system::` realm + +Polaris NoSQL persistence uses a system realm which is used for node ID leases and realm management. +The realm-IDs starting with two colons (`::`) are reserved for system use. + +### Named pointers in the `::system::` realm + +| Named pointer | Meaning | +|---------------|-----------------| +| `realms` | Realms, by name | + +## "User" realms + +### Named pointers in the user realms + +| Named pointer | Meaning | +|-------------------|------------------------------| +| `root` | Pointer to the "root" entity | +| `catalogs` | Catalogs | +| `principals` | Principals | +| `principal-roles` | Principal roles | +| `grants` | All grants | +| `immediate-tasks` | Immediately scheduled tasks | +| `policy-mappings` | Policy mappings | + +Per catalog named pointers, where `%d` refers to the catalog's integer ID: + +| Named pointer | Meaning | +|---------------------|--------------------------------------------------| +| `cat/%d/roles` | Catalog roles | +| `cat/%d/heads/main` | Catalog content (namespaces, tables, views, etc) | +| `cat/%d/grants` | Catalog related grants (*) | + +(*) = currently not used, stored in the realm grants. + +## Maintenance Service + +**Note**: maintenance service not yet in the code base. + +The maintenance service is a mechanism to scan the backend database and perform necessary maintenance operations +as a background service. + +The most important maintenance operation is to purge unreferenced objects from the database. +Pluggable "identifiers" are used to "mark" objects to retain. + +The implementation calls all per-realm "identifiers," which then "mark" the named pointers and objects that have to be +retained. +Plugins and/or extensions can provide per-object-type "identifiers," which get called for "identified" objects. +The second phase of the maintenance service scans the whole backend database and purges those objects, +which have not been "marked" to be retained. + +Maintenance service invocations require two sets of realm-ids: the set of realms to retain and the set of realms +to purge. +These sets can be derived using `RealmManagement.list()` and grouping realms by their status. + +### Purging realms + +Eventually, purging realm from the backend database can happen in two different ways, depending on the database. +Some databases support deleting one or more realms using bulk deletions. +Other databases do not support this kind of bulk deletion. +Both ways are supported by the maintenance service. + +Eventually, purging realms is a responsibility of the [maintenance service](#maintenance-service). diff --git a/persistence/nosql/persistence/api/build.gradle.kts b/persistence/nosql/persistence/api/build.gradle.kts new file mode 100644 index 0000000000..ff0d441897 --- /dev/null +++ b/persistence/nosql/persistence/api/build.gradle.kts @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + id("org.kordamp.gradle.jandex") + id("polaris-server") +} + +description = "Polaris NoSQL persistence API, no concrete implementations" + +dependencies { + api(project(":polaris-version")) + api(project(":polaris-misc-types")) + + implementation(project(":polaris-idgen-api")) + implementation(project(":polaris-nodes-api")) + implementation(project(":polaris-persistence-nosql-varint")) + + implementation(platform(libs.jackson.bom)) + implementation("com.fasterxml.jackson.core:jackson-annotations") + implementation("com.fasterxml.jackson.core:jackson-core") + implementation("com.fasterxml.jackson.core:jackson-databind") + runtimeOnly("com.fasterxml.jackson.datatype:jackson-datatype-guava") + runtimeOnly("com.fasterxml.jackson.datatype:jackson-datatype-jdk8") + runtimeOnly("com.fasterxml.jackson.datatype:jackson-datatype-jsr310") + + implementation(libs.guava) + implementation(libs.slf4j.api) + + compileOnly(libs.smallrye.config.core) + compileOnly(platform(libs.quarkus.bom)) + compileOnly("io.quarkus:quarkus-core") + + compileOnly(project(":polaris-immutables")) + annotationProcessor(project(":polaris-immutables", configuration = "processor")) + + compileOnly(libs.jakarta.annotation.api) + compileOnly(libs.jakarta.validation.api) + compileOnly(libs.jakarta.inject.api) + compileOnly(libs.jakarta.enterprise.cdi.api) + + testImplementation(platform(libs.jackson.bom)) + testImplementation("com.fasterxml.jackson.dataformat:jackson-dataformat-smile") + + testImplementation(libs.junit.pioneer) + + testFixturesImplementation(platform(libs.jackson.bom)) + testFixturesImplementation("com.fasterxml.jackson.core:jackson-databind") + + testFixturesCompileOnly(project(":polaris-immutables")) + testFixturesAnnotationProcessor(project(":polaris-immutables", configuration = "processor")) + + testFixturesCompileOnly(libs.jakarta.annotation.api) + testFixturesCompileOnly(libs.jakarta.validation.api) +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Persistence.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Persistence.java new file mode 100644 index 0000000000..6795036235 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Persistence.java @@ -0,0 +1,437 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +import static com.google.common.base.Preconditions.checkState; + +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Stream; +import org.apache.polaris.ids.api.IdGenerator; +import org.apache.polaris.ids.api.MonotonicClock; +import org.apache.polaris.persistence.nosql.api.backend.Backend; +import org.apache.polaris.persistence.nosql.api.commit.Commits; +import org.apache.polaris.persistence.nosql.api.commit.Committer; +import org.apache.polaris.persistence.nosql.api.exceptions.ReferenceAlreadyExistsException; +import org.apache.polaris.persistence.nosql.api.exceptions.ReferenceNotFoundException; +import org.apache.polaris.persistence.nosql.api.index.Index; +import org.apache.polaris.persistence.nosql.api.index.IndexContainer; +import org.apache.polaris.persistence.nosql.api.index.IndexValueSerializer; +import org.apache.polaris.persistence.nosql.api.index.UpdatableIndex; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.apache.polaris.persistence.nosql.api.obj.ObjType; +import org.apache.polaris.persistence.nosql.api.ref.Reference; + +/** + * Polaris NoSQL persistence interface providing fundamental primitive operations to manage + * named-references including atomic updates and to read and write {@code Obj}s. Batch operations + * are provided where applicable. + * + *

{@code Obj}s are usually only written but never updated. This enables efficient caching of + * persisted data. In certain, exceptional use cases, which should always almost be avoided, CAS + * primitives allow conditional creates/updates/deletes. {@code ObjType} implementations can provide + * custom positive and negative caching rules. + * + *

Databases often have hard limits or at least more-or-less strong recommendations on the size + * of serialized {@link Obj}s. The "main" implementation of this interface in {@code + * :polaris-persistence-nosql-impl} takes care of transparently splitting and re-assembling {@link + * Obj}s across multiple database rows. The latter is not supported for conditionally updated {@link + * Obj}s. + * + *

This interface is a Polaris-internal low-level API interface for NoSQL. Instances of this + * interface are scoped to a specific realm. + * + *

The behavior when fetching a non-existing reference is to throw, which is different from + * fetching non-existing {@link Obj}s, because references are supposed to exist and a non-existence + * is usually a sign of a missing initialization step, whereas a missing {@link Obj} is often + * expected. + * + *

Database-specific implementations do implement the {@link Backend} interface, not this one. + */ +public interface Persistence { + /** + * Creates the reference with the given name and {@linkplain Reference#pointer() pointer} value. + * + *

Reference creation is always a strongly consistent operation. + * + * @throws ReferenceAlreadyExistsException if a reference with the same name already exists + */ + @Nonnull + Reference createReference(@Nonnull String name, @Nonnull Optional pointer) + throws ReferenceAlreadyExistsException; + + /** + * Convenience function to create a reference with an empty {@linkplain Reference#pointer() + * pointer}, if it does not already exist. + * + * @see #createReferencesSilent(Set) + */ + default void createReferenceSilent(@Nonnull String name) { + createReferencesSilent(Set.of(name)); + } + + /** + * Ensures that multiple references exist, leveraging bulk operations, if possible. References are + * created with empty {@linkplain Reference#pointer() pointers}. + * + *

This whole operation is not guaranteed to be atomic, the creation of each reference is + * atomic. + * + * @see #createReferenceSilent(String) + */ + void createReferencesSilent(Set referenceNames); + + /** + * Convenience function to return an existing reference or to create the reference with a supplied + * {@linkplain Reference#pointer() pointer}, if it does not already exist. + */ + @Nonnull + default Reference fetchOrCreateReference( + @Nonnull String name, @Nonnull Supplier> pointerForCreate) { + try { + return fetchReference(name); + } catch (ReferenceNotFoundException e) { + try { + return createReference(name, pointerForCreate.get()); + } catch (ReferenceAlreadyExistsException x) { + // Unlikely that we ever get here (ref does not exist (but then concurrently created) + return fetchReference(name); + } + } + } + + /** + * Updates the {@linkplain Reference#pointer() pointer} to {@code newPointer}, if the reference + * exists and the current persisted pointer is the same as in {@code reference}. + * + *

Reference update is always a strongly consistent operation. + * + * @param reference the existing reference including the expected pointer + * @param newPointer the pointer to update the reference to. If the reference has a current + * pointer value, both the current and the new pointer must use the same {@link ObjType + * ObjType}. + * @return If the reference was successfully updated, an updated {@link Reference} instances will + * be returned. + * @throws ReferenceNotFoundException if the reference does not exist + */ + @Nonnull + Optional updateReferencePointer( + @Nonnull Reference reference, @Nonnull ObjRef newPointer) throws ReferenceNotFoundException; + + /** + * Fetch the reference with the given name, leveraging the reference cache. + * + * @throws ReferenceNotFoundException if the reference does not exist + * @see #fetchReferenceForUpdate(String) + * @see #fetchReferenceHead(String, Class) + */ + @Nonnull + Reference fetchReference(@Nonnull String name) throws ReferenceNotFoundException; + + /** + * Fetches the reference with the given name, but will always fetch the most recent state from the + * backend database. + * + * @see #fetchReference(String) + */ + @Nonnull + default Reference fetchReferenceForUpdate(@Nonnull String name) + throws ReferenceNotFoundException { + return fetchReference(name); + } + + /** + * Convenience function to return the {@link Obj} as pointed to from the reference with the given + * name. + * + * @see #fetchReference(String) + * @see #fetch(ObjRef, Class) + */ + default Optional fetchReferenceHead( + @Nonnull String name, @Nonnull Class clazz) throws ReferenceNotFoundException { + var ref = fetchReference(name); + return ref.pointer() + .map( + id -> { + var head = fetch(id, clazz); + checkState(head != null, "%s referenced by '%s' does not exist", id, name); + return head; + }); + } + + /** + * Fetch the objects for the given object Ids. + * + *

Supports assembling object splits across multiple rows by {@link #write(Obj, Class)} or + * {@link #writeMany(Class, Obj[])}. + * + * @param id ID of the object to load + * @param clazz expected {@link Obj} subtype, passing {@code Obj.class} is fine + * @return loaded object or {@code null} if it does not exist + * @param returned type can also be just {@code Obj} + * @see #fetchMany(Class, ObjRef[]) + */ + @Nullable + T fetch(@Nonnull ObjRef id, @Nonnull Class clazz); + + /** + * Fetch multiple objects for the given object Ids. + * + *

Supports assembling object splits across multiple rows by {@link #write(Obj, Class)} or + * {@link #writeMany(Class, Obj[])}. + * + * @param returned type can also be just {@code Obj} + * @param clazz expected {@link Obj} subtype, passing {@code Obj.class} is fine + * @param ids ID of the object to load, callers must ensure that the IDs are not duplicated within + * the array + * @return array of the same length as {@code ids} containing the loaded objects, with {@code + * null} elements for objects that do not exist + * @see #fetch(ObjRef, Class) + */ + @Nonnull + T[] fetchMany(@Nonnull Class clazz, @Nonnull ObjRef... ids); + + /** + * Persist {@code obj} with eventually consistent guarantees. + * + *

Supports splitting the serialized representation across multiple rows in the backend + * database, if the serialized representation does not fit entirely in a single row, limited by + * {@link #maxSerializedValueSize()}. + * + *

This function (and {@link #writeMany(Class, Obj[])}) are not meant to actually + * update existing objects with different information, especially not when the size of the + * serialized object changes the number of splits in the backend database. Note that there is + * no protection against this scenario. + * + * @return {@code obj} with the {@link Obj#createdAtMicros()} and {@link Obj#numParts()} fields + * updated + * @see #writeMany(Class, Obj[]) + */ + @Nonnull + T write(@Nonnull T obj, @Nonnull Class clazz); + + /** + * Persist multiple {@code objs} with eventually consistent guarantees. + * + *

See {@link #write(Obj, Class)} for more information. + * + *

Supports splitting the serialized representation across multiple rows in the backend + * database, if the serialized representation does not fit entirely in a single row, limited by + * {@link #maxSerializedValueSize()}. + * + *

This function and {@link #write(Obj, Class)} are not meant to actually update + * existing objects with different information, especially not when the size of the serialized + * object changes the number of splits in the backend database. Note that there is no + * protection against this scenario. + * + * @return {@code objs} with the {@link Obj#createdAtMicros()} and {@link Obj#numParts()} fields + * updated, callers must ensure that the IDs are not duplicated within the array. {@code null} + * elements in the returned array will appear for {@code null} elements in the {@code objs} + * array. + * @see #write(Obj, Class) + */ + @SuppressWarnings("unchecked") + @Nonnull + T[] writeMany(@Nonnull Class clazz, @Nonnull T... objs); + + /** + * Unconditionally delete the object with the given id. + * + *

Note that it is generally not advised to actively (or prematurely) delete objects. In + * general, it is better to just leave the object and let the maintenance service take care of + * purging it. + * + *

If the object has been split across multiple database rows, only the number of parts + * mentioned in {@link ObjRef#numParts()} will be deleted. However, the maintenance service will + * take care of purging possibly left-over parts. + * + * @see #deleteMany(ObjRef[]) + */ + void delete(@Nonnull ObjRef id); + + /** + * Unconditionally delete the objects with the given ids. + * + *

Note that it is generally not advised to actively (or prematurely) delete objects. In + * general, it is better to just leave the object and let the maintenance service take care of + * purging it. + * + *

If the object has been split across multiple database rows, only the number of parts + * mentioned in {@link ObjRef#numParts()} will be deleted. However, the maintenance service will + * take care of purging possibly left-over parts. + * + * @param ids IDs of objects to delete, callers must ensure that the IDs are not duplicated within + * the array + * @see #delete(ObjRef) + */ + void deleteMany(@Nonnull ObjRef... ids); + + /** + * Persist {@code obj} with strong consistent guarantees. + * + *

Unlike {@linkplain #write(Obj, Class) eventually consistent writes}, conditional write + * operations do not support splitting the serialized representation across multiple rows in the + * backend database. + * + *

The serialized representation must fit entirely in a single row, limited by {@link + * #maxSerializedValueSize()}. + * + * @return {@code obj} with the {@link Obj#createdAtMicros()} field updated if and only if no + * other object with the same object id existed before, otherwise {@code null} + */ + @Nullable + T conditionalInsert(@Nonnull T obj, @Nonnull Class clazz); + + /** + * Update an object with strong consistent guarantees. + * + *

Unlike {@linkplain #write(Obj, Class) eventually consistent writes}, conditional write + * operations do not support splitting the serialized representation across multiple rows in the + * backend database. + * + *

The serialized representation must fit entirely in a single row, limited by {@link + * #maxSerializedValueSize()}. + * + * @param expected the object expected to have the same {@link Obj#versionToken()} as this one + * @param update the object to be updated to, must have the same {@linkplain Obj#id() id}, + * {@linkplain Obj#type() type} but a different {@linkplain Obj#versionToken() version token} + * @return updated state in the database, if successful, otherwise {@code null} + */ + @Nullable + T conditionalUpdate( + @Nonnull T expected, @Nonnull T update, @Nonnull Class clazz); + + /** + * Delete an object with strong consistent guarantees. + * + * @param expected the object expected to have the same {@link Obj#versionToken()} as this one + * @return {@code true} if the object existed with the expected version token and was deleted in + * the database, if successful, otherwise {@code false} + */ + boolean conditionalDelete(@Nonnull T expected, Class clazz); + + PersistenceParams params(); + + /** + * Defines the maximum allowed {@linkplain Obj serialized object} size. Serialized representation + * larger than this value will be split into multiple database rows. + */ + int maxSerializedValueSize(); + + long generateId(); + + ObjRef generateObjId(ObjType type); + + /** + * If the {@linkplain Persistence persistence implementation} is caching, this function returns + * the object with the ID from the cache, but does not consult the backend. + * + *

Non-caching implementations default to {@link #fetch(ObjRef, Class)}. + */ + @Nullable + T getImmediate(@Nonnull ObjRef id, @Nonnull Class clazz); + + Commits commits(); + + Committer createCommitter( + @Nonnull String refName, + @Nonnull Class referencedObjType, + @Nonnull Class resultType); + + Index buildReadIndex( + @Nullable IndexContainer indexContainer, + @Nonnull IndexValueSerializer indexValueSerializer); + + UpdatableIndex buildWriteIndex( + @Nullable IndexContainer indexContainer, + @Nonnull IndexValueSerializer indexValueSerializer); + + @Nonnull + default Duration objAge(@Nonnull Obj obj) { + return Duration.ofNanos( + TimeUnit.MICROSECONDS.toNanos(Math.max(currentTimeMicros() - obj.createdAtMicros(), 0L))); + } + + String realmId(); + + MonotonicClock monotonicClock(); + + IdGenerator idGenerator(); + + /** + * Convenience for {@link #monotonicClock() monotonicClock().}{@link + * MonotonicClock#currentTimeMicros()}. + */ + @SuppressWarnings("resource") + default long currentTimeMicros() { + return monotonicClock().currentTimeMicros(); + } + + /** + * Convenience for {@link #monotonicClock() monotonicClock().}{@link + * MonotonicClock#currentTimeMillis()}. + */ + @SuppressWarnings("resource") + default long currentTimeMillis() { + return monotonicClock().currentTimeMillis(); + } + + /** + * Convenience for {@link #monotonicClock() monotonicClock().}{@link + * MonotonicClock#currentInstant()}. + */ + @SuppressWarnings("resource") + default Instant currentInstant() { + return monotonicClock().currentInstant(); + } + + /** + * Convenience function to perform {@link #fetchMany(Class, ObjRef...)} on an arbitrary number of + * objects to fetch. + * + * @param objRefs all {@link ObjRef}s to fetch + * @param clazz type of {@link Obj} to fetch + * @return stream of fetched {@link Obj}s, not found {@link Obj}s are filtered out + * @see StreamUtil#bucketized(Stream, Function, int) for a more generic implementation + */ + default Stream bucketizedBulkFetches(Stream objRefs, Class clazz) { + var fetchSize = params().bucketizedBulkFetchSize(); + + return StreamUtil.bucketized( + objRefs, + refs -> { + var toFetch = refs.toArray(new ObjRef[0]); + var objs = fetchMany(clazz, toFetch); + return Arrays.asList(objs); + }, + fetchSize) + .filter(Objects::nonNull); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceDecorator.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceDecorator.java new file mode 100644 index 0000000000..746d0d90ae --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceDecorator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +public interface PersistenceDecorator { + /** Flag whether the decorator should be considered. */ + boolean active(); + + /** + * Indicates the priority. Decorators with a lower priority are applied before those with a higher + * priority. + */ + int priority(); + + Persistence decorate(Persistence persistence); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceParams.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceParams.java new file mode 100644 index 0000000000..0fa9036ae2 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/PersistenceParams.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.util.stream.Stream; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.misc.types.memorysize.MemorySize; +import org.apache.polaris.persistence.nosql.api.commit.RetryConfig; +import org.immutables.value.Value; + +@ConfigMapping(prefix = "polaris.persistence") +@JsonSerialize(as = ImmutableBuildablePersistenceParams.class) +@JsonDeserialize(as = ImmutableBuildablePersistenceParams.class) +public interface PersistenceParams { + String DEFAULT_REFERENCE_PREVIOUS_HEAD_COUNT_STRING = "20"; + int DEFAULT_REFERENCE_PREVIOUS_HEAD_COUNT = + Integer.parseInt(DEFAULT_REFERENCE_PREVIOUS_HEAD_COUNT_STRING); + + @WithDefault(DEFAULT_REFERENCE_PREVIOUS_HEAD_COUNT_STRING) + int referencePreviousHeadCount(); + + String DEFAULT_MAX_INDEX_STRIPES_STRING = "20"; + int DEFAULT_MAX_INDEX_STRIPES = Integer.parseInt(DEFAULT_MAX_INDEX_STRIPES_STRING); + + @WithDefault(DEFAULT_MAX_INDEX_STRIPES_STRING) + int maxIndexStripes(); + + String DEFAULT_MAX_EMBEDDED_INDEX_SIZE_STRING = "32k"; + MemorySize DEFAULT_MAX_EMBEDDED_INDEX_SIZE = + MemorySize.valueOf(DEFAULT_MAX_EMBEDDED_INDEX_SIZE_STRING); + + @WithDefault(DEFAULT_MAX_EMBEDDED_INDEX_SIZE_STRING) + MemorySize maxEmbeddedIndexSize(); + + String DEFAULT_MAX_INDEX_STRIPE_SIZE_STRING = "128k"; + MemorySize DEFAULT_MAX_INDEX_STRIPE_SIZE = + MemorySize.valueOf(DEFAULT_MAX_INDEX_STRIPE_SIZE_STRING); + + @WithDefault(DEFAULT_MAX_INDEX_STRIPE_SIZE_STRING) + MemorySize maxIndexStripeSize(); + + @Value.Default + default RetryConfig retryConfig() { + return RetryConfig.DEFAULT_RETRY_CONFIG; + } + + String DEFAULT_BUCKETIZED_BULK_FETCH_SIZE_STRING = "16"; + int DEFAULT_BUCKETIZED_BULK_FETCH_SIZE = + Integer.parseInt(DEFAULT_BUCKETIZED_BULK_FETCH_SIZE_STRING); + + /** + * The number of objects to fetch at once via {@link Persistence#bucketizedBulkFetches(Stream, + * Class)}. + */ + @WithDefault(DEFAULT_BUCKETIZED_BULK_FETCH_SIZE_STRING) + int bucketizedBulkFetchSize(); + + String DEFAULT_MAX_SERIALIZED_VALUE_SIZE_STRING = "350k"; + MemorySize DEFAULT_MAX_SERIALIZED_VALUE_SIZE = + MemorySize.valueOf(DEFAULT_MAX_SERIALIZED_VALUE_SIZE_STRING); + + /** The maximum size of a serialized value in a persisted database row. */ + @WithDefault(DEFAULT_MAX_SERIALIZED_VALUE_SIZE_STRING) + MemorySize maxSerializedValueSize(); + + @PolarisImmutable + interface BuildablePersistenceParams extends PersistenceParams { + static ImmutableBuildablePersistenceParams.Builder builder() { + return ImmutableBuildablePersistenceParams.builder(); + } + + @Override + @Value.Default + default int referencePreviousHeadCount() { + return DEFAULT_REFERENCE_PREVIOUS_HEAD_COUNT; + } + + @Override + @Value.Default + default int maxIndexStripes() { + return DEFAULT_MAX_INDEX_STRIPES; + } + + @Override + @Value.Default + default MemorySize maxEmbeddedIndexSize() { + return DEFAULT_MAX_EMBEDDED_INDEX_SIZE; + } + + @Override + @Value.Default + default MemorySize maxIndexStripeSize() { + return DEFAULT_MAX_INDEX_STRIPE_SIZE; + } + + @Override + @Value.Default + default RetryConfig retryConfig() { + return RetryConfig.BuildableRetryConfig.builder().build(); + } + + @Override + @Value.Default + default int bucketizedBulkFetchSize() { + return DEFAULT_BUCKETIZED_BULK_FETCH_SIZE; + } + + @Override + @Value.Default + default MemorySize maxSerializedValueSize() { + return DEFAULT_MAX_SERIALIZED_VALUE_SIZE; + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/RealmPersistenceFactory.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/RealmPersistenceFactory.java new file mode 100644 index 0000000000..cd2bdf196b --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/RealmPersistenceFactory.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +import jakarta.annotation.Nonnull; +import jakarta.enterprise.context.ApplicationScoped; + +/** + * Builder factory to generate "realm-scoped" {@link Persistence} instances. + * + *

{@link RealmPersistenceFactory} instance is available as an {@link ApplicationScoped} bean. + */ +public interface RealmPersistenceFactory { + /** + * Return a new builder for per-realm persistence. + * + *

Builders must only be used once. + */ + RealmPersistenceBuilder newBuilder(); + + interface RealmPersistenceBuilder { + RealmPersistenceBuilder realmId(@Nonnull String realmId); + + RealmPersistenceBuilder skipDecorators(); + + Persistence build(); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Realms.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Realms.java new file mode 100644 index 0000000000..6275875871 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/Realms.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +public final class Realms { + private Realms() {} + + /** + * Realms with special meanings and "non-standard behavior" (as per {@code + * org.apache.polaris.realms.api.RealmDefinition.RealmStatus}) have to have an ID that starts with + * this prefix. + */ + public static final String SYSTEM_REALM_PREFIX = "::"; + + public static final String SYSTEM_REALM_ID = SYSTEM_REALM_PREFIX + "system::"; +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StartupPersistence.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StartupPersistence.java new file mode 100644 index 0000000000..4530531f71 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StartupPersistence.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.ElementType.TYPE; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.util.AnnotationLiteral; +import jakarta.inject.Inject; +import jakarta.inject.Qualifier; +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; +import org.apache.polaris.ids.api.IdGenerator; +import org.apache.polaris.persistence.nosql.nodeids.api.NodeManagement; + +/** + * Qualifier for system-level {@link Persistence} instance against the {@linkplain + * Realms#SYSTEM_REALM_ID system realm} needed for {@linkplain NodeManagement node management}. + * + *

This qualifier is only needed and should only be used by code used to initialize the + * application. There is really no need to use this qualifier in any application code. + * + *

The qualified {@link Persistence} instance has no functional {@link IdGenerator}. + * + *

A system-realm {@link Persistence} instance can be {@link Inject @Inject}ed as an {@link + * ApplicationScoped @ApplicationScoped} bean using + * + * {@snippet : + * @ApplicationScoped + * class MyBean { + * @Inject @StartupPersistence Persistence startupPersistence; // @highlight + * } + * } + * + * @see SystemPersistence + */ +@Target({TYPE, METHOD, PARAMETER, FIELD}) +@Retention(RUNTIME) +@Documented +@Qualifier +public @interface StartupPersistence { + @SuppressWarnings("ClassExplicitlyAnnotation") + final class Literal extends AnnotationLiteral implements StartupPersistence {} +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StreamUtil.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StreamUtil.java new file mode 100644 index 0000000000..f0d7640a03 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/StreamUtil.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.persistence.nosql.api; + +import java.util.ArrayList; +import java.util.List; +import java.util.Spliterator; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public final class StreamUtil { + /** + * Bucketizes the elements of the source stream, passes each bucket through the {@code + * bucketFetcher} function, eventually yielding a stream the elements of all buckets. + * + *

A classic use case for this function is {@link Persistence#bucketizedBulkFetches(Stream, + * Class)}. + */ + public static Stream bucketized( + Stream source, Function, List> bucketFetcher, int bucketSize) { + var sourceIter = source.iterator(); + + var split = + new Spliterator>() { + @Override + public boolean tryAdvance(Consumer> action) { + if (!sourceIter.hasNext()) { + // nothing more to do + return false; + } + + var bucket = new ArrayList(bucketSize); + for (int i = 0; i < bucketSize && sourceIter.hasNext(); i++) { + bucket.add(sourceIter.next()); + } + var fetched = bucketFetcher.apply(bucket); + action.accept(fetched); + + return true; + } + + @Override + public Spliterator> trySplit() { + return null; + } + + @Override + public long estimateSize() { + return Long.MAX_VALUE; + } + + @Override + public int characteristics() { + return 0; + } + }; + return StreamSupport.stream(split, false).flatMap(List::stream); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/SystemPersistence.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/SystemPersistence.java new file mode 100644 index 0000000000..6520da4d68 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/SystemPersistence.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.ElementType.TYPE; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.util.AnnotationLiteral; +import jakarta.inject.Inject; +import jakarta.inject.Qualifier; +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; +import org.apache.polaris.ids.api.IdGenerator; +import org.apache.polaris.persistence.nosql.nodeids.api.NodeManagement; + +/** + * Qualifier for system-level {@link Persistence} instance against the {@linkplain + * Realms#SYSTEM_REALM_ID system realm} needed for realm management. + * + *

The qualified {@link Persistence} instance has a functional {@link IdGenerator}, enabled via a + * valid {@linkplain NodeManagement#lease() node lease}. + * + *

A system-realm {@link Persistence} instance can be {@link Inject @Inject}ed as an {@link + * ApplicationScoped @ApplicationScoped} bean using + * + * {@snippet : + * @ApplicationScoped + * class MyBean { + * @Inject @SystemPersistence Persistence systemPersistence; // @highlight + * } + * } + * + * @see StartupPersistence + */ +@Target({TYPE, METHOD, PARAMETER, FIELD}) +@Retention(RUNTIME) +@Documented +@Qualifier +public @interface SystemPersistence { + @SuppressWarnings("ClassExplicitlyAnnotation") + final class Literal extends AnnotationLiteral implements SystemPersistence {} +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/Backend.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/Backend.java new file mode 100644 index 0000000000..6eea4a5940 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/Backend.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import jakarta.annotation.Nonnull; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import org.apache.polaris.ids.api.IdGenerator; +import org.apache.polaris.ids.api.MonotonicClock; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.PersistenceParams; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.apache.polaris.persistence.nosql.api.ref.Reference; + +/** Provides "low-level" access to the database-specific backend. */ +public interface Backend extends AutoCloseable { + /** Name of this backend. This value serves as an identifier to select the correct backend. */ + @Nonnull + String type(); + + /** + * Called to set up the database schema. + * + * @return optional, human-readable information + */ + Optional setupSchema(); + + @Nonnull + Persistence newPersistence( + Function backendWrapper, + @Nonnull PersistenceParams persistenceParams, + String realmId, + MonotonicClock monotonicClock, + IdGenerator idGenerator); + + /** Whether the implementation supports {@link #deleteRealms(Set)}. */ + boolean supportsRealmDeletion(); + + /** + * Delete the given realms. + * + *

This function works, if {@link #supportsRealmDeletion()} yields {@code true}. + * + *

Throws an {@link UnsupportedOperationException}, if {@link #supportsRealmDeletion()} yields + * {@code false}. + */ + void deleteRealms(Set realmIds); + + /** + * Bulk reference deletion, grouped by realm. This functionality is primarily needed for the + * maintenance service. + */ + void batchDeleteRefs(Map> realmRefs); + + /** + * Bulk object-part deletion, grouped by realm. This functionality is primarily needed for the + * maintenance service. + */ + void batchDeleteObjs(Map> realmObjs); + + /** Callback interface for {@link #scanBackend(ReferenceScanCallback, ObjScanCallback)}. */ + @FunctionalInterface + interface ReferenceScanCallback { + /** + * Called for each discovered reference and object-part ("item"). + * + * @param realmId the realm to which the item belongs + * @param refName the reference name + * @param createdAtMicros the timestamp in microseconds since (Unix) epoch at which the item was + * created in the database + */ + void call(@Nonnull String realmId, @Nonnull String refName, long createdAtMicros); + } + + /** Callback interface for {@link #scanBackend(ReferenceScanCallback, ObjScanCallback)}. */ + @FunctionalInterface + interface ObjScanCallback { + /** + * Called for each discovered reference and object-part ("item"). + * + * @param realmId the realm to which the item belongs + * @param type the object type ID + * @param id object-part ID + * @param createdAtMicros the timestamp in microseconds since (Unix) epoch at which the item was + * created in the database + */ + void call( + @Nonnull String realmId, @Nonnull String type, @Nonnull PersistId id, long createdAtMicros); + } + + /** + * Scan the whole backend database and return each discovered reference and object-part via the + * provided callbacks. This functionality is primarily needed for the maintenance service. + */ + void scanBackend( + @Nonnull ReferenceScanCallback referenceConsumer, @Nonnull ObjScanCallback objConsumer); + + boolean createReference(@Nonnull String realmId, @Nonnull Reference newRef); + + void createReferences(@Nonnull String realmId, @Nonnull List newRefs); + + boolean updateReference( + @Nonnull String realmId, + @Nonnull Reference updatedRef, + @Nonnull Optional expectedPointer); + + @Nonnull + Reference fetchReference(@Nonnull String realmId, @Nonnull String name); + + @Nonnull + Map fetch(@Nonnull String realmId, @Nonnull Set ids); + + void write(@Nonnull String realmId, @Nonnull List writes); + + void delete(@Nonnull String realmId, @Nonnull Set ids); + + boolean conditionalInsert( + @Nonnull String realmId, + String objTypeId, + @Nonnull PersistId persistId, + long createdAtMicros, + @Nonnull String versionToken, + @Nonnull byte[] serializedValue); + + boolean conditionalUpdate( + @Nonnull String realmId, + String objTypeId, + @Nonnull PersistId persistId, + long createdAtMicros, + @Nonnull String updateToken, + @Nonnull String expectedToken, + @Nonnull byte[] serializedValue); + + boolean conditionalDelete( + @Nonnull String realmId, @Nonnull PersistId persistId, @Nonnull String expectedToken); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendConfiguration.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendConfiguration.java new file mode 100644 index 0000000000..5e178d23bf --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendConfiguration.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import io.smallrye.config.ConfigMapping; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** Polaris persistence backend configuration. */ +@ConfigMapping(prefix = "polaris.persistence.backend") +@JsonSerialize(as = ImmutableBuildableBackendConfiguration.class) +@JsonDeserialize(as = ImmutableBuildableBackendConfiguration.class) +public interface BackendConfiguration { + /** Name of the persistence backend to use. */ + Optional type(); + + @PolarisImmutable + interface BuildableBackendConfiguration extends BackendConfiguration { + static ImmutableBuildableBackendConfiguration.Builder builder() { + return ImmutableBuildableBackendConfiguration.builder(); + } + + @Override + Optional type(); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendFactory.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendFactory.java new file mode 100644 index 0000000000..b9cc72ce84 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendFactory.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import jakarta.annotation.Nonnull; + +/** + * Factory responsible to produce {@link Backend} instances. Usually only one {@link Backend} + * instance is ever produced and active in a production environment. + */ +public interface BackendFactory { + /** Human-readable name. */ + String name(); + + @Nonnull + Backend buildBackend(@Nonnull RUNTIME_CONFIG backendConfig); + + Class configurationInterface(); + + RUNTIME_CONFIG buildConfiguration(CONFIG_INTERFACE config); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendLoader.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendLoader.java new file mode 100644 index 0000000000..346e3bf96d --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/BackendLoader.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +import jakarta.annotation.Nonnull; +import java.util.ArrayList; +import java.util.List; +import java.util.ServiceLoader; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public final class BackendLoader { + private BackendLoader() {} + + @Nonnull + public static BackendFactory findFactoryByName(@Nonnull String name) { + return findFactory(f -> f.name().equals(name)); + } + + @Nonnull + public static Stream> availableFactories() { + @SuppressWarnings("rawtypes") + var x = (Stream) loader().stream().map(ServiceLoader.Provider::get); + @SuppressWarnings("unchecked") + var r = (Stream>) x; + return r; + } + + @Nonnull + public static BackendFactory findFactory( + @Nonnull Predicate> filter) { + ServiceLoader> loader = loader(); + List> candidates = new ArrayList<>(); + boolean any = false; + for (BackendFactory backendFactory : loader) { + any = true; + if (filter.test(backendFactory)) { + candidates.add(backendFactory); + } + } + checkState(any, "No BackendFactory on class path"); + checkArgument(!candidates.isEmpty(), "No BackendFactory matched the given filter"); + + if (candidates.size() == 1) { + return cast(candidates.getFirst()); + } + + throw new IllegalStateException( + "More than one BackendFactory matched the given filter: " + + candidates.stream().map(BackendFactory::name).collect(Collectors.joining(", "))); + } + + // Helper for ugly generics casting + private static ServiceLoader> loader() { + @SuppressWarnings("rawtypes") + ServiceLoader f = ServiceLoader.load(BackendFactory.class); + @SuppressWarnings({"unchecked", "rawtypes"}) + ServiceLoader> r = (ServiceLoader) f; + return r; + } + + // Helper for ugly generics casting + private static BackendFactory cast(BackendFactory backendFactory) { + @SuppressWarnings("unchecked") + BackendFactory r = (BackendFactory) backendFactory; + return r; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/FetchedObj.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/FetchedObj.java new file mode 100644 index 0000000000..61f21db1b4 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/FetchedObj.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +public record FetchedObj( + String type, long createdAtMicros, String versionToken, byte[] serialized, int realNumParts) {} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/PersistId.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/PersistId.java new file mode 100644 index 0000000000..5a0de664c5 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/PersistId.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import static com.google.common.base.Preconditions.checkState; +import static org.apache.polaris.persistence.varint.VarInt.putVarInt; +import static org.apache.polaris.persistence.varint.VarInt.readVarInt; +import static org.apache.polaris.persistence.varint.VarInt.varIntLen; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.immutables.value.Value; + +/** + * Represents the key of a serialized part of an {@link Obj}, where {@link #part()} defines + * the {@code 0}-based offset of the serialized part. + * + *

This type is used internally when dealing with individual database rows/documents and for + * maintenance operations. This type is not part of any application/user facing API. + */ +@JsonSerialize(using = PersistId.PersistIdSerializer.class) +@JsonDeserialize(using = PersistId.PersistIdDeserializer.class) +@PolarisImmutable +public interface PersistId { + @Value.Parameter(order = 1) + long id(); + + @Value.Parameter(order = 2) + int part(); + + @Value.Check + default void check() { + checkState(part() >= 0, "part must not be negative"); + } + + static PersistId persistId(long id, int part) { + return ImmutablePersistId.of(id, part); + } + + static PersistId persistIdPart0(Obj obj) { + return persistId(obj.id(), 0); + } + + class PersistIdSerializer extends JsonSerializer { + @Override + public void serialize(PersistId value, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + gen.writeBinary(serializeAsBytes(value)); + } + } + + class PersistIdDeserializer extends JsonDeserializer { + @Override + public PersistId deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + return fromBytes(p.getBinaryValue()); + } + } + + static int serializedSize(PersistId persistId) { + var part = persistId.part(); + var hasPart = part > 0; + var partLen = hasPart ? varIntLen(part) : 0; + return 1 + Long.BYTES + partLen; + } + + @Value.NonAttribute + @JsonIgnore + default byte[] toBytes() { + return serializeAsBytes(this); + } + + static byte[] serializeAsBytes(PersistId persistId) { + var part = persistId.part(); + var hasPart = part > 0; + var partLen = hasPart ? varIntLen(part) : 0; + var type = (byte) (hasPart ? 2 : 1); + + var bytes = new byte[1 + Long.BYTES + partLen]; + var buf = ByteBuffer.wrap(bytes); + buf.put(type); + buf.putLong(persistId.id()); + if (hasPart) { + putVarInt(buf, part); + } + return bytes; + } + + static PersistId fromBytes(byte[] bytes) { + if (bytes == null || bytes.length == 0) { + return null; + } + var buf = ByteBuffer.wrap(bytes); + var type = buf.get(); + return switch (type) { + case 1 -> persistId(buf.getLong(), 0); + case 2 -> { + var id = buf.getLong(); + var part = readVarInt(buf); + yield persistId(id, part); + } + default -> throw new IllegalArgumentException("Unsupported PersistId type: " + type); + }; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/WriteObj.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/WriteObj.java new file mode 100644 index 0000000000..3379dd8fa6 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/backend/WriteObj.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +public record WriteObj( + String type, long id, int part, long createdAtMicros, byte[] serialized, int partNum) {} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheBackend.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheBackend.java new file mode 100644 index 0000000000..38d6653dfd --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheBackend.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.cache; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.objRef; + +import jakarta.annotation.Nonnull; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.backend.Backend; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.apache.polaris.persistence.nosql.api.obj.ObjType; +import org.apache.polaris.persistence.nosql.api.ref.Reference; + +/** + * Provides the cache primitives for a caching {@link Persistence} facade, suitable for multiple + * repositories. It is advisable to have one {@link CacheBackend} per {@link Backend}. + */ +public interface CacheBackend { + /** + * Special sentinel reference instance to indicate that a reference object has been marked as "not + * found". This object is only for cache-internal purposes. + */ + Reference NON_EXISTENT_REFERENCE_SENTINEL = + Reference.builder() + .name("NON_EXISTENT") + .pointer(objRef("CACHE_SENTINEL", 0L)) + .createdAtMicros(0L) + .previousPointers() + .build(); + + /** + * Special sentinel object instance to indicate that an object has been marked as "not found". + * This object is only for cache-internal purposes. + */ + Obj NOT_FOUND_OBJ_SENTINEL = + new Obj() { + @Override + public ObjType type() { + throw new UnsupportedOperationException(); + } + + @Override + public long id() { + throw new UnsupportedOperationException(); + } + + @Override + public int numParts() { + throw new UnsupportedOperationException(); + } + + @Override + public String versionToken() { + throw new UnsupportedOperationException(); + } + + @Override + public long createdAtMicros() { + throw new UnsupportedOperationException(); + } + + @Override + @Nonnull + public Obj withCreatedAtMicros(long createdAtMicros) { + throw new UnsupportedOperationException(); + } + + @Override + @Nonnull + public Obj withNumParts(int numParts) { + throw new UnsupportedOperationException(); + } + }; + + /** Returns the {@link Obj} for the given {@link ObjRef id}. */ + Obj get(@Nonnull String realmId, @Nonnull ObjRef id); + + /** + * Adds the given object to the local cache and sends a cache-invalidation message to Polaris + * peers. + */ + void put(@Nonnull String realmId, @Nonnull Obj obj); + + /** Adds the given object only to the local cache, does not send a cache-invalidation message. */ + void putLocal(@Nonnull String realmId, @Nonnull Obj obj); + + /** Record the "not found" sentinel for the given {@link ObjRef id} and {@link ObjType type}. */ + void putNegative(@Nonnull String realmId, @Nonnull ObjRef id); + + void remove(@Nonnull String realmId, @Nonnull ObjRef id); + + void clear(@Nonnull String realmId); + + void purge(); + + long estimatedSize(); + + Persistence wrap(@Nonnull Persistence persist); + + Reference getReference(@Nonnull String realmId, @Nonnull String name); + + void removeReference(@Nonnull String realmId, @Nonnull String name); + + /** + * Adds the given reference to the local cache and sends a cache-invalidation message to Polaris + * peers. + */ + void putReference(@Nonnull String realmId, @Nonnull Reference reference); + + /** + * Adds the given reference only to the local cache, does not send a cache-invalidation message. + */ + void putReferenceLocal(@Nonnull String realmId, @Nonnull Reference reference); + + void putReferenceNegative(@Nonnull String realmId, @Nonnull String name); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheConfig.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheConfig.java new file mode 100644 index 0000000000..8753d4e0ac --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheConfig.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.cache; + +import static com.google.common.base.Preconditions.checkState; + +import com.fasterxml.jackson.annotation.JsonFormat; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.time.Duration; +import java.util.Optional; +import java.util.function.LongSupplier; +import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; + +/** Persistence cache configuration. */ +@ConfigMapping(prefix = "polaris.persistence.cache") +@JsonSerialize(as = ImmutableBuildableCacheConfig.class) +@JsonDeserialize(as = ImmutableBuildableCacheConfig.class) +public interface CacheConfig { + + String INVALID_REFERENCE_NEGATIVE_TTL = + "Cache reference-negative-TTL, if present, must be positive."; + String INVALID_REFERENCE_TTL = + "Cache reference-TTL must be positive, 0 disables reference caching."; + + String DEFAULT_REFERENCE_TTL_STRING = "PT15M"; + Duration DEFAULT_REFERENCE_TTL = Duration.parse(DEFAULT_REFERENCE_TTL_STRING); + + boolean DEFAULT_ENABLE = true; + + /** + * Optionally disable the cache, the default value is {@code true}, meaning that the cache is + * enabled by default. + */ + @WithDefault("" + DEFAULT_ENABLE) + Optional enable(); + + /** Duration to cache the state of references. */ + @WithDefault(DEFAULT_REFERENCE_TTL_STRING) + Optional referenceTtl(); + + /** Duration to cache whether a reference does not exist (negative caching). */ + @JsonFormat(shape = JsonFormat.Shape.STRING) + Optional referenceNegativeTtl(); + + Optional sizing(); + + @Value.Default + @JsonIgnore + default LongSupplier clockNanos() { + return System::nanoTime; + } + + @PolarisImmutable + interface BuildableCacheConfig extends CacheConfig { + + static Builder builder() { + return ImmutableBuildableCacheConfig.builder(); + } + + @Value.Check + default void check() { + var referenceTtl = referenceTtl().orElse(DEFAULT_REFERENCE_TTL); + checkState(referenceTtl.compareTo(Duration.ZERO) >= 0, INVALID_REFERENCE_TTL); + referenceNegativeTtl() + .ifPresent( + ttl -> + checkState( + referenceTtl.compareTo(Duration.ZERO) > 0 && ttl.compareTo(Duration.ZERO) > 0, + INVALID_REFERENCE_NEGATIVE_TTL)); + } + + interface Builder { + @CanIgnoreReturnValue + Builder referenceTtl(Duration referenceTtl); + + @CanIgnoreReturnValue + Builder referenceNegativeTtl(Duration referenceNegativeTtl); + + @CanIgnoreReturnValue + Builder sizing(CacheSizing sizing); + + @CanIgnoreReturnValue + Builder clockNanos(LongSupplier clockNanos); + + CacheConfig build(); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheInvalidations.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheInvalidations.java new file mode 100644 index 0000000000..f98afd07cd --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheInvalidations.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.cache; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.List; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.immutables.value.Value; + +@Value.Immutable +@Value.Style(jdkOnly = true) +@JsonSerialize(as = ImmutableCacheInvalidations.class) +@JsonDeserialize(as = ImmutableCacheInvalidations.class) +public interface CacheInvalidations { + @Value.Parameter(order = 1) + List invalidations(); + + static CacheInvalidations cacheInvalidations(List invalidations) { + return ImmutableCacheInvalidations.of(invalidations); + } + + @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "t") + @JsonSubTypes({ + @JsonSubTypes.Type( + value = CacheInvalidationEvictObj.class, + name = CacheInvalidationEvictObj.TYPE), + @JsonSubTypes.Type( + value = CacheInvalidationEvictReference.class, + name = CacheInvalidationEvictReference.TYPE), + }) + interface CacheInvalidation { + String type(); + } + + @Value.Immutable + @JsonSerialize(as = ImmutableCacheInvalidationEvictObj.class) + @JsonDeserialize(as = ImmutableCacheInvalidationEvictObj.class) + @JsonTypeName(value = CacheInvalidationEvictObj.TYPE) + interface CacheInvalidationEvictObj extends CacheInvalidation { + String TYPE = "obj"; + + @Override + default String type() { + return TYPE; + } + + @JsonProperty("r") + @Value.Parameter(order = 1) + String realmId(); + + @Value.Parameter(order = 2) + ObjRef id(); + + static CacheInvalidationEvictObj cacheInvalidationEvictObj(String realmId, ObjRef id) { + return ImmutableCacheInvalidationEvictObj.of(realmId, id); + } + } + + @Value.Immutable + @JsonSerialize(as = ImmutableCacheInvalidationEvictReference.class) + @JsonDeserialize(as = ImmutableCacheInvalidationEvictReference.class) + @JsonTypeName(value = CacheInvalidationEvictReference.TYPE) + interface CacheInvalidationEvictReference extends CacheInvalidation { + String TYPE = "ref"; + + @Override + default String type() { + return TYPE; + } + + @JsonProperty("r") + @Value.Parameter(order = 1) + String realmId(); + + @Value.Parameter(order = 2) + String ref(); + + static CacheInvalidationEvictReference cacheInvalidationEvictReference( + String realmId, String refName) { + return ImmutableCacheInvalidationEvictReference.of(realmId, refName); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheSizing.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheSizing.java new file mode 100644 index 0000000000..81a7ecf096 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/CacheSizing.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.cache; + +import static com.google.common.base.Preconditions.checkState; + +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import io.smallrye.config.WithDefault; +import java.util.Optional; +import java.util.OptionalDouble; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.misc.types.memorysize.MemorySize; +import org.immutables.value.Value; + +/** + * Parameters to size the persistence cache. It is recommended to leave the defaults. If changes are + * necessary, prefer the heap-size relative options over a fixed cache size, because relative sizing + * is portable across instances with different heap sizes. + */ +@PolarisImmutable +public interface CacheSizing { + + double DEFAULT_HEAP_FRACTION = .6d; + + /** + * Fraction of Java’s max heap size to use for cache objects, set to 0 to disable. Must not be + * used with fixed cache sizing. If neither this value nor a fixed size is configured, a default + * of {@code .4} (40%) is assumed, if {@code enable-soft-references} is enabled, else {@code .6} + * (60%) is assumed. + */ + OptionalDouble fractionOfMaxHeapSize(); + + String DEFAULT_MIN_SIZE_STRING = "64M"; + MemorySize DEFAULT_MIN_SIZE = MemorySize.valueOf(DEFAULT_MIN_SIZE_STRING); + + /** When using fractional cache sizing, this amount in MB is the minimum cache size. */ + @WithDefault(DEFAULT_MIN_SIZE_STRING) + Optional fractionMinSize(); + + String DEFAULT_HEAP_SIZE_KEEP_FREE_STRING = "256M"; + MemorySize DEFAULT_HEAP_SIZE_KEEP_FREE = MemorySize.valueOf(DEFAULT_HEAP_SIZE_KEEP_FREE_STRING); + + /** + * When using fractional cache sizing, this amount in MB of the heap will always be "kept free" + * when calculating the cache size. + */ + @WithDefault(DEFAULT_HEAP_SIZE_KEEP_FREE_STRING) + Optional fractionAdjustment(); + + /** Capacity of the persistence cache in MiB. */ + Optional fixedSize(); + + double DEFAULT_CACHE_CAPACITY_OVERSHOOT = 0.1d; + String DEFAULT_CACHE_CAPACITY_OVERSHOOT_STRING = "0.1"; + + /** + * Admitted cache-capacity-overshoot fraction, defaults to {@code 0.1} (10 %). + * + *

New elements are admitted to be added to the cache, if the cache's size is less than {@code + * cache-capacity * (1 + cache-capacity-overshoot}. + * + *

Cache eviction happens asynchronously. Situations when eviction cannot keep up with the + * amount of data added could lead to out-of-memory situations. + * + *

The value, if present, must be greater than 0. + */ + @WithDefault(DEFAULT_CACHE_CAPACITY_OVERSHOOT_STRING) + OptionalDouble cacheCapacityOvershoot(); + + default long calculateEffectiveSize(long maxHeapInBytes, double defaultHeapFraction) { + if (fixedSize().isPresent()) { + return fixedSize().get().asLong(); + } + + long fractionAsBytes = + (long) (fractionOfMaxHeapSize().orElse(defaultHeapFraction) * maxHeapInBytes); + + long freeHeap = maxHeapInBytes - fractionAsBytes; + long minFree = fractionAdjustment().orElse(DEFAULT_HEAP_SIZE_KEEP_FREE).asLong(); + + long capacityInBytes = (minFree > freeHeap) ? maxHeapInBytes - minFree : fractionAsBytes; + + long fractionMin = fractionMinSize().orElse(DEFAULT_MIN_SIZE).asLong(); + if (capacityInBytes < fractionMin) { + capacityInBytes = fractionMin; + } + + return capacityInBytes; + } + + static Builder builder() { + return ImmutableCacheSizing.builder(); + } + + @SuppressWarnings("unused") + interface Builder { + @CanIgnoreReturnValue + Builder fixedSize(MemorySize fixedSize); + + @CanIgnoreReturnValue + Builder fixedSize(Optional fixedSize); + + @CanIgnoreReturnValue + Builder fractionOfMaxHeapSize(double fractionOfMaxHeapSize); + + @CanIgnoreReturnValue + Builder fractionOfMaxHeapSize(OptionalDouble fractionOfMaxHeapSize); + + @CanIgnoreReturnValue + Builder fractionMinSize(MemorySize fractionMinSize); + + @CanIgnoreReturnValue + Builder fractionMinSize(Optional fractionMinSize); + + @CanIgnoreReturnValue + Builder fractionAdjustment(MemorySize fractionAdjustment); + + @CanIgnoreReturnValue + Builder fractionAdjustment(Optional fractionAdjustment); + + @CanIgnoreReturnValue + Builder cacheCapacityOvershoot(double cacheCapacityOvershoot); + + @CanIgnoreReturnValue + Builder cacheCapacityOvershoot(OptionalDouble cacheCapacityOvershoot); + + CacheSizing build(); + } + + @Value.Check + default void check() { + if (fractionOfMaxHeapSize().isPresent()) { + checkState( + fractionOfMaxHeapSize().getAsDouble() > 0d && fractionOfMaxHeapSize().getAsDouble() < 1d, + "Cache sizing: fractionOfMaxHeapSize must be > 0 and < 1, but is %s", + fractionOfMaxHeapSize()); + } + if (fixedSize().isPresent()) { + long fixed = fixedSize().get().asLong(); + checkState( + fixed >= 0, "Cache sizing: sizeInBytes must be greater than 0, but is %s", fixedSize()); + } + checkState( + fractionAdjustment().orElse(DEFAULT_HEAP_SIZE_KEEP_FREE).asLong() > 64L * 1024L * 1024L, + "Cache sizing: heapSizeAdjustment must be greater than 64 MB, but is %s", + fractionAdjustment()); + checkState( + cacheCapacityOvershoot().orElse(DEFAULT_CACHE_CAPACITY_OVERSHOOT) > 0d, + "Cache sizing: cacheCapacityOvershoot must be greater than 0, but is %s", + cacheCapacityOvershoot()); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/DistributedCacheInvalidation.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/DistributedCacheInvalidation.java new file mode 100644 index 0000000000..7ba5de407d --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/cache/DistributedCacheInvalidation.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.cache; + +import jakarta.annotation.Nonnull; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; + +public interface DistributedCacheInvalidation { + void evictObj(@Nonnull String realmId, @Nonnull ObjRef objRef); + + void evictReference(@Nonnull String realmId, @Nonnull String refName); + + interface Receiver extends DistributedCacheInvalidation {} + + interface Sender extends DistributedCacheInvalidation {} +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitException.java new file mode 100644 index 0000000000..b3d7382b78 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +public abstract class CommitException extends RuntimeException { + + public CommitException(String message) { + super(message); + } + + public CommitException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitRetryable.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitRetryable.java new file mode 100644 index 0000000000..2dffdf44e0 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitRetryable.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import jakarta.annotation.Nonnull; +import java.util.Optional; +import java.util.function.Supplier; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.ref.Reference; + +@FunctionalInterface +public interface CommitRetryable { + + /** + * Called from {@linkplain Committer committer} implementations. + * + *

Implementations call the {@code refObjSupplier} to retrieve the current reference object + * using the current state of the reference. Long-running attempt implementations that need to + * have the reference object early should call the supplier again shortly before from this + * function and attempt to perform the required checks against the latest state of the reference + * object. This helps in reducing unnecessary retries when the attempt can be safely applied to + * the latest state of the reference object. + * + *

Writes must be triggered via the various {@code write*()} functions on {@link + * CommitterState}, preferable via {@link CommitterState#writeOrReplace(Object, Obj, Class)}. The + * {@link String} keys are used as symbolic identifiers, implementations are responsible for + * providing keys that are unique. + * + *

Reads must happen via the specialized {@link CommitterState#persistence() Persistence} + * provided by the committer implementation. + * + * @param state Communicate {@linkplain Obj objects} to be persisted via {@link CommitterState} + * @param refObjSupplier supplier returning the {@linkplain Reference#pointer() current object}, + * if present. Must be invoked. + * @return Successful attempts return a non-empty {@link Optional} containing the result. An + * {@linkplain Optional#empty() empty optional} indicates that a retry should be attempted. + * @throws CommitException Instances of this class let the whole commit operation abort. + */ + @Nonnull + Optional attempt( + @Nonnull CommitterState state, + @Nonnull Supplier> refObjSupplier) + throws CommitException; +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Commits.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Commits.java new file mode 100644 index 0000000000..cf06219f7f --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Commits.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import java.util.Iterator; +import java.util.OptionalLong; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; + +/** Provides iterator-based access to the history of a named reference. */ +public interface Commits { + + /** + * Retrieves the commit log in the natural, chronologically reverse order - most recent commit + * first. + */ + Iterator commitLog( + String refName, OptionalLong offset, Class clazz); + + /** + * Retrieves the commit log in chronological order starting at the given offset. + * + *

This function is useful when retrieving commits to serve events/notification use cases. + */ + Iterator commitLogReversed( + String refName, long offset, Class clazz); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Committer.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Committer.java new file mode 100644 index 0000000000..a148b3e9ec --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/Committer.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import java.util.Optional; +import java.util.function.Supplier; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.ref.Reference; + +/** + * A {@link Committer} performs an atomic change against a named reference. This is a higher-level + * functionality building on top of the low-level {@code RetryLoop}. + * + *

Committing use cases ensure that a {@linkplain Reference#pointer() reference} always points to + * a consistent state, and that the change is atomic. + * + *

Committing use cases usually need to write more {@linkplain Obj objects} than just the + * {@linkplain Reference#pointer() referenced} one. Implementations must use {@link + * CommitterState#writeIntent(Object, Obj)} to get those objects being persisted. Retries can + * {@linkplain CommitterState#getWrittenByKey(Object) check} whether an object has already been + * written to prevent unnecessary write operations against the backend database. + * + *

A committing use case {@linkplain Persistence#createCommitter(String, Class, Class) creates} a + * {@link Committer} instance using a {@link CommitRetryable} implementation, which {@linkplain + * CommitRetryable#attempt(CommitterState, Supplier) receives} the {@linkplain Obj object} pointed + * in the {@linkplain Reference reference} and returns the new object to which the reference shall + * point to. + * + * @param type of the {@link Obj} {@linkplain Reference#pointer() referenced} + * @param the commit result type for successful commits including non-changing + */ +public interface Committer { + + /** + * When called, commits to the same reference will be synchronized locally. + * + *

Using local reference-synchronization prevents commit retries. When using this feature, the + * actual {@link CommitRetryable#attempt(CommitterState, Supplier)} implementation must not block + * and complete quickly. + */ + Committer synchronizingLocally(); + + /** + * Perform an atomic change. + * + *

The given {@link CommitRetryable} is called to perform the actual change. The implementation + * of the {@link CommitRetryable} must be side-effect-free and prepared to be called multiple + * times. + * + * @param commitRetryable performs the state change, must be side-effect-free + * @return the result as returned via {@link CommitterState#commitResult(Object, + * BaseCommitObj.Builder, Optional)} or an empty optional if {@linkplain + * CommitterState#noCommit() no change happened} + */ + Optional commit(CommitRetryable commitRetryable) + throws CommitException, RetryTimeoutException; + + /** + * Same as {@link #commit(CommitRetryable)}, but wraps the checked exceptions in a {@link + * RuntimeException}. + */ + default Optional commitRuntimeException( + CommitRetryable commitRetryable) { + try { + return commit(commitRetryable); + } catch (RetryTimeoutException e) { + throw new RuntimeException(e); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitterState.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitterState.java new file mode 100644 index 0000000000..4142c4b581 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/CommitterState.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import jakarta.annotation.Nonnull; +import java.util.Optional; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; + +public interface CommitterState extends UpdateState { + > Optional commitResult( + @Nonnull RESULT result, @Nonnull B refObjBuilder, @Nonnull Optional refObj); + + Optional noCommit(); + + Optional noCommit(@Nonnull RESULT result); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/FairRetriesType.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/FairRetriesType.java new file mode 100644 index 0000000000..f27b0a7ec8 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/FairRetriesType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +public enum FairRetriesType { + UNFAIR, + SLEEPING, +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryConfig.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryConfig.java new file mode 100644 index 0000000000..109a333d84 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryConfig.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import io.smallrye.config.WithDefault; +import java.time.Duration; +import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; + +public interface RetryConfig { + RetryConfig DEFAULT_RETRY_CONFIG = BuildableRetryConfig.builder().build(); + + /** + * Maximum allowed time until a retry-loop and {@linkplain Committer#commit(CommitRetryable) + * commits} fails with a {@link RetryTimeoutException}, defaults to {@value #DEFAULT_TIMEOUT}. + */ + @WithDefault(DEFAULT_TIMEOUT) + Duration timeout(); + + /** Maximum number of allowed retries, defaults to {@value #DEFAULT_RETRIES}. */ + @WithDefault(DEFAULT_RETRIES) + int retries(); + + /** + * Initial lower bound for a retry-sleep duration for the retry-loop, defaults to {@link + * #DEFAULT_RETRY_INITIAL_SLEEP_LOWER}. This value will be doubled after each retry, as long as + * {@link #maxSleep()} is not exceeded. A concrete sleep duration will be randomly chosen between + * the current lower and upper bounds. + */ + @WithDefault(DEFAULT_RETRY_INITIAL_SLEEP_LOWER) + Duration initialSleepLower(); + + /** + * Initial upper bound for a retry-sleep duration for the retry-loop, defaults to {@link + * #DEFAULT_RETRY_INITIAL_SLEEP_UPPER}. This value will be doubled after each retry, as long as + * {@link #maxSleep()} is not exceeded. A concrete sleep duration will be randomly chosen between + * the current lower and upper bounds. + */ + @WithDefault(DEFAULT_RETRY_INITIAL_SLEEP_UPPER) + Duration initialSleepUpper(); + + /** Maximum retry-sleep duration, defaults to {@link #DEFAULT_RETRY_MAX_SLEEP}. */ + @WithDefault(DEFAULT_RETRY_MAX_SLEEP) + Duration maxSleep(); + + /** + * Without mitigation, very frequently started retry-loops running against highly contended + * resources can result in some retry-loops invocations never making any progress and eventually + * time out. + * + *

The default "fair retries type" helps in these scenarios with sacrificing the overall + * throughput too much. + */ + @WithDefault("SLEEPING") + FairRetriesType fairRetries(); + + String DEFAULT_TIMEOUT = "PT15S"; + String DEFAULT_RETRIES = "10000"; + String DEFAULT_RETRY_INITIAL_SLEEP_LOWER = "PT0.010S"; + String DEFAULT_RETRY_INITIAL_SLEEP_UPPER = "PT0.020S"; + String DEFAULT_RETRY_MAX_SLEEP = "PT0.250S"; + + @PolarisImmutable + interface BuildableRetryConfig extends RetryConfig { + + static ImmutableBuildableRetryConfig.Builder builder() { + return ImmutableBuildableRetryConfig.builder(); + } + + @Override + @Value.Default + default Duration timeout() { + return Duration.parse(DEFAULT_TIMEOUT); + } + + @Override + @Value.Default + default int retries() { + return Integer.parseInt(DEFAULT_RETRIES); + } + + @Override + @Value.Default + default Duration initialSleepLower() { + return Duration.parse(DEFAULT_RETRY_INITIAL_SLEEP_LOWER); + } + + @Override + @Value.Default + default Duration initialSleepUpper() { + return Duration.parse(DEFAULT_RETRY_INITIAL_SLEEP_UPPER); + } + + @Override + @Value.Default + default Duration maxSleep() { + return Duration.parse(DEFAULT_RETRY_MAX_SLEEP); + } + + @Override + @Value.Default + default FairRetriesType fairRetries() { + return FairRetriesType.SLEEPING; + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryTimeoutException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryTimeoutException.java new file mode 100644 index 0000000000..cac5b2b3ef --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/RetryTimeoutException.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import java.time.Duration; + +/** + * Thrown to indicate that a retryable ({@linkplain Committer#commit(CommitRetryable) commit}) + * attempt eventually failed due to a timeout. + */ +public final class RetryTimeoutException extends Exception { + + private final int retry; + private final long timeNanos; + + public RetryTimeoutException(int retry, long timeNanos) { + super("Retry timeout after " + Duration.ofNanos(timeNanos) + ", " + retry + " retries"); + this.retry = retry; + this.timeNanos = timeNanos; + } + + public int getRetry() { + return retry; + } + + public long getTimeNanos() { + return timeNanos; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/UpdateState.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/UpdateState.java new file mode 100644 index 0000000000..ed1611cce4 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/commit/UpdateState.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.commit; + +import jakarta.annotation.Nonnull; +import java.util.function.Supplier; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; + +public interface UpdateState { + + /** + * Use this instance of {@link Persistence} instead for operations related to this state, + * especially from {@link CommitRetryable#attempt(CommitterState, Supplier)}. + */ + Persistence persistence(); + + /** + * Add {@code obj} to the list of objects to be persisted, using {@code key} to {@linkplain + * #getWrittenByKey(Object) identify/reuse} an already persisted object in a retried attempt. + * + *

Prefer this function over {@link #writeIntent(Object, Obj)} and {@link + * #getWrittenByKey(Object)}. + * + *

Note that objects will not be immediately persisted, but after the {@linkplain + * CommitRetryable#attempt(CommitterState, Supplier) attempt returns}, but before the {@linkplain + * Committer#commit(CommitRetryable) commit returns}. + * + *

A {@linkplain Committer#commit(CommitRetryable) failed commit} will delete objects passed to + * this function. + * + * @param key key identifying {@code obj} + * @param obj object to persist + * @return returns the given {@code obj}, if {@code key} is new, or the previous {@linkplain Obj}, + * if {@code key} was already used in a call to this function or {@link #writeIntent(Object, + * Obj)}. + */ + O writeIfNew(@Nonnull Object key, @Nonnull O obj, @Nonnull Class type); + + default Obj writeIfNew(@Nonnull Object key, @Nonnull Obj obj) { + return writeIfNew(key, obj, Obj.class); + } + + /** + * Add {@code obj} to the list of objects to be persisted, using {@code key} to {@linkplain + * #getWrittenByKey(Object) identify/reuse} an already persisted object in a retried attempt. + * + *

If an object was already associated with the same {@code key}, the previous object will be + * eventually deleted. + * + * @param key key identifying {@code obj} + * @param obj object to persist + * @return returns {@code obj} + */ + O writeOrReplace(@Nonnull Object key, @Nonnull O obj, @Nonnull Class type); + + default Obj writeOrReplace(@Nonnull Object key, @Nonnull Obj obj) { + return writeOrReplace(key, obj, Obj.class); + } + + /** + * Get an already present object by a use-case defined key. + * + * @return the already present object or {@code null}, if no object is associated with the {@code + * key} + */ + Obj getWrittenByKey(@Nonnull Object key); + + /** + * Get an already present object by its {@link ObjRef}. + * + * @return the already present object or {@code null}, if no object is associated with the {@code + * id} + */ + C getWrittenById(ObjRef id, Class clazz); + + /** + * Add {@code obj} to the list of objects to be persisted, using {@code key} to {@linkplain + * #getWrittenByKey(Object) identify/reuse} an already persisted object in a retried attempt. + * + *

Note that objects will not be immediately persisted, but after the {@linkplain + * CommitRetryable#attempt(CommitterState, Supplier) attempt returns}, but before the {@linkplain + * Committer#commit(CommitRetryable) commit returns}. + * + *

A {@linkplain Committer#commit(CommitRetryable) failed commit} will delete objects passed to + * this function. + * + *

Prefer {@link #writeIfNew(Object, Obj)}, if possible. + * + * @param key key identifying {@code obj}, must be unique across all objects. Throws an {@link + * IllegalStateException}, if the {@code key} has already been used. + * @param obj object to persist + */ + void writeIntent(@Nonnull Object key, @Nonnull Obj obj); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/PersistenceException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/PersistenceException.java new file mode 100644 index 0000000000..65f289bb97 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/PersistenceException.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.exceptions; + +public abstract class PersistenceException extends RuntimeException { + public PersistenceException(Throwable cause) { + super(cause); + } + + public PersistenceException(String message, Throwable cause) { + super(message, cause); + } + + public PersistenceException(String message) { + super(message); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceAlreadyExistsException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceAlreadyExistsException.java new file mode 100644 index 0000000000..59804f0eba --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceAlreadyExistsException.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.exceptions; + +public class ReferenceAlreadyExistsException extends PersistenceException { + public ReferenceAlreadyExistsException(String message) { + super(message); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceNotFoundException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceNotFoundException.java new file mode 100644 index 0000000000..85c8afe6bf --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/ReferenceNotFoundException.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.exceptions; + +public class ReferenceNotFoundException extends PersistenceException { + public ReferenceNotFoundException(String message) { + super(message); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/UnknownOperationResultException.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/UnknownOperationResultException.java new file mode 100644 index 0000000000..22ec764d6b --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/exceptions/UnknownOperationResultException.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.exceptions; + +import org.apache.polaris.persistence.nosql.api.Persistence; + +/** + * Thrown by {@link Persistence} implementations when the result of a database operation is unknown, + * for example, due to a timeout. + */ +public class UnknownOperationResultException extends PersistenceException { + public UnknownOperationResultException(Throwable cause) { + super(cause); + } + + public UnknownOperationResultException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/EmptyIndex.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/EmptyIndex.java new file mode 100644 index 0000000000..cda55ba743 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/EmptyIndex.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.persistence.nosql.api.index; + +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +final class EmptyIndex { + private static final Index EMPTY = + new Index() { + @Override + public void prefetchIfNecessary(Iterable keys) {} + + @Override + public boolean contains(IndexKey key) { + return false; + } + + @Nullable + @Override + public Object get(@Nonnull IndexKey key) { + return null; + } + + @Nonnull + @Override + public Iterator> iterator( + @Nullable IndexKey lower, @Nullable IndexKey higher, boolean prefetch) { + return List.>of().iterator(); + } + + @Nonnull + @Override + public Iterator> reverseIterator( + @Nullable IndexKey lower, @Nullable IndexKey higher, boolean prefetch) { + return List.>of().iterator(); + } + }; + + @SuppressWarnings("unchecked") + static Index instance() { + return (Index) EMPTY; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/Index.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/Index.java new file mode 100644 index 0000000000..becf6d8252 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/Index.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.util.Iterator; +import java.util.Map; + +/** + * General interface for all store indexes. + * + *

Indexes provide lexicographically ordered access to the index keys/elements via the iterator + * functions. Reverse iterator functions provide reverse lexicographically ordered access. + * + *

Instances of this interface are generally not thread-safe when modified, + * read-only accesses are generally thread-safe. + * + * @param value type + * @see ModifiableIndex + * @see UpdatableIndex + * @see IndexContainer + */ +public interface Index extends Iterable> { + + /** Retrieves a read-only, empty index. */ + static Index empty() { + return EmptyIndex.instance(); + } + + /** + * Prefetch this index and/or index splits that are needed to satisfy operations against the given + * keys. + */ + void prefetchIfNecessary(Iterable keys); + + /** Check whether the index contains the given key and whether its value is not {@code null}. */ + boolean contains(IndexKey key); + + /** + * Retrieve the value for a key. + * + * @param key key to retrieve the value for + * @return value or {@code null}, if the key does not exist + */ + @Nullable + V get(@Nonnull IndexKey key); + + /** + * Convenience for {@link #iterator(IndexKey, IndexKey, boolean) iterator(null, null, false)}. + * + * @see #reverseIterator(IndexKey, IndexKey, boolean) + * @see #reverseIterator() + * @see #iterator() + */ + @Override + @Nonnull + default Iterator> iterator() { + return iterator(null, null, false); + } + + /** + * Iterate over the elements in this index, with optional lower/higher or prefix restrictions. + * + *

Prefix queries: {@code lower} and {@code higher} must be equal and not {@code + * null}, only elements that start with the given key value will be returned. + * + *

Start at queries: Start at {@code lower} (inclusive) + * + *

End at queries: End at {@code higher} (inclusive if exact match) restrictions + * + *

Range queries: {@code lower} (inclusive) and {@code higher} (inclusive if exact + * match) restrictions + * + * @param lower optional lower bound for the range, see description above.. + * @param higher optional higher bound for the range, see description above.. + * @param prefetch Enables eager prefetch of all potentially required indexes. Set to {@code + * false}, when using result paging. + * @return iterator over the elements in this index, lexicographically ordered. + * @see #reverseIterator(IndexKey, IndexKey, boolean) + * @see #reverseIterator() + * @see #iterator(IndexKey, IndexKey, boolean) + */ + @Nonnull + Iterator> iterator( + @Nullable IndexKey lower, @Nullable IndexKey higher, boolean prefetch); + + /** + * Convenience for {@link #reverseIterator(IndexKey, IndexKey, boolean) reverseIterator(null, + * null, false)}. + * + * @see #reverseIterator(IndexKey, IndexKey, boolean) + * @see #iterator(IndexKey, IndexKey, boolean) + * @see #iterator() + */ + @Nonnull + default Iterator> reverseIterator() { + return reverseIterator(null, null, false); + } + + /** + * Iterate in reverse order over the elements in this index, with optional lower/higher or prefix + * restrictions. + * + *

Prefix queries (NOT SUPPORTED, YET?): {@code lower} and {@code higher} must + * be equal and not {@code null}, only elements that start with the given key value will be + * returned. + * + *

Start at queries: Start at {@code higher} (inclusive) + * + *

End at queries: End at {@code lower} (inclusive if exact match) restrictions + * + *

Range queries: {@code higher} (inclusive) and {@code lower} (inclusive if exact + * match) restrictions + * + * @param lower optional lower bound for the range, see description above.. + * @param higher optional higher bound for the range, see description above.. + * @param prefetch Enables eager prefetch of all potentially required indexes. Set to {@code + * false}, when using result paging. + * @return iterator over the elements in this index, reverse-lexicographically ordered. + * @see #reverseIterator() + * @see #iterator(IndexKey, IndexKey, boolean) + * @see #iterator() + */ + @Nonnull + Iterator> reverseIterator( + @Nullable IndexKey lower, @Nullable IndexKey higher, boolean prefetch); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexContainer.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexContainer.java new file mode 100644 index 0000000000..e2a7a591ff --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexContainer.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nonnull; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.function.BiConsumer; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.PersistenceParams; +import org.apache.polaris.persistence.nosql.api.obj.Obj; + +/** + * Container to hold an index, to be used as an attribute in {@link Obj}s, see the rules below which + * functions to use. + * + *

Do not access the {@link #embedded()} and {@link #stripes()} attributes directly, use + * the {@link #indexForRead(Persistence, IndexValueSerializer) indexForRead()}/{@link + * #asUpdatableIndex(Persistence, IndexValueSerializer) asUpdatableIndex()} functions instead. + * + *

Do not construct an {@link IndexContainer} directly, use the {@link + * #newUpdatableIndex(Persistence, IndexValueSerializer) newUpdatableIndex()} and {@link + * #asUpdatableIndex(Persistence, IndexValueSerializer) asUpdatableIndex()} functions. + * + * @param value type + */ +@PolarisImmutable +@JsonSerialize(as = ImmutableIndexContainer.class) +@JsonDeserialize(as = ImmutableIndexContainer.class) +public interface IndexContainer { + + /** + * Returns a read-only representation of the whole index from index information in this container. + * The returned index cannot be used for any serialization or any other write-intended operations. + */ + default Index indexForRead( + @Nonnull Persistence persistence, @Nonnull IndexValueSerializer indexValueSerializer) { + return persistence.buildReadIndex(this, indexValueSerializer); + } + + /** + * Builds an {@link UpdatableIndex} from index information in this container, to eventually + * {@linkplain UpdatableIndex#toIndexed(String, BiConsumer) build a new index container}, using + * the given element serializer. + */ + default UpdatableIndex asUpdatableIndex( + @Nonnull Persistence persistence, @Nonnull IndexValueSerializer indexValueSerializer) { + return persistence.buildWriteIndex(this, indexValueSerializer); + } + + /** + * Creates a new {@link UpdatableIndex} to eventually {@linkplain UpdatableIndex#toIndexed(String, + * BiConsumer) build a new index container}, using the given element serializer. + */ + static UpdatableIndex newUpdatableIndex( + @Nonnull Persistence persistence, @Nonnull IndexValueSerializer indexValueSerializer) { + return persistence.buildWriteIndex(null, indexValueSerializer); + } + + /** DO NOT ACCESS DIRECTLY, this is the serialized representation of the "embedded" index. */ + ByteBuffer embedded(); + + /** + * DO NOT ACCESS DIRECTLY, these are pointers to the composite reference index stripes, an + * "embedded" version of {@code IndexSegmentsObj}. Index container objects that require to + * "externalize" index elements to a reference index, which requires up to {@link + * PersistenceParams#maxIndexStripes()} will be kept here and not create another indirection via a + * {@code IndexSegmentsObj}. + * + * @see #embedded() + */ + @JsonInclude(JsonInclude.Include.NON_EMPTY) + List stripes(); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexKey.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexKey.java new file mode 100644 index 0000000000..476960c92e --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexKey.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Objects.requireNonNull; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * Represents a key in an {@link Index}. + * + *

Index keys are always represented as a byte array. Convenience functions to use {@link String} + * and {@code long} as keys are provided this type. + * + *

The serialized representation of an {@link IndexKey} is its binary representation terminated + * by a {@code 0x01} byte. {@code 0x01} appearing in the value is escaped as {@code 0x02 0x01}, + * {@code 0x02} appearing in the value is escaped as {@code 0x02 0x02}, + * + *

The implementation assumes that {@code 0x00} byte values appear more often, for example, when + * serializing plain {@code long} keys, hence the choice to use {@code 0x01}/{@code 0x02}. + * + *

The serialized representation of {@link IndexKey}s is safe to be "partially serialized", as + * done by index implementations, which does not serialize the common prefix of a key compared to + * the previously serialized key. + * + *

{@link IndexKey}s are comparable, the results reflect the outcome of the unsigned + * comparison of the respective byte representations. + */ +@JsonSerialize(using = IndexKey.IndexKeySerializer.class) +@JsonDeserialize(using = IndexKey.IndexKeyDeserializer.class) +public final class IndexKey implements Comparable { + + static final byte EOF = 0x01; + static final byte ESC = 0x02; + static final byte EOF_ESCAPED = 0x03; + static final byte ESC_ESCAPED = 0x04; + + /** Maximum number of characters in a key. Note: characters can take up to 3 bytes via UTF-8. */ + public static final int MAX_LENGTH = 500; + + public static final IndexValueSerializer INDEX_KEY_SERIALIZER = + new IndexValueSerializer<>() { + @Override + public int serializedSize(@Nullable IndexKey value) { + return requireNonNull(value).serializedSize(); + } + + @Override + @Nonnull + public ByteBuffer serialize(@Nullable IndexKey value, @Nonnull ByteBuffer target) { + return requireNonNull(value).serialize(target); + } + + @Override + public IndexKey deserialize(@Nonnull ByteBuffer buffer) { + return deserializeKey(buffer); + } + + @Override + public void skip(@Nonnull ByteBuffer buffer) { + IndexKey.skip(buffer); + } + }; + + private final byte[] key; + + private boolean hasHash; + private int hash; + + private IndexKey(byte[] key) { + this.key = key; + } + + public static IndexKey key(ByteBuffer buffer) { + var key = new byte[buffer.remaining()]; + buffer.get(key); + return new IndexKey(key); + } + + public static IndexKey key(String string) { + var key = string.getBytes(UTF_8); + checkArgument(key.length <= MAX_LENGTH, "Key too long, max allowed length: %s", MAX_LENGTH); + return new IndexKey(key); + } + + public static IndexKey key(long value) { + var key = ByteBuffer.allocate(Long.BYTES).putLong(value).array(); + return new IndexKey(key); + } + + public static IndexKey deserializeKey(ByteBuffer buffer) { + var tmp = new byte[MAX_LENGTH]; + var l = 0; + while (true) { + var b = buffer.get(); + if (b == EOF) { + return new IndexKey(Arrays.copyOf(tmp, l)); + } + if (b == ESC) { + b = buffer.get(); + switch (b) { + case EOF_ESCAPED -> b = EOF; + case ESC_ESCAPED -> b = ESC; + default -> throw new IllegalArgumentException("Invalid escaped value " + b); + } + } + tmp[l++] = b; + checkArgument(l <= MAX_LENGTH, "Deserialized key too long"); + } + } + + public static void skip(ByteBuffer buffer) { + var l = 0; + while (true) { + var b = buffer.get(); + if (b == EOF) { + return; + } + if (b == ESC) { + b = buffer.get(); + switch (b) { + case EOF_ESCAPED, ESC_ESCAPED -> {} + default -> throw new IllegalArgumentException("Invalid escaped value " + b); + } + l += 2; + } else { + l++; + } + checkArgument(l <= MAX_LENGTH, "Deserialized key too long"); + } + } + + public int serializedSize() { + var l = 1; + for (byte b : key) { + if (b == ESC || b == EOF) { + // ESC and EOF are escaped + l += 2; + } else { + l++; + } + } + return l; + } + + public ByteBuffer serialize(ByteBuffer target) { + for (byte b : key) { + switch (b) { + case ESC: + target.put(ESC); + target.put(ESC_ESCAPED); + break; + case EOF: + target.put(ESC); + target.put(EOF_ESCAPED); + break; + default: + target.put(b); + break; + } + } + target.put(EOF); + return target; + } + + public void serializeNoFail(ByteBuffer target) { + var remain = target.remaining(); + var k = key; + var l = k.length; + for (int i = 0; i < l; i++, remain--) { + if (remain == 0) { + return; + } + var b = k[i]; + switch (b) { + case ESC: + target.put(ESC); + if (--remain == 0) { + return; + } + target.put(ESC_ESCAPED); + break; + case EOF: + target.put(ESC); + if (--remain == 0) { + return; + } + target.put(EOF_ESCAPED); + break; + default: + target.put(b); + break; + } + } + if (remain == 0) { + return; + } + target.put(EOF); + } + + @Override + public int compareTo(IndexKey that) { + return Arrays.compareUnsigned(this.key, that.key); + } + + @Override + public int hashCode() { + if (!hasHash) { + hash = Arrays.hashCode(key); + hasHash = true; + } + return hash; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof IndexKey that)) { + return false; + } + + return Arrays.equals(this.key, that.key); + } + + @Override + public String toString() { + return new String(key, UTF_8); + } + + private static final char[] HEX = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + public String toSafeString(String prefix) { + var sb = new StringBuilder(prefix.length() + key.length); + sb.append(prefix); + for (byte b : key) { + if (b > 32 && b < 127) { + sb.append((char) b); + } else { + sb.append('x'); + sb.append(HEX[(b >> 4) & 0x0f]); + sb.append(HEX[b & 0x0f]); + } + } + return sb.toString(); + } + + public boolean startsWith(@Nonnull IndexKey prefix) { + var preKey = prefix.key; + var preLen = preKey.length; + checkArgument(preLen > 0, "prefix must not be empty"); + var key = this.key; + var len = key.length; + for (var i = 0; ; i++) { + if (i == preLen) { + return true; + } + if (i >= len) { + return false; + } + if (key[i] != preKey[i]) { + return false; + } + } + } + + public long asLong() { + checkState(this.key.length == 8, "Invalid key length, must be 8"); + return ByteBuffer.wrap(this.key).getLong(); + } + + public ByteBuffer asByteBuffer() { + return ByteBuffer.wrap(key); + } + + public static class IndexKeySerializer extends JsonSerializer { + @Override + public void serialize(IndexKey value, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + gen.writeBinary(value.key); + } + } + + public static class IndexKeyDeserializer extends JsonDeserializer { + @Override + public IndexKey deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + return new IndexKey(p.getBinaryValue()); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexStripe.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexStripe.java new file mode 100644 index 0000000000..21842c66ec --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexStripe.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.immutables.value.Value; + +/** + * Describes a spilled-out index stripe, do not use/interpret this type. + * + *

This type is only used internally by code that manages index serialization and must never be + * interpreted or even updated by any client code. + * + *

First/last key information is included to enable lazy-loading of required index stripes. + */ +@PolarisImmutable +@JsonSerialize(as = ImmutableIndexStripe.class) +@JsonDeserialize(as = ImmutableIndexStripe.class) +public interface IndexStripe { + @Value.Parameter + IndexKey firstKey(); + + @Value.Parameter + IndexKey lastKey(); + + @Value.Parameter + ObjRef segment(); + + static IndexStripe indexStripe(IndexKey firstKey, IndexKey lastKey, ObjRef segment) { + return ImmutableIndexStripe.of(firstKey, lastKey, segment); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexValueSerializer.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexValueSerializer.java new file mode 100644 index 0000000000..d214b80e80 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/IndexValueSerializer.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.nio.ByteBuffer; + +public interface IndexValueSerializer { + int serializedSize(@Nullable V value); + + /** Serialize {@code value} into {@code target}, returns {@code target}. */ + @Nonnull + ByteBuffer serialize(@Nullable V value, @Nonnull ByteBuffer target); + + /** + * Deserialize a value from {@code buffer}. Implementations must not assume that the given {@link + * ByteBuffer} only contains data for the value to deserialize, other data likely follows. + */ + @Nullable + V deserialize(@Nonnull ByteBuffer buffer); + + /** Skips an element, only updating the {@code buffer}'s {@link ByteBuffer#position()}. */ + void skip(@Nonnull ByteBuffer buffer); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/ModifiableIndex.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/ModifiableIndex.java new file mode 100644 index 0000000000..61d35ae703 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/ModifiableIndex.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import jakarta.annotation.Nonnull; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; + +/** Represents an {@link Index} that can be modified using put and remove functions. */ +public interface ModifiableIndex extends Index { + + /** + * Adds the given key element or updates the {@link ObjRef} if the {@link IndexKey} already + * existed. + * + * @return {@code true} if the {@link IndexKey} didn't exist or {@code false} if the key was + * already present and the operation only updated the {@link ObjRef}. + */ + boolean put(@Nonnull IndexKey key, @Nonnull V value); + + /** + * Removes the index element for the given key. + * + * @return {@code true} if the {@link IndexKey} did exist and was removed, {@code false} + * otherwise. + */ + boolean remove(@Nonnull IndexKey key); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/UpdatableIndex.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/UpdatableIndex.java new file mode 100644 index 0000000000..11653e79e8 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/index/UpdatableIndex.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import jakarta.annotation.Nonnull; +import java.util.Optional; +import java.util.function.BiConsumer; +import org.apache.polaris.persistence.nosql.api.obj.Obj; + +/** + * Represents an index that can be modifies using put and remove functions and that can be + * serialized as an attribute in an {@link Obj} interface via {@link IndexContainer}. + */ +public interface UpdatableIndex extends ModifiableIndex { + /** + * Build a serializable index container from this index object. This updatable index may no longer + * be accessible after this function has been called, runtime exception may be thrown if the index + * is accessed after calling this function. + * + * @param prefix prefix to pass to the string argument of the {@code persistObj} consumer. + * @param persistObj callback invoked to persist the object, to be delegated to {@code + * CommitterState.writeOrReplace()} + * @return the updated {@link IndexContainer} + */ + IndexContainer toIndexed( + @Nonnull String prefix, @Nonnull BiConsumer persistObj); + + /** + * Similar to {@link #toIndexed(String, BiConsumer)}, but returns an empty {@link Optional}, if + * the index container is empty. + */ + Optional> toOptionalIndexed( + @Nonnull String prefix, @Nonnull BiConsumer persistObj); +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/AbstractObjType.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/AbstractObjType.java new file mode 100644 index 0000000000..d2f91a8cc5 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/AbstractObjType.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import java.util.function.LongSupplier; + +public abstract class AbstractObjType implements ObjType { + private final String id; + private final String name; + private final Class targetClass; + + protected AbstractObjType(String id, String name, Class targetClass) { + this.id = id; + this.name = name; + this.targetClass = targetClass; + } + + @Override + public String id() { + return id; + } + + @Override + public String name() { + return name; + } + + @Override + public Class targetClass() { + return targetClass; + } + + public abstract static class AbstractUncachedObjType extends AbstractObjType { + protected AbstractUncachedObjType(String id, String name, Class targetClass) { + super(id, name, targetClass); + } + + @Override + public long cachedObjectExpiresAtMicros(Obj obj, LongSupplier clockMicros) { + return 0L; + } + } + + @Override + @SuppressWarnings("EqualsGetClass") // explict class-instance-equals is intentional + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + AbstractObjType that = (AbstractObjType) o; + + return id.equals(that.id); + } + + @Override + public int hashCode() { + return id.hashCode(); + } + + @Override + public String toString() { + return id; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/BaseCommitObj.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/BaseCommitObj.java new file mode 100644 index 0000000000..f5502aad00 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/BaseCommitObj.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.objRef; + +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.util.Optional; + +public interface BaseCommitObj extends Obj { + /** + * Monotonically increasing counter representing the number of commits since the "beginning of + * time". + */ + long seq(); + + /** + * Zero, one or more parent-entry hashes of this commit, the nearest parent first. + * + *

This is an internal attribute used to more efficiently page through the commit log. + * + *

Only the first, the nearest parent shall be exposed to clients. + * + *

This is a {@code long[]} for more efficient serialization wrt space. + */ + long[] tail(); + + default Optional directParent() { + var t = tail(); + return t.length == 0 ? Optional.empty() : Optional.of(objRef(type(), t[0])); + } + + interface Builder> { + @CanIgnoreReturnValue + B id(long id); + + @CanIgnoreReturnValue + B seq(long seq); + + @CanIgnoreReturnValue + B tail(long[] tail); + + O build(); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/GenericObj.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/GenericObj.java new file mode 100644 index 0000000000..8ba4d1903e --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/GenericObj.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_CREATED_AT_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_ID_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_NUM_PARTS_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_TYPE_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_VERSION_TOKEN; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonAnyGetter; +import com.fasterxml.jackson.annotation.JsonAnySetter; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.annotations.VisibleForTesting; +import jakarta.annotation.Nullable; +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Target; +import java.util.Map; +import org.apache.polaris.immutables.PolarisImmutable; +import org.immutables.value.Value; + +@PolarisImmutable +@Value.Style(jdkOnly = true) +@VisibleForTesting +public abstract class GenericObj implements Obj { + + @Override + @JsonIgnore + public abstract ObjType type(); + + @Override + @JsonIgnore + public abstract long id(); + + @Override + @JsonIgnore + public abstract int numParts(); + + @Override + @JsonIgnore + @Value.Auxiliary + public abstract long createdAtMicros(); + + @Override + @JsonIgnore + @Nullable + public abstract String versionToken(); + + @JsonAnyGetter + @AllowNulls + public abstract Map attributes(); + + @JsonCreator + static GenericObj create( + @JacksonInject(OBJ_TYPE_KEY) ObjType objType, + @JacksonInject(OBJ_ID_KEY) long id, + @JacksonInject(OBJ_NUM_PARTS_KEY) int numParts, + @JacksonInject(OBJ_VERSION_TOKEN) String versionToken, + @JacksonInject(OBJ_CREATED_AT_KEY) long createdAtMicros, + @JsonAnySetter Map attributes) { + ImmutableGenericObj.Builder builder = + ImmutableGenericObj.builder() + .type(objType) + .id(id) + .numParts(numParts) + .createdAtMicros(createdAtMicros); + if (versionToken != null) { + builder.versionToken(versionToken); + } + attributes.forEach( + (k, v) -> { + if (!"type".equals(k) + && !"id".equals(k) + && !"numParts".equals(k) + && !"createdAtMicros".equals(k) + && !"versionToken".equals(k)) { + builder.putAttributes(k, v); + } + }); + return builder.build(); + } + + @Documented + @Target({ElementType.FIELD, ElementType.METHOD}) + @interface AllowNulls {} +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/Obj.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/Obj.java new file mode 100644 index 0000000000..5384ba7004 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/Obj.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_CREATED_AT_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_ID_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_NUM_PARTS_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_TYPE_KEY; +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.OBJ_VERSION_TOKEN; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonView; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.immutables.value.Value; + +public interface Obj { + + // Note on the JsonView annotations used here: + // + // BasePersistence.serializeObj() does not serialize the attributes type, id, createdAtMicros, + // versionToken, because those are either part of the key in the database or available via + // distinct database columns/attributes, so it is unnecessary to serialize those again. + + @JsonView(ObjSerializeAll.class) + @JacksonInject(OBJ_TYPE_KEY) + ObjType type(); + + @JsonView(ObjSerializeAll.class) + @JacksonInject(OBJ_ID_KEY) + long id(); + + /** + * Indicates the number of parts of which the object is split in the backing database. This value + * is available after the object has been {@linkplain Persistence#write(Obj, Class) written}. + */ + @JsonView(ObjSerializeAll.class) + @JacksonInject(OBJ_NUM_PARTS_KEY) + @Value.Default + default int numParts() { + return 1; + } + + /** + * Contains the timestamp in microseconds since (Unix) epoch when the object was last written, + * only intended for repository cleanup mechanisms. + * + *

The value of this attribute is generated exclusively by the {@link Persistence} + * implementations. + * + *

This attribute is not consistent when using a caching {@link Persistence}. + */ + @JsonView(ObjSerializeAll.class) + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @JacksonInject(OBJ_CREATED_AT_KEY) + @Value.Default + @Value.Auxiliary + default long createdAtMicros() { + return 0L; + } + + /** + * Opaque token used for objects when persisted using conditional {@linkplain + * Persistence#conditionalInsert(Obj, Class) inserts}, {@linkplain + * Persistence#conditionalUpdate(Obj, Obj, Class) updates} or {@linkplain + * Persistence#conditionalDelete(Obj, Class) deletes}. + * + *

This value must be {@code null} for non-conditional operations and must be non-{@code null} + * when used for conditional operations. + */ + @JsonView(ObjSerializeAll.class) + @JsonInclude(JsonInclude.Include.NON_NULL) + @JacksonInject(OBJ_VERSION_TOKEN) + @Nullable + String versionToken(); + + @SuppressWarnings("NullableProblems") + @Nonnull + Obj withCreatedAtMicros(long createdAt); + + @SuppressWarnings("NullableProblems") + @Nonnull + Obj withNumParts(int numParts); + + class ObjSerializeAll { + private ObjSerializeAll() {} + } + + /** The Jackson view used when {@link Obj}s are serialized to be persisted. */ + class StorageView { + private StorageView() {} + } + + interface Builder> { + @CanIgnoreReturnValue + B versionToken(@Nullable String versionToken); + + @CanIgnoreReturnValue + B id(long id); + + @CanIgnoreReturnValue + B numParts(int numParts); + + O build(); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRef.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRef.java new file mode 100644 index 0000000000..caa45f3a2d --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRef.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static com.google.common.base.Preconditions.checkArgument; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.nosql.api.Persistence; +import org.apache.polaris.persistence.nosql.api.index.IndexValueSerializer; +import org.immutables.value.Value; + +/** + * Describes a reference to an object. + * + *

Note that the persisted object key is just the {@link #id() integer ID}, the {@link #type() + * object type} and {@link #numParts()} attributes are rather hints. + */ +@JsonSerialize(using = ObjRef.ObjRefSerializer.class) +@JsonDeserialize(using = ObjRef.ObjRefDeserializer.class) +@PolarisImmutable +public interface ObjRef { + + IndexValueSerializer OBJ_REF_SERIALIZER = + new IndexValueSerializer<>() { + @Override + public int serializedSize(@Nullable ObjRef value) { + return value != null ? value.serializedSize() : serializedNullSize(); + } + + @Override + @Nonnull + public ByteBuffer serialize(@Nullable ObjRef value, @Nonnull ByteBuffer target) { + if (value == null) { + return serializeNullToByteBuffer(target); + } + return value.serializeToByteBuffer(target); + } + + @Override + @Nullable + public ObjRef deserialize(@Nonnull ByteBuffer buffer) { + return fromByteBuffer(buffer); + } + + @Override + public void skip(@Nonnull ByteBuffer buffer) { + skipObjId(buffer); + } + }; + + static ObjRef objRef(@Nonnull String type, long id, int partNum) { + return ImmutableObjRef.of(type, id, partNum); + } + + static ObjRef objRef(@Nonnull ObjType type, long id, int partNum) { + return objRef(type.id(), id, partNum); + } + + static ObjRef objRef(@Nonnull String type, long id) { + return objRef(type, id, 0); + } + + static ObjRef objRef(@Nonnull ObjType type, long id) { + return objRef(type.id(), id); + } + + static ObjRef objRef(@Nonnull Obj obj) { + return objRef(obj.type(), obj.id(), obj.numParts()); + } + + /** {@linkplain ObjType#id() Object type ID} this object reference refers to. */ + @Value.Parameter(order = 1) + String type(); + + /** Numeric ID of this object reference. */ + @Value.Parameter(order = 2) + long id(); + + /** + * Indicates the number of parts of which the object is split in the backing database. This value + * is available after the object has been {@linkplain Persistence#write(Obj, Class) written}. + * + *

This value is rather a hint than a strictly correct value. This value should be + * correct, but {@link Persistence} implementations must expect the case that the real number of + * written parts is different. + */ + @Value.Parameter(order = 3) + @Value.Auxiliary + int numParts(); + + default ByteBuffer toByteBuffer() { + checkArgument(numParts() >= 0, "partNum must not be negative"); + return ObjRefSerialization.serializeToByteBuffer(type(), id(), numParts() - 1).flip(); + } + + static int serializedNullSize() { + return 1; + } + + default int serializedSize() { + checkArgument(numParts() >= 0, "partNum must not be negative"); + return ObjRefSerialization.serializedSize(type(), id(), numParts() - 1); + } + + static ByteBuffer serializeNullToByteBuffer(ByteBuffer target) { + return target.put((byte) 0); + } + + default ByteBuffer serializeToByteBuffer(ByteBuffer bytes) { + checkArgument(numParts() >= 0, "partNum must not be negative"); + return ObjRefSerialization.serializeToByteBuffer(bytes, type(), id(), numParts() - 1); + } + + default byte[] toBytes() { + checkArgument(numParts() >= 0, "partNum must not be negative"); + return ObjRefSerialization.serializeAsBytes(type(), id(), numParts() - 1); + } + + static ByteBuffer skipObjId(@Nonnull ByteBuffer bytes) { + return ObjRefSerialization.skipId(bytes); + } + + static ObjRef fromByteBuffer(ByteBuffer bytes) { + return ObjRefSerialization.fromByteBuffer(bytes); + } + + static ObjRef fromBytes(byte[] bytes) { + return ObjRefSerialization.fromBytes(bytes); + } + + class ObjRefSerializer extends JsonSerializer { + @Override + public void serialize(ObjRef value, JsonGenerator gen, SerializerProvider serializers) + throws IOException { + gen.writeBinary(value.toBytes()); + } + } + + class ObjRefDeserializer extends JsonDeserializer { + @Override + public ObjRef deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + return ObjRef.fromBytes(p.getBinaryValue()); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRefSerialization.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRefSerialization.java new file mode 100644 index 0000000000..92ca6538e0 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjRefSerialization.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.objRef; + +import jakarta.annotation.Nonnull; +import java.nio.ByteBuffer; + +final class ObjRefSerialization { + private ObjRefSerialization() {} + + static int serializedSize(String type, long id, int part) { + var typeNameBytes = type.getBytes(UTF_8); + var nameLen = typeNameBytes.length; + checkArgument(nameLen > 0 && nameLen <= 32, "Name length must be between 1 and 32"); + var hasMultipleParts = part > 0; + return 1 + nameLen + Long.BYTES + (hasMultipleParts ? Integer.BYTES : 0); + } + + static byte[] serializeAsBytes(String type, long id, int part) { + var typeNameBytes = type.getBytes(UTF_8); + var nameLen = typeNameBytes.length; + checkArgument(nameLen > 0 && nameLen <= 32, "Name length must be between 1 and 32"); + var hasMultipleParts = part > 0; + var hasPartsAndNameLen = (byte) ((nameLen - 1) | (hasMultipleParts ? 0x80 : 0)); + + var bytes = new byte[1 + nameLen + Long.BYTES + (hasMultipleParts ? Integer.BYTES : 0)]; + + bytes[0] = hasPartsAndNameLen; + System.arraycopy(typeNameBytes, 0, bytes, 1, nameLen); + int64ToBytes(bytes, 1 + nameLen, id); + if (hasMultipleParts) { + int32ToBytes(bytes, 1 + nameLen + Long.BYTES, part); + } + return bytes; + } + + static ByteBuffer serializeToByteBuffer(String type, long id, int part) { + return serializeToByteBuffer( + ByteBuffer.allocate(serializedSize(type, id, part)), type, id, part); + } + + static ByteBuffer serializeToByteBuffer(ByteBuffer bytes, String type, long id, int part) { + var typeNameBytes = type.getBytes(UTF_8); + var nameLen = typeNameBytes.length; + checkArgument(nameLen > 0 && nameLen <= 32, "Name length must be between 1 and 32"); + var hasMultipleParts = part > 0; + var hasPartsAndNameLen = (byte) ((nameLen - 1) | (hasMultipleParts ? 0x80 : 0)); + + bytes.put(hasPartsAndNameLen); + bytes.put(typeNameBytes); + bytes.putLong(id); + if (hasMultipleParts) { + bytes.putInt(part); + } + return bytes; + } + + static ByteBuffer skipId(@Nonnull ByteBuffer bytes) { + var versionAndNameLength = bytes.get(); + if (versionAndNameLength == 0) { + return bytes; + } + var version = extractVersion(versionAndNameLength); + var nameLen = extractNameLen(versionAndNameLength); + var hasMultipleParts = extractHasMultipleParts(versionAndNameLength); + if (version == 0) { + return bytes.position( + bytes.position() + nameLen + Long.BYTES + (hasMultipleParts ? Integer.BYTES : 0)); + } + throw new IllegalArgumentException("Unsupported ObjId version: " + version); + } + + static ObjRef fromByteBuffer(ByteBuffer bytes) { + if (bytes == null || bytes.remaining() == 0) { + return null; + } + var versionAndNameLength = bytes.get(); + if (versionAndNameLength == 0) { + return null; + } + var version = extractVersion(versionAndNameLength); + var nameLen = extractNameLen(versionAndNameLength); + var hasMultipleParts = extractHasMultipleParts(versionAndNameLength); + + if (version == 0) { + var nameBuf = new byte[nameLen]; + bytes.get(nameBuf); + var type = new String(nameBuf, 0, nameLen, UTF_8); + var id = bytes.getLong(); + var part = hasMultipleParts ? bytes.getInt() : 0; + return objRef(type, id, part + 1); + } + throw new IllegalArgumentException("Unsupported ObjId version: " + version); + } + + static ObjRef fromBytes(byte[] bytes) { + if (bytes == null || bytes.length == 0) { + return null; + } + var versionAndNameLength = bytes[0]; + if (versionAndNameLength == 0) { + return null; + } + var version = extractVersion(versionAndNameLength); + var nameLen = extractNameLen(versionAndNameLength); + var hasPartNum = extractHasMultipleParts(versionAndNameLength); + + if (version == 0) { + var type = new String(bytes, 1, nameLen, UTF_8); + var id = int64FromBytes(bytes, 1 + nameLen); + var part = hasPartNum ? int32FromBytes(bytes, 1 + nameLen + Long.BYTES) : 0; + return objRef(type, id, part + 1); + } + throw new IllegalArgumentException("Unsupported ObjId version: " + version); + } + + private static int extractNameLen(byte versionAndNameLength) { + // 5 bits + return (versionAndNameLength & 0x1F) + 1; + } + + private static int extractVersion(byte versionAndNameLength) { + // 3 bits + return (versionAndNameLength >>> 5) & 0x3; + } + + private static boolean extractHasMultipleParts(byte versionAndNameLength) { + // 1 bits + return (versionAndNameLength & 0x80) == 0x80; + } + + private static void int64ToBytes(byte[] bytes, int off, long v) { + bytes[off++] = (byte) (v >>> 56); + bytes[off++] = (byte) (v >>> 48); + bytes[off++] = (byte) (v >>> 40); + bytes[off++] = (byte) (v >>> 32); + bytes[off++] = (byte) (v >>> 24); + bytes[off++] = (byte) (v >>> 16); + bytes[off++] = (byte) (v >>> 8); + bytes[off] = (byte) v; + } + + private static long int64FromBytes(byte[] bytes, int off) { + var v = ((long) (bytes[off++] & 0xFF)) << 56; + v |= ((long) (bytes[off++] & 0xFF)) << 48; + v |= ((long) (bytes[off++] & 0xFF)) << 40; + v |= ((long) (bytes[off++] & 0xFF)) << 32; + v |= ((long) (bytes[off++] & 0xFF)) << 24; + v |= ((long) (bytes[off++] & 0xFF)) << 16; + v |= ((long) (bytes[off++] & 0xFF)) << 8; + v |= bytes[off] & 0xFF; + return v; + } + + private static void int32ToBytes(byte[] bytes, int off, int v) { + bytes[off++] = (byte) (v >>> 24); + bytes[off++] = (byte) (v >>> 16); + bytes[off++] = (byte) (v >>> 8); + bytes[off] = (byte) v; + } + + private static int int32FromBytes(byte[] bytes, int off) { + var v = (bytes[off++] & 0xFF) << 24; + v |= (bytes[off++] & 0xFF) << 16; + v |= (bytes[off++] & 0xFF) << 8; + v |= bytes[off] & 0xFF; + return v; + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjSerializationHelper.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjSerializationHelper.java new file mode 100644 index 0000000000..65ddee494f --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjSerializationHelper.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import com.fasterxml.jackson.annotation.JacksonInject; +import com.fasterxml.jackson.databind.InjectableValues; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import jakarta.annotation.Nonnull; + +public final class ObjSerializationHelper { + private ObjSerializationHelper() {} + + /** + * The key used to store the injectable {@code long id} instance, representing the id of the + * object being deserialized. Meant to be used in methods and constructor parameters annotated + * with {@link JacksonInject}. + */ + static final String OBJ_ID_KEY = "polaris.persistence.ObjId"; + + static final String OBJ_NUM_PARTS_KEY = "polaris.persistence.ObjNumParts"; + + static final String OBJ_TYPE_KEY = "polaris.persistence.ObjType"; + + static final String OBJ_VERSION_TOKEN = "polaris.persistence.ObjVersion"; + + static final String OBJ_CREATED_AT_KEY = "polaris.persistence.ObjCreatedAt"; + + /** + * Returns an {@link ObjectReader} for the given target {@link ObjType} using the key {@value + * #OBJ_TYPE_KEY}, with the given {@code long id} injectable under the key {@value #OBJ_ID_KEY}, + * version token using the key {@value #OBJ_VERSION_TOKEN}, {@code createdAtMicros} timestamp + * using the key {@value #OBJ_CREATED_AT_KEY}, {@code numParts} using the key {@value + * #OBJ_NUM_PARTS_KEY}. + */ + public static ObjectReader contextualReader( + @Nonnull ObjectMapper mapper, + @Nonnull ObjType objType, + long id, + int numParts, + String objVersionToken, + long objCreatedAtMicros) { + InjectableValues values = + new InjectableValues.Std() + .addValue(OBJ_TYPE_KEY, objType) + .addValue(OBJ_ID_KEY, id) + .addValue(OBJ_NUM_PARTS_KEY, numParts) + .addValue(OBJ_VERSION_TOKEN, objVersionToken) + .addValue(OBJ_CREATED_AT_KEY, objCreatedAtMicros); + return mapper.reader(values).forType(objType.targetClass()); + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjType.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjType.java new file mode 100644 index 0000000000..4dde0e7342 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjType.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjTypes.objTypeById; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.io.IOException; +import java.util.function.LongSupplier; + +@JsonSerialize(using = ObjType.ObjTypeSerializer.class) +@JsonDeserialize(using = ObjType.ObjTypeDeserializer.class) +public interface ObjType { + /** Human-readable name. */ + String name(); + + /** Must be unique among all registered object types. */ + String id(); + + /** The target class that objects of this type should be serialized from and deserialized to. */ + Class targetClass(); + + /** + * Allows an object type to define how long a particular object instance can be cached. + * + *

{@value #CACHE_UNLIMITED}, which is the default implementation, defines that an object + * instance can be cached forever. + * + *

{@value #NOT_CACHED} defines that an object instance must never be cached. + * + *

A positive value defines the timestamp in "microseconds since epoch" when the cached object + * can be evicted + */ + default long cachedObjectExpiresAtMicros(Obj obj, LongSupplier clockMicros) { + return CACHE_UNLIMITED; + } + + /** + * Allows an object type to define how long the fact of a non-existing object instance can be + * cached. + * + *

{@value #CACHE_UNLIMITED} defines that an object instance can be cached forever. + * + *

{@value #NOT_CACHED}, which is the default implementation, defines that an object instance + * must never be cached. + * + *

A positive value defines the timestamp in "microseconds since epoch" when the negative-cache + * sentinel can be evicted + */ + default long negativeCacheExpiresAtMicros(LongSupplier clockMicros) { + return NOT_CACHED; + } + + long CACHE_UNLIMITED = -1L; + long NOT_CACHED = 0L; + + class ObjTypeSerializer extends JsonSerializer { + @Override + public void serialize(ObjType value, JsonGenerator gen, SerializerProvider provider) + throws IOException { + gen.writeString(value.id()); + } + } + + class ObjTypeDeserializer extends JsonDeserializer { + @Override + public ObjType deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + return objTypeById(p.getText()); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjTypes.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjTypes.java new file mode 100644 index 0000000000..6e7978b87f --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/obj/ObjTypes.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static java.lang.String.format; +import static java.util.Collections.unmodifiableMap; + +import jakarta.annotation.Nonnull; +import java.util.HashMap; +import java.util.Map; +import java.util.ServiceLoader; +import java.util.concurrent.ConcurrentHashMap; + +public final class ObjTypes { + private ObjTypes() {} + + @Nonnull + public static ObjType objTypeById(@Nonnull String id) { + var type = Registry.BY_ID.get(id); + if (type == null) { + type = Registry.genericType(id); + } + return type; + } + + public static Map nonGenericObjTypes() { + return Registry.BY_ID; + } + + private static final class Registry { + private static final Map BY_ID; + private static final Map GENERIC_TYPES = new ConcurrentHashMap<>(); + + static ObjType genericType(String name) { + return GENERIC_TYPES.computeIfAbsent(name, GenObjType::new); + } + + static final class GenObjType extends AbstractObjType { + GenObjType(String id) { + super(id, "Generic (" + id + ")", GenericObj.class); + } + + @Override + public String name() { + return "Generic ObjType (dynamically created)"; + } + } + + static { + var byId = new HashMap(); + var loader = ServiceLoader.load(ObjType.class); + loader.stream() + .map(ServiceLoader.Provider::get) + .forEach( + objType -> { + ObjType ex = byId.put(objType.id(), objType); + if (ex != null) { + throw new IllegalStateException( + format("Duplicate object type ID: from %s and %s", objType, ex)); + } + }); + BY_ID = unmodifiableMap(byId); + } + } +} diff --git a/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/ref/Reference.java b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/ref/Reference.java new file mode 100644 index 0000000000..7cba328400 --- /dev/null +++ b/persistence/nosql/persistence/api/src/main/java/org/apache/polaris/persistence/nosql/api/ref/Reference.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.ref; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; +import org.apache.polaris.persistence.nosql.api.obj.Obj; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; + +@PolarisImmutable +@JsonSerialize(as = ImmutableReference.class) +@JsonDeserialize(as = ImmutableReference.class) +public interface Reference { + String name(); + + /** + * Current pointer of this reference. + * + *

Note that the pointer can only be {@linkplain Optional#empty()} when a reference has been + * created without a current pointer. If a reference ever had a pointer value, it cannot be + * "reset" to become {@linkplain Optional#empty()} again. + * + * @return current pointer, {@linkplain Optional#empty()} for references that have been created + * without a current pointer. + */ + Optional pointer(); + + /** + * Timestamp in microseconds since (Unix) epoch when the reference was created in the database. + */ + long createdAtMicros(); + + /** + * List of previously assigned {@linkplain #pointer() pointers}. + * + *

This list can be useful in case of disaster recovery scenarios in combination with + * geographically distributed databases / replication, when in the disaster recovery case the + * whole database content is not consistently available. + * + *

The "full" {@link ObjRef}s can be re-constructed by using the {@link #pointer() + * pointer()}{@code .}{@link Optional#get() get()}{@code .}{@link Obj#type()}. + */ + long[] previousPointers(); + + static ImmutableReference.Builder builder() { + return ImmutableReference.builder(); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/TestPersistence.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/TestPersistence.java new file mode 100644 index 0000000000..b04dded6bb --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/TestPersistence.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.persistence.nosql.api; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.function.Function; +import java.util.function.IntFunction; +import java.util.stream.IntStream; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.apache.polaris.persistence.nosql.api.obj.SimpleTestObj; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestPersistence { + @InjectSoftAssertions SoftAssertions soft; + + @CartesianTest + public void bucketizedBulkFetches( + @Values(ints = {1, 3, 5, 13}) int fetchSize, + @Values(ints = {1, 3, 5, 15, 7, 13, 30}) int totalSize) { + var params = + PersistenceParams.BuildablePersistenceParams.builder() + .bucketizedBulkFetchSize(fetchSize) + .build(); + + var persistence = mock(Persistence.class); + when(persistence.params()).thenReturn(params); + + when(persistence.bucketizedBulkFetches(any(), any())).thenCallRealMethod(); + + var objRefIntFunction = (IntFunction) i -> ObjRef.objRef(SimpleTestObj.TYPE, i); + var objRefs = IntStream.range(0, totalSize).mapToObj(objRefIntFunction).toList(); + var toObj = + (Function) + objRef -> + objRef.id() < totalSize ? SimpleTestObj.builder().id(objRef.id()).build() : null; + + var fetchManyInvocations = (totalSize + fetchSize - 1) / fetchSize; + var whenFetchMany = when(persistence.fetchMany(any(), any(ObjRef[].class))); + for (int i = 0; i < fetchManyInvocations; i++) { + // Construct a full chunk of size 'fetchSize' + var answer = + IntStream.range(i * fetchSize, (i + 1) * fetchSize) + .mapToObj(objRefIntFunction) + .map(toObj) + .toArray(SimpleTestObj[]::new); + + whenFetchMany = whenFetchMany.thenReturn(answer); + } + + var result = persistence.bucketizedBulkFetches(objRefs.stream(), SimpleTestObj.class).toList(); + + soft.assertThat(result) + .hasSize(totalSize) + .map(SimpleTestObj::id) + .containsExactlyElementsOf(objRefs.stream().map(ObjRef::id).toList()); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/backend/TestPersistId.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/backend/TestPersistId.java new file mode 100644 index 0000000000..5f1eb2741a --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/backend/TestPersistId.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.backend; + +import static java.lang.String.format; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.smile.databind.SmileMapper; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Base64; +import java.util.stream.Stream; +import org.apache.polaris.persistence.varint.VarInt; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestPersistId { + @InjectSoftAssertions SoftAssertions soft; + + protected ObjectMapper mapper; + protected ObjectMapper smile; + + @BeforeEach + protected void setUp() { + mapper = new ObjectMapper(); + smile = new SmileMapper(); + } + + @Test + public void invalidRepresentations() { + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> PersistId.fromBytes(new byte[] {(byte) 0x3})) + .withMessage("Unsupported PersistId type: 3"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> PersistId.fromBytes(new byte[] {(byte) 0x0})) + .withMessage("Unsupported PersistId type: 0"); + soft.assertThatIllegalStateException() + .isThrownBy(() -> PersistId.persistId(0L, -1)) + .withMessage("part must not be negative"); + } + + @ParameterizedTest + @MethodSource + @SuppressWarnings("ByteBufferBackingArray") + public void serDe(long id, int part, ByteBuffer expected) throws Exception { + var persistId = PersistId.persistId(id, part); + + var expectedSerializedSize = 1 + Long.BYTES + (part > 0 ? VarInt.varIntLen(part) : 0); + + // ser/deser using byte[] + + var bytes = persistId.toBytes(); + soft.assertThat(bytes).hasSize(expectedSerializedSize).hasSize(expected.remaining()); + soft.assertThat(bytes).containsExactly(Arrays.copyOf(expected.array(), expected.remaining())); + + var deser = PersistId.fromBytes(bytes); + soft.assertThat(deser).extracting(PersistId::id, PersistId::part).containsExactly(id, part); + + var reser = deser.toBytes(); + soft.assertThat(reser).containsExactly(bytes); + + // JSON serialization + + var serializedJson = mapper.writerFor(PersistId.class).writeValueAsString(persistId); + var base64 = Base64.getEncoder().encodeToString(bytes); + soft.assertThat(serializedJson).isEqualTo(format("\"%s\"", base64)); + soft.assertThat(mapper.readValue(serializedJson, PersistId.class)) + .extracting(PersistId::id, PersistId::part) + .containsExactly(id, part); + + // Smile serialization + + var serializedSmile = smile.writerFor(PersistId.class).writeValueAsBytes(persistId); + soft.assertThat(smile.readValue(serializedSmile, PersistId.class)) + .extracting(PersistId::id, PersistId::part) + .containsExactly(id, part); + } + + static Stream serDe() { + return Stream.of( + arguments(0L, 0, ByteBuffer.allocate(50).put((byte) 1).putLong(0L).flip()), + arguments(42L, 0, ByteBuffer.allocate(50).put((byte) 1).putLong(42L).flip()), + arguments( + Long.MIN_VALUE, + 0, + ByteBuffer.allocate(50).put((byte) 1).putLong(Long.MIN_VALUE).flip()), + arguments( + Long.MAX_VALUE, + 0, + ByteBuffer.allocate(50).put((byte) 1).putLong(Long.MAX_VALUE).flip()), + arguments( + 0L, 1, VarInt.putVarInt(ByteBuffer.allocate(50).put((byte) 2).putLong(0L), 1).flip()), + arguments( + 42L, 1, VarInt.putVarInt(ByteBuffer.allocate(50).put((byte) 2).putLong(42L), 1).flip()), + arguments( + Long.MIN_VALUE, + 666, + VarInt.putVarInt(ByteBuffer.allocate(50).put((byte) 2).putLong(Long.MIN_VALUE), 666) + .flip()), + arguments( + Long.MAX_VALUE, + 42, + VarInt.putVarInt(ByteBuffer.allocate(50).put((byte) 2).putLong(Long.MAX_VALUE), 42) + .flip())); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/TestIndexKey.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/TestIndexKey.java new file mode 100644 index 0000000000..18867fe264 --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/TestIndexKey.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.polaris.persistence.nosql.api.index.IndexKey.EOF; +import static org.apache.polaris.persistence.nosql.api.index.IndexKey.INDEX_KEY_SERIALIZER; +import static org.apache.polaris.persistence.nosql.api.index.IndexKey.deserializeKey; +import static org.apache.polaris.persistence.nosql.api.index.IndexKey.key; +import static org.apache.polaris.persistence.nosql.api.index.Util.asHex; +import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException; +import static org.assertj.core.api.InstanceOfAssertFactories.INTEGER; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import java.nio.ByteBuffer; +import java.util.function.IntFunction; +import java.util.stream.Stream; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestIndexKey { + static final String STRING_100 = + "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + + @InjectSoftAssertions SoftAssertions soft; + + @ParameterizedTest + @MethodSource("keyLengthGood") + void keyLengthGood(String value) { + key(value); + } + + static Stream keyLengthGood() { + return Stream.of( + "1", STRING_100, STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100); + } + + @ParameterizedTest + @MethodSource("keyTooLong") + void keyTooLong(String value) { + assertThatIllegalArgumentException() + .isThrownBy(() -> key(value)) + .withMessage("Key too long, max allowed length: " + IndexKey.MAX_LENGTH); + } + + static Stream keyTooLong() { + return Stream.of( + STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100 + "x", + STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100); + } + + @ParameterizedTest + @MethodSource("startsWith") + void startsWith(IndexKey value, IndexKey prefix, boolean expected) { + soft.assertThat(value.startsWith(prefix)).isEqualTo(expected); + } + + static Stream startsWith() { + return Stream.of( + arguments(key("a"), key("a"), true), + arguments(key("ab"), key("a"), true), + arguments(key("a"), key("ab"), false), + arguments(key("b"), key("ab"), false), + arguments(key("b"), key("ab"), false)); + } + + @ParameterizedTest + @MethodSource("compare") + void compare(IndexKey a, IndexKey b, int expectedCompare) { + soft.assertThat(a) + .describedAs("Compare of %s to %s expect %d", a, b, expectedCompare) + .extracting(k -> Integer.signum(k.compareTo(b))) + .asInstanceOf(INTEGER) + .isEqualTo(expectedCompare); + soft.assertThat(a) + .describedAs("Reverse compare of %s to %s expect %d", a, b, expectedCompare) + .extracting(k -> Integer.signum(b.compareTo(k))) + .asInstanceOf(INTEGER) + .isEqualTo(-expectedCompare); + } + + static Stream compare() { + return Stream.of( + arguments(key("k2\u0001k3"), key("k2\u0001πa"), -1), // UNICODE CHAR + arguments(key("a"), key("a"), 0), + arguments(key("a"), key("aa"), -1), + arguments(key("aa"), key("a"), 1), + arguments(key("aa"), key("aaa"), -1), + arguments(key("aa"), key("aaa"), -1), + arguments(key("aaa"), key("aaaa"), -1), + arguments(key("a\u0001a"), key("a\u0001a"), 0), + arguments(key("a\u0001a"), key("aa\u0001a"), -1), + // 10 + arguments(key("a\u0001a"), key("a\u0001a"), 0), + arguments(key("a\u0001a"), key("aa\u0001aa"), -1), + arguments(key("aπ\u0001a"), key("aπ\u0001a"), 0), // UNICODE CHAR + arguments(key("aπ\u0001a"), key("aπa\u0001a"), -1), // UNICODE CHAR + arguments(key("aπ\u0001a"), key("aπ\u0001a"), 0), // UNICODE CHAR + arguments(key("aπ\u0001a"), key("aπa\u0001aa"), -1), // UNICODE CHAR + arguments(key("aa\u0001a"), key("aπ\u0001a"), -1), // UNICODE CHAR + arguments(key("aa\u0001a"), key("aπa\u0001a"), -1), // UNICODE CHAR + arguments(key("aa\u0001a"), key("aπ\u0001a"), -1), // UNICODE CHAR + arguments(key("aa\u0001a"), key("aπa\u0001aa"), -1), // UNICODE CHAR + // 20 + arguments(key("aa"), key("a"), 1), + arguments(key("aa"), key("aaa"), -1), + arguments(key("aa"), key("aaa"), -1), + arguments(key("aaa"), key("aaaa"), -1), + arguments(key("a"), key("aa"), -1), + arguments(key("aπa"), key("a"), 1), // UNICODE CHAR + arguments(key("aπa"), key("aπaa"), -1), // UNICODE CHAR + arguments(key("aaπ"), key("aaπa"), -1), // UNICODE CHAR + arguments(key("aaππ"), key("aaππa"), -1), // UNICODE CHAR + arguments(key("aaπ"), key("aaππa"), -1), // UNICODE CHAR + // 30 + arguments(key("aaπa"), key("aaπaa"), -1), // UNICODE CHAR + arguments(key("a"), key("ab"), -1), + arguments(key("a"), key("aa"), -1), + arguments(key("a"), key("aπa"), -1), // UNICODE CHAR + arguments(key("aa"), key("aπ"), -1), // UNICODE CHAR + arguments(key("aa"), key("a"), 1), + arguments(key("a"), key("abcdef"), -1), + arguments(key("abcdef"), key("a"), 1), + arguments(key("abcdef"), key("0123123123"), 1), + arguments(key("abcdefabcabc"), key("0123"), 1), + // 40 + arguments(key("0"), key("0123123123"), -1), + arguments(key("abcdefabcabc"), key("a"), 1), + arguments(key("key.0"), key("key.1"), -1), + arguments(key("key.42"), key("key.42"), 0), + arguments(key("key0"), key("key1"), -1), + arguments(key("key42"), key("key42"), 0)); + } + + @Test + public void utf8surrogates() { + var arr = new char[] {0xd800, 0xdc00, 0xd8ff, 0xdcff}; + + utf8verify(arr); + } + + @ParameterizedTest + @ValueSource(strings = {"süße sahne", "là-bas"}) + public void utf8string(String s) { + var arr = s.toCharArray(); + + utf8verify(arr); + } + + private void utf8verify(char[] arr) { + var serToBufferFromString = ByteBuffer.allocate(arr.length * 3 + 2); + + key(new String(arr)).serialize(serToBufferFromString); + serToBufferFromString.flip(); + + var bufferFromString = ByteBuffer.allocate(arr.length * 3 + 2); + bufferFromString.put(new String(arr).getBytes(UTF_8)); + bufferFromString.put(EOF); + bufferFromString.flip(); + + var mismatch = bufferFromString.mismatch(serToBufferFromString); + if (mismatch != -1) { + soft.assertThat(mismatch).describedAs("Mismatch at %d", mismatch).isEqualTo(-1); + } + + var deser = deserializeKey(serToBufferFromString.duplicate()); + var b2 = ByteBuffer.allocate(serToBufferFromString.capacity()); + deser.serialize(b2); + b2.flip(); + + mismatch = serToBufferFromString.mismatch(b2); + if (mismatch != -1) { + soft.assertThat(mismatch).describedAs("Mismatch at %d", mismatch).isEqualTo(-1); + } + + soft.assertThat(deser.toString()).isEqualTo(new String(arr)); + } + + static Stream keySerializationRoundTrip() { + return Stream.of( + arguments(false, key("A"), 2, "4101"), + arguments(true, key("A"), 2, "4101"), + arguments(false, key("A\u0001B"), 5, "4102034201"), + arguments(true, key("A\u0001B"), 5, "4102034201"), + arguments(false, key("A\u0001B\u0002C"), 8, "4102034202044301"), + arguments(true, key("A\u0001B\u0002C"), 8, "4102034202044301"), + arguments(false, key("abc"), 4, "61626301"), + arguments(true, key("abc"), 4, "61626301"), + arguments(false, key("abcdefghi"), 10, "61626364656667686901"), + arguments(true, key("abcdefghi"), 10, "61626364656667686901"), + arguments(false, key(STRING_100 + STRING_100), 201, null), + arguments(true, key(STRING_100 + STRING_100), 201, null), + arguments( + false, key(STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100), 501, null), + arguments( + true, key(STRING_100 + STRING_100 + STRING_100 + STRING_100 + STRING_100), 501, null)); + } + + @ParameterizedTest + @MethodSource("keySerializationRoundTrip") + public void keySerializationRoundTrip( + boolean directBuffer, IndexKey key, int expectedSerializedSize, String checkedHex) { + IntFunction alloc = + len -> directBuffer ? ByteBuffer.allocateDirect(len) : ByteBuffer.allocate(len); + + var serialized = key.serialize(alloc.apply(506)).flip(); + soft.assertThat(serialized.remaining()).isEqualTo(expectedSerializedSize); + soft.assertThat(key.serializedSize()).isEqualTo(expectedSerializedSize); + if (checkedHex != null) { + soft.assertThat(asHex(serialized)).isEqualTo(checkedHex); + } + var deserialized = deserializeKey(serialized.duplicate()); + soft.assertThat(deserialized).isEqualTo(key); + + var big = alloc.apply(8192); + big.position(1234); + big.put(serialized.duplicate()); + big.position(8000); + var ser = big.duplicate().flip(); + ser.position(1234); + deserialized = deserializeKey(ser.duplicate()); + soft.assertThat(deserialized).isEqualTo(key); + } + + @Test + public void longIndexKeyOrder() { + for (int i = -32768; i < 32768; i++) { + var keyLow = key(i); + var keyHigh = key(i + 1); + if (i == -1) { + // unsigned comparison means that negative `long` values are _higher_ than positive ones. + soft.assertThat(keyLow).describedAs(Integer.toHexString(i)).isGreaterThan(keyHigh); + soft.assertThat(keyHigh).describedAs(Integer.toHexString(i)).isLessThan(keyLow); + continue; + } + soft.assertThat(keyLow).describedAs(Integer.toHexString(i)).isLessThan(keyHigh); + soft.assertThat(keyHigh).describedAs(Integer.toHexString(i)).isGreaterThan(keyLow); + } + } + + @ParameterizedTest + @MethodSource + public void indexKeySerializer(IndexKey indexKey) { + var serSize = INDEX_KEY_SERIALIZER.serializedSize(indexKey); + var buffer = ByteBuffer.allocate(serSize); + soft.assertThat(INDEX_KEY_SERIALIZER.serialize(indexKey, buffer)).isSameAs(buffer); + soft.assertThat(buffer.remaining()).isEqualTo(0); + + for (int i = 0; i < serSize - 1; i++) { + soft.assertThat(buffer.get(i)).isNotEqualTo(EOF); + } + soft.assertThat(buffer.get(serSize - 1)).isEqualTo(EOF); + + buffer.flip(); + soft.assertThat(INDEX_KEY_SERIALIZER.deserialize(buffer.duplicate())).isEqualTo(indexKey); + + var skipped = buffer.duplicate(); + INDEX_KEY_SERIALIZER.skip(skipped); + soft.assertThat(skipped.remaining()).isEqualTo(0); + + for (var off = 1; off < serSize - 1; off++) { + var prefix = ByteBuffer.allocate(off); + var suffix = ByteBuffer.allocate(serSize - off); + prefix.put(0, buffer, 0, off); + suffix.put(0, buffer, off, serSize - off); + + var prefix1 = suffix.duplicate(); + INDEX_KEY_SERIALIZER.deserialize(prefix1); + soft.assertThat(prefix1.remaining()).isEqualTo(0); + + var suffix1 = suffix.duplicate(); + INDEX_KEY_SERIALIZER.deserialize(suffix1); + soft.assertThat(suffix1.remaining()).isEqualTo(0); + } + } + + static Stream indexKeySerializer() { + return Stream.of( + key(""), + key("foo"), + key("foo\u0000"), + key("foo\u0002"), + key("foo\u0000\u0000bar"), + key("foo\u0000\u0001\u0000bar"), + key("foo\u0000\u0001\u0002\u0000\u0001\u0002\u0000bar"), + key(0L), + key(1L), + key(2L), + key(3L), + key(4L), + key(5L), + key(6L), + key(7L), + key(8L), + key(9L), + key(10L), + key(0x100L), + key(0x200L), + key(0x10000L), + key(0x20000L), + key(0x1000000L), + key(0x2000000L), + key(0x100000000L), + key(0x200000000L), + key(0x10000000000L), + key(0x20000000000L), + key(0x1000000000000L), + key(0x2000000000000L), + key(0x100000000000000L), + key(0x200000000000000L), + key(Long.MIN_VALUE), + key(Long.MAX_VALUE), + key(1L), + key(-1L), + key(Integer.MIN_VALUE), + key(Integer.MAX_VALUE)); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/Util.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/Util.java new file mode 100644 index 0000000000..d9da2314f6 --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/index/Util.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.index; + +import java.nio.ByteBuffer; + +public final class Util { + private Util() {} + + private static final char[] HEX = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' + }; + + public static String asHex(ByteBuffer b) { + StringBuilder sb = new StringBuilder(); + for (int p = b.position(); p < b.limit(); p++) { + int v = b.get(p); + sb.append(HEX[(v >> 4) & 0xf]); + sb.append(HEX[v & 0xf]); + } + return sb.toString(); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestGenericObj.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestGenericObj.java new file mode 100644 index 0000000000..c544edf2b9 --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestGenericObj.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static org.apache.polaris.persistence.nosql.api.obj.ObjSerializationHelper.contextualReader; +import static org.apache.polaris.persistence.nosql.api.obj.ObjTypes.objTypeById; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Stream; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestGenericObj { + @InjectSoftAssertions SoftAssertions soft; + + @ParameterizedTest + @MethodSource + public void genericObj(ObjType realType, long id, Obj realObj) throws Exception { + var mapper = new ObjectMapper().findAndRegisterModules(); + // Use some view to exclude the type,id,createdAtMicros,versionToken attributes from being + // serialized by Jackson. + var writerAllAttributes = mapper.writer(); + var writer = mapper.writer().withView(Object.class); + + var genericType = objTypeById("genericType_" + UUID.randomUUID()); + var versionToken = realObj.versionToken(); + + var json = writer.writeValueAsString(realObj); + var jsonAllAttributes = writerAllAttributes.writeValueAsString(realObj); + var genericObj = + contextualReader(mapper, genericType, id, 0, versionToken, realObj.createdAtMicros()) + .readValue(json, genericType.targetClass()); + var genericObjAllAttributes = + contextualReader(mapper, genericType, id, 0, versionToken, realObj.createdAtMicros()) + .readValue(jsonAllAttributes, genericType.targetClass()); + soft.assertThat(genericObj) + .isEqualTo(genericObjAllAttributes) + .isInstanceOf(GenericObj.class) + .extracting(GenericObj.class::cast) + .extracting(GenericObj::id, GenericObj::type) + .containsExactly(realObj.id(), genericType); + + var jsonGeneric = writer.writeValueAsString(genericObj); + var jsonGenericAllAttributes = writerAllAttributes.writeValueAsString(genericObj); + var deserRealObj = + contextualReader(mapper, realType, id, 1, versionToken, realObj.createdAtMicros()) + .readValue(jsonGeneric, realType.targetClass()); + var deserRealObjAllAttributes = + contextualReader(mapper, realType, id, 1, versionToken, realObj.createdAtMicros()) + .readValue(jsonGenericAllAttributes, realType.targetClass()); + soft.assertThat(deserRealObj).isEqualTo(realObj).isEqualTo(deserRealObjAllAttributes); + } + + static Stream genericObj() { + // We don't persist anything, so we can reuse this ID. + var id = ThreadLocalRandom.current().nextLong(); + + return Stream.of( + arguments( + SimpleTestObj.TYPE, + id, + SimpleTestObj.builder() + .id(id) + .createdAtMicros(123L) + .addList("one", "two", "three") + .putMap("a", "A") + .putMap("b", "B") + .text("some text") + .build()), + // + arguments( + VersionedTestObj.TYPE, + id, + VersionedTestObj.builder() + .id(id) + .createdAtMicros(123L) + .someValue("some value") + .versionToken("my version token") + .build())); + } +} diff --git a/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestObjRef.java b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestObjRef.java new file mode 100644 index 0000000000..bd8390bfde --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/java/org/apache/polaris/persistence/nosql/api/obj/TestObjRef.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import static java.lang.String.format; +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.OBJ_REF_SERIALIZER; +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.objRef; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.smile.databind.SmileMapper; +import java.nio.ByteBuffer; +import java.util.Base64; +import java.util.stream.Stream; +import org.assertj.core.api.SoftAssertions; +import org.assertj.core.api.junit.jupiter.InjectSoftAssertions; +import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.MethodSource; + +@ExtendWith(SoftAssertionsExtension.class) +public class TestObjRef { + @InjectSoftAssertions SoftAssertions soft; + + protected ObjectMapper mapper; + protected ObjectMapper smile; + + @BeforeEach + protected void setUp() { + mapper = new ObjectMapper(); + smile = new SmileMapper(); + } + + @Test + public void nullObjRef() throws Exception { + soft.assertThat(mapper.writerFor(ObjRef.class).writeValueAsString(null)).isEqualTo("null"); + } + + @Test + public void invalidRepresentations() { + // unsupported versions (1,2,3) + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromBytes(new byte[] {(byte) 0x60})) + .withMessage("Unsupported ObjId version: 3"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromBytes(new byte[] {(byte) 0x40})) + .withMessage("Unsupported ObjId version: 2"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromBytes(new byte[] {(byte) 0x20})) + .withMessage("Unsupported ObjId version: 1"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromByteBuffer(ByteBuffer.wrap(new byte[] {(byte) 0x60}))) + .withMessage("Unsupported ObjId version: 3"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromByteBuffer(ByteBuffer.wrap(new byte[] {(byte) 0x40}))) + .withMessage("Unsupported ObjId version: 2"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.fromByteBuffer(ByteBuffer.wrap(new byte[] {(byte) 0x20}))) + .withMessage("Unsupported ObjId version: 1"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.skipObjId(ByteBuffer.wrap(new byte[] {(byte) 0x60}))) + .withMessage("Unsupported ObjId version: 3"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.skipObjId(ByteBuffer.wrap(new byte[] {(byte) 0x40}))) + .withMessage("Unsupported ObjId version: 2"); + soft.assertThatIllegalArgumentException() + .isThrownBy(() -> ObjRef.skipObjId(ByteBuffer.wrap(new byte[] {(byte) 0x20}))) + .withMessage("Unsupported ObjId version: 1"); + + var dummyObjType = + new AbstractObjType<>("123456789012345678901234567890123", "123456", Obj.class) {}; + + soft.assertThatIllegalArgumentException() + .isThrownBy( + () -> + // type name too long + objRef(dummyObjType, 42L, -1).toBytes()) + .withMessage("partNum must not be negative"); + soft.assertThatIllegalArgumentException() + .isThrownBy( + () -> + // type name too long + objRef(dummyObjType, 42L, -1).toByteBuffer()) + .withMessage("partNum must not be negative"); + soft.assertThatIllegalArgumentException() + .isThrownBy( + () -> + // type name too long + objRef(dummyObjType, 42L, 1).toBytes()) + .withMessage("Name length must be between 1 and 32"); + soft.assertThatIllegalArgumentException() + .isThrownBy( + () -> + // type name too long + objRef(dummyObjType, 42L, 1).toByteBuffer()) + .withMessage("Name length must be between 1 and 32"); + } + + @ParameterizedTest + @CsvSource( + value = { + "foo,0", + "elani,9223372036854775807", + "elaniursus,-9223372036854775808", + "a234567890123456789012345678901,42", + "a234567890123456789012345678901,9223372036854775807", + "a234567890123456789012345678901,-9223372036854775808", + }) + public void serDe(String typeId, long id) throws Exception { + var type = new AbstractObjType<>(typeId, typeId, Obj.class) {}; + var objId = objRef(type, id, 1); + + var expectedSerializedSize = 1 + typeId.length() + Long.BYTES; + + // ser/deser using byte[] + + var bytes = objId.toBytes(); + soft.assertThat(bytes).hasSize(expectedSerializedSize); + + var deser = ObjRef.fromBytes(bytes); + soft.assertThat(deser) + .extracting(ObjRef::type, ObjRef::id, ObjRef::numParts) + .containsExactly(objId.type(), id, 1); + + var reser = deser.toBytes(); + soft.assertThat(reser).containsExactly(bytes); + + // ser/deser using ByteBuffer + + var byteBuffer = objId.toByteBuffer(); + soft.assertThat(byteBuffer.remaining()).isEqualTo(expectedSerializedSize); + + var dup = byteBuffer.duplicate(); + var deserBuffer = ObjRef.fromByteBuffer(dup); + soft.assertThat(dup.remaining()).isEqualTo(0); + soft.assertThat(deserBuffer) + .extracting(ObjRef::type, ObjRef::id) + .containsExactly(objId.type(), id); + + dup = byteBuffer.duplicate(); + soft.assertThat(ObjRef.skipObjId(dup)) + .isSameAs(dup) + .extracting(ByteBuffer::remaining) + .isEqualTo(0); + + var reserBuffer = deser.toByteBuffer(); + soft.assertThat(reserBuffer) + .isEqualTo(byteBuffer) + .extracting(ByteBuffer::array) + .isEqualTo(bytes); + soft.assertThat(reserBuffer.remaining()).isEqualTo(expectedSerializedSize); + + // JSON serialization + + var serializedJson = mapper.writerFor(ObjRef.class).writeValueAsString(objId); + var base64 = Base64.getEncoder().encodeToString(bytes); + soft.assertThat(serializedJson).isEqualTo(format("\"%s\"", base64)); + soft.assertThat(mapper.readValue(serializedJson, ObjRef.class)) + .extracting(ObjRef::type, ObjRef::id) + .containsExactly(objId.type(), id); + + // Smile serialization + + var serializedSmile = smile.writerFor(ObjRef.class).writeValueAsBytes(objId); + soft.assertThat(smile.readValue(serializedSmile, ObjRef.class)) + .extracting(ObjRef::type, ObjRef::id) + .containsExactly(objId.type(), id); + } + + @Test + public void nullObjRefSerialization() { + var nullSerialized = ByteBuffer.wrap(new byte[1]); + + var target = ByteBuffer.allocate(1); + soft.assertThat(OBJ_REF_SERIALIZER.serialize(null, target)).isSameAs(target); + soft.assertThat(target.flip()).inHexadecimal().isEqualTo(nullSerialized); + + soft.assertThat(OBJ_REF_SERIALIZER.serializedSize(null)).isEqualTo(1); + + soft.assertThat(OBJ_REF_SERIALIZER.deserialize(nullSerialized.duplicate())).isNull(); + var nullSkip = nullSerialized.duplicate(); + OBJ_REF_SERIALIZER.skip(nullSkip); + soft.assertThat(nullSkip) + .extracting(ByteBuffer::position, ByteBuffer::remaining) + .containsExactly(1, 0); + } + + @ParameterizedTest + @MethodSource + public void objRefSerialization(ObjRef objRef) { + var serSize = OBJ_REF_SERIALIZER.serializedSize(objRef); + var buffer = ByteBuffer.allocate(serSize); + soft.assertThat(OBJ_REF_SERIALIZER.serialize(objRef, buffer)).isSameAs(buffer); + soft.assertThat(buffer.remaining()).isEqualTo(0); + + buffer.flip(); + soft.assertThat(OBJ_REF_SERIALIZER.deserialize(buffer.duplicate())).isEqualTo(objRef); + + var skipped = buffer.duplicate(); + OBJ_REF_SERIALIZER.skip(skipped); + soft.assertThat(skipped.remaining()).isEqualTo(0); + } + + static Stream objRefSerialization() { + return Stream.of( + null, + objRef("foo", 123L, 1), + objRef("max", Long.MAX_VALUE, 5), + objRef("max", 0, 1), + objRef("max", 42, 2), + objRef("max", 0x1234567890abcdefL, 3), + objRef("min", Long.MIN_VALUE, 4)); + } +} diff --git a/persistence/nosql/persistence/api/src/test/resources/logback-test.xml b/persistence/nosql/persistence/api/src/test/resources/logback-test.xml new file mode 100644 index 0000000000..6799188f4e --- /dev/null +++ b/persistence/nosql/persistence/api/src/test/resources/logback-test.xml @@ -0,0 +1,32 @@ + + + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + diff --git a/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/AnotherTestObj.java b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/AnotherTestObj.java new file mode 100644 index 0000000000..15d50cbce1 --- /dev/null +++ b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/AnotherTestObj.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nullable; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +@PolarisImmutable +@JsonSerialize(as = ImmutableAnotherTestObj.class) +@JsonDeserialize(as = ImmutableAnotherTestObj.class) +public interface AnotherTestObj extends Obj { + + ObjType TYPE = new AnotherTestObjType(); + + @Override + default ObjType type() { + return TYPE; + } + + @Nullable + String text(); + + @Nullable + byte[] binary(); + + @Nullable + Number number(); + + @Nullable + Map map(); + + @Nullable + List list(); + + @Nullable + Instant instant(); + + Optional optional(); + + static ImmutableAnotherTestObj.Builder builder() { + return ImmutableAnotherTestObj.builder(); + } + + final class AnotherTestObjType extends AbstractObjType { + public AnotherTestObjType() { + super("test-another", "another test obj type", AnotherTestObj.class); + } + } +} diff --git a/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/CommitTestObj.java b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/CommitTestObj.java new file mode 100644 index 0000000000..ad30b27481 --- /dev/null +++ b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/CommitTestObj.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nullable; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +@PolarisImmutable +@JsonSerialize(as = ImmutableCommitTestObj.class) +@JsonDeserialize(as = ImmutableCommitTestObj.class) +public interface CommitTestObj extends BaseCommitObj { + + ObjType TYPE = new CommitTestObjType(); + + @Override + default ObjType type() { + return TYPE; + } + + @Nullable + String text(); + + @Nullable + byte[] binary(); + + @Nullable + Number number(); + + @Nullable + Map map(); + + @Nullable + List list(); + + @Nullable + Instant instant(); + + Optional optional(); + + static ImmutableCommitTestObj.Builder builder() { + return ImmutableCommitTestObj.builder(); + } + + final class CommitTestObjType extends AbstractObjType { + public CommitTestObjType() { + super("test-commit", "commit", CommitTestObj.class); + } + } + + interface Builder extends BaseCommitObj.Builder {} +} diff --git a/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/SimpleTestObj.java b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/SimpleTestObj.java new file mode 100644 index 0000000000..63111fe85b --- /dev/null +++ b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/SimpleTestObj.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nullable; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +@PolarisImmutable +@JsonSerialize(as = ImmutableSimpleTestObj.class) +@JsonDeserialize(as = ImmutableSimpleTestObj.class) +public interface SimpleTestObj extends Obj { + + ObjType TYPE = new SimpleTestObjType(); + + @Override + default ObjType type() { + return TYPE; + } + + @Nullable + String text(); + + @Nullable + byte[] binary(); + + @Nullable + Number number(); + + @Nullable + Map map(); + + @Nullable + List list(); + + @Nullable + Instant instant(); + + Optional optional(); + + static ImmutableSimpleTestObj.Builder builder() { + return ImmutableSimpleTestObj.builder(); + } + + final class SimpleTestObjType extends AbstractObjType { + public SimpleTestObjType() { + super("test-simple", "simple", SimpleTestObj.class); + } + } +} diff --git a/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/VersionedTestObj.java b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/VersionedTestObj.java new file mode 100644 index 0000000000..69d65249f6 --- /dev/null +++ b/persistence/nosql/persistence/api/src/testFixtures/java/org/apache/polaris/persistence/nosql/api/obj/VersionedTestObj.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.api.obj; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import org.apache.polaris.immutables.PolarisImmutable; + +@PolarisImmutable +@JsonSerialize(as = ImmutableVersionedTestObj.class) +@JsonDeserialize(as = ImmutableVersionedTestObj.class) +public interface VersionedTestObj extends Obj { + + ObjType TYPE = new VersionedTestObjType(); + + @Nonnull + @Override + default ObjType type() { + return TYPE; + } + + static ImmutableVersionedTestObj.Builder builder() { + return ImmutableVersionedTestObj.builder(); + } + + @Nullable + String someValue(); + + @Nullable + byte[] binary(); + + final class VersionedTestObjType extends AbstractObjType { + public VersionedTestObjType() { + super("test-versioned", "versioned", VersionedTestObj.class); + } + } +} diff --git a/persistence/nosql/persistence/api/src/testFixtures/resources/META-INF/services/org.apache.polaris.persistence.nosql.api.obj.ObjType b/persistence/nosql/persistence/api/src/testFixtures/resources/META-INF/services/org.apache.polaris.persistence.nosql.api.obj.ObjType new file mode 100644 index 0000000000..3a087a8993 --- /dev/null +++ b/persistence/nosql/persistence/api/src/testFixtures/resources/META-INF/services/org.apache.polaris.persistence.nosql.api.obj.ObjType @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.polaris.persistence.nosql.api.obj.AnotherTestObj$AnotherTestObjType +org.apache.polaris.persistence.nosql.api.obj.CommitTestObj$CommitTestObjType +org.apache.polaris.persistence.nosql.api.obj.SimpleTestObj$SimpleTestObjType +org.apache.polaris.persistence.nosql.api.obj.VersionedTestObj$VersionedTestObjType