Skip to content

Commit e294de7

Browse files
c-thielFokko
andauthored
feat: View Metadata Builder (#908)
This PR is not completely ready yet as I believe the current mechanism of view expiration is flawed. I opened a PR in Java to demonstrate the problem and use for discussions: apache/iceberg#12051 Feedback from anyone is welcome. I am not sure what the best solutions looks like. --------- Co-authored-by: Fokko Driesprong <[email protected]>
1 parent 7d71e47 commit e294de7

File tree

6 files changed

+1720
-92
lines changed

6 files changed

+1720
-92
lines changed

crates/iceberg/src/io/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,6 @@ mod storage_gcs;
8989
#[cfg(feature = "storage-gcs")]
9090
pub use storage_gcs::*;
9191

92-
fn is_truthy(value: &str) -> bool {
92+
pub(crate) fn is_truthy(value: &str) -> bool {
9393
["true", "t", "1", "on"].contains(&value.to_lowercase().as_str())
9494
}

crates/iceberg/src/spec/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ mod table_metadata_builder;
3131
mod transform;
3232
mod values;
3333
mod view_metadata;
34+
mod view_metadata_builder;
3435
mod view_version;
3536

3637
pub use datatypes::*;

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ impl TableMetadataBuilder {
138138
}
139139

140140
/// Creates a new table metadata builder from the given metadata to modify it.
141-
142141
/// `current_file_location` is the location where the current version
143142
/// of the metadata file is stored. This is used to update the metadata log.
144143
/// If `current_file_location` is `None`, the metadata log will not be updated.
@@ -312,7 +311,7 @@ impl TableMetadataBuilder {
312311
Ok(self)
313312
}
314313

315-
/// Set the location of the table metadata, stripping any trailing slashes.
314+
/// Set the location of the table, stripping any trailing slashes.
316315
pub fn set_location(mut self, location: String) -> Self {
317316
let location = location.trim_end_matches('/').to_string();
318317
if self.metadata.location != location {

crates/iceberg/src/spec/view_metadata.rs

Lines changed: 58 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,27 @@ use serde::{Deserialize, Serialize};
2929
use serde_repr::{Deserialize_repr, Serialize_repr};
3030
use uuid::Uuid;
3131

32-
use super::view_version::{ViewVersion, ViewVersionId, ViewVersionRef};
32+
pub use super::view_metadata_builder::ViewMetadataBuilder;
33+
use super::view_version::{ViewVersionId, ViewVersionRef};
3334
use super::{SchemaId, SchemaRef};
34-
use crate::catalog::ViewCreation;
3535
use crate::error::{timestamp_ms_to_utc, Result};
36+
use crate::{Error, ErrorKind};
3637

3738
/// Reference to [`ViewMetadata`].
3839
pub type ViewMetadataRef = Arc<ViewMetadata>;
3940

41+
// ID of the initial version of views
4042
pub(crate) static INITIAL_VIEW_VERSION_ID: i32 = 1;
4143

44+
/// Property key for allowing to drop dialects when replacing a view.
45+
pub const VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED: &str = "replace.drop-dialect.allowed";
46+
/// Default value for the property key for allowing to drop dialects when replacing a view.
47+
pub const VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED_DEFAULT: bool = false;
48+
/// Property key for the number of history entries to keep.
49+
pub const VIEW_PROPERTY_VERSION_HISTORY_SIZE: &str = "version.history.num-entries";
50+
/// Default value for the property key for the number of history entries to keep.
51+
pub const VIEW_PROPERTY_VERSION_HISTORY_SIZE_DEFAULT: usize = 10;
52+
4253
#[derive(Debug, PartialEq, Deserialize, Eq, Clone)]
4354
#[serde(try_from = "ViewMetadataEnum", into = "ViewMetadataEnum")]
4455
/// Fields for the version 1 of the view metadata.
@@ -60,14 +71,20 @@ pub struct ViewMetadata {
6071
/// change to current-version-id
6172
pub(crate) version_log: Vec<ViewVersionLog>,
6273
/// A list of schemas, stored as objects with schema-id.
63-
pub(crate) schemas: HashMap<i32, SchemaRef>,
74+
pub(crate) schemas: HashMap<SchemaId, SchemaRef>,
6475
/// A string to string map of view properties.
6576
/// Properties are used for metadata such as comment and for settings that
6677
/// affect view maintenance. This is not intended to be used for arbitrary metadata.
6778
pub(crate) properties: HashMap<String, String>,
6879
}
6980

7081
impl ViewMetadata {
82+
/// Convert this View Metadata into a builder for modification.
83+
#[must_use]
84+
pub fn into_builder(self) -> ViewMetadataBuilder {
85+
ViewMetadataBuilder::new_from_metadata(self)
86+
}
87+
7188
/// Returns format version of this metadata.
7289
#[inline]
7390
pub fn format_version(&self) -> ViewFormatVersion {
@@ -143,65 +160,36 @@ impl ViewMetadata {
143160
pub fn history(&self) -> &[ViewVersionLog] {
144161
&self.version_log
145162
}
146-
}
147-
148-
/// Manipulating view metadata.
149-
pub struct ViewMetadataBuilder(ViewMetadata);
150-
151-
impl ViewMetadataBuilder {
152-
/// Creates a new view metadata builder from the given view metadata.
153-
pub fn new(origin: ViewMetadata) -> Self {
154-
Self(origin)
155-
}
156-
157-
/// Creates a new view metadata builder from the given view creation.
158-
pub fn from_view_creation(view_creation: ViewCreation) -> Result<Self> {
159-
let ViewCreation {
160-
location,
161-
schema,
162-
properties,
163-
name: _,
164-
representations,
165-
default_catalog,
166-
default_namespace,
167-
summary,
168-
} = view_creation;
169-
let initial_version_id = super::INITIAL_VIEW_VERSION_ID;
170-
let version = ViewVersion::builder()
171-
.with_default_catalog(default_catalog)
172-
.with_default_namespace(default_namespace)
173-
.with_representations(representations)
174-
.with_schema_id(schema.schema_id())
175-
.with_summary(summary)
176-
.with_timestamp_ms(Utc::now().timestamp_millis())
177-
.with_version_id(initial_version_id)
178-
.build();
179-
180-
let versions = HashMap::from_iter(vec![(initial_version_id, version.into())]);
181-
182-
let view_metadata = ViewMetadata {
183-
format_version: ViewFormatVersion::V1,
184-
view_uuid: Uuid::now_v7(),
185-
location,
186-
current_version_id: initial_version_id,
187-
versions,
188-
version_log: Vec::new(),
189-
schemas: HashMap::from_iter(vec![(schema.schema_id(), Arc::new(schema))]),
190-
properties,
191-
};
192163

193-
Ok(Self(view_metadata))
164+
/// Validate the view metadata.
165+
pub(super) fn validate(&self) -> Result<()> {
166+
self.validate_current_version_id()?;
167+
self.validate_current_schema_id()?;
168+
Ok(())
194169
}
195170

196-
/// Changes uuid of view metadata.
197-
pub fn assign_uuid(mut self, uuid: Uuid) -> Result<Self> {
198-
self.0.view_uuid = uuid;
199-
Ok(self)
171+
fn validate_current_version_id(&self) -> Result<()> {
172+
if !self.versions.contains_key(&self.current_version_id) {
173+
return Err(Error::new(
174+
ErrorKind::DataInvalid,
175+
format!(
176+
"No version exists with the current version id {}.",
177+
self.current_version_id
178+
),
179+
));
180+
}
181+
Ok(())
200182
}
201183

202-
/// Returns the new view metadata after changes.
203-
pub fn build(self) -> Result<ViewMetadata> {
204-
Ok(self.0)
184+
fn validate_current_schema_id(&self) -> Result<()> {
185+
let schema_id = self.current_version().schema_id();
186+
if !self.schemas.contains_key(&schema_id) {
187+
return Err(Error::new(
188+
ErrorKind::DataInvalid,
189+
format!("No schema exists with the schema id {}.", schema_id),
190+
));
191+
}
192+
Ok(())
205193
}
206194
}
207195

@@ -258,7 +246,7 @@ pub(super) mod _serde {
258246
use crate::spec::table_metadata::_serde::VersionNumber;
259247
use crate::spec::view_version::_serde::ViewVersionV1;
260248
use crate::spec::{ViewMetadata, ViewVersion};
261-
use crate::{Error, ErrorKind};
249+
use crate::Error;
262250

263251
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
264252
#[serde(untagged)]
@@ -326,28 +314,8 @@ pub(super) mod _serde {
326314
.map(|x| Ok((x.version_id, Arc::new(ViewVersion::from(x)))))
327315
.collect::<Result<Vec<_>, Error>>()?,
328316
);
329-
// Make sure at least the current schema exists
330-
let current_version =
331-
versions
332-
.get(&value.current_version_id)
333-
.ok_or(self::Error::new(
334-
ErrorKind::DataInvalid,
335-
format!(
336-
"No version exists with the current version id {}.",
337-
value.current_version_id
338-
),
339-
))?;
340-
if !schemas.contains_key(&current_version.schema_id()) {
341-
return Err(self::Error::new(
342-
ErrorKind::DataInvalid,
343-
format!(
344-
"No schema exists with the schema id {}.",
345-
current_version.schema_id()
346-
),
347-
));
348-
}
349317

350-
Ok(ViewMetadata {
318+
let view_metadata = ViewMetadata {
351319
format_version: ViewFormatVersion::V1,
352320
view_uuid: value.view_uuid,
353321
location: value.location,
@@ -356,7 +324,9 @@ pub(super) mod _serde {
356324
current_version_id: value.current_version_id,
357325
versions,
358326
version_log: value.version_log,
359-
})
327+
};
328+
view_metadata.validate()?;
329+
Ok(view_metadata)
360330
}
361331
}
362332

@@ -423,7 +393,7 @@ impl Display for ViewFormatVersion {
423393
}
424394

425395
#[cfg(test)]
426-
mod tests {
396+
pub(crate) mod tests {
427397
use std::collections::HashMap;
428398
use std::fs;
429399
use std::sync::Arc;
@@ -435,7 +405,7 @@ mod tests {
435405
use super::{ViewFormatVersion, ViewMetadataBuilder, ViewVersionLog};
436406
use crate::spec::{
437407
NestedField, PrimitiveType, Schema, SqlViewRepresentation, Type, ViewMetadata,
438-
ViewRepresentations, ViewVersion,
408+
ViewRepresentations, ViewVersion, INITIAL_VIEW_VERSION_ID,
439409
};
440410
use crate::{NamespaceIdent, ViewCreation};
441411

@@ -449,7 +419,7 @@ mod tests {
449419
assert_eq!(parsed_json_value, desered_type);
450420
}
451421

452-
fn get_test_view_metadata(file_name: &str) -> ViewMetadata {
422+
pub(crate) fn get_test_view_metadata(file_name: &str) -> ViewMetadata {
453423
let path = format!("testdata/view_metadata/{}", file_name);
454424
let metadata: String = fs::read_to_string(path).unwrap();
455425

@@ -578,13 +548,14 @@ mod tests {
578548
let metadata = ViewMetadataBuilder::from_view_creation(creation)
579549
.unwrap()
580550
.build()
581-
.unwrap();
551+
.unwrap()
552+
.metadata;
582553

583554
assert_eq!(
584555
metadata.location(),
585556
"s3://bucket/warehouse/default.db/event_agg"
586557
);
587-
assert_eq!(metadata.current_version_id(), 1);
558+
assert_eq!(metadata.current_version_id(), INITIAL_VIEW_VERSION_ID);
588559
assert_eq!(metadata.versions().count(), 1);
589560
assert_eq!(metadata.schemas_iter().count(), 1);
590561
assert_eq!(metadata.properties().len(), 0);
@@ -652,9 +623,9 @@ mod tests {
652623
#[test]
653624
fn test_view_builder_assign_uuid() {
654625
let metadata = get_test_view_metadata("ViewMetadataV1Valid.json");
655-
let metadata_builder = ViewMetadataBuilder::new(metadata);
626+
let metadata_builder = metadata.into_builder();
656627
let uuid = Uuid::new_v4();
657-
let metadata = metadata_builder.assign_uuid(uuid).unwrap().build().unwrap();
628+
let metadata = metadata_builder.assign_uuid(uuid).build().unwrap().metadata;
658629
assert_eq!(metadata.uuid(), uuid);
659630
}
660631

0 commit comments

Comments
 (0)