Skip to content

Commit eb33e90

Browse files
authored
feat: Add hms catalog layout (#112)
* feat: Add hms catalog layout Signed-off-by: Xuanwo <[email protected]> * Fix Signed-off-by: Xuanwo <[email protected]> * Sort deps Signed-off-by: Xuanwo <[email protected]> --------- Signed-off-by: Xuanwo <[email protected]>
1 parent ffa3a5c commit eb33e90

File tree

4 files changed

+262
-0
lines changed

4 files changed

+262
-0
lines changed

crates/catalog/hms/Cargo.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "iceberg-catalog-hms"
20+
version = "0.1.0"
21+
edition = "2021"
22+
23+
categories = ["database"]
24+
description = "Apache Iceberg Hive Metastore Catalog Support"
25+
repository = "https://github.com/apache/iceberg-rust"
26+
license = "Apache-2.0"
27+
keywords = ["iceberg", "hive", "catalog"]
28+
29+
[dependencies]
30+
async-trait = { workspace = true }
31+
hive_metastore = "0.0.1"
32+
iceberg = { workspace = true }
33+
# the thrift upstream suffered from no regular rust release.
34+
#
35+
# [test-rs](https://github.com/tent-rs) is an organization that helps resolves this
36+
# issue. And [tent-thrift](https://github.com/tent-rs/thrift) is a fork of the thrift
37+
# crate, built from the thrift upstream with only version bumped.
38+
thrift = { package = "tent-thrift", version = "0.18.1" }
39+
typed-builder = { workspace = true }

crates/catalog/hms/src/catalog.rs

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use super::utils::*;
19+
use async_trait::async_trait;
20+
use hive_metastore::{TThriftHiveMetastoreSyncClient, ThriftHiveMetastoreSyncClient};
21+
use iceberg::table::Table;
22+
use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent};
23+
use std::collections::HashMap;
24+
use std::fmt::{Debug, Formatter};
25+
use std::sync::{Arc, Mutex};
26+
use thrift::protocol::{TBinaryInputProtocol, TBinaryOutputProtocol};
27+
use thrift::transport::{
28+
ReadHalf, TBufferedReadTransport, TBufferedWriteTransport, TIoChannel, WriteHalf,
29+
};
30+
use typed_builder::TypedBuilder;
31+
32+
/// Hive metastore Catalog configuration.
33+
#[derive(Debug, TypedBuilder)]
34+
pub struct HmsCatalogConfig {
35+
address: String,
36+
}
37+
38+
/// TODO: We only support binary protocol for now.
39+
type HmsClientType = ThriftHiveMetastoreSyncClient<
40+
TBinaryInputProtocol<TBufferedReadTransport<ReadHalf<thrift::transport::TTcpChannel>>>,
41+
TBinaryOutputProtocol<TBufferedWriteTransport<WriteHalf<thrift::transport::TTcpChannel>>>,
42+
>;
43+
44+
/// # TODO
45+
///
46+
/// we are using the same connection everytime, we should support connection
47+
/// pool in the future.
48+
struct HmsClient(Arc<Mutex<HmsClientType>>);
49+
50+
impl HmsClient {
51+
fn call<T>(&self, f: impl FnOnce(&mut HmsClientType) -> thrift::Result<T>) -> Result<T> {
52+
let mut client = self.0.lock().unwrap();
53+
f(&mut client).map_err(from_thrift_error)
54+
}
55+
}
56+
57+
/// Hive metastore Catalog.
58+
pub struct HmsCatalog {
59+
config: HmsCatalogConfig,
60+
client: HmsClient,
61+
}
62+
63+
impl Debug for HmsCatalog {
64+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
65+
f.debug_struct("HmsCatalog")
66+
.field("config", &self.config)
67+
.finish_non_exhaustive()
68+
}
69+
}
70+
71+
impl HmsCatalog {
72+
/// Create a new hms catalog.
73+
pub fn new(config: HmsCatalogConfig) -> Result<Self> {
74+
let mut channel = thrift::transport::TTcpChannel::new();
75+
channel
76+
.open(config.address.as_str())
77+
.map_err(from_thrift_error)?;
78+
let (i_chan, o_chan) = channel.split().map_err(from_thrift_error)?;
79+
let i_chan = TBufferedReadTransport::new(i_chan);
80+
let o_chan = TBufferedWriteTransport::new(o_chan);
81+
let i_proto = TBinaryInputProtocol::new(i_chan, true);
82+
let o_proto = TBinaryOutputProtocol::new(o_chan, true);
83+
let client = ThriftHiveMetastoreSyncClient::new(i_proto, o_proto);
84+
Ok(Self {
85+
config,
86+
client: HmsClient(Arc::new(Mutex::new(client))),
87+
})
88+
}
89+
}
90+
91+
/// Refer to <https://github.com/apache/iceberg/blob/main/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java> for implementation details.
92+
#[async_trait]
93+
impl Catalog for HmsCatalog {
94+
/// HMS doesn't support nested namespaces.
95+
///
96+
/// We will return empty list if parent is some.
97+
///
98+
/// Align with java implementation: <https://github.com/apache/iceberg/blob/9bd62f79f8cd973c39d14e89163cb1c707470ed2/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java#L305C26-L330>
99+
async fn list_namespaces(
100+
&self,
101+
parent: Option<&NamespaceIdent>,
102+
) -> Result<Vec<NamespaceIdent>> {
103+
let dbs = if parent.is_some() {
104+
return Ok(vec![]);
105+
} else {
106+
self.client.call(|client| client.get_all_databases())?
107+
};
108+
109+
Ok(dbs.into_iter().map(NamespaceIdent::new).collect())
110+
}
111+
112+
async fn create_namespace(
113+
&self,
114+
_namespace: &NamespaceIdent,
115+
_properties: HashMap<String, String>,
116+
) -> Result<Namespace> {
117+
todo!()
118+
}
119+
120+
async fn get_namespace(&self, _namespace: &NamespaceIdent) -> Result<Namespace> {
121+
todo!()
122+
}
123+
124+
async fn namespace_exists(&self, _namespace: &NamespaceIdent) -> Result<bool> {
125+
todo!()
126+
}
127+
128+
async fn update_namespace(
129+
&self,
130+
_namespace: &NamespaceIdent,
131+
_properties: HashMap<String, String>,
132+
) -> Result<()> {
133+
todo!()
134+
}
135+
136+
async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> {
137+
todo!()
138+
}
139+
140+
async fn list_tables(&self, _namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
141+
todo!()
142+
}
143+
144+
async fn create_table(
145+
&self,
146+
_namespace: &NamespaceIdent,
147+
_creation: TableCreation,
148+
) -> Result<Table> {
149+
todo!()
150+
}
151+
152+
async fn load_table(&self, _table: &TableIdent) -> Result<Table> {
153+
todo!()
154+
}
155+
156+
async fn drop_table(&self, _table: &TableIdent) -> Result<()> {
157+
todo!()
158+
}
159+
160+
async fn stat_table(&self, _table: &TableIdent) -> Result<bool> {
161+
todo!()
162+
}
163+
164+
async fn rename_table(&self, _src: &TableIdent, _dest: &TableIdent) -> Result<()> {
165+
todo!()
166+
}
167+
168+
async fn update_table(&self, _commit: TableCommit) -> Result<Table> {
169+
todo!()
170+
}
171+
}

crates/catalog/hms/src/lib.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Iceberg Hive Metastore Catalog implementation.
19+
20+
#![deny(missing_docs)]
21+
22+
mod catalog;
23+
pub use catalog::*;
24+
25+
mod utils;

crates/catalog/hms/src/utils.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use iceberg::{Error, ErrorKind};
19+
20+
/// Format a thrift error into iceberg error.
21+
pub fn from_thrift_error(error: thrift::Error) -> Error {
22+
Error::new(
23+
ErrorKind::Unexpected,
24+
"operation failed for hitting thrift error".to_string(),
25+
)
26+
.with_source(error)
27+
}

0 commit comments

Comments
 (0)