Skip to content

Commit dec7e82

Browse files
authored
Merge branch 'main' into create-tests
2 parents 396a0d2 + c0398f2 commit dec7e82

File tree

20 files changed

+624
-82
lines changed

20 files changed

+624
-82
lines changed

CONTRIBUTING.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,11 @@ cargo 1.69.0 (6e9a83356 2023-04-12)
114114

115115
#### Install Docker or Podman
116116

117-
Currently, iceberg-rust uses Docker to set up environment for integration tests. Podman is also supported.
117+
Currently, iceberg-rust uses Docker to set up environment for integration tests. Native Docker has some limitations, please check (https://github.com/apache/iceberg-rust/pull/748). Please use Orbstack or Podman.
118118

119-
You can learn how to install Docker from [here](https://docs.docker.com/get-docker/).
119+
For MacOS users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative.
120120

121-
For macos users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative.
122-
123-
For podman users, refer to [Using Podman instead of Docker](docs/contributing/podman.md)
121+
For podman, refer to [Using Podman instead of Docker](docs/contributing/podman.md)
124122

125123
## Build
126124

crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ networks:
2020

2121
services:
2222
rest:
23-
image: tabulario/iceberg-rest:0.10.0
23+
image: apache/iceberg-rest-fixture
2424
environment:
2525
- AWS_ACCESS_KEY_ID=admin
2626
- AWS_SECRET_ACCESS_KEY=password
2727
- AWS_REGION=us-east-1
28-
- CATALOG_CATOLOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
28+
- CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
2929
- CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory
3030
- CATALOG_WAREHOUSE=s3://icebergdata/demo
3131
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
@@ -34,8 +34,6 @@ services:
3434
- minio
3535
networks:
3636
rest_bridge:
37-
aliases:
38-
- icebergdata.minio
3937
expose:
4038
- 8181
4139

crates/iceberg/src/arrow/record_batch_projector.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::error::Result;
2424
use crate::{Error, ErrorKind};
2525

2626
/// Help to project specific field from `RecordBatch`` according to the fields id.
27-
#[derive(Clone)]
27+
#[derive(Clone, Debug)]
2828
pub(crate) struct RecordBatchProjector {
2929
// A vector of vectors, where each inner vector represents the index path to access a specific field in a nested structure.
3030
// E.g. [[0], [1, 2]] means the first field is accessed directly from the first column,

crates/iceberg/src/spec/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
mod datatypes;
2121
mod manifest;
2222
mod manifest_list;
23+
mod name_mapping;
2324
mod partition;
2425
mod schema;
2526
mod snapshot;
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Iceberg name mapping.
19+
20+
use serde::{Deserialize, Serialize};
21+
use serde_with::{serde_as, DefaultOnNull};
22+
23+
/// Iceberg fallback field name to ID mapping.
24+
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
25+
#[serde(transparent)]
26+
pub struct NameMapping {
27+
pub root: Vec<MappedField>,
28+
}
29+
30+
/// Maps field names to IDs.
31+
#[serde_as]
32+
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
33+
#[serde(rename_all = "kebab-case")]
34+
pub struct MappedField {
35+
#[serde(skip_serializing_if = "Option::is_none")]
36+
pub field_id: Option<i32>,
37+
pub names: Vec<String>,
38+
#[serde(default)]
39+
#[serde(skip_serializing_if = "Vec::is_empty")]
40+
#[serde_as(deserialize_as = "DefaultOnNull")]
41+
pub fields: Vec<MappedField>,
42+
}
43+
44+
#[cfg(test)]
45+
mod tests {
46+
use super::*;
47+
48+
#[test]
49+
fn test_json_mapped_field_deserialization() {
50+
let expected = MappedField {
51+
field_id: Some(1),
52+
names: vec!["id".to_string(), "record_id".to_string()],
53+
fields: vec![],
54+
};
55+
let mapped_field = r#"
56+
{
57+
"field-id": 1,
58+
"names": ["id", "record_id"]
59+
}
60+
"#;
61+
62+
let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
63+
assert_eq!(mapped_field, expected);
64+
65+
let mapped_field_with_null_fields = r#"
66+
{
67+
"field-id": 1,
68+
"names": ["id", "record_id"],
69+
"fields": null
70+
}
71+
"#;
72+
73+
let mapped_field_with_null_fields: MappedField =
74+
serde_json::from_str(mapped_field_with_null_fields).unwrap();
75+
assert_eq!(mapped_field_with_null_fields, expected);
76+
}
77+
78+
#[test]
79+
fn test_json_mapped_field_no_names_deserialization() {
80+
let expected = MappedField {
81+
field_id: Some(1),
82+
names: vec![],
83+
fields: vec![],
84+
};
85+
let mapped_field = r#"
86+
{
87+
"field-id": 1,
88+
"names": []
89+
}
90+
"#;
91+
92+
let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
93+
assert_eq!(mapped_field, expected);
94+
95+
let mapped_field_with_null_fields = r#"
96+
{
97+
"field-id": 1,
98+
"names": [],
99+
"fields": null
100+
}
101+
"#;
102+
103+
let mapped_field_with_null_fields: MappedField =
104+
serde_json::from_str(mapped_field_with_null_fields).unwrap();
105+
assert_eq!(mapped_field_with_null_fields, expected);
106+
}
107+
108+
#[test]
109+
fn test_json_mapped_field_no_field_id_deserialization() {
110+
let expected = MappedField {
111+
field_id: None,
112+
names: vec!["id".to_string(), "record_id".to_string()],
113+
fields: vec![],
114+
};
115+
let mapped_field = r#"
116+
{
117+
"names": ["id", "record_id"]
118+
}
119+
"#;
120+
121+
let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
122+
assert_eq!(mapped_field, expected);
123+
124+
let mapped_field_with_null_fields = r#"
125+
{
126+
"names": ["id", "record_id"],
127+
"fields": null
128+
}
129+
"#;
130+
131+
let mapped_field_with_null_fields: MappedField =
132+
serde_json::from_str(mapped_field_with_null_fields).unwrap();
133+
assert_eq!(mapped_field_with_null_fields, expected);
134+
}
135+
136+
#[test]
137+
fn test_json_name_mapping_deserialization() {
138+
let name_mapping = r#"
139+
[
140+
{
141+
"field-id": 1,
142+
"names": [
143+
"id",
144+
"record_id"
145+
]
146+
},
147+
{
148+
"field-id": 2,
149+
"names": [
150+
"data"
151+
]
152+
},
153+
{
154+
"field-id": 3,
155+
"names": [
156+
"location"
157+
],
158+
"fields": [
159+
{
160+
"field-id": 4,
161+
"names": [
162+
"latitude",
163+
"lat"
164+
]
165+
},
166+
{
167+
"field-id": 5,
168+
"names": [
169+
"longitude",
170+
"long"
171+
]
172+
}
173+
]
174+
}
175+
]
176+
"#;
177+
178+
let name_mapping: NameMapping = serde_json::from_str(name_mapping).unwrap();
179+
assert_eq!(name_mapping, NameMapping {
180+
root: vec![
181+
MappedField {
182+
field_id: Some(1),
183+
names: vec!["id".to_string(), "record_id".to_string()],
184+
fields: vec![]
185+
},
186+
MappedField {
187+
field_id: Some(2),
188+
names: vec!["data".to_string()],
189+
fields: vec![]
190+
},
191+
MappedField {
192+
field_id: Some(3),
193+
names: vec!["location".to_string()],
194+
fields: vec![
195+
MappedField {
196+
field_id: Some(4),
197+
names: vec!["latitude".to_string(), "lat".to_string()],
198+
fields: vec![]
199+
},
200+
MappedField {
201+
field_id: Some(5),
202+
names: vec!["longitude".to_string(), "long".to_string()],
203+
fields: vec![]
204+
},
205+
]
206+
}
207+
]
208+
});
209+
}
210+
211+
#[test]
212+
fn test_json_name_mapping_serialization() {
213+
let name_mapping = NameMapping {
214+
root: vec![
215+
MappedField {
216+
field_id: None,
217+
names: vec!["foo".to_string()],
218+
fields: vec![],
219+
},
220+
MappedField {
221+
field_id: Some(2),
222+
names: vec!["bar".to_string()],
223+
fields: vec![],
224+
},
225+
MappedField {
226+
field_id: Some(3),
227+
names: vec!["baz".to_string()],
228+
fields: vec![],
229+
},
230+
MappedField {
231+
field_id: Some(4),
232+
names: vec!["qux".to_string()],
233+
fields: vec![MappedField {
234+
field_id: Some(5),
235+
names: vec!["element".to_string()],
236+
fields: vec![],
237+
}],
238+
},
239+
MappedField {
240+
field_id: Some(6),
241+
names: vec!["quux".to_string()],
242+
fields: vec![
243+
MappedField {
244+
field_id: Some(7),
245+
names: vec!["key".to_string()],
246+
fields: vec![],
247+
},
248+
MappedField {
249+
field_id: Some(8),
250+
names: vec!["value".to_string()],
251+
fields: vec![
252+
MappedField {
253+
field_id: Some(9),
254+
names: vec!["key".to_string()],
255+
fields: vec![],
256+
},
257+
MappedField {
258+
field_id: Some(10),
259+
names: vec!["value".to_string()],
260+
fields: vec![],
261+
},
262+
],
263+
},
264+
],
265+
},
266+
MappedField {
267+
field_id: Some(11),
268+
names: vec!["location".to_string()],
269+
fields: vec![MappedField {
270+
field_id: Some(12),
271+
names: vec!["element".to_string()],
272+
fields: vec![
273+
MappedField {
274+
field_id: Some(13),
275+
names: vec!["latitude".to_string()],
276+
fields: vec![],
277+
},
278+
MappedField {
279+
field_id: Some(14),
280+
names: vec!["longitude".to_string()],
281+
fields: vec![],
282+
},
283+
],
284+
}],
285+
},
286+
MappedField {
287+
field_id: Some(15),
288+
names: vec!["person".to_string()],
289+
fields: vec![
290+
MappedField {
291+
field_id: Some(16),
292+
names: vec!["name".to_string()],
293+
fields: vec![],
294+
},
295+
MappedField {
296+
field_id: Some(17),
297+
names: vec!["age".to_string()],
298+
fields: vec![],
299+
},
300+
],
301+
},
302+
],
303+
};
304+
let expected = r#"[{"names":["foo"]},{"field-id":2,"names":["bar"]},{"field-id":3,"names":["baz"]},{"field-id":4,"names":["qux"],"fields":[{"field-id":5,"names":["element"]}]},{"field-id":6,"names":["quux"],"fields":[{"field-id":7,"names":["key"]},{"field-id":8,"names":["value"],"fields":[{"field-id":9,"names":["key"]},{"field-id":10,"names":["value"]}]}]},{"field-id":11,"names":["location"],"fields":[{"field-id":12,"names":["element"],"fields":[{"field-id":13,"names":["latitude"]},{"field-id":14,"names":["longitude"]}]}]},{"field-id":15,"names":["person"],"fields":[{"field-id":16,"names":["name"]},{"field-id":17,"names":["age"]}]}]"#;
305+
assert_eq!(serde_json::to_string(&name_mapping).unwrap(), expected);
306+
}
307+
}

0 commit comments

Comments
 (0)