Skip to content

Commit a807eb8

Browse files
committed
add tests for string_view type
1 parent 5a57951 commit a807eb8

File tree

5 files changed

+175
-1
lines changed

5 files changed

+175
-1
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use crate::arrow::array::BinaryViewArrayGeneric;
16+
use crate::arrow::array::MutableBinaryViewArray;
17+
use crate::arrow::array::ViewType;
18+
19+
impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for BinaryViewArrayGeneric<T> {
20+
#[inline]
21+
fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
22+
MutableBinaryViewArray::<T>::from_iter(iter).into()
23+
}
24+
}

src/common/arrow/src/arrow/array/binview/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
mod ffi;
1717
pub(crate) mod fmt;
18+
mod from;
1819
mod iterator;
1920
mod mutable;
2021
mod view;
@@ -23,6 +24,7 @@ mod private {
2324
pub trait Sealed: Send + Sync {}
2425

2526
impl Sealed for str {}
27+
2628
impl Sealed for [u8] {}
2729
}
2830

@@ -157,6 +159,7 @@ impl<T: ViewType + ?Sized> Clone for BinaryViewArrayGeneric<T> {
157159
}
158160

159161
unsafe impl<T: ViewType + ?Sized> Send for BinaryViewArrayGeneric<T> {}
162+
160163
unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewArrayGeneric<T> {}
161164

162165
fn buffers_into_raw<T>(buffers: &[Buffer<T>]) -> Arc<[(*const T, usize)]> {
@@ -254,6 +257,12 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
254257
}
255258
}
256259

260+
/// Returns a new [`BinaryViewArrayGeneric`] from a slice of `&T`.
261+
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
262+
pub fn from<V: AsRef<T>, P: AsRef<[Option<V>]>>(slice: P) -> Self {
263+
MutableBinaryViewArray::<T>::from(slice).into()
264+
}
265+
257266
/// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero.
258267
#[inline]
259268
pub fn new_empty(data_type: DataType) -> Self {

src/common/arrow/src/arrow/array/binview/mutable.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,16 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
175175
payload[0..4].copy_from_slice(&len.to_le_bytes());
176176

177177
if len <= 12 {
178+
// | len | prefix | remaining(zero-padded) |
179+
// ^ ^ ^
180+
// | 4 bytes | 4 bytes | 8 bytes |
178181
payload[4..4 + bytes.len()].copy_from_slice(bytes);
179182
} else {
183+
// | len | prefix | buffer | offsets |
184+
// ^ ^ ^ ^
185+
// | 4 bytes | 4 bytes | 4 bytes | 4 bytes |
186+
//
187+
// buffer index + offset -> real binary data
180188
self.total_buffer_len += bytes.len();
181189
let required_cap = self.in_progress_buffer.len() + bytes.len();
182190
if self.in_progress_buffer.capacity() < required_cap {
@@ -192,6 +200,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
192200
let offset = self.in_progress_buffer.len() as u32;
193201
self.in_progress_buffer.extend_from_slice(bytes);
194202

203+
// set prefix
195204
unsafe { payload[4..8].copy_from_slice(bytes.get_unchecked(0..4)) };
196205
let buffer_idx: u32 = self.completed_buffers.len().try_into().unwrap();
197206
payload[8..12].copy_from_slice(&buffer_idx.to_le_bytes());
@@ -347,12 +356,13 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
347356
let len = v.length;
348357

349358
// view layout:
359+
// for no-inlined layout:
350360
// length: 4 bytes
351361
// prefix: 4 bytes
352362
// buffer_index: 4 bytes
353363
// offset: 4 bytes
354364

355-
// inlined layout:
365+
// for inlined layout:
356366
// length: 4 bytes
357367
// data: 12 bytes
358368
let bytes = if len <= 12 {
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use databend_common_arrow::arrow::array::Array;
16+
use databend_common_arrow::arrow::array::BinaryViewArray;
17+
use databend_common_arrow::arrow::array::Utf8ViewArray;
18+
use databend_common_arrow::arrow::bitmap::Bitmap;
19+
use databend_common_arrow::arrow::datatypes::DataType;
20+
21+
#[test]
22+
fn basics_string_view() {
23+
let data = vec![
24+
Some("hello"),
25+
None,
26+
// larger than 12 bytes.
27+
Some("Databend Cloud is a Cost-Effective alternative to Snowflake."),
28+
];
29+
30+
let array: Utf8ViewArray = data.into_iter().collect();
31+
32+
assert_eq!(array.value(0), "hello");
33+
assert_eq!(array.value(1), "");
34+
assert_eq!(
35+
array.value(2),
36+
"Databend Cloud is a Cost-Effective alternative to Snowflake."
37+
);
38+
assert_eq!(
39+
unsafe { array.value_unchecked(2) },
40+
"Databend Cloud is a Cost-Effective alternative to Snowflake."
41+
);
42+
assert_eq!(
43+
array.validity(),
44+
Some(&Bitmap::from_u8_slice([0b00000101], 3))
45+
);
46+
assert!(array.is_valid(0));
47+
assert!(!array.is_valid(1));
48+
assert!(array.is_valid(2));
49+
50+
let array2 = Utf8ViewArray::new_unchecked(
51+
DataType::Utf8View,
52+
array.views().clone(),
53+
array.data_buffers().clone(),
54+
array.validity().cloned(),
55+
array.total_bytes_len(),
56+
array.total_buffer_len(),
57+
);
58+
59+
assert_eq!(array, array2);
60+
61+
let array = array.sliced(1, 2);
62+
63+
assert_eq!(array.value(0), "");
64+
assert_eq!(
65+
array.value(1),
66+
"Databend Cloud is a Cost-Effective alternative to Snowflake."
67+
);
68+
}
69+
70+
#[test]
71+
fn basics_binary_view() {
72+
let data = vec![
73+
Some(b"hello".to_vec()),
74+
None,
75+
// larger than 12 bytes.
76+
Some(b"Databend Cloud is a Cost-Effective alternative to Snowflake.".to_vec()),
77+
];
78+
79+
let array: BinaryViewArray = data.into_iter().collect();
80+
81+
assert_eq!(array.value(0), b"hello");
82+
assert_eq!(array.value(1), b"");
83+
assert_eq!(
84+
array.value(2),
85+
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
86+
);
87+
assert_eq!(
88+
unsafe { array.value_unchecked(2) },
89+
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
90+
);
91+
assert_eq!(
92+
array.validity(),
93+
Some(&Bitmap::from_u8_slice([0b00000101], 3))
94+
);
95+
assert!(array.is_valid(0));
96+
assert!(!array.is_valid(1));
97+
assert!(array.is_valid(2));
98+
99+
let array2 = BinaryViewArray::new_unchecked(
100+
DataType::Utf8View,
101+
array.views().clone(),
102+
array.data_buffers().clone(),
103+
array.validity().cloned(),
104+
array.total_bytes_len(),
105+
array.total_buffer_len(),
106+
);
107+
108+
assert_eq!(array, array2);
109+
110+
let array = array.sliced(1, 2);
111+
112+
assert_eq!(array.value(0), b"");
113+
assert_eq!(
114+
array.value(1),
115+
b"Databend Cloud is a Cost-Effective alternative to Snowflake."
116+
);
117+
}
118+
119+
#[test]
120+
fn from() {
121+
let array = Utf8ViewArray::from([Some("hello"), Some(" "), None]);
122+
123+
let a = array.validity().unwrap();
124+
assert_eq!(a, &Bitmap::from([true, true, false]));
125+
126+
let array = BinaryViewArray::from([Some(b"hello".to_vec()), Some(b" ".to_vec()), None]);
127+
128+
let a = array.validity().unwrap();
129+
assert_eq!(a, &Bitmap::from([true, true, false]));
130+
}

src/common/arrow/tests/it/arrow/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
// limitations under the License.
1515

1616
mod binary;
17+
mod binview;
1718
mod boolean;
1819
mod dictionary;
1920
mod equal;

0 commit comments

Comments
 (0)