Skip to content

Commit 0aa114a

Browse files
committed
add new string view type
1 parent 2f65a22 commit 0aa114a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+4079
-233
lines changed

Cargo.lock

Lines changed: 67 additions & 45 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/arrow/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ arrow-format = { workspace = true }
9090
bitpacking = "0.8.0"
9191
byteorder = { workspace = true }
9292
bytes = "^1"
93+
indexmap = "2.2.3"
9394
log = { workspace = true }
9495
num = { version = "0.4", default-features = false, features = ["std"] }
9596
ordered-float = "3.7.0"

src/common/arrow/src/arrow/array/binary/ffi.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
2626
fn buffers(&self) -> Vec<Option<*const u8>> {
2727
vec![
2828
self.validity.as_ref().map(|x| x.as_ptr()),
29-
Some(self.offsets.buffer().as_ptr().cast::<u8>()),
30-
Some(self.values.as_ptr().cast::<u8>()),
29+
Some(self.offsets.buffer().data_ptr().cast::<u8>()),
30+
Some(self.values.data_ptr().cast::<u8>()),
3131
]
3232
}
3333

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright (c) 2020 Ritchie Vink
2+
// Copyright 2021 Datafuse Labs
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
use std::sync::atomic::AtomicU64;
17+
use std::sync::atomic::Ordering;
18+
use std::sync::Arc;
19+
20+
use crate::arrow::array::binview::BinaryViewArrayGeneric;
21+
use crate::arrow::array::binview::View;
22+
use crate::arrow::array::binview::ViewType;
23+
use crate::arrow::array::FromFfi;
24+
use crate::arrow::array::ToFfi;
25+
use crate::arrow::bitmap::align;
26+
use crate::arrow::error::Result;
27+
use crate::arrow::ffi;
28+
29+
unsafe impl<T: ViewType + ?Sized> ToFfi for BinaryViewArrayGeneric<T> {
30+
fn buffers(&self) -> Vec<Option<*const u8>> {
31+
let mut buffers = Vec::with_capacity(self.buffers.len() + 2);
32+
buffers.push(self.validity.as_ref().map(|x| x.as_ptr()));
33+
buffers.push(Some(self.views.data_ptr().cast::<u8>()));
34+
buffers.extend(self.buffers.iter().map(|b| Some(b.data_ptr())));
35+
buffers
36+
}
37+
38+
fn offset(&self) -> Option<usize> {
39+
let offset = self.views.offset();
40+
if let Some(bitmap) = self.validity.as_ref() {
41+
if bitmap.offset() == offset {
42+
Some(offset)
43+
} else {
44+
None
45+
}
46+
} else {
47+
Some(offset)
48+
}
49+
}
50+
51+
fn to_ffi_aligned(&self) -> Self {
52+
let offset = self.views.offset();
53+
54+
let validity = self.validity.as_ref().map(|bitmap| {
55+
if bitmap.offset() == offset {
56+
bitmap.clone()
57+
} else {
58+
align(bitmap, offset)
59+
}
60+
});
61+
62+
Self {
63+
data_type: self.data_type.clone(),
64+
validity,
65+
views: self.views.clone(),
66+
buffers: self.buffers.clone(),
67+
raw_buffers: self.raw_buffers.clone(),
68+
phantom: Default::default(),
69+
total_bytes_len: AtomicU64::new(self.total_bytes_len.load(Ordering::Relaxed)),
70+
total_buffer_len: self.total_buffer_len,
71+
}
72+
}
73+
}
74+
75+
impl<T: ViewType + ?Sized, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryViewArrayGeneric<T> {
76+
unsafe fn try_from_ffi(array: A) -> Result<Self> {
77+
let data_type = array.data_type().clone();
78+
79+
let validity = unsafe { array.validity() }?;
80+
let views = unsafe { array.buffer::<View>(1) }?;
81+
82+
// 2 - validity + views
83+
let n_buffers = array.n_buffers();
84+
let mut remaining_buffers = n_buffers - 2;
85+
if remaining_buffers <= 1 {
86+
return Ok(Self::new_unchecked_unknown_md(
87+
data_type,
88+
views,
89+
Arc::from([]),
90+
validity,
91+
None,
92+
));
93+
}
94+
95+
let n_variadic_buffers = remaining_buffers - 1;
96+
let variadic_buffer_offset = n_buffers - 1;
97+
98+
let variadic_buffer_sizes =
99+
array.buffer_known_len::<i64>(variadic_buffer_offset, n_variadic_buffers)?;
100+
remaining_buffers -= 1;
101+
102+
let mut variadic_buffers = Vec::with_capacity(remaining_buffers);
103+
104+
let offset = 2;
105+
for (i, &size) in (offset..remaining_buffers + offset).zip(variadic_buffer_sizes.iter()) {
106+
let values = unsafe { array.buffer_known_len::<u8>(i, size as usize) }?;
107+
variadic_buffers.push(values);
108+
}
109+
110+
Ok(Self::new_unchecked_unknown_md(
111+
data_type,
112+
views,
113+
Arc::from(variadic_buffers),
114+
validity,
115+
None,
116+
))
117+
}
118+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright (c) 2020 Ritchie Vink
2+
// Copyright 2021 Datafuse Labs
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
use std::fmt::Debug;
17+
use std::fmt::Formatter;
18+
use std::fmt::Result;
19+
use std::fmt::Write;
20+
21+
use crate::arrow::array::binview::BinaryViewArray;
22+
use crate::arrow::array::binview::BinaryViewArrayGeneric;
23+
use crate::arrow::array::binview::Utf8ViewArray;
24+
use crate::arrow::array::binview::ViewType;
25+
use crate::arrow::array::fmt::write_vec;
26+
use crate::arrow::array::Array;
27+
28+
pub fn write_value<'a, T: ViewType + ?Sized, W: Write>(
29+
array: &'a BinaryViewArrayGeneric<T>,
30+
index: usize,
31+
f: &mut W,
32+
) -> Result
33+
where
34+
&'a T: Debug,
35+
{
36+
let bytes = array.value(index).to_bytes();
37+
let writer = |f: &mut W, index| write!(f, "{}", bytes[index]);
38+
39+
write_vec(f, writer, None, bytes.len(), "None", false)
40+
}
41+
42+
impl Debug for BinaryViewArray {
43+
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
44+
let writer = |f: &mut Formatter, index| write_value(self, index, f);
45+
write!(f, "BinaryViewArray")?;
46+
write_vec(f, writer, self.validity(), self.len(), "None", false)
47+
}
48+
}
49+
50+
impl Debug for Utf8ViewArray {
51+
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
52+
let writer = |f: &mut Formatter, index| write!(f, "{}", self.value(index));
53+
write!(f, "Utf8ViewArray")?;
54+
write_vec(f, writer, self.validity(), self.len(), "None", false)
55+
}
56+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use crate::arrow::array::binview::mutable::MutableBinaryViewArray;
16+
use crate::arrow::array::binview::BinaryViewArrayGeneric;
17+
use crate::arrow::array::binview::ViewType;
18+
use crate::arrow::array::ArrayAccessor;
19+
use crate::arrow::array::ArrayValuesIter;
20+
use crate::arrow::bitmap::utils::BitmapIter;
21+
use crate::arrow::bitmap::utils::ZipValidity;
22+
23+
unsafe impl<'a, T: ViewType + ?Sized> ArrayAccessor<'a> for BinaryViewArrayGeneric<T> {
24+
type Item = &'a T;
25+
26+
#[inline]
27+
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
28+
self.value_unchecked(index)
29+
}
30+
31+
#[inline]
32+
fn len(&self) -> usize {
33+
self.views.len()
34+
}
35+
}
36+
37+
/// Iterator of values of an [`BinaryArray`].
38+
pub type BinaryViewValueIter<'a, T> = ArrayValuesIter<'a, BinaryViewArrayGeneric<T>>;
39+
40+
impl<'a, T: ViewType + ?Sized> IntoIterator for &'a BinaryViewArrayGeneric<T> {
41+
type Item = Option<&'a T>;
42+
type IntoIter = ZipValidity<&'a T, BinaryViewValueIter<'a, T>, BitmapIter<'a>>;
43+
44+
fn into_iter(self) -> Self::IntoIter {
45+
self.iter()
46+
}
47+
}
48+
49+
unsafe impl<'a, T: ViewType + ?Sized> ArrayAccessor<'a> for MutableBinaryViewArray<T> {
50+
type Item = &'a T;
51+
52+
#[inline]
53+
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
54+
self.value_unchecked(index)
55+
}
56+
57+
#[inline]
58+
fn len(&self) -> usize {
59+
self.views().len()
60+
}
61+
}
62+
63+
/// Iterator of values of an [`MutableBinaryViewArray`].
64+
pub type MutableBinaryViewValueIter<'a, T> = ArrayValuesIter<'a, MutableBinaryViewArray<T>>;

0 commit comments

Comments
 (0)