Skip to content

Commit 0dd76d5

Browse files
committed
add tests for string_view type
1 parent 0aa114a commit 0dd76d5

File tree

9 files changed

+455
-3
lines changed

9 files changed

+455
-3
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use crate::arrow::array::BinaryViewArrayGeneric;
16+
use crate::arrow::array::MutableBinaryViewArray;
17+
use crate::arrow::array::ViewType;
18+
19+
impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for BinaryViewArrayGeneric<T> {
20+
#[inline]
21+
fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
22+
MutableBinaryViewArray::<T>::from_iter(iter).into()
23+
}
24+
}

src/common/arrow/src/arrow/array/binview/mod.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
mod ffi;
1717
pub(crate) mod fmt;
18+
mod from;
1819
mod iterator;
1920
mod mutable;
2021
mod view;
@@ -23,6 +24,7 @@ mod private {
2324
pub trait Sealed: Send + Sync {}
2425

2526
impl Sealed for str {}
27+
2628
impl Sealed for [u8] {}
2729
}
2830

@@ -33,6 +35,7 @@ use std::sync::atomic::AtomicU64;
3335
use std::sync::atomic::Ordering;
3436
use std::sync::Arc;
3537

38+
use either::Either;
3639
pub use iterator::BinaryViewValueIter;
3740
pub use mutable::MutableBinaryViewArray;
3841
use private::Sealed;
@@ -157,6 +160,7 @@ impl<T: ViewType + ?Sized> Clone for BinaryViewArrayGeneric<T> {
157160
}
158161

159162
unsafe impl<T: ViewType + ?Sized> Send for BinaryViewArrayGeneric<T> {}
163+
160164
unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewArrayGeneric<T> {}
161165

162166
fn buffers_into_raw<T>(buffers: &[Buffer<T>]) -> Arc<[(*const T, usize)]> {
@@ -233,6 +237,11 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
233237
buffers: Arc<[Buffer<u8>]>,
234238
validity: Option<Bitmap>,
235239
) -> Result<Self> {
240+
if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
241+
return Err(Error::oos(
242+
"BinaryViewArray can only be initialized with DataType::BinaryView or DataType::Utf8View",
243+
));
244+
}
236245
if T::IS_UTF8 {
237246
validate_utf8_view(views.as_ref(), buffers.as_ref())?;
238247
} else {
@@ -254,6 +263,12 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
254263
}
255264
}
256265

266+
/// Returns a new [`BinaryViewArrayGeneric`] from a slice of `&T`.
267+
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
268+
pub fn from<V: AsRef<T>, P: AsRef<[Option<V>]>>(slice: P) -> Self {
269+
MutableBinaryViewArray::<T>::from(slice).into()
270+
}
271+
257272
/// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero.
258273
#[inline]
259274
pub fn new_empty(data_type: DataType) -> Self {
@@ -438,6 +453,84 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
438453
total_buffer_len: self.total_buffer_len,
439454
}
440455
}
456+
457+
#[must_use]
458+
pub fn into_mut(self) -> Either<Self, MutableBinaryViewArray<T>> {
459+
use Either::*;
460+
let is_unique = (Arc::strong_count(&self.buffers) + Arc::weak_count(&self.buffers)) == 1;
461+
462+
if let Some(bitmap) = self.validity {
463+
match bitmap.into_mut() {
464+
Left(bitmap) => Left(Self::new_unchecked(
465+
self.data_type,
466+
self.views,
467+
self.buffers,
468+
Some(bitmap),
469+
self.total_bytes_len.load(Ordering::Relaxed) as usize,
470+
self.total_buffer_len,
471+
)),
472+
Right(mutable_bitmap) => match (self.views.into_mut(), is_unique) {
473+
(Right(views), true) => Right(MutableBinaryViewArray {
474+
views,
475+
completed_buffers: self.buffers.to_vec(),
476+
in_progress_buffer: vec![],
477+
validity: Some(mutable_bitmap),
478+
phantom: Default::default(),
479+
total_bytes_len: self.total_bytes_len.load(Ordering::Relaxed) as usize,
480+
total_buffer_len: self.total_buffer_len,
481+
}),
482+
(Right(views), false) => Left(Self::new_unchecked(
483+
self.data_type,
484+
views.into(),
485+
self.buffers,
486+
Some(mutable_bitmap.into()),
487+
self.total_bytes_len.load(Ordering::Relaxed) as usize,
488+
self.total_buffer_len,
489+
)),
490+
(Left(views), _) => Left(Self::new_unchecked(
491+
self.data_type,
492+
views,
493+
self.buffers,
494+
Some(mutable_bitmap.into()),
495+
self.total_bytes_len.load(Ordering::Relaxed) as usize,
496+
self.total_buffer_len,
497+
)),
498+
},
499+
}
500+
} else {
501+
match (self.views.into_mut(), is_unique) {
502+
(Right(views), true) => Right(MutableBinaryViewArray {
503+
views,
504+
completed_buffers: self.buffers.to_vec(),
505+
in_progress_buffer: vec![],
506+
validity: None,
507+
phantom: Default::default(),
508+
total_bytes_len: self.total_bytes_len.load(Ordering::Relaxed) as usize,
509+
total_buffer_len: self.total_buffer_len,
510+
}),
511+
(Right(views), false) => Left(Self::new_unchecked(
512+
self.data_type,
513+
views.into(),
514+
self.buffers,
515+
None,
516+
self.total_bytes_len.load(Ordering::Relaxed) as usize,
517+
self.total_buffer_len,
518+
)),
519+
(Left(views), _) => Left(Self::new_unchecked(
520+
self.data_type,
521+
views,
522+
self.buffers,
523+
None,
524+
self.total_bytes_len.load(Ordering::Relaxed) as usize,
525+
self.total_buffer_len,
526+
)),
527+
}
528+
}
529+
}
530+
531+
pub fn default_data_type() -> &'static DataType {
532+
T::dtype()
533+
}
441534
}
442535

443536
pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>;

src/common/arrow/src/arrow/array/binview/mutable.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,11 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
113113
&self.views
114114
}
115115

116-
pub fn validity(&mut self) -> Option<&mut MutableBitmap> {
116+
pub fn validity(&self) -> Option<&MutableBitmap> {
117+
self.validity.as_ref()
118+
}
119+
120+
pub fn validity_mut(&mut self) -> Option<&mut MutableBitmap> {
117121
self.validity.as_mut()
118122
}
119123

@@ -175,8 +179,16 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
175179
payload[0..4].copy_from_slice(&len.to_le_bytes());
176180

177181
if len <= 12 {
182+
// | len | prefix | remaining(zero-padded) |
183+
// ^ ^ ^
184+
// | 4 bytes | 4 bytes | 8 bytes |
178185
payload[4..4 + bytes.len()].copy_from_slice(bytes);
179186
} else {
187+
// | len | prefix | buffer | offsets |
188+
// ^ ^ ^ ^
189+
// | 4 bytes | 4 bytes | 4 bytes | 4 bytes |
190+
//
191+
// buffer index + offset -> real binary data
180192
self.total_buffer_len += bytes.len();
181193
let required_cap = self.in_progress_buffer.len() + bytes.len();
182194
if self.in_progress_buffer.capacity() < required_cap {
@@ -192,6 +204,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
192204
let offset = self.in_progress_buffer.len() as u32;
193205
self.in_progress_buffer.extend_from_slice(bytes);
194206

207+
// set prefix
195208
unsafe { payload[4..8].copy_from_slice(bytes.get_unchecked(0..4)) };
196209
let buffer_idx: u32 = self.completed_buffers.len().try_into().unwrap();
197210
payload[8..12].copy_from_slice(&buffer_idx.to_le_bytes());
@@ -347,12 +360,13 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
347360
let len = v.length;
348361

349362
// view layout:
363+
// for no-inlined layout:
350364
// length: 4 bytes
351365
// prefix: 4 bytes
352366
// buffer_index: 4 bytes
353367
// offset: 4 bytes
354368

355-
// inlined layout:
369+
// for inlined layout:
356370
// length: 4 bytes
357371
// data: 12 bytes
358372
let bytes = if len <= 12 {
@@ -378,6 +392,10 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
378392
pub fn values_iter(&self) -> MutableBinaryViewValueIter<T> {
379393
MutableBinaryViewValueIter::new(self)
380394
}
395+
396+
pub fn values(&self) -> Vec<&T> {
397+
self.values_iter().collect()
398+
}
381399
}
382400

383401
impl MutableBinaryViewArray<[u8]> {

src/common/arrow/src/arrow/io/parquet/read/deserialize/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ impl<T: ViewType + ?Sized> Pushable<&T> for MutableBinaryViewArray<T> {
116116
views.push(view);
117117
}
118118

119-
if let Some(bitmap) = self.validity() {
119+
if let Some(bitmap) = self.validity_mut() {
120120
bitmap.extend_constant(remaining, true)
121121
}
122122
}

0 commit comments

Comments
 (0)