@@ -22,7 +22,7 @@ use crate::types::bytes::ByteArrayNativeType;
2222use crate :: types:: { BinaryViewType , ByteViewType , StringViewType } ;
2323use crate :: { Array , ArrayAccessor , ArrayRef , GenericByteArray , OffsetSizeTrait , Scalar } ;
2424use arrow_buffer:: { ArrowNativeType , Buffer , NullBuffer , ScalarBuffer } ;
25- use arrow_data:: { ArrayData , ArrayDataBuilder , ByteView } ;
25+ use arrow_data:: { ArrayData , ArrayDataBuilder , ByteView , MAX_INLINE_VIEW_LEN } ;
2626use arrow_schema:: { ArrowError , DataType } ;
2727use core:: str;
2828use num:: ToPrimitive ;
@@ -78,8 +78,9 @@ use super::ByteArrayType;
7878/// 0 31 63 95 127
7979/// ```
8080///
81- /// * Strings with length <= 12 are stored directly in the view. See
82- /// [`Self::inline_value`] to access the inlined prefix from a short view.
81+ /// * Strings with length <= 12 ([`MAX_INLINE_VIEW_LEN`]) are stored directly in
82+ /// the view. See [`Self::inline_value`] to access the inlined prefix from a
83+ /// short view.
8384///
8485/// * Strings with length > 12: The first four bytes are stored inline in the
8586/// view and the entire string is stored in one of the buffers. See [`ByteView`]
@@ -129,6 +130,7 @@ use super::ByteArrayType;
129130/// assert_eq!(value, "this string is also longer than 12 bytes");
130131/// ```
131132///
133+ /// [`MAX_INLINE_VIEW_LEN`]: arrow_data::MAX_INLINE_VIEW_LEN
132134/// [`arrow_compute`]: https://docs.rs/arrow/latest/arrow/compute/index.html
133135///
134136/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
@@ -317,7 +319,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
317319 pub unsafe fn value_unchecked ( & self , idx : usize ) -> & T :: Native {
318320 let v = self . views . get_unchecked ( idx) ;
319321 let len = * v as u32 ;
320- let b = if len <= 12 {
322+ let b = if len <= MAX_INLINE_VIEW_LEN {
321323 Self :: inline_value ( v, len as usize )
322324 } else {
323325 let view = ByteView :: from ( * v) ;
@@ -332,10 +334,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
332334 ///
333335 /// # Safety
334336 /// - The `view` must be a valid element from `Self::views()` that adheres to the view layout.
335- /// - The `len` must be the length of the inlined value. It should never be larger than 12 .
337+ /// - The `len` must be the length of the inlined value. It should never be larger than [`MAX_INLINE_VIEW_LEN`] .
336338 #[ inline( always) ]
337339 pub unsafe fn inline_value ( view : & u128 , len : usize ) -> & [ u8 ] {
338- debug_assert ! ( len <= 12 ) ;
340+ debug_assert ! ( len <= MAX_INLINE_VIEW_LEN as usize ) ;
339341 std:: slice:: from_raw_parts ( ( view as * const u128 as * const u8 ) . wrapping_add ( 4 ) , len)
340342 }
341343
@@ -348,7 +350,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
348350 pub fn bytes_iter ( & self ) -> impl Iterator < Item = & [ u8 ] > {
349351 self . views . iter ( ) . map ( move |v| {
350352 let len = * v as u32 ;
351- if len <= 12 {
353+ if len <= MAX_INLINE_VIEW_LEN {
352354 unsafe { Self :: inline_value ( v, len as usize ) }
353355 } else {
354356 let view = ByteView :: from ( * v) ;
@@ -372,7 +374,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
372374 return & [ ] as & [ u8 ] ;
373375 }
374376
375- if prefix_len <= 4 || len <= 12 {
377+ if prefix_len <= 4 || len as u32 <= MAX_INLINE_VIEW_LEN {
376378 unsafe { StringViewArray :: inline_value ( v, prefix_len) }
377379 } else {
378380 let view = ByteView :: from ( * v) ;
@@ -402,7 +404,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
402404 return & [ ] as & [ u8 ] ;
403405 }
404406
405- if len <= 12 {
407+ if len as u32 <= MAX_INLINE_VIEW_LEN {
406408 unsafe { & StringViewArray :: inline_value ( v, len) [ len - suffix_len..] }
407409 } else {
408410 let view = ByteView :: from ( * v) ;
@@ -496,9 +498,9 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
496498 self . views ( )
497499 . iter ( )
498500 . map ( |v| {
499- let len = ( * v as u32 ) as usize ;
500- if len > 12 {
501- len
501+ let len = * v as u32 ;
502+ if len > MAX_INLINE_VIEW_LEN {
503+ len as usize
502504 } else {
503505 0
504506 }
@@ -512,11 +514,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
512514 /// It takes a bit of patience to understand why we don't just compare two &[u8] directly.
513515 ///
514516 /// ByteView types give us the following two advantages, and we need to be careful not to lose them:
515- /// (1) For string/byte smaller than 12 bytes, the entire data is inlined in the view.
517+ /// (1) For string/byte smaller than [`MAX_INLINE_VIEW_LEN`] bytes, the entire data is inlined in the view.
516518 /// Meaning that reading one array element requires only one memory access
517519 /// (two memory access required for StringArray, one for offset buffer, the other for value buffer).
518520 ///
519- /// (2) For string/byte larger than 12 bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
521+ /// (2) For string/byte larger than [`MAX_INLINE_VIEW_LEN`] bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
520522 /// thanks to the inlined 4 bytes.
521523 /// Consider equality check:
522524 /// If the first four bytes of the two strings are different, we can return false immediately (with just one memory access).
@@ -526,8 +528,8 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
526528 /// e.g., if the inlined 4 bytes are different, we can directly return unequal without looking at the full string.
527529 ///
528530 /// # Order check flow
529- /// (1) if both string are smaller than 12 bytes, we can directly compare the data inlined to the view.
530- /// (2) if any of the string is larger than 12 bytes, we need to compare the full string.
531+ /// (1) if both string are smaller than [`MAX_INLINE_VIEW_LEN`] bytes, we can directly compare the data inlined to the view.
532+ /// (2) if any of the string is larger than [`MAX_INLINE_VIEW_LEN`] bytes, we need to compare the full string.
531533 /// (2.1) if the inlined 4 bytes are different, we can return the result immediately.
532534 /// (2.2) o.w., we need to compare the full string.
533535 ///
@@ -555,7 +557,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
555557 // one of the string is larger than 12 bytes,
556558 // we then try to compare the inlined data first
557559
558- // Note: In theory, ByteView is only used for views larger than 12 bytes,
560+ // Note: In theory, ByteView is only used for string which is larger than 12 bytes,
559561 // but we can still use it to get the inlined prefix for shorter strings.
560562 // The prefix is always the first 4 bytes of the view, for both short and long strings.
561563 let l_inlined_be = l_byte_view. prefix . swap_bytes ( ) ;
0 commit comments