@@ -22,7 +22,7 @@ use crate::types::bytes::ByteArrayNativeType;
2222use crate :: types:: { BinaryViewType , ByteViewType , StringViewType } ;
2323use crate :: { Array , ArrayAccessor , ArrayRef , GenericByteArray , OffsetSizeTrait , Scalar } ;
2424use arrow_buffer:: { ArrowNativeType , Buffer , NullBuffer , ScalarBuffer } ;
25- use arrow_data:: { ArrayData , ArrayDataBuilder , ByteView } ;
25+ use arrow_data:: { ArrayData , ArrayDataBuilder , ByteView , MAX_INLINE_VIEW_LEN } ;
2626use arrow_schema:: { ArrowError , DataType } ;
2727use core:: str;
2828use num:: ToPrimitive ;
@@ -77,8 +77,9 @@ use super::ByteArrayType;
7777/// 0 31 63 95 127
7878/// ```
7979///
80- /// * Strings with length <= 12 are stored directly in the view. See
81- /// [`Self::inline_value`] to access the inlined prefix from a short view.
80+ /// * Strings with length <= 12 ([`MAX_INLINE_VIEW_LEN`]) are stored directly in
81+ /// the view. See [`Self::inline_value`] to access the inlined prefix from a
82+ /// short view.
8283///
8384/// * Strings with length > 12: The first four bytes are stored inline in the
8485/// view and the entire string is stored in one of the buffers. See [`ByteView`]
@@ -128,6 +129,7 @@ use super::ByteArrayType;
128129/// assert_eq!(value, "this string is also longer than 12 bytes");
129130/// ```
130131///
132+ /// [`MAX_INLINE_VIEW_LEN`]: arrow_data::MAX_INLINE_VIEW_LEN
131133/// [`arrow_compute`]: https://docs.rs/arrow/latest/arrow/compute/index.html
132134///
133135/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
@@ -316,7 +318,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
316318 pub unsafe fn value_unchecked ( & self , idx : usize ) -> & T :: Native {
317319 let v = self . views . get_unchecked ( idx) ;
318320 let len = * v as u32 ;
319- let b = if len <= 12 {
321+ let b = if len <= MAX_INLINE_VIEW_LEN {
320322 Self :: inline_value ( v, len as usize )
321323 } else {
322324 let view = ByteView :: from ( * v) ;
@@ -331,10 +333,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
331333 ///
332334 /// # Safety
333335 /// - The `view` must be a valid element from `Self::views()` that adheres to the view layout.
334- /// - The `len` must be the length of the inlined value. It should never be larger than 12 .
336+ /// - The `len` must be the length of the inlined value. It should never be larger than [`MAX_INLINE_VIEW_LEN`] .
335337 #[ inline( always) ]
336338 pub unsafe fn inline_value ( view : & u128 , len : usize ) -> & [ u8 ] {
337- debug_assert ! ( len <= 12 ) ;
339+ debug_assert ! ( len <= MAX_INLINE_VIEW_LEN as usize ) ;
338340 std:: slice:: from_raw_parts ( ( view as * const u128 as * const u8 ) . wrapping_add ( 4 ) , len)
339341 }
340342
@@ -347,7 +349,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
347349 pub fn bytes_iter ( & self ) -> impl Iterator < Item = & [ u8 ] > {
348350 self . views . iter ( ) . map ( move |v| {
349351 let len = * v as u32 ;
350- if len <= 12 {
352+ if len <= MAX_INLINE_VIEW_LEN {
351353 unsafe { Self :: inline_value ( v, len as usize ) }
352354 } else {
353355 let view = ByteView :: from ( * v) ;
@@ -371,7 +373,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
371373 return & [ ] as & [ u8 ] ;
372374 }
373375
374- if prefix_len <= 4 || len <= 12 {
376+ if prefix_len <= 4 || len as u32 <= MAX_INLINE_VIEW_LEN {
375377 unsafe { StringViewArray :: inline_value ( v, prefix_len) }
376378 } else {
377379 let view = ByteView :: from ( * v) ;
@@ -401,7 +403,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
401403 return & [ ] as & [ u8 ] ;
402404 }
403405
404- if len <= 12 {
406+ if len as u32 <= MAX_INLINE_VIEW_LEN {
405407 unsafe { & StringViewArray :: inline_value ( v, len) [ len - suffix_len..] }
406408 } else {
407409 let view = ByteView :: from ( * v) ;
@@ -495,9 +497,9 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
495497 self . views ( )
496498 . iter ( )
497499 . map ( |v| {
498- let len = ( * v as u32 ) as usize ;
499- if len > 12 {
500- len
500+ let len = * v as u32 ;
501+ if len > MAX_INLINE_VIEW_LEN {
502+ len as usize
501503 } else {
502504 0
503505 }
@@ -511,11 +513,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
511513 /// It takes a bit of patience to understand why we don't just compare two &[u8] directly.
512514 ///
513515 /// ByteView types give us the following two advantages, and we need to be careful not to lose them:
514- /// (1) For string/byte smaller than 12 bytes, the entire data is inlined in the view.
516+ /// (1) For string/byte smaller than [`MAX_INLINE_VIEW_LEN`] bytes, the entire data is inlined in the view.
515517 /// Meaning that reading one array element requires only one memory access
516518 /// (two memory access required for StringArray, one for offset buffer, the other for value buffer).
517519 ///
518- /// (2) For string/byte larger than 12 bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
520+ /// (2) For string/byte larger than [`MAX_INLINE_VIEW_LEN`] bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
519521 /// thanks to the inlined 4 bytes.
520522 /// Consider equality check:
521523 /// If the first four bytes of the two strings are different, we can return false immediately (with just one memory access).
@@ -525,8 +527,8 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
525527 /// e.g., if the inlined 4 bytes are different, we can directly return unequal without looking at the full string.
526528 ///
527529 /// # Order check flow
528- /// (1) if both string are smaller than 12 bytes, we can directly compare the data inlined to the view.
529- /// (2) if any of the string is larger than 12 bytes, we need to compare the full string.
530+ /// (1) if both string are smaller than [`MAX_INLINE_VIEW_LEN`] bytes, we can directly compare the data inlined to the view.
531+ /// (2) if any of the string is larger than [`MAX_INLINE_VIEW_LEN`] bytes, we need to compare the full string.
530532 /// (2.1) if the inlined 4 bytes are different, we can return the result immediately.
531533 /// (2.2) o.w., we need to compare the full string.
532534 ///
@@ -544,7 +546,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
544546 let r_view = right. views ( ) . get_unchecked ( right_idx) ;
545547 let r_len = * r_view as u32 ;
546548
547- if l_len <= 12 && r_len <= 12 {
549+ if l_len <= MAX_INLINE_VIEW_LEN && r_len <= MAX_INLINE_VIEW_LEN {
548550 let l_data = unsafe { GenericByteViewArray :: < T > :: inline_value ( l_view, l_len as usize ) } ;
549551 let r_data = unsafe { GenericByteViewArray :: < T > :: inline_value ( r_view, r_len as usize ) } ;
550552 return l_data. cmp ( r_data) ;
0 commit comments