@@ -22,13 +22,12 @@ use std::sync::Arc;
2222
2323use crate :: BufferBuilder ;
2424use crate :: alloc:: { Allocation , Deallocation } ;
25- use crate :: util:: bit_chunk_iterator:: { BitChunks , UnalignedBitChunk } ;
26- use crate :: { bit_util, bytes:: Bytes , native:: ArrowNativeType } ;
27-
25+ use crate :: bit_util:: ceil;
2826#[ cfg( feature = "pool" ) ]
2927use crate :: pool:: MemoryPool ;
28+ use crate :: util:: bit_chunk_iterator:: { BitChunks , UnalignedBitChunk } ;
29+ use crate :: { bit_util, bytes:: Bytes , native:: ArrowNativeType } ;
3030
31- use super :: ops:: bitwise_unary_op_helper;
3231use super :: { MutableBuffer , ScalarBuffer } ;
3332
3433/// A contiguous memory region that can be shared with other buffers and across
@@ -115,6 +114,150 @@ impl Buffer {
115114 Self :: from ( bytes)
116115 }
117116
117+ /// Create a new [`Buffer`] by applying the bitwise operation `op` to two input buffers.
118+ ///
119+ /// This function is highly optimized for bitwise operations on large
120+ /// bitmaps by processing input buffers in chunks of 64 bits (8 bytes) at a
121+ /// time, and thus is much faster than applying the operation bit by bit.
122+ ///
123+ /// # Notes:
124+ /// * `op` takes two `u64` inputs and produces one `u64` output,
125+ /// operating on 64 bits at a time. **It must only apply bitwise operations
126+ /// on the relevant bits, as the input `u64` may contain irrelevant bits
127+ /// and may be processed differently on different endian architectures.**
128+ /// * The inputs are treated as bitmaps, meaning that offsets and length
129+ /// are specified in number of bits.
130+ /// * The output always has zero offset
131+ ///
132+ /// # See Also
133+ /// - [`Buffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
134+ /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
135+ ///
136+ /// # Example: Create new [`Buffer`] from bitwise `AND` of two [`Buffer`]s
137+ /// ```
138+ /// # use arrow_buffer::Buffer;
139+ /// let left = Buffer::from(&[0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
140+ /// let right = Buffer::from(&[0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
141+ /// // AND of the first 12 bits
142+ /// let result = Buffer::from_bitwise_binary_op(
143+ /// &left, 0, &right, 0, 12, |a, b| a & b
144+ /// );
145+ /// assert_eq!(result.as_slice(), &[0b10001000u8, 0b00001000u8]);
146+ /// ```
147+ ///
148+ /// # Example: Create new [`Buffer`] from bitwise `OR` of two byte slices
149+ /// ```
150+ /// # use arrow_buffer::Buffer;
151+ /// let left = [0b11001100u8, 0b10111010u8];
152+ /// let right = [0b10101010u8, 0b11011100u8];
153+ /// // OR of bits 4..16 from left and bits 0..12 from right
154+ /// let result = Buffer::from_bitwise_binary_op(
155+ /// &left, 4, &right, 0, 12, |a, b| a | b
156+ /// );
157+ /// assert_eq!(result.as_slice(), &[0b10101110u8, 0b00001111u8]);
158+ /// ```
159+ pub fn from_bitwise_binary_op < F > (
160+ left : impl AsRef < [ u8 ] > ,
161+ left_offset_in_bits : usize ,
162+ right : impl AsRef < [ u8 ] > ,
163+ right_offset_in_bits : usize ,
164+ len_in_bits : usize ,
165+ mut op : F ,
166+ ) -> Buffer
167+ where
168+ F : FnMut ( u64 , u64 ) -> u64 ,
169+ {
170+ let left_chunks = BitChunks :: new ( left. as_ref ( ) , left_offset_in_bits, len_in_bits) ;
171+ let right_chunks = BitChunks :: new ( right. as_ref ( ) , right_offset_in_bits, len_in_bits) ;
172+
173+ let chunks = left_chunks
174+ . iter ( )
175+ . zip ( right_chunks. iter ( ) )
176+ . map ( |( left, right) | op ( left, right) ) ;
177+ // Soundness: `BitChunks` is a `BitChunks` iterator which
178+ // correctly reports its upper bound
179+ let mut buffer = unsafe { MutableBuffer :: from_trusted_len_iter ( chunks) } ;
180+
181+ let remainder_bytes = ceil ( left_chunks. remainder_len ( ) , 8 ) ;
182+ let rem = op ( left_chunks. remainder_bits ( ) , right_chunks. remainder_bits ( ) ) ;
183+ // we are counting its starting from the least significant bit, to to_le_bytes should be correct
184+ let rem = & rem. to_le_bytes ( ) [ 0 ..remainder_bytes] ;
185+ buffer. extend_from_slice ( rem) ;
186+
187+ buffer. into ( )
188+ }
189+
190+ /// Create a new [`Buffer`] by applying the bitwise operation to `op` to an input buffer.
191+ ///
192+ /// This function is highly optimized for bitwise operations on large
193+ /// bitmaps by processing input buffers in chunks of 64 bits (8 bytes) at a
194+ /// time, and thus is much faster than applying the operation bit by bit.
195+ ///
196+ /// # Notes:
197+ /// * `op` takes two `u64` inputs and produces one `u64` output,
198+ /// operating on 64 bits at a time. **It must only apply bitwise operations
199+ /// on the relevant bits, as the input `u64` may contain irrelevant bits
200+ /// and may be processed differently on different endian architectures.**
201+ /// * The inputs are treated as bitmaps, meaning that offsets and length
202+ /// are specified in number of bits.
203+ /// * The output always has zero offset
204+ ///
205+ /// # See Also
206+ /// - [`Buffer::from_bitwise_binary_op`] for binary operations on a single input buffer.
207+ /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
208+ ///
209+ /// # Example: Create new [`Buffer`] from bitwise `NOT` of an input [`Buffer`]
210+ /// ```
211+ /// # use arrow_buffer::Buffer;
212+ /// let input = Buffer::from(&[0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
213+ /// // NOT of the first 12 bits
214+ /// let result = Buffer::from_bitwise_unary_op(
215+ /// &input, 0, 12, |a| !a
216+ /// );
217+ /// assert_eq!(result.as_slice(), &[0b00110011u8, 0b11110101u8]);
218+ /// ```
219+ ///
220+ /// # Example: Create a new [`Buffer`] copying a bit slice from in input slice
221+ /// ```
222+ /// # use arrow_buffer::Buffer;
223+ /// let input = [0b11001100u8, 0b10111010u8];
224+ /// // // Copy bits 4..16 from input
225+ /// let result = Buffer::from_bitwise_unary_op(
226+ /// &input, 4, 12, |a| a
227+ /// );
228+ /// assert_eq!(result.as_slice(), &[0b10101100u8, 0b00001011u8], "[{:08b}, {:08b}]", result.as_slice()[0], result.as_slice()[1]);
229+ pub fn from_bitwise_unary_op < F > (
230+ left : impl AsRef < [ u8 ] > ,
231+ offset_in_bits : usize ,
232+ len_in_bits : usize ,
233+ mut op : F ,
234+ ) -> Buffer
235+ where
236+ F : FnMut ( u64 ) -> u64 ,
237+ {
238+ // reserve capacity and set length so we can get a typed view of u64 chunks
239+ let mut result =
240+ MutableBuffer :: new ( ceil ( len_in_bits, 8 ) ) . with_bitset ( len_in_bits / 64 * 8 , false ) ;
241+
242+ let left_chunks = BitChunks :: new ( left. as_ref ( ) , offset_in_bits, len_in_bits) ;
243+
244+ let result_chunks = result. typed_data_mut :: < u64 > ( ) . iter_mut ( ) ;
245+
246+ result_chunks
247+ . zip ( left_chunks. iter ( ) )
248+ . for_each ( |( res, left) | {
249+ * res = op ( left) ;
250+ } ) ;
251+
252+ let remainder_bytes = ceil ( left_chunks. remainder_len ( ) , 8 ) ;
253+ let rem = op ( left_chunks. remainder_bits ( ) ) ;
254+ // we are counting its starting from the least significant bit, to to_le_bytes should be correct
255+ let rem = & rem. to_le_bytes ( ) [ 0 ..remainder_bytes] ;
256+ result. extend_from_slice ( rem) ;
257+
258+ result. into ( )
259+ }
260+
118261 /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
119262 ///
120263 /// self.ptr and self.data can be different after slicing or advancing the buffer.
@@ -344,10 +487,10 @@ impl Buffer {
344487 return self . slice_with_length ( offset / 8 , bit_util:: ceil ( len, 8 ) ) ;
345488 }
346489
347- bitwise_unary_op_helper ( self , offset, len, |a| a)
490+ Self :: from_bitwise_unary_op ( self , offset, len, |a| a)
348491 }
349492
350- /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
493+ /// Returns a `BitChunks` instance which can be used to iterate over this buffer's bits
351494 /// in larger chunks and starting at arbitrary bit offsets.
352495 /// Note that both `offset` and `length` are measured in bits.
353496 pub fn bit_chunks ( & self , offset : usize , len : usize ) -> BitChunks < ' _ > {
0 commit comments