Skip to content

Commit d5a3604

Browse files
committed
Add Buffer::from_bitwise_unary and Buffer::from_bitwise_binary methods, deprecate old methods
1 parent ca4a0ae commit d5a3604

File tree

4 files changed

+176
-62
lines changed

4 files changed

+176
-62
lines changed

arrow-arith/src/boolean.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
2424
2525
use arrow_array::*;
26-
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
27-
use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
26+
use arrow_buffer::buffer::bitwise_quaternary_op_helper;
27+
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, buffer_bin_and_not};
2828
use arrow_schema::ArrowError;
2929

3030
/// Logical 'and' boolean values with Kleene logic
@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
7474
// The final null bit is set only if:
7575
// 1. left null bit is set, or
7676
// 2. right data bit is false (because null AND false = false).
77-
Some(bitwise_bin_op_helper(
77+
Some(Buffer::from_bitwise_binary_op(
7878
left_null_buffer.buffer(),
7979
left_null_buffer.offset(),
8080
right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
8585
}
8686
(None, Some(right_null_buffer)) => {
8787
// Same as above
88-
Some(bitwise_bin_op_helper(
88+
Some(Buffer::from_bitwise_binary_op(
8989
right_null_buffer.buffer(),
9090
right_null_buffer.offset(),
9191
left_values.inner(),
@@ -169,7 +169,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
169169
// The final null bit is set only if:
170170
// 1. left null bit is set, or
171171
// 2. right data bit is true (because null OR true = true).
172-
Some(bitwise_bin_op_helper(
172+
Some(Buffer::from_bitwise_binary_op(
173173
left_nulls.buffer(),
174174
left_nulls.offset(),
175175
right_values.inner(),
@@ -180,7 +180,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
180180
}
181181
(None, Some(right_nulls)) => {
182182
// Same as above
183-
Some(bitwise_bin_op_helper(
183+
Some(Buffer::from_bitwise_binary_op(
184184
right_nulls.buffer(),
185185
right_nulls.offset(),
186186
left_values.inner(),

arrow-buffer/src/buffer/immutable.rs

Lines changed: 149 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,12 @@ use std::sync::Arc;
2222

2323
use crate::BufferBuilder;
2424
use crate::alloc::{Allocation, Deallocation};
25-
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26-
use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27-
25+
use crate::bit_util::ceil;
2826
#[cfg(feature = "pool")]
2927
use crate::pool::MemoryPool;
28+
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
29+
use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
3030

31-
use super::ops::bitwise_unary_op_helper;
3231
use super::{MutableBuffer, ScalarBuffer};
3332

3433
/// A contiguous memory region that can be shared with other buffers and across
@@ -115,6 +114,150 @@ impl Buffer {
115114
Self::from(bytes)
116115
}
117116

117+
/// Create a new [`Buffer`] by applying the bitwise operation `op` to two input buffers.
118+
///
119+
/// This function is highly optimized for bitwise operations on large
120+
/// bitmaps by processing input buffers in chunks of 64 bits (8 bytes) at a
121+
/// time, and thus is much faster than applying the operation bit by bit.
122+
///
123+
/// # Notes:
124+
/// * `op` takes two `u64` inputs and produces one `u64` output,
125+
/// operating on 64 bits at a time. **It must only apply bitwise operations
126+
/// on the relevant bits, as the input `u64` may contain irrelevant bits
127+
/// and may be processed differently on different endian architectures.**
128+
/// * The inputs are treated as bitmaps, meaning that offsets and length
129+
/// are specified in number of bits.
130+
/// * The output always has zero offset
131+
///
132+
/// # See Also
133+
/// - [`Buffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
134+
/// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
135+
///
136+
/// # Example: Create new [`Buffer`] from bitwise `AND` of two [`Buffer`]s
137+
/// ```
138+
/// # use arrow_buffer::Buffer;
139+
/// let left = Buffer::from(&[0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
140+
/// let right = Buffer::from(&[0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
141+
/// // AND of the first 12 bits
142+
/// let result = Buffer::from_bitwise_binary_op(
143+
/// &left, 0, &right, 0, 12, |a, b| a & b
144+
/// );
145+
/// assert_eq!(result.as_slice(), &[0b10001000u8, 0b00001000u8]);
146+
/// ```
147+
///
148+
/// # Example: Create new [`Buffer`] from bitwise `OR` of two byte slices
149+
/// ```
150+
/// # use arrow_buffer::Buffer;
151+
/// let left = [0b11001100u8, 0b10111010u8];
152+
/// let right = [0b10101010u8, 0b11011100u8];
153+
/// // OR of bits 4..16 from left and bits 0..12 from right
154+
/// let result = Buffer::from_bitwise_binary_op(
155+
/// &left, 4, &right, 0, 12, |a, b| a | b
156+
/// );
157+
/// assert_eq!(result.as_slice(), &[0b10101110u8, 0b00001111u8]);
158+
/// ```
159+
pub fn from_bitwise_binary_op<F>(
160+
left: impl AsRef<[u8]>,
161+
left_offset_in_bits: usize,
162+
right: impl AsRef<[u8]>,
163+
right_offset_in_bits: usize,
164+
len_in_bits: usize,
165+
mut op: F,
166+
) -> Buffer
167+
where
168+
F: FnMut(u64, u64) -> u64,
169+
{
170+
let left_chunks = BitChunks::new(left.as_ref(), left_offset_in_bits, len_in_bits);
171+
let right_chunks = BitChunks::new(right.as_ref(), right_offset_in_bits, len_in_bits);
172+
173+
let chunks = left_chunks
174+
.iter()
175+
.zip(right_chunks.iter())
176+
.map(|(left, right)| op(left, right));
177+
// Soundness: `BitChunks` is a `BitChunks` iterator which
178+
// correctly reports its upper bound
179+
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
180+
181+
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
182+
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
183+
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
184+
let rem = &rem.to_le_bytes()[0..remainder_bytes];
185+
buffer.extend_from_slice(rem);
186+
187+
buffer.into()
188+
}
189+
190+
/// Create a new [`Buffer`] by applying the bitwise operation to `op` to an input buffer.
191+
///
192+
/// This function is highly optimized for bitwise operations on large
193+
/// bitmaps by processing input buffers in chunks of 64 bits (8 bytes) at a
194+
/// time, and thus is much faster than applying the operation bit by bit.
195+
///
196+
/// # Notes:
197+
/// * `op` takes two `u64` inputs and produces one `u64` output,
198+
/// operating on 64 bits at a time. **It must only apply bitwise operations
199+
/// on the relevant bits, as the input `u64` may contain irrelevant bits
200+
/// and may be processed differently on different endian architectures.**
201+
/// * The inputs are treated as bitmaps, meaning that offsets and length
202+
/// are specified in number of bits.
203+
/// * The output always has zero offset
204+
///
205+
/// # See Also
206+
/// - [`Buffer::from_bitwise_binary_op`] for binary operations on a single input buffer.
207+
/// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
208+
///
209+
/// # Example: Create new [`Buffer`] from bitwise `NOT` of an input [`Buffer`]
210+
/// ```
211+
/// # use arrow_buffer::Buffer;
212+
/// let input = Buffer::from(&[0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
213+
/// // NOT of the first 12 bits
214+
/// let result = Buffer::from_bitwise_unary_op(
215+
/// &input, 0, 12, |a| !a
216+
/// );
217+
/// assert_eq!(result.as_slice(), &[0b00110011u8, 0b11110101u8]);
218+
/// ```
219+
///
220+
/// # Example: Create a new [`Buffer`] copying a bit slice from in input slice
221+
/// ```
222+
/// # use arrow_buffer::Buffer;
223+
/// let input = [0b11001100u8, 0b10111010u8];
224+
/// // // Copy bits 4..16 from input
225+
/// let result = Buffer::from_bitwise_unary_op(
226+
/// &input, 4, 12, |a| a
227+
/// );
228+
/// assert_eq!(result.as_slice(), &[0b10101100u8, 0b00001011u8], "[{:08b}, {:08b}]", result.as_slice()[0], result.as_slice()[1]);
229+
pub fn from_bitwise_unary_op<F>(
230+
left: impl AsRef<[u8]>,
231+
offset_in_bits: usize,
232+
len_in_bits: usize,
233+
mut op: F,
234+
) -> Buffer
235+
where
236+
F: FnMut(u64) -> u64,
237+
{
238+
// reserve capacity and set length so we can get a typed view of u64 chunks
239+
let mut result =
240+
MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
241+
242+
let left_chunks = BitChunks::new(left.as_ref(), offset_in_bits, len_in_bits);
243+
244+
let result_chunks = result.typed_data_mut::<u64>().iter_mut();
245+
246+
result_chunks
247+
.zip(left_chunks.iter())
248+
.for_each(|(res, left)| {
249+
*res = op(left);
250+
});
251+
252+
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
253+
let rem = op(left_chunks.remainder_bits());
254+
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
255+
let rem = &rem.to_le_bytes()[0..remainder_bytes];
256+
result.extend_from_slice(rem);
257+
258+
result.into()
259+
}
260+
118261
/// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
119262
///
120263
/// self.ptr and self.data can be different after slicing or advancing the buffer.
@@ -344,10 +487,10 @@ impl Buffer {
344487
return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
345488
}
346489

347-
bitwise_unary_op_helper(self, offset, len, |a| a)
490+
Self::from_bitwise_unary_op(self, offset, len, |a| a)
348491
}
349492

350-
/// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
493+
/// Returns a `BitChunks` instance which can be used to iterate over this buffer's bits
351494
/// in larger chunks and starting at arbitrary bit offsets.
352495
/// Note that both `offset` and `length` are measured in bits.
353496
pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {

arrow-buffer/src/buffer/ops.rs

Lines changed: 18 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -60,69 +60,41 @@ where
6060

6161
/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
6262
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
63+
#[deprecated(since = "57.1.0", note = "use Buffer::from_bitwise_binary_op instead")]
6364
pub fn bitwise_bin_op_helper<F>(
6465
left: &Buffer,
6566
left_offset_in_bits: usize,
6667
right: &Buffer,
6768
right_offset_in_bits: usize,
6869
len_in_bits: usize,
69-
mut op: F,
70+
op: F,
7071
) -> Buffer
7172
where
7273
F: FnMut(u64, u64) -> u64,
7374
{
74-
let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
75-
let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
76-
77-
let chunks = left_chunks
78-
.iter()
79-
.zip(right_chunks.iter())
80-
.map(|(left, right)| op(left, right));
81-
// Soundness: `BitChunks` is a `BitChunks` iterator which
82-
// correctly reports its upper bound
83-
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
84-
85-
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
86-
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
87-
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
88-
let rem = &rem.to_le_bytes()[0..remainder_bytes];
89-
buffer.extend_from_slice(rem);
90-
91-
buffer.into()
75+
Buffer::from_bitwise_binary_op(
76+
left,
77+
left_offset_in_bits,
78+
right,
79+
right_offset_in_bits,
80+
len_in_bits,
81+
op,
82+
)
9283
}
9384

9485
/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
9586
/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
87+
#[deprecated(since = "57.1.0", note = "use Buffer::from_bitwise_unary_op instead")]
9688
pub fn bitwise_unary_op_helper<F>(
9789
left: &Buffer,
9890
offset_in_bits: usize,
9991
len_in_bits: usize,
100-
mut op: F,
92+
op: F,
10193
) -> Buffer
10294
where
10395
F: FnMut(u64) -> u64,
10496
{
105-
// reserve capacity and set length so we can get a typed view of u64 chunks
106-
let mut result =
107-
MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
108-
109-
let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
110-
111-
let result_chunks = result.typed_data_mut::<u64>().iter_mut();
112-
113-
result_chunks
114-
.zip(left_chunks.iter())
115-
.for_each(|(res, left)| {
116-
*res = op(left);
117-
});
118-
119-
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
120-
let rem = op(left_chunks.remainder_bits());
121-
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
122-
let rem = &rem.to_le_bytes()[0..remainder_bytes];
123-
result.extend_from_slice(rem);
124-
125-
result.into()
97+
Buffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, op)
12698
}
12799

128100
/// Apply a bitwise and to two inputs and return the result as a Buffer.
@@ -134,7 +106,7 @@ pub fn buffer_bin_and(
134106
right_offset_in_bits: usize,
135107
len_in_bits: usize,
136108
) -> Buffer {
137-
bitwise_bin_op_helper(
109+
Buffer::from_bitwise_binary_op(
138110
left,
139111
left_offset_in_bits,
140112
right,
@@ -153,7 +125,7 @@ pub fn buffer_bin_or(
153125
right_offset_in_bits: usize,
154126
len_in_bits: usize,
155127
) -> Buffer {
156-
bitwise_bin_op_helper(
128+
Buffer::from_bitwise_binary_op(
157129
left,
158130
left_offset_in_bits,
159131
right,
@@ -172,7 +144,7 @@ pub fn buffer_bin_xor(
172144
right_offset_in_bits: usize,
173145
len_in_bits: usize,
174146
) -> Buffer {
175-
bitwise_bin_op_helper(
147+
Buffer::from_bitwise_binary_op(
176148
left,
177149
left_offset_in_bits,
178150
right,
@@ -191,7 +163,7 @@ pub fn buffer_bin_and_not(
191163
right_offset_in_bits: usize,
192164
len_in_bits: usize,
193165
) -> Buffer {
194-
bitwise_bin_op_helper(
166+
Buffer::from_bitwise_binary_op(
195167
left,
196168
left_offset_in_bits,
197169
right,
@@ -204,5 +176,5 @@ pub fn buffer_bin_and_not(
204176
/// Apply a bitwise not to one input and return the result as a Buffer.
205177
/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
206178
pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
207-
bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
179+
Buffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a)
208180
}

arrow-select/src/nullif.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
//! Implements the `nullif` function for Arrow arrays.
1919
2020
use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
21-
use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper};
22-
use arrow_buffer::{BooleanBuffer, NullBuffer};
21+
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
2322
use arrow_schema::{ArrowError, DataType};
2423

2524
/// Returns a new array with the same values and the validity bit to false where
@@ -75,7 +74,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
7574
let (combined, null_count) = match left_data.nulls() {
7675
Some(left) => {
7776
let mut valid_count = 0;
78-
let b = bitwise_bin_op_helper(
77+
let b = Buffer::from_bitwise_binary_op(
7978
left.buffer(),
8079
left.offset(),
8180
right.inner(),
@@ -91,7 +90,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
9190
}
9291
None => {
9392
let mut null_count = 0;
94-
let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
93+
let buffer = Buffer::from_bitwise_unary_op(right.inner(), right.offset(), len, |b| {
9594
let t = !b;
9695
null_count += t.count_zeros() as usize;
9796
t

0 commit comments

Comments
 (0)