From 5cc751958e8aa0956c683bad293140e001da426a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Thu, 5 Jun 2025 23:29:03 +0200 Subject: [PATCH 1/3] Implement `int_format_into` feature --- library/core/src/fmt/mod.rs | 4 + library/core/src/fmt/num.rs | 236 +++++++++++++++++++++++++---- library/core/src/fmt/num_buffer.rs | 60 ++++++++ 3 files changed, 271 insertions(+), 29 deletions(-) create mode 100644 library/core/src/fmt/num_buffer.rs diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 2be8d37bbee67..ef54b0fd68f15 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -15,6 +15,7 @@ mod float; #[cfg(no_fp_fmt_parse)] mod nofloat; mod num; +mod num_buffer; mod rt; #[stable(feature = "fmt_flags_align", since = "1.28.0")] @@ -33,6 +34,9 @@ pub enum Alignment { Center, } +#[unstable(feature = "int_format_into", issue = "138215")] +pub use num_buffer::{NumBuffer, NumBufferTrait}; + #[stable(feature = "debug_builders", since = "1.2.0")] pub use self::builders::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; #[unstable(feature = "debug_closure_helpers", issue = "117729")] diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index 42af595ae4170..8463bf4dc6806 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -1,5 +1,6 @@ //! Integer and floating-point number formatting +use crate::fmt::NumBuffer; use crate::mem::MaybeUninit; use crate::num::fmt as numfmt; use crate::ops::{Div, Rem, Sub}; @@ -199,6 +200,20 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"\ 6061626364656667686970717273747576777879\ 8081828384858687888990919293949596979899"; +/// This function converts a slice of ascii characters into a `&str` starting from `offset`. +/// +/// # Safety +/// +/// `buf` content starting from `offset` index MUST BE initialized and MUST BE ascii +/// characters. +unsafe fn slice_buffer_to_str(buf: &[MaybeUninit], offset: usize) -> &str { + // SAFETY: `offset` is always included between 0 and `buf`'s length. + let written = unsafe { buf.get_unchecked(offset..) }; + // SAFETY: (`assume_init_ref`) All buf content since offset is set. + // SAFETY: (`from_utf8_unchecked`) Writes use ASCII from the lookup table exclusively. + unsafe { str::from_utf8_unchecked(written.assume_init_ref()) } +} + macro_rules! impl_Display { ($($signed:ident, $unsigned:ident,)* ; as $u:ident via $conv_fn:ident named $gen_name:ident) => { @@ -248,6 +263,13 @@ macro_rules! impl_Display { issue = "none" )] pub fn _fmt<'a>(self, buf: &'a mut [MaybeUninit::]) -> &'a str { + // SAFETY: `buf` will always be big enough to contain all digits. + let offset = unsafe { self._fmt_inner(buf) }; + // SAFETY: Starting from `offset`, all elements of the slice have been set. + unsafe { slice_buffer_to_str(buf, offset) } + } + + unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit::]) -> usize { // Count the number of bytes in buf that are not initialized. let mut offset = buf.len(); // Consume the least-significant decimals from a working copy. @@ -309,24 +331,97 @@ macro_rules! impl_Display { // not used: remain = 0; } - // SAFETY: All buf content since offset is set. - let written = unsafe { buf.get_unchecked(offset..) }; - // SAFETY: Writes use ASCII from the lookup table exclusively. + offset + } + } + + impl $signed { + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + #[doc = concat!("let n = 0", stringify!($signed), ";")] + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + #[doc = concat!("let n1 = 32", stringify!($signed), ";")] + /// assert_eq!(n1.format_into(&mut buf), "32"); + /// + #[doc = concat!("let n2 = ", stringify!($signed::MAX), ";")] + #[doc = concat!("assert_eq!(n2.format_into(&mut buf), ", stringify!($signed::MAX), ".to_string());")] + /// ``` + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + let mut offset; + + #[cfg(not(feature = "optimize_for_size"))] + // SAFETY: `buf` will always be big enough to contain all digits. unsafe { - str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_ptr(written), - written.len(), - )) + offset = self.unsigned_abs()._fmt_inner(&mut buf.buf); } + #[cfg(feature = "optimize_for_size")] + { + offset = _inner_slow_integer_to_str(self.unsigned_abs().$conv_fn(), &mut buf.buf); + } + // Only difference between signed and unsigned are these 4 lines. + if self < 0 { + offset -= 1; + buf.buf[offset].write(b'-'); + } + // SAFETY: Starting from `offset`, all elements of the slice have been set. + unsafe { slice_buffer_to_str(&buf.buf, offset) } } - })* + } + + impl $unsigned { + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + #[doc = concat!("let n = 0", stringify!($unsigned), ";")] + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + #[doc = concat!("let n1 = 32", stringify!($unsigned), ";")] + /// assert_eq!(n1.format_into(&mut buf), "32"); + /// + #[doc = concat!("let n2 = ", stringify!($unsigned::MAX), ";")] + #[doc = concat!("assert_eq!(n2.format_into(&mut buf), ", stringify!($unsigned::MAX), ".to_string());")] + /// ``` + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + let offset; + + #[cfg(not(feature = "optimize_for_size"))] + // SAFETY: `buf` will always be big enough to contain all digits. + unsafe { + offset = self._fmt_inner(&mut buf.buf); + } + #[cfg(feature = "optimize_for_size")] + { + offset = _inner_slow_integer_to_str(self.$conv_fn(), &mut buf.buf); + } + // SAFETY: Starting from `offset`, all elements of the slice have been set. + unsafe { slice_buffer_to_str(&buf.buf, offset) } + } + } + + + )* #[cfg(feature = "optimize_for_size")] - fn $gen_name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result { - const MAX_DEC_N: usize = $u::MAX.ilog10() as usize + 1; - let mut buf = [MaybeUninit::::uninit(); MAX_DEC_N]; - let mut curr = MAX_DEC_N; - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); + fn _inner_slow_integer_to_str(mut n: $u, buf: &mut [MaybeUninit::]) -> usize { + let mut curr = buf.len(); // SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at @@ -336,7 +431,7 @@ macro_rules! impl_Display { unsafe { loop { curr -= 1; - buf_ptr.add(curr).write((n % 10) as u8 + b'0'); + buf[curr].write((n % 10) as u8 + b'0'); n /= 10; if n == 0 { @@ -344,12 +439,17 @@ macro_rules! impl_Display { } } } + cur + } - // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid UTF-8 - let buf_slice = unsafe { - str::from_utf8_unchecked( - slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr)) - }; + #[cfg(feature = "optimize_for_size")] + fn $gen_name(n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const MAX_DEC_N: usize = $u::MAX.ilog(10) as usize + 1; + let mut buf = [MaybeUninit::::uninit(); MAX_DEC_N]; + + let offset = _inner_slow_integer_to_str(n, &mut buf); + // SAFETY: Starting from `offset`, all elements of the slice have been set. + let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) }; f.pad_integral(is_nonnegative, "", buf_slice) } }; @@ -598,12 +698,20 @@ impl u128 { issue = "none" )] pub fn _fmt<'a>(self, buf: &'a mut [MaybeUninit]) -> &'a str { + // SAFETY: `buf` will always be big enough to contain all digits. + let offset = unsafe { self._fmt_inner(buf) }; + // SAFETY: Starting from `offset`, all elements of the slice have been set. + unsafe { slice_buffer_to_str(buf, offset) } + } + + unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit]) -> usize { // Optimize common-case zero, which would also need special treatment due to // its "leading" zero. if self == 0 { - return "0"; + let offset = buf.len() - 1; + buf[offset].write(b'0'); + return offset; } - // Take the 16 least-significant decimals. let (quot_1e16, mod_1e16) = div_rem_1e16(self); let (mut remain, mut offset) = if quot_1e16 == 0 { @@ -677,16 +785,86 @@ impl u128 { buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]); // not used: remain = 0; } + offset + } - // SAFETY: All buf content since offset is set. - let written = unsafe { buf.get_unchecked(offset..) }; - // SAFETY: Writes use ASCII from the lookup table exclusively. - unsafe { - str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_ptr(written), - written.len(), - )) + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + /// let n = 0u128; + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + /// let n1 = 32u128; + /// let mut buf1 = NumBuffer::new(); + /// assert_eq!(n1.format_into(&mut buf1), "32"); + /// + /// let n2 = u128::MAX; + /// let mut buf2 = NumBuffer::new(); + /// assert_eq!(n2.format_into(&mut buf2), u128::MAX.to_string()); + /// ``` + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + let diff = buf.capacity() - U128_MAX_DEC_N; + // FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const + // for `fmt_u128_inner`. + // + // In the meantime, we have to use a slice starting at index 1 and add 1 to the returned + // offset to ensure the number is correctly generated at the end of the buffer. + // SAFETY: `diff` will always be between 0 and its initial value. + unsafe { self._fmt(buf.buf.get_unchecked_mut(diff..)) } + } +} + +impl i128 { + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + /// let n = 0i128; + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + /// let n1 = i128::MIN; + /// assert_eq!(n1.format_into(&mut buf), i128::MIN.to_string()); + /// + /// let n2 = i128::MAX; + /// assert_eq!(n2.format_into(&mut buf), i128::MAX.to_string()); + /// ``` + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + let diff = buf.capacity() - U128_MAX_DEC_N; + // FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const + // for `fmt_u128_inner`. + // + // In the meantime, we have to use a slice starting at index 1 and add 1 to the returned + // offset to ensure the number is correctly generated at the end of the buffer. + let mut offset = + // SAFETY: `buf` will always be big enough to contain all digits. + unsafe { self.unsigned_abs()._fmt_inner(buf.buf.get_unchecked_mut(diff..)) }; + // We put back the offset at the right position. + offset += diff; + // Only difference between signed and unsigned are these 4 lines. + if self < 0 { + offset -= 1; + // SAFETY: `buf` will always be big enough to contain all digits plus the minus sign. + unsafe { + buf.buf.get_unchecked_mut(offset).write(b'-'); + } } + // SAFETY: Starting from `offset`, all elements of the slice have been set. + unsafe { slice_buffer_to_str(&buf.buf, offset) } } } diff --git a/library/core/src/fmt/num_buffer.rs b/library/core/src/fmt/num_buffer.rs new file mode 100644 index 0000000000000..474a8d20ef6c5 --- /dev/null +++ b/library/core/src/fmt/num_buffer.rs @@ -0,0 +1,60 @@ +use crate::mem::MaybeUninit; + +/// Trait used to describe the maximum number of digits in decimal base of the implemented integer. +#[unstable(feature = "int_format_into", issue = "138215")] +pub trait NumBufferTrait { + /// Maximum number of digits in decimal base of the implemented integer. + const BUF_SIZE: usize; +} + +macro_rules! impl_NumBufferTrait { + ($($signed:ident, $unsigned:ident,)*) => { + $( + #[unstable(feature = "int_format_into", issue = "138215")] + impl NumBufferTrait for $signed { + // `+ 2` and not `+ 1` to include the `-` character. + const BUF_SIZE: usize = $signed::MAX.ilog(10) as usize + 2; + } + #[unstable(feature = "int_format_into", issue = "138215")] + impl NumBufferTrait for $unsigned { + const BUF_SIZE: usize = $unsigned::MAX.ilog(10) as usize + 1; + } + )* + } +} + +impl_NumBufferTrait! { + i8, u8, + i16, u16, + i32, u32, + i64, u64, + isize, usize, + i128, u128, +} + +/// A buffer wrapper of which the internal size is based on the maximum +/// number of digits the associated integer can have. +#[unstable(feature = "int_format_into", issue = "138215")] +#[derive(Debug)] +pub struct NumBuffer { + // FIXME: Once const generics feature is working, use `T::BUF_SIZE` instead of 40. + pub(crate) buf: [MaybeUninit; 40], + // FIXME: Remove this field once we can actually use `T`. + phantom: core::marker::PhantomData, +} + +#[unstable(feature = "int_format_into", issue = "138215")] +impl NumBuffer { + /// Initializes internal buffer. + #[unstable(feature = "int_format_into", issue = "138215")] + pub const fn new() -> Self { + // FIXME: Once const generics feature is working, use `T::BUF_SIZE` instead of 40. + NumBuffer { buf: [MaybeUninit::::uninit(); 40], phantom: core::marker::PhantomData } + } + + /// Returns the length of the internal buffer. + #[unstable(feature = "int_format_into", issue = "138215")] + pub const fn capacity(&self) -> usize { + self.buf.len() + } +} From ce4a7091048a91cbbfc41b823a66e913a6b6a83a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Thu, 5 Jun 2025 23:29:12 +0200 Subject: [PATCH 2/3] Add test for `int_format_into` feature --- library/alloctests/tests/lib.rs | 1 + library/alloctests/tests/num.rs | 46 +++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/library/alloctests/tests/lib.rs b/library/alloctests/tests/lib.rs index a41162ecd51a0..fcfc7f8dd296d 100644 --- a/library/alloctests/tests/lib.rs +++ b/library/alloctests/tests/lib.rs @@ -9,6 +9,7 @@ #![feature(downcast_unchecked)] #![feature(exact_size_is_empty)] #![feature(hashmap_internals)] +#![feature(int_format_into)] #![feature(linked_list_cursors)] #![feature(map_try_insert)] #![feature(pattern)] diff --git a/library/alloctests/tests/num.rs b/library/alloctests/tests/num.rs index a169bbec8e0c9..589b809009636 100644 --- a/library/alloctests/tests/num.rs +++ b/library/alloctests/tests/num.rs @@ -1,15 +1,21 @@ -use std::fmt::{Debug, Display}; +use core::fmt::NumBuffer; use std::str::FromStr; -fn assert_nb(value: Int) { - let s = value.to_string(); - let s2 = format!("s: {}.", value); +macro_rules! assert_nb { + ($int:ident, $value:expr) => { + let value: $int = $value; + let s = value.to_string(); + let s2 = format!("s: {}.", value); - assert_eq!(format!("s: {s}."), s2); - let Ok(ret) = Int::from_str(&s) else { - panic!("failed to convert into to string"); + assert_eq!(format!("s: {s}."), s2); + let Ok(ret) = $int::from_str(&s) else { + panic!("failed to convert into to string"); + }; + assert_eq!(ret, value); + + let mut buffer = NumBuffer::<$int>::new(); + assert_eq!(value.format_into(&mut buffer), s.as_str()); }; - assert_eq!(ret, value); } macro_rules! uint_to_s { @@ -17,11 +23,11 @@ macro_rules! uint_to_s { $( #[test] fn $fn_name() { - assert_nb::<$int>($int::MIN); - assert_nb::<$int>($int::MAX); - assert_nb::<$int>(1); - assert_nb::<$int>($int::MIN / 2); - assert_nb::<$int>($int::MAX / 2); + assert_nb!($int, $int::MIN); + assert_nb!($int, $int::MAX); + assert_nb!($int, 1); + assert_nb!($int, $int::MIN / 2); + assert_nb!($int, $int::MAX / 2); } )+ } @@ -31,13 +37,13 @@ macro_rules! int_to_s { $( #[test] fn $fn_name() { - assert_nb::<$int>($int::MIN); - assert_nb::<$int>($int::MAX); - assert_nb::<$int>(1); - assert_nb::<$int>(0); - assert_nb::<$int>(-1); - assert_nb::<$int>($int::MIN / 2); - assert_nb::<$int>($int::MAX / 2); + assert_nb!($int, $int::MIN); + assert_nb!($int, $int::MAX); + assert_nb!($int, 1); + assert_nb!($int, 0); + assert_nb!($int, -1); + assert_nb!($int, $int::MIN / 2); + assert_nb!($int, $int::MAX / 2); } )+ } From fb17077d1e3ff61f192e7d7eec923c2e4629cd66 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 6 Jun 2025 14:14:49 +0200 Subject: [PATCH 3/3] Use `slice_buffer_to_str` in `GenericRadix::fmt_int` --- library/core/src/fmt/num.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index 8463bf4dc6806..7969bca2897c4 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -61,7 +61,7 @@ unsafe trait GenericRadix: Sized { let zero = T::zero(); let is_nonnegative = x >= zero; let mut buf = [MaybeUninit::::uninit(); 128]; - let mut curr = buf.len(); + let mut offset = buf.len(); let base = T::from_u8(Self::BASE); if is_nonnegative { // Accumulate each digit of the number from the least significant @@ -69,8 +69,8 @@ unsafe trait GenericRadix: Sized { loop { let n = x % base; // Get the current place value. x = x / base; // Deaccumulate the number. - curr -= 1; - buf[curr].write(Self::digit(n.to_u8())); // Store the digit in the buffer. + offset -= 1; + buf[offset].write(Self::digit(n.to_u8())); // Store the digit in the buffer. if x == zero { // No more digits left to accumulate. break; @@ -81,27 +81,17 @@ unsafe trait GenericRadix: Sized { loop { let n = zero - (x % base); // Get the current place value. x = x / base; // Deaccumulate the number. - curr -= 1; - buf[curr].write(Self::digit(n.to_u8())); // Store the digit in the buffer. + offset -= 1; + buf[offset].write(Self::digit(n.to_u8())); // Store the digit in the buffer. if x == zero { // No more digits left to accumulate. break; }; } } - // SAFETY: `curr` is initialized to `buf.len()` and is only decremented, so it can't overflow. It is - // decremented exactly once for each digit. Since u128 is the widest fixed width integer format supported, - // the maximum number of digits (bits) is 128 for base-2, so `curr` won't underflow as well. - let buf = unsafe { buf.get_unchecked(curr..) }; - // SAFETY: The only chars in `buf` are created by `Self::digit` which are assumed to be - // valid UTF-8 - let buf = unsafe { - str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_ptr(buf), - buf.len(), - )) - }; - f.pad_integral(is_nonnegative, Self::PREFIX, buf) + // SAFETY: Starting from `offset`, all elements of the slice have been set. + let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) }; + f.pad_integral(is_nonnegative, Self::PREFIX, buf_slice) } }