Skip to content

Commit 5b58c66

Browse files
committed
Document fmt::Arguments internal representation.
1 parent 7f848fb commit 5b58c66

File tree

6 files changed

+149
-45
lines changed

6 files changed

+149
-45
lines changed

compiler/rustc_ast_lowering/src/format.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ fn expand_format_args<'hir>(
318318
&fmt.template[..]
319319
};
320320

321+
// See library/core/src/fmt/rt.rs for the format string encoding format.
322+
321323
for (i, piece) in template.iter().enumerate() {
322324
match piece {
323325
&FormatArgsPiece::Literal(sym) => {
@@ -351,6 +353,7 @@ fn expand_format_args<'hir>(
351353
return hir::ExprKind::Call(from_str, args);
352354
}
353355

356+
// Encode the literal in chunks of up to 127 bytes, split at utf-8 boundaries.
354357
while !s.is_empty() {
355358
let len = s.floor_char_boundary(127);
356359
bytecode.push(len as u8);
@@ -361,6 +364,7 @@ fn expand_format_args<'hir>(
361364
incomplete_lit.clear();
362365
}
363366
FormatArgsPiece::Placeholder(p) => {
367+
// Push the start byte and remember its index so we can set the option bits later.
364368
let i = bytecode.len();
365369
bytecode.push(0x80);
366370

@@ -382,6 +386,7 @@ fn expand_format_args<'hir>(
382386
Some(FormatAlignment::Center) => 2,
383387
None => 3,
384388
};
389+
let default_flags = 0x6000_0020;
385390
let flags: u32 = o.fill.unwrap_or(' ') as u32
386391
| ((o.sign == Some(FormatSign::Plus)) as u32) << 21
387392
| ((o.sign == Some(FormatSign::Minus)) as u32) << 22
@@ -392,7 +397,7 @@ fn expand_format_args<'hir>(
392397
| (o.width.is_some() as u32) << 27
393398
| (o.precision.is_some() as u32) << 28
394399
| align << 29;
395-
if flags != 0x6000_0020 {
400+
if flags != default_flags {
396401
bytecode[i] |= 1;
397402
bytecode.extend_from_slice(&flags.to_le_bytes());
398403
if let Some(val) = &o.width {

library/core/src/fmt/mod.rs

Lines changed: 139 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,101 @@ impl<'a> Formatter<'a> {
605605
/// ```
606606
///
607607
/// [`format()`]: ../../std/fmt/fn.format.html
608+
//
609+
// Internal representation:
610+
//
611+
// fmt::Arguments is represented in one of two ways:
612+
//
613+
// 1) String literal representation (e.g. format_args!("hello"))
614+
// ┌────────────────────────────────┐
615+
// template: │ *const u8 │ ─▷ "hello"
616+
// ├──────────────────────────────┬─┤
617+
// args: │ len │1│ (lowest bit is 1; field contains `len << 1 | 1`)
618+
// └──────────────────────────────┴─┘
619+
// In this representation, there are no placeholders and `fmt::Arguments::as_str()` returns Some.
620+
// The pointer points to the start of a static `str`. The length is given by `args as usize >> 1`.
621+
// (The length of a `&str` is isize::MAX at most, so it always fits in a usize minus one bit.)
622+
//
623+
// `fmt::Arguments::from_str()` constructs this representation from a `&'static str`.
624+
//
625+
// 2) Placeholders representation (e.g. format_args!("hello {name}\n"))
626+
// ┌────────────────────────────────┐
627+
// template: │ *const u8 │ ─▷ b"\x06hello \x80\x01\n\x00"
628+
// ├────────────────────────────────┤
629+
// args: │ &'a [Argument<'a>; _] 0│ (lower bit is 0 due to alignment of Argument type)
630+
// └────────────────────────────────┘
631+
// In this representation, the template is a byte sequence encoding both the literal string pieces
632+
// and the placeholders (including their options/flags).
633+
//
634+
// The `args` pointer points to an array of `fmt::Argument<'a>` values, of sufficient length to
635+
// match the placeholders in the template.
636+
//
637+
// `fmt::Arguments::new()` constructs this representation from a template byte slice and a slice
638+
// of arguments. This function is unsafe, as the template is assumed to be valid and the args
639+
// slice is assumed to have elements matching the template.
640+
//
641+
// The template byte sequence is the concatenation of parts of the following types:
642+
//
643+
// - Literal string piece (1-127 bytes):
644+
// ┌───┬────────────────────────────┐
645+
// │len│ `len` bytes (utf-8) │ (e.g. b"\x06hello ")
646+
// └───┴────────────────────────────┘
647+
// Pieces that must be formatted verbatim (e.g. "hello " and "\n" in "hello {name}\n")
648+
// are represented as a single byte containing their length followed directly by the bytes
649+
// of the string.
650+
//
651+
// Pieces can be 127 bytes at most. Longer pieces are split into multiple pieces (at utf-8
652+
// boundaries).
653+
//
654+
// - Placeholder:
655+
// ┌──────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┐
656+
// │0b10______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\x82\x05\0")
657+
// └────││││││┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┘
658+
// ││││││ 32 bit 16 bit 16 bit 16 bit
659+
// │││││└─ flags present
660+
// ││││└─ width present
661+
// │││└─ precision present
662+
// ││└─ arg_index present
663+
// │└─ width indirect
664+
// └─ precision indirect
665+
//
666+
// Fully default placeholder, without any options:
667+
// ┌──────────┐
668+
// │0b10000000│ (b"\x80")
669+
// └──────────┘
670+
//
671+
// Placeholders (e.g. `{name}` in "hello {name}") are represented as a byte with the highest
672+
// bit set, followed by zero or more fields depending on the flags set in the first byte.
673+
//
674+
// The fields are stored as little endian.
675+
//
676+
// The `flags` fields corresponds to the `flags` field of `FormattingOptions`.
677+
// See doc comment of `FormattingOptions::flags` for details.
678+
//
679+
// The `width` and `precision` fields correspond to their respective fields in
680+
// `FormattingOptions`. However, if their "indirect" flag is set, the field contains the
681+
// index in the `args` array where the dynamic width or precision is stored, rather than the
682+
// value directly.
683+
//
684+
// The `arg_index` field is the index into the `args` array for the argument to be
685+
// formatted.
686+
//
687+
// If omitted, the flags, width and precision of the default FormattingOptions::new() are
688+
// used.
689+
//
690+
// If the `arg_index` is omitted, the next argument in the `args` array is used (starting
691+
// at 0).
692+
//
693+
// - End:
694+
// ┌───┐
695+
// │ 0 │ ("\0")
696+
// └───┘
697+
// A single zero byte marks the end of the template.
698+
//
699+
// (Note that the zero byte may also occur naturally as part of the string pieces or flags,
700+
// width, precision and arg_index fields above. That is, the template byte sequence ends
701+
// with a 0 byte, but isn't terminated by the first 0 byte.)
702+
//
608703
#[lang = "format_arguments"]
609704
#[stable(feature = "rust1", since = "1.0.0")]
610705
#[derive(Copy, Clone)]
@@ -613,6 +708,42 @@ pub struct Arguments<'a> {
613708
args: NonNull<rt::Argument<'a>>,
614709
}
615710

711+
/// Used by the format_args!() macro to create a fmt::Arguments object.
712+
#[doc(hidden)]
713+
#[rustc_diagnostic_item = "FmtArgumentsNew"]
714+
#[unstable(feature = "fmt_internals", issue = "none")]
715+
impl<'a> Arguments<'a> {
716+
// SAFETY: The caller must ensure that the provided template and args encode a valid
717+
// fmt::Arguments, as documented above.
718+
#[inline]
719+
pub unsafe fn new<const N: usize, const M: usize>(
720+
template: &'a [u8; N],
721+
args: &'a [rt::Argument<'a>; M],
722+
) -> Arguments<'a> {
723+
// SAFETY: Responsibility of the caller.
724+
unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } }
725+
}
726+
727+
#[inline]
728+
pub const fn from_str(s: &'static str) -> Arguments<'a> {
729+
// SAFETY: This is the "static str" representation of fmt::Arguments; see above.
730+
unsafe {
731+
Arguments {
732+
template: mem::transmute(s.as_ptr()),
733+
args: mem::transmute(s.len() << 1 | 1),
734+
}
735+
}
736+
}
737+
738+
// Same as `from_str`, but not const.
739+
// Used by format_args!() expansion when arguments are inlined,
740+
// e.g. format_args!("{}", 123), which is not allowed in const.
741+
#[inline]
742+
pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> {
743+
Arguments::from_str(s)
744+
}
745+
}
746+
616747
#[doc(hidden)]
617748
#[unstable(feature = "fmt_internals", issue = "none")]
618749
impl<'a> Arguments<'a> {
@@ -646,10 +777,10 @@ impl<'a> Arguments<'a> {
646777
starts_with_placeholder = true;
647778
}
648779
// Skip remainder of placeholder:
649-
let skip = (n & 1 == 1) as usize * 4
650-
+ (n & 2 == 2) as usize * 2
651-
+ (n & 4 == 4) as usize * 2
652-
+ (n & 8 == 8) as usize * 2;
780+
let skip = (n & 1 != 0) as usize * 4 // flags (32 bit)
781+
+ (n & 2 != 0) as usize * 2 // width (16 bit)
782+
+ (n & 4 != 0) as usize * 2 // precision (16 bit)
783+
+ (n & 8 != 0) as usize * 2; // arg_index (16 bit)
653784
template = template.add(1 + skip as usize);
654785
}
655786
}
@@ -718,11 +849,13 @@ impl<'a> Arguments<'a> {
718849
#[inline]
719850
pub const fn as_str(&self) -> Option<&'static str> {
720851
// SAFETY: During const eval, `self.args` must have come from a usize,
721-
// not a pointer, because that's the only way to creat a fmt::Arguments in const.
852+
// not a pointer, because that's the only way to create a fmt::Arguments in const.
853+
// (I.e. only fmt::Arguments::from_str is const, fmt::Arguments::new is not.)
854+
//
722855
// Outside const eval, transmuting a pointer to a usize is fine.
723856
let bits: usize = unsafe { mem::transmute(self.args) };
724857
if bits & 1 == 1 {
725-
// SAFETY: This fmt::Arguments stores a &'static str.
858+
// SAFETY: This fmt::Arguments stores a &'static str. See encoding documentation above.
726859
Some(unsafe {
727860
str::from_utf8_unchecked(crate::slice::from_raw_parts(
728861
self.template.as_ptr(),

library/core/src/fmt/rt.rs

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
99
use super::*;
1010
use crate::hint::unreachable_unchecked;
11-
use crate::mem;
1211
use crate::ptr::NonNull;
1312

1413
#[derive(Copy, Clone)]
@@ -35,7 +34,7 @@ enum ArgumentType<'a> {
3534
/// precision and width.
3635
#[lang = "format_argument"]
3736
#[derive(Copy, Clone)]
38-
#[repr(align(2))]
37+
#[repr(align(2))] // To ensure pointers to this always have their lowest bit cleared.
3938
pub struct Argument<'a> {
4039
ty: ArgumentType<'a>,
4140
}
@@ -164,36 +163,3 @@ impl Argument<'_> {
164163
}
165164
}
166165
}
167-
168-
/// Used by the format_args!() macro to create a fmt::Arguments object.
169-
#[doc(hidden)]
170-
#[rustc_diagnostic_item = "FmtArgumentsNew"]
171-
impl<'a> Arguments<'a> {
172-
#[inline]
173-
pub unsafe fn new<const N: usize, const M: usize>(
174-
template: &'a [u8; N],
175-
args: &'a [rt::Argument<'a>; M],
176-
) -> Arguments<'a> {
177-
// SAFETY: ...
178-
unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } }
179-
}
180-
181-
#[inline]
182-
pub const fn from_str(s: &'static str) -> Arguments<'a> {
183-
// SAFETY: This is the "static str" representation of fmt::Arguments.
184-
unsafe {
185-
Arguments {
186-
template: mem::transmute(s.as_ptr()),
187-
args: mem::transmute(s.len() << 1 | 1),
188-
}
189-
}
190-
}
191-
192-
// Same as `from_str`, but not const.
193-
// Used by format_args!() expansion when arguments are inlined,
194-
// e.g. format_args!("{}", 123), which is not allowed in const.
195-
#[inline]
196-
pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> {
197-
Arguments::from_str(s)
198-
}
199-
}

tests/codegen-units/item-collection/opaque-return-impls.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,4 @@ pub fn foo3() -> Box<dyn Iterator<Item = usize>> {
8686
//~ MONO_ITEM fn foo3
8787
//~ MONO_ITEM fn std::boxed::Box::<Counter>::new
8888
//~ MONO_ITEM fn Counter::new
89-
//~ MONO_ITEM fn core::fmt::rt::<impl std::fmt::Arguments<'_>>::from_str
89+
//~ MONO_ITEM fn std::fmt::Arguments::<'_>::from_str

tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@
144144
StorageLive(_23);
145145
_23 = &_15;
146146
_22 = &(*_23);
147-
_11 = core::fmt::rt::<impl Arguments<'_>>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable];
147+
_11 = Arguments::<'_>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable];
148148
}
149149

150150
bb5: {

tests/pretty/issue-4264.pp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
((::alloc::fmt::format as
3535
for<'a> fn(Arguments<'a>) -> String {format})(((format_arguments::from_str
3636
as
37-
fn(&'static str) -> Arguments<'_> {core::fmt::rt::<impl Arguments<'_>>::from_str})(("test"
37+
fn(&'static str) -> Arguments<'_> {Arguments::<'_>::from_str})(("test"
3838
as &str)) as Arguments<'_>)) as String)
3939
} as String)) as String);
4040
} as ())

0 commit comments

Comments
 (0)