@@ -605,6 +605,101 @@ impl<'a> Formatter<'a> {
605605/// ```
606606///
607607/// [`format()`]: ../../std/fmt/fn.format.html
608+ //
609+ // Internal representation:
610+ //
611+ // fmt::Arguments is represented in one of two ways:
612+ //
613+ // 1) String literal representation (e.g. format_args!("hello"))
614+ // ┌────────────────────────────────┐
615+ // template: │ *const u8 │ ─▷ "hello"
616+ // ├──────────────────────────────┬─┤
617+ // args: │ len │1│ (lowest bit is 1; field contains `len << 1 | 1`)
618+ // └──────────────────────────────┴─┘
619+ // In this representation, there are no placeholders and `fmt::Arguments::as_str()` returns Some.
620+ // The pointer points to the start of a static `str`. The length is given by `args as usize >> 1`.
621+ // (The length of a `&str` is isize::MAX at most, so it always fits in a usize minus one bit.)
622+ //
623+ // `fmt::Arguments::from_str()` constructs this representation from a `&'static str`.
624+ //
625+ // 2) Placeholders representation (e.g. format_args!("hello {name}\n"))
626+ // ┌────────────────────────────────┐
627+ // template: │ *const u8 │ ─▷ b"\x06hello \x80\x01\n\x00"
628+ // ├────────────────────────────────┤
629+ // args: │ &'a [Argument<'a>; _] 0│ (lower bit is 0 due to alignment of Argument type)
630+ // └────────────────────────────────┘
631+ // In this representation, the template is a byte sequence encoding both the literal string pieces
632+ // and the placeholders (including their options/flags).
633+ //
634+ // The `args` pointer points to an array of `fmt::Argument<'a>` values, of sufficient length to
635+ // match the placeholders in the template.
636+ //
637+ // `fmt::Arguments::new()` constructs this representation from a template byte slice and a slice
638+ // of arguments. This function is unsafe, as the template is assumed to be valid and the args
639+ // slice is assumed to have elements matching the template.
640+ //
641+ // The template byte sequence is the concatenation of parts of the following types:
642+ //
643+ // - Literal string piece (1-127 bytes):
644+ // ┌───┬────────────────────────────┐
645+ // │len│ `len` bytes (utf-8) │ (e.g. b"\x06hello ")
646+ // └───┴────────────────────────────┘
647+ // Pieces that must be formatted verbatim (e.g. "hello " and "\n" in "hello {name}\n")
648+ // are represented as a single byte containing their length followed directly by the bytes
649+ // of the string.
650+ //
651+ // Pieces can be 127 bytes at most. Longer pieces are split into multiple pieces (at utf-8
652+ // boundaries).
653+ //
654+ // - Placeholder:
655+ // ┌──────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┐
656+ // │0b10______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\x82\x05\0")
657+ // └────││││││┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┘
658+ // ││││││ 32 bit 16 bit 16 bit 16 bit
659+ // │││││└─ flags present
660+ // ││││└─ width present
661+ // │││└─ precision present
662+ // ││└─ arg_index present
663+ // │└─ width indirect
664+ // └─ precision indirect
665+ //
666+ // Fully default placeholder, without any options:
667+ // ┌──────────┐
668+ // │0b10000000│ (b"\x80")
669+ // └──────────┘
670+ //
671+ // Placeholders (e.g. `{name}` in "hello {name}") are represented as a byte with the highest
672+ // bit set, followed by zero or more fields depending on the flags set in the first byte.
673+ //
674+ // The fields are stored as little endian.
675+ //
676+ // The `flags` fields corresponds to the `flags` field of `FormattingOptions`.
677+ // See doc comment of `FormattingOptions::flags` for details.
678+ //
679+ // The `width` and `precision` fields correspond to their respective fields in
680+ // `FormattingOptions`. However, if their "indirect" flag is set, the field contains the
681+ // index in the `args` array where the dynamic width or precision is stored, rather than the
682+ // value directly.
683+ //
684+ // The `arg_index` field is the index into the `args` array for the argument to be
685+ // formatted.
686+ //
687+ // If omitted, the flags, width and precision of the default FormattingOptions::new() are
688+ // used.
689+ //
690+ // If the `arg_index` is omitted, the next argument in the `args` array is used (starting
691+ // at 0).
692+ //
693+ // - End:
694+ // ┌───┐
695+ // │ 0 │ ("\0")
696+ // └───┘
697+ // A single zero byte marks the end of the template.
698+ //
699+ // (Note that the zero byte may also occur naturally as part of the string pieces or flags,
700+ // width, precision and arg_index fields above. That is, the template byte sequence ends
701+ // with a 0 byte, but isn't terminated by the first 0 byte.)
702+ //
608703#[ lang = "format_arguments" ]
609704#[ stable( feature = "rust1" , since = "1.0.0" ) ]
610705#[ derive( Copy , Clone ) ]
@@ -613,6 +708,42 @@ pub struct Arguments<'a> {
613708 args : NonNull < rt:: Argument < ' a > > ,
614709}
615710
711+ /// Used by the format_args!() macro to create a fmt::Arguments object.
712+ #[ doc( hidden) ]
713+ #[ rustc_diagnostic_item = "FmtArgumentsNew" ]
714+ #[ unstable( feature = "fmt_internals" , issue = "none" ) ]
715+ impl < ' a > Arguments < ' a > {
716+ // SAFETY: The caller must ensure that the provided template and args encode a valid
717+ // fmt::Arguments, as documented above.
718+ #[ inline]
719+ pub unsafe fn new < const N : usize , const M : usize > (
720+ template : & ' a [ u8 ; N ] ,
721+ args : & ' a [ rt:: Argument < ' a > ; M ] ,
722+ ) -> Arguments < ' a > {
723+ // SAFETY: Responsibility of the caller.
724+ unsafe { Arguments { template : mem:: transmute ( template) , args : mem:: transmute ( args) } }
725+ }
726+
727+ #[ inline]
728+ pub const fn from_str ( s : & ' static str ) -> Arguments < ' a > {
729+ // SAFETY: This is the "static str" representation of fmt::Arguments; see above.
730+ unsafe {
731+ Arguments {
732+ template : mem:: transmute ( s. as_ptr ( ) ) ,
733+ args : mem:: transmute ( s. len ( ) << 1 | 1 ) ,
734+ }
735+ }
736+ }
737+
738+ // Same as `from_str`, but not const.
739+ // Used by format_args!() expansion when arguments are inlined,
740+ // e.g. format_args!("{}", 123), which is not allowed in const.
741+ #[ inline]
742+ pub fn from_str_nonconst ( s : & ' static str ) -> Arguments < ' a > {
743+ Arguments :: from_str ( s)
744+ }
745+ }
746+
616747#[ doc( hidden) ]
617748#[ unstable( feature = "fmt_internals" , issue = "none" ) ]
618749impl < ' a > Arguments < ' a > {
@@ -646,10 +777,10 @@ impl<'a> Arguments<'a> {
646777 starts_with_placeholder = true ;
647778 }
648779 // Skip remainder of placeholder:
649- let skip = ( n & 1 == 1 ) as usize * 4
650- + ( n & 2 == 2 ) as usize * 2
651- + ( n & 4 == 4 ) as usize * 2
652- + ( n & 8 == 8 ) as usize * 2 ;
780+ let skip = ( n & 1 != 0 ) as usize * 4 // flags (32 bit)
781+ + ( n & 2 != 0 ) as usize * 2 // width (16 bit)
782+ + ( n & 4 != 0 ) as usize * 2 // precision (16 bit)
783+ + ( n & 8 != 0 ) as usize * 2 ; // arg_index (16 bit)
653784 template = template. add ( 1 + skip as usize ) ;
654785 }
655786 }
@@ -718,11 +849,13 @@ impl<'a> Arguments<'a> {
718849 #[ inline]
719850 pub const fn as_str ( & self ) -> Option < & ' static str > {
720851 // SAFETY: During const eval, `self.args` must have come from a usize,
721- // not a pointer, because that's the only way to creat a fmt::Arguments in const.
852+ // not a pointer, because that's the only way to create a fmt::Arguments in const.
853+ // (I.e. only fmt::Arguments::from_str is const, fmt::Arguments::new is not.)
854+ //
722855 // Outside const eval, transmuting a pointer to a usize is fine.
723856 let bits: usize = unsafe { mem:: transmute ( self . args ) } ;
724857 if bits & 1 == 1 {
725- // SAFETY: This fmt::Arguments stores a &'static str.
858+ // SAFETY: This fmt::Arguments stores a &'static str. See encoding documentation above.
726859 Some ( unsafe {
727860 str:: from_utf8_unchecked ( crate :: slice:: from_raw_parts (
728861 self . template . as_ptr ( ) ,
0 commit comments