From b51deba9ac36ee2808af8a03fe8bc6fc570cc497 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 08:37:11 +1000 Subject: [PATCH 1/6] Remove `MemDecoder::read_raw_bytes_inherent`. It's unnecessary. Note that `MemDecoder::read_raw_bytes` how has a `&'a [u8]` return type, the same as what `read_raw_bytes_inherent` had. --- compiler/rustc_serialize/src/opaque.rs | 30 ++++++++++---------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index b7976ea3b1c63..012a6406de35c 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -573,22 +573,6 @@ impl<'a> MemDecoder<'a> { self.read_raw_bytes(N).try_into().unwrap() } - // The trait method doesn't have a lifetime parameter, and we need a version of this - // that definitely returns a slice based on the underlying storage as opposed to - // the Decoder itself in order to implement read_str efficiently. - #[inline] - fn read_raw_bytes_inherent(&mut self, bytes: usize) -> &'a [u8] { - if bytes > self.remaining() { - Self::decoder_exhausted(); - } - // SAFETY: We just checked if this range is in-bounds above. - unsafe { - let slice = std::slice::from_raw_parts(self.current, bytes); - self.current = self.current.add(bytes); - slice - } - } - /// While we could manually expose manipulation of the decoder position, /// all current users of that method would need to reset the position later, /// incurring the bounds check of set_position twice. @@ -706,14 +690,22 @@ impl<'a> Decoder for MemDecoder<'a> { #[inline] fn read_str(&mut self) -> &str { let len = self.read_usize(); - let bytes = self.read_raw_bytes_inherent(len + 1); + let bytes = self.read_raw_bytes(len + 1); assert!(bytes[len] == STR_SENTINEL); unsafe { std::str::from_utf8_unchecked(&bytes[..len]) } } #[inline] - fn read_raw_bytes(&mut self, bytes: usize) -> &[u8] { - self.read_raw_bytes_inherent(bytes) + fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { + if bytes > self.remaining() { + Self::decoder_exhausted(); + } + // SAFETY: We just checked if this range is in-bounds above. + unsafe { + let slice = std::slice::from_raw_parts(self.current, bytes); + self.current = self.current.add(bytes); + slice + } } #[inline] From 37c9e45186e00c197902a7c7349c18383aa0abf7 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 08:53:13 +1000 Subject: [PATCH 2/6] Add a comment explaining the lack of `Decoder::read_enum_variant`. Because I was wondering about it, and this may save a future person from also wondering. --- compiler/rustc_serialize/src/serialize.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index a6d9c7b7d4210..79c2f76c01c62 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -84,6 +84,11 @@ pub trait Decoder { fn read_char(&mut self) -> char; fn read_str(&mut self) -> &str; fn read_raw_bytes(&mut self, len: usize) -> &[u8]; + + // Although there is an `emit_enum_variant` method in `Encoder`, the code + // patterns in decoding are different enough to encoding that there is no + // need for a corresponding `read_enum_variant` method here. + fn peek_byte(&self) -> u8; fn position(&self) -> usize; } From fa133f5354ac29096d1577d5ba9c1400c2ad3b0f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 09:01:00 +1000 Subject: [PATCH 3/6] Remove a low-value assertion. Checking that `read_raw_bytes(len)` changes the position by `len` is a reasonable thing for a test, but isn't much use in just one of the zillion `Decodable` impls. --- compiler/rustc_serialize/src/opaque.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 012a6406de35c..6c18a8d074266 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -779,12 +779,7 @@ impl Encodable for IntEncodedWithFixedSize { impl<'a> Decodable> for IntEncodedWithFixedSize { #[inline] fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { - let _start_pos = decoder.position(); - let bytes = decoder.read_raw_bytes(IntEncodedWithFixedSize::ENCODED_SIZE); - let value = u64::from_le_bytes(bytes.try_into().unwrap()); - let _end_pos = decoder.position(); - debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); - - IntEncodedWithFixedSize(value) + let bytes = decoder.read_array::<{ IntEncodedWithFixedSize::ENCODED_SIZE }>(); + IntEncodedWithFixedSize(u64::from_le_bytes(bytes)) } } From 7a16d25365b5f0aa815948237c46fb1843386d7a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 09:06:57 +1000 Subject: [PATCH 4/6] Add some provided methods to `Encoder`/`Decoder`. The methods for `i8`, `bool`, `char`, `str` are the same for all impls, because they layered on top of other methods. --- compiler/rustc_serialize/src/opaque.rs | 76 ----------------------- compiler/rustc_serialize/src/serialize.rs | 64 ++++++++++++++++--- 2 files changed, 56 insertions(+), 84 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 6c18a8d074266..44d0cc8164da2 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -51,13 +51,6 @@ macro_rules! write_leb128 { }}; } -/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string. -/// This way we can skip validation and still be relatively sure that deserialization -/// did not desynchronize. -/// -/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout -const STR_SENTINEL: u8 = 0xC1; - impl Encoder for MemEncoder { #[inline] fn emit_usize(&mut self, v: usize) { @@ -114,28 +107,6 @@ impl Encoder for MemEncoder { self.data.extend_from_slice(&v.to_le_bytes()); } - #[inline] - fn emit_i8(&mut self, v: i8) { - self.emit_u8(v as u8); - } - - #[inline] - fn emit_bool(&mut self, v: bool) { - self.emit_u8(if v { 1 } else { 0 }); - } - - #[inline] - fn emit_char(&mut self, v: char) { - self.emit_u32(v as u32); - } - - #[inline] - fn emit_str(&mut self, v: &str) { - self.emit_usize(v.len()); - self.emit_raw_bytes(v.as_bytes()); - self.emit_u8(STR_SENTINEL); - } - #[inline] fn emit_raw_bytes(&mut self, s: &[u8]) { self.data.extend_from_slice(s); @@ -480,28 +451,6 @@ impl Encoder for FileEncoder { self.write_all(&v.to_le_bytes()); } - #[inline] - fn emit_i8(&mut self, v: i8) { - self.emit_u8(v as u8); - } - - #[inline] - fn emit_bool(&mut self, v: bool) { - self.emit_u8(if v { 1 } else { 0 }); - } - - #[inline] - fn emit_char(&mut self, v: char) { - self.emit_u32(v as u32); - } - - #[inline] - fn emit_str(&mut self, v: &str) { - self.emit_usize(v.len()); - self.emit_raw_bytes(v.as_bytes()); - self.emit_u8(STR_SENTINEL); - } - #[inline] fn emit_raw_bytes(&mut self, s: &[u8]) { self.write_all(s); @@ -665,36 +614,11 @@ impl<'a> Decoder for MemDecoder<'a> { i16::from_le_bytes(self.read_array()) } - #[inline] - fn read_i8(&mut self) -> i8 { - self.read_byte() as i8 - } - #[inline] fn read_isize(&mut self) -> isize { read_leb128!(self, read_isize_leb128) } - #[inline] - fn read_bool(&mut self) -> bool { - let value = self.read_u8(); - value != 0 - } - - #[inline] - fn read_char(&mut self) -> char { - let bits = self.read_u32(); - std::char::from_u32(bits).unwrap() - } - - #[inline] - fn read_str(&mut self) -> &str { - let len = self.read_usize(); - let bytes = self.read_raw_bytes(len + 1); - assert!(bytes[len] == STR_SENTINEL); - unsafe { std::str::from_utf8_unchecked(&bytes[..len]) } - } - #[inline] fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { if bytes > self.remaining() { diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index 79c2f76c01c62..e1bc598736fee 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -12,6 +12,13 @@ use std::path; use std::rc::Rc; use std::sync::Arc; +/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string. +/// This way we can skip validation and still be relatively sure that deserialization +/// did not desynchronize. +/// +/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout +const STR_SENTINEL: u8 = 0xC1; + /// A note about error handling. /// /// Encoders may be fallible, but in practice failure is rare and there are so @@ -40,10 +47,29 @@ pub trait Encoder { fn emit_i64(&mut self, v: i64); fn emit_i32(&mut self, v: i32); fn emit_i16(&mut self, v: i16); - fn emit_i8(&mut self, v: i8); - fn emit_bool(&mut self, v: bool); - fn emit_char(&mut self, v: char); - fn emit_str(&mut self, v: &str); + + #[inline] + fn emit_i8(&mut self, v: i8) { + self.emit_u8(v as u8); + } + + #[inline] + fn emit_bool(&mut self, v: bool) { + self.emit_u8(if v { 1 } else { 0 }); + } + + #[inline] + fn emit_char(&mut self, v: char) { + self.emit_u32(v as u32); + } + + #[inline] + fn emit_str(&mut self, v: &str) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v.as_bytes()); + self.emit_u8(STR_SENTINEL); + } + fn emit_raw_bytes(&mut self, s: &[u8]); fn emit_enum_variant(&mut self, v_id: usize, f: F) @@ -79,10 +105,32 @@ pub trait Decoder { fn read_i64(&mut self) -> i64; fn read_i32(&mut self) -> i32; fn read_i16(&mut self) -> i16; - fn read_i8(&mut self) -> i8; - fn read_bool(&mut self) -> bool; - fn read_char(&mut self) -> char; - fn read_str(&mut self) -> &str; + + #[inline] + fn read_i8(&mut self) -> i8 { + self.read_u8() as i8 + } + + #[inline] + fn read_bool(&mut self) -> bool { + let value = self.read_u8(); + value != 0 + } + + #[inline] + fn read_char(&mut self) -> char { + let bits = self.read_u32(); + std::char::from_u32(bits).unwrap() + } + + #[inline] + fn read_str(&mut self) -> &str { + let len = self.read_usize(); + let bytes = self.read_raw_bytes(len + 1); + assert!(bytes[len] == STR_SENTINEL); + unsafe { std::str::from_utf8_unchecked(&bytes[..len]) } + } + fn read_raw_bytes(&mut self, len: usize) -> &[u8]; // Although there is an `emit_enum_variant` method in `Encoder`, the code From a676dfa888b3d14abfa5d9b9a1045f8c1bde6793 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 09:16:31 +1000 Subject: [PATCH 5/6] Remove `MemDecoder::read_byte`. It's just a synonym for `read_u8`. --- compiler/rustc_serialize/src/opaque.rs | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 44d0cc8164da2..0f6e4b329b87e 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -504,19 +504,6 @@ impl<'a> MemDecoder<'a> { panic!("MemDecoder exhausted") } - #[inline] - fn read_byte(&mut self) -> u8 { - if self.current == self.end { - Self::decoder_exhausted(); - } - // SAFETY: This type guarantees current <= end, and we just checked current == end. - unsafe { - let byte = *self.current; - self.current = self.current.add(1); - byte - } - } - #[inline] fn read_array(&mut self) -> [u8; N] { self.read_raw_bytes(N).try_into().unwrap() @@ -586,7 +573,15 @@ impl<'a> Decoder for MemDecoder<'a> { #[inline] fn read_u8(&mut self) -> u8 { - self.read_byte() + if self.current == self.end { + Self::decoder_exhausted(); + } + // SAFETY: This type guarantees current <= end, and we just checked current == end. + unsafe { + let byte = *self.current; + self.current = self.current.add(1); + byte + } } #[inline] From 23e91d4d73785f6dfac6b6ac198dc45574cabc88 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 28 Apr 2023 11:38:59 +1000 Subject: [PATCH 6/6] Remove some unnecessary derives. I was curious about how many `Encodable`/`Decodable` derives we have. Some grepping revealed that it's over 500 of each, but the number of `Encodable` ones was higher, which was weird. Most of the `Encodable`-only ones were in `hir.rs`. This commit removes them all, plus some other unnecessary derives in that file and others that I found via trial and error. --- compiler/rustc_hir/src/hir.rs | 78 ++++++++++------------ compiler/rustc_middle/src/middle/region.rs | 6 +- compiler/rustc_middle/src/traits/mod.rs | 14 ++-- compiler/rustc_middle/src/ty/adt.rs | 2 +- 4 files changed, 48 insertions(+), 52 deletions(-) diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 21b4a3370d3e3..e220a0293393e 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -26,7 +26,7 @@ use rustc_target::spec::abi::Abi; use smallvec::SmallVec; use std::fmt; -#[derive(Debug, Copy, Clone, Encodable, HashStable_Generic)] +#[derive(Debug, Copy, Clone, HashStable_Generic)] pub struct Lifetime { pub hir_id: HirId, @@ -41,8 +41,7 @@ pub struct Lifetime { pub res: LifetimeName, } -#[derive(Debug, Clone, PartialEq, Eq, Encodable, Hash, Copy)] -#[derive(HashStable_Generic)] +#[derive(Debug, Copy, Clone, HashStable_Generic)] pub enum ParamName { /// Some user-given name like `T` or `'x`. Plain(Ident), @@ -85,8 +84,7 @@ impl ParamName { } } -#[derive(Debug, Clone, PartialEq, Eq, Encodable, Hash, Copy)] -#[derive(HashStable_Generic)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, HashStable_Generic)] pub enum LifetimeName { /// User-given names or fresh (synthetic) names. Param(LocalDefId), @@ -243,13 +241,13 @@ impl<'hir> PathSegment<'hir> { } } -#[derive(Encodable, Clone, Copy, Debug, HashStable_Generic)] +#[derive(Clone, Copy, Debug, HashStable_Generic)] pub struct ConstArg { pub value: AnonConst, pub span: Span, } -#[derive(Encodable, Clone, Copy, Debug, HashStable_Generic)] +#[derive(Clone, Copy, Debug, HashStable_Generic)] pub struct InferArg { pub hir_id: HirId, pub span: Span, @@ -422,8 +420,7 @@ impl<'hir> GenericArgs<'hir> { } } -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Hash, Debug)] -#[derive(HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, HashStable_Generic)] pub enum GenericArgsParentheses { No, /// Bounds for `feature(return_type_notation)`, like `T: Trait`, @@ -435,8 +432,7 @@ pub enum GenericArgsParentheses { /// A modifier on a bound, currently this is only used for `?Sized`, where the /// modifier is `Maybe`. Negative bounds should also be handled here. -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Hash, Debug)] -#[derive(HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, HashStable_Generic)] pub enum TraitBoundModifier { None, Maybe, @@ -474,7 +470,7 @@ impl GenericBound<'_> { pub type GenericBounds<'hir> = &'hir [GenericBound<'hir>]; -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub enum LifetimeParamKind { // Indicates that the lifetime definition was explicitly declared (e.g., in // `fn foo<'a>(x: &'a u8) -> &'a u8 { x }`). @@ -539,7 +535,7 @@ impl<'hir> GenericParam<'hir> { /// early-bound (but can be a late-bound lifetime in functions, for example), /// or from a `for<...>` binder, in which case it's late-bound (and notably, /// does not show up in the parent item's generics). -#[derive(Debug, HashStable_Generic, PartialEq, Eq, Copy, Clone)] +#[derive(Debug, Clone, Copy, HashStable_Generic)] pub enum GenericParamSource { // Early or late-bound parameters defined on an item Generics, @@ -1097,7 +1093,7 @@ pub struct PatField<'hir> { pub span: Span, } -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum RangeEnd { Included, Excluded, @@ -1197,7 +1193,7 @@ pub enum PatKind<'hir> { Slice(&'hir [Pat<'hir>], Option<&'hir Pat<'hir>>, &'hir [Pat<'hir>]), } -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum BinOpKind { /// The `+` operator (addition). Add, @@ -1325,7 +1321,7 @@ impl Into for BinOpKind { pub type BinOp = Spanned; -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum UnOp { /// The `*` operator (dereferencing). Deref, @@ -1450,19 +1446,19 @@ pub struct ExprField<'hir> { pub is_shorthand: bool, } -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum BlockCheckMode { DefaultBlock, UnsafeBlock(UnsafeSource), } -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum UnsafeSource { CompilerGenerated, UserProvided, } -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] pub struct BodyId { pub hir_id: HirId, } @@ -1506,7 +1502,7 @@ impl<'hir> Body<'hir> { } /// The type of source expression that caused this generator to be created. -#[derive(Clone, PartialEq, PartialOrd, Eq, Hash, Debug, Copy)] +#[derive(Clone, PartialEq, Eq, Debug, Copy, Hash)] #[derive(HashStable_Generic, Encodable, Decodable)] pub enum GeneratorKind { /// An explicit `async` block or the body of an async function. @@ -1539,7 +1535,7 @@ impl GeneratorKind { /// /// This helps error messages but is also used to drive coercions in /// type-checking (see #60424). -#[derive(Clone, PartialEq, PartialOrd, Eq, Hash, Debug, Copy)] +#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)] #[derive(HashStable_Generic, Encodable, Decodable)] pub enum AsyncGeneratorKind { /// An explicit `async` block written by the user. @@ -1649,7 +1645,7 @@ impl fmt::Display for ConstContext { /// A literal. pub type Lit = Spanned; -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub enum ArrayLen { Infer(HirId, Span), Body(AnonConst), @@ -1671,7 +1667,7 @@ impl ArrayLen { /// /// You can check if this anon const is a default in a const param /// `const N: usize = { ... }` with `tcx.hir().opt_const_param_default_param_def_id(..)` -#[derive(Copy, Clone, PartialEq, Eq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub struct AnonConst { pub hir_id: HirId, pub def_id: LocalDefId, @@ -2105,7 +2101,7 @@ impl<'hir> QPath<'hir> { } /// Hints at the original code for a let statement. -#[derive(Copy, Clone, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub enum LocalSource { /// A `match _ { .. }`. Normal, @@ -2158,7 +2154,7 @@ impl MatchSource { } /// The loop type that yielded an `ExprKind::Loop`. -#[derive(Copy, Clone, PartialEq, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, PartialEq, Debug, HashStable_Generic)] pub enum LoopSource { /// A `loop { .. }` loop. Loop, @@ -2178,7 +2174,7 @@ impl LoopSource { } } -#[derive(Copy, Clone, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub enum LoopIdError { OutsideLoopScope, UnlabeledCfInWhileCondition, @@ -2197,7 +2193,7 @@ impl fmt::Display for LoopIdError { } } -#[derive(Copy, Clone, Encodable, Debug, HashStable_Generic)] +#[derive(Copy, Clone, Debug, HashStable_Generic)] pub struct Destination { /// This is `Some(_)` iff there is an explicit user-specified 'label pub label: Option