From 75b67c2d5e12d98b70323bd7874886dd650c5499 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 5 Aug 2020 17:29:13 +1000 Subject: [PATCH 1/2] Fix symbol ordering for confusable idents detection. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Confusable idents detection uses a type `BTreeMap`. This is highly dubious given that `Symbol` doesn't guarantee a meaningful order. (In practice, it currently gives an order that mostly matches source code order.) As a result, changes in `Symbol` representation make the `lint-confusable-idents.rs` test fail, because this error message: > identifier pair considered confusable between `s` and `s` is changed to this: > identifier pair considered confusable between `s` and `s` and the corresponding span pointers get swapped erroneously, leading to an incorrect "previous identifier" label. This commit sorts the relevant symbols by span before doing the checking, which ensures that the ident that appears first in the code will be mentioned first in the message. The commit also extends the test slightly to be more thorough. --- src/librustc_lint/non_ascii_idents.rs | 6 ++++++ src/librustc_session/parse.rs | 3 +-- .../lint-confusable-idents.rs | 2 ++ .../lint-confusable-idents.stderr | 13 +++++++++++-- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 30dbd069c29bd..7c45d826473b8 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -58,6 +58,12 @@ impl EarlyLintPass for NonAsciiIdents { let mut has_non_ascii_idents = false; let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); + + // Sort by `Span` so that error messages make sense with respect to the + // order of identifier locations in the code. + let mut symbols: Vec<_> = symbols.iter().collect(); + symbols.sort_by_key(|k| k.1); + for (symbol, &sp) in symbols.iter() { let symbol_str = symbol.as_str(); if symbol_str.is_ascii() { diff --git a/src/librustc_session/parse.rs b/src/librustc_session/parse.rs index 9cdb7e966fef8..a2bb8c4f91ff4 100644 --- a/src/librustc_session/parse.rs +++ b/src/librustc_session/parse.rs @@ -13,7 +13,6 @@ use rustc_span::hygiene::ExpnId; use rustc_span::source_map::{FilePathMapping, SourceMap}; use rustc_span::{MultiSpan, Span, Symbol}; -use std::collections::BTreeMap; use std::path::PathBuf; use std::str; @@ -64,7 +63,7 @@ impl GatedSpans { #[derive(Default)] pub struct SymbolGallery { /// All symbols occurred and their first occurrence span. - pub symbols: Lock>, + pub symbols: Lock>, } impl SymbolGallery { diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs index e15ed2e70b896..2c711f994043f 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs @@ -3,9 +3,11 @@ #![allow(uncommon_codepoints, non_upper_case_globals)] const s: usize = 42; +const s_s: usize = 42; fn main() { let s = "rust"; //~ ERROR identifier pair considered confusable + let s_s = "rust2"; //~ ERROR identifier pair considered confusable not_affected(); } diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr index 218f94f7b5829..b9af60963adf6 100644 --- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr @@ -1,5 +1,5 @@ error: identifier pair considered confusable between `s` and `s` - --> $DIR/lint-confusable-idents.rs:8:9 + --> $DIR/lint-confusable-idents.rs:9:9 | LL | const s: usize = 42; | -- this is where the previous identifier occurred @@ -13,5 +13,14 @@ note: the lint level is defined here LL | #![deny(confusable_idents)] | ^^^^^^^^^^^^^^^^^ -error: aborting due to previous error +error: identifier pair considered confusable between `s_s` and `s_s` + --> $DIR/lint-confusable-idents.rs:10:9 + | +LL | const s_s: usize = 42; + | --- this is where the previous identifier occurred +... +LL | let s_s = "rust2"; + | ^^^^^ + +error: aborting due to 2 previous errors From 0a597bd98f91b7f0e87985b8dcac0fd9820c6d47 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 6 Aug 2020 12:48:53 +1000 Subject: [PATCH 2/2] Remove `CowBoxSymStr`. `CowBoxSymStr` is a type that either holds a `SymbolStr` (which is much the same as a `Symbol`), or an owned string. When computing skeletons, a `SymbolStr` is stored if the skeleton is the same as the original string, otherwise an owned string is stored. So, basically, `CowBoxSymStr` is a type for string interning. But we already have one of those: `Symbol` itself. This PR removes `CowBoxSymStr`, using `Symbol` instead. A good thing about this is that it avoids storing `SymbolStr` values in `skeleton_map`, something that is discouraged. The PR also inlines and removes the `calc_skeleton()` function because that simplifies the code. --- src/librustc_lint/non_ascii_idents.rs | 84 ++++++++------------------- 1 file changed, 23 insertions(+), 61 deletions(-) diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 7c45d826473b8..ab1658b222982 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -1,7 +1,7 @@ use crate::{EarlyContext, EarlyLintPass, LintContext}; use rustc_ast::ast; use rustc_data_structures::fx::FxHashMap; -use rustc_span::symbol::SymbolStr; +use rustc_span::symbol::Symbol; declare_lint! { pub NON_ASCII_IDENTS, @@ -39,7 +39,6 @@ impl EarlyLintPass for NonAsciiIdents { use rustc_span::Span; use std::collections::BTreeMap; use unicode_security::GeneralSecurityProfile; - use utils::CowBoxSymStr; let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow; let check_uncommon_codepoints = @@ -83,33 +82,34 @@ impl EarlyLintPass for NonAsciiIdents { } if has_non_ascii_idents && check_confusable_idents { - let mut skeleton_map: FxHashMap = + let mut skeleton_map: FxHashMap = FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default()); - let mut str_buf = String::new(); - for (symbol, &sp) in symbols.iter() { - fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr { - use std::mem::replace; - use unicode_security::confusable_detection::skeleton; - buffer.clear(); - buffer.extend(skeleton(symbol_str)); - if *symbol_str == *buffer { - CowBoxSymStr::Interned(symbol_str.clone()) - } else { - let owned = replace(buffer, String::new()); - CowBoxSymStr::Owned(owned.into_boxed_str()) - } - } + let mut skeleton_buf = String::new(); + + for (&symbol, &sp) in symbols.iter() { + use unicode_security::confusable_detection::skeleton; + let symbol_str = symbol.as_str(); let is_ascii = symbol_str.is_ascii(); - let skeleton = calc_skeleton(&symbol_str, &mut str_buf); + + // Get the skeleton as a `Symbol`. + skeleton_buf.clear(); + skeleton_buf.extend(skeleton(&symbol_str)); + let skeleton_sym = if *symbol_str == *skeleton_buf { + symbol + } else { + Symbol::intern(&skeleton_buf) + }; + skeleton_map - .entry(skeleton) - .and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| { + .entry(skeleton_sym) + .and_modify(|(existing_symbol, existing_span, existing_is_ascii)| { if !*existing_is_ascii || !is_ascii { cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| { lint.build(&format!( "identifier pair considered confusable between `{}` and `{}`", - existing_symbolstr, symbol_str + existing_symbol.as_str(), + symbol.as_str() )) .span_label( *existing_span, @@ -119,12 +119,12 @@ impl EarlyLintPass for NonAsciiIdents { }); } if *existing_is_ascii && !is_ascii { - *existing_symbolstr = symbol_str.clone(); + *existing_symbol = symbol; *existing_span = sp; *existing_is_ascii = is_ascii; } }) - .or_insert((symbol_str, sp, is_ascii)); + .or_insert((symbol, sp, is_ascii)); } } @@ -238,41 +238,3 @@ impl EarlyLintPass for NonAsciiIdents { } } } - -mod utils { - use rustc_span::symbol::SymbolStr; - use std::hash::{Hash, Hasher}; - use std::ops::Deref; - - pub(super) enum CowBoxSymStr { - Interned(SymbolStr), - Owned(Box), - } - - impl Deref for CowBoxSymStr { - type Target = str; - - fn deref(&self) -> &str { - match self { - CowBoxSymStr::Interned(interned) => interned, - CowBoxSymStr::Owned(ref owned) => owned, - } - } - } - - impl Hash for CowBoxSymStr { - #[inline] - fn hash(&self, state: &mut H) { - Hash::hash(&**self, state) - } - } - - impl PartialEq for CowBoxSymStr { - #[inline] - fn eq(&self, other: &CowBoxSymStr) -> bool { - PartialEq::eq(&**self, &**other) - } - } - - impl Eq for CowBoxSymStr {} -}