From 4989a56448c7e3047e0538ff4ef54c49db8a5a4f Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 9 Jun 2014 13:12:30 -0700 Subject: [PATCH 01/15] syntax: doc comments all the things --- src/libsyntax/abi.rs | 21 +- src/libsyntax/ast.rs | 366 +++++++++--------- src/libsyntax/ast_map.rs | 22 +- src/libsyntax/ast_util.rs | 14 +- src/libsyntax/attr.rs | 28 +- src/libsyntax/codemap.rs | 22 +- src/libsyntax/diagnostic.rs | 26 +- src/libsyntax/ext/base.rs | 23 +- src/libsyntax/ext/deriving/encodable.rs | 143 ++++--- src/libsyntax/ext/deriving/generic/mod.rs | 332 ++++++++-------- src/libsyntax/ext/deriving/generic/ty.rs | 14 +- src/libsyntax/ext/deriving/show.rs | 4 +- src/libsyntax/ext/expand.rs | 10 +- src/libsyntax/ext/format.rs | 16 +- src/libsyntax/ext/mtwt.rs | 52 +-- src/libsyntax/ext/source_util.rs | 14 +- src/libsyntax/ext/tt/macro_parser.rs | 175 +++++---- src/libsyntax/ext/tt/macro_rules.rs | 8 +- src/libsyntax/ext/tt/transcribe.rs | 12 +- src/libsyntax/lib.rs | 14 +- src/libsyntax/parse/attr.rs | 42 +- src/libsyntax/parse/classify.rs | 20 +- src/libsyntax/parse/common.rs | 4 +- src/libsyntax/parse/lexer/comments.rs | 18 +- src/libsyntax/parse/lexer/mod.rs | 12 +- src/libsyntax/parse/mod.rs | 13 +- src/libsyntax/parse/obsolete.rs | 8 +- src/libsyntax/parse/parser.rs | 449 +++++++++++----------- src/libsyntax/parse/token.rs | 6 +- src/libsyntax/print/pp.rs | 303 ++++++++------- src/libsyntax/print/pprust.rs | 6 +- src/libsyntax/util/interner.rs | 6 +- src/libsyntax/util/parser_testing.rs | 38 +- src/libsyntax/visit.rs | 32 +- 34 files changed, 1136 insertions(+), 1137 deletions(-) diff --git a/src/libsyntax/abi.rs b/src/libsyntax/abi.rs index 9771bc9386b16..5aaf7ed3dba5d 100644 --- a/src/libsyntax/abi.rs +++ b/src/libsyntax/abi.rs @@ -60,9 +60,12 @@ pub struct AbiData { } pub enum AbiArchitecture { - RustArch, // Not a real ABI (e.g., intrinsic) - AllArch, // An ABI that specifies cross-platform defaults (e.g., "C") - Archs(u32) // Multiple architectures (bitset) + /// Not a real ABI (e.g., intrinsic) + RustArch, + /// An ABI that specifies cross-platform defaults (e.g., "C") + AllArch, + /// Multiple architectures (bitset) + Archs(u32) } static AbiDatas: &'static [AbiData] = &[ @@ -84,21 +87,13 @@ static AbiDatas: &'static [AbiData] = &[ AbiData {abi: RustIntrinsic, name: "rust-intrinsic", abi_arch: RustArch}, ]; +/// Iterates through each of the defined ABIs. fn each_abi(op: |abi: Abi| -> bool) -> bool { - /*! - * - * Iterates through each of the defined ABIs. - */ - AbiDatas.iter().advance(|abi_data| op(abi_data.abi)) } +/// Returns the ABI with the given name (if any). pub fn lookup(name: &str) -> Option { - /*! - * - * Returns the ABI with the given name (if any). - */ - let mut res = None; each_abi(|abi| { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 5f3adbdb54df4..c5afc5067b6a9 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -24,7 +24,8 @@ use std::rc::Rc; use std::gc::{Gc, GC}; use serialize::{Encodable, Decodable, Encoder, Decoder}; -/// A pointer abstraction. FIXME(eddyb) #10676 use Rc in the future. +/// A pointer abstraction. +// FIXME(eddyb) #10676 use Rc in the future. pub type P = Gc; #[allow(non_snake_case_functions)] @@ -36,10 +37,10 @@ pub fn P(value: T) -> P { // FIXME #6993: in librustc, uses of "ident" should be replaced // by just "Name". -// an identifier contains a Name (index into the interner -// table) and a SyntaxContext to track renaming and -// macro expansion per Flatt et al., "Macros -// That Work Together" +/// An identifier contains a Name (index into the interner +/// table) and a SyntaxContext to track renaming and +/// macro expansion per Flatt et al., "Macros +/// That Work Together" #[deriving(Clone, Hash, PartialOrd, Eq, Ord, Show)] pub struct Ident { pub name: Name, @@ -122,10 +123,9 @@ pub struct Lifetime { pub name: Name } -// a "Path" is essentially Rust's notion of a name; -// for instance: std::cmp::PartialEq . It's represented -// as a sequence of identifiers, along with a bunch -// of supporting information. +/// A "Path" is essentially Rust's notion of a name; for instance: +/// std::cmp::PartialEq . It's represented as a sequence of identifiers, +/// along with a bunch of supporting information. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Path { pub span: Span, @@ -163,15 +163,15 @@ pub struct DefId { pub static LOCAL_CRATE: CrateNum = 0; pub static CRATE_NODE_ID: NodeId = 0; -// When parsing and doing expansions, we initially give all AST nodes this AST -// node value. Then later, in the renumber pass, we renumber them to have -// small, positive ids. +/// When parsing and doing expansions, we initially give all AST nodes this AST +/// node value. Then later, in the renumber pass, we renumber them to have +/// small, positive ids. pub static DUMMY_NODE_ID: NodeId = -1; -// The AST represents all type param bounds as types. -// typeck::collect::compute_bounds matches these against -// the "special" built-in traits (see middle::lang_items) and -// detects Copy, Send and Share. +/// The AST represents all type param bounds as types. +/// typeck::collect::compute_bounds matches these against +/// the "special" built-in traits (see middle::lang_items) and +/// detects Copy, Send and Share. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum TyParamBound { TraitTyParamBound(TraitRef), @@ -210,9 +210,9 @@ impl Generics { } } -// The set of MetaItems that define the compilation environment of the crate, -// used to drive conditional compilation -pub type CrateConfig = Vec>; +/// The set of MetaItems that define the compilation environment of the crate, +/// used to drive conditional compilation +pub type CrateConfig = Vec> ; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Crate { @@ -289,13 +289,13 @@ pub enum BindingMode { pub enum Pat_ { PatWild, PatWildMulti, - // A PatIdent may either be a new bound variable, - // or a nullary enum (in which case the third field - // is None). - // In the nullary enum case, the parser can't determine - // which it is. The resolver determines this, and - // records this pattern's NodeId in an auxiliary - // set (of "PatIdents that refer to nullary enums") + /// A PatIdent may either be a new bound variable, + /// or a nullary enum (in which case the third field + /// is None). + /// In the nullary enum case, the parser can't determine + /// which it is. The resolver determines this, and + /// records this pattern's NodeId in an auxiliary + /// set (of "PatIdents that refer to nullary enums") PatIdent(BindingMode, SpannedIdent, Option>), PatEnum(Path, Option>>), /* "none" means a * pattern where * we don't bind the fields to names */ @@ -305,8 +305,8 @@ pub enum Pat_ { PatRegion(Gc), // reference pattern PatLit(Gc), PatRange(Gc, Gc), - // [a, b, ..i, y, z] is represented as - // PatVec(~[a, b], Some(i), ~[y, z]) + /// [a, b, ..i, y, z] is represented as: + /// PatVec(~[a, b], Some(i), ~[y, z]) PatVec(Vec>, Option>, Vec>), PatMac(Mac), } @@ -319,9 +319,12 @@ pub enum Mutability { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ExprVstore { - ExprVstoreUniq, // ~[1,2,3,4] - ExprVstoreSlice, // &[1,2,3,4] - ExprVstoreMutSlice, // &mut [1,2,3,4] + /// ~[1, 2, 3, 4] + ExprVstoreUniq, + /// &[1, 2, 3, 4] + ExprVstoreSlice, + /// &mut [1, 2, 3, 4] + ExprVstoreMutSlice, } #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] @@ -359,16 +362,16 @@ pub type Stmt = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Stmt_ { - // could be an item or a local (let) binding: + /// Could be an item or a local (let) binding: StmtDecl(Gc, NodeId), - // expr without trailing semi-colon (must have unit type): + /// Expr without trailing semi-colon (must have unit type): StmtExpr(Gc, NodeId), - // expr with trailing semi-colon (may have any type): + /// Expr with trailing semi-colon (may have any type): StmtSemi(Gc, NodeId), - // bool: is there a trailing sem-colon? + /// bool: is there a trailing sem-colon? StmtMac(Mac, bool), } @@ -397,9 +400,9 @@ pub type Decl = Spanned; #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Decl_ { - // a local (let) binding: + /// A local (let) binding: DeclLocal(Gc), - // an item binding: + /// An item binding: DeclItem(Gc), } @@ -443,7 +446,7 @@ pub struct Expr { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Expr_ { ExprVstore(Gc, ExprVstore), - // First expr is the place; second expr is the value. + /// First expr is the place; second expr is the value. ExprBox(Gc, Gc), ExprVec(Vec>), ExprCall(Gc, Vec>), @@ -483,124 +486,121 @@ pub enum Expr_ { ExprMac(Mac), - // A struct literal expression. + /// A struct literal expression. ExprStruct(Path, Vec , Option> /* base */), - // A vector literal constructed from one repeated element. + /// A vector literal constructed from one repeated element. ExprRepeat(Gc /* element */, Gc /* count */), - // No-op: used solely so we can pretty-print faithfully + /// No-op: used solely so we can pretty-print faithfully ExprParen(Gc) } -// When the main rust parser encounters a syntax-extension invocation, it -// parses the arguments to the invocation as a token-tree. This is a very -// loose structure, such that all sorts of different AST-fragments can -// be passed to syntax extensions using a uniform type. -// -// If the syntax extension is an MBE macro, it will attempt to match its -// LHS "matchers" against the provided token tree, and if it finds a -// match, will transcribe the RHS token tree, splicing in any captured -// macro_parser::matched_nonterminals into the TTNonterminals it finds. -// -// The RHS of an MBE macro is the only place a TTNonterminal or TTSeq -// makes any real sense. You could write them elsewhere but nothing -// else knows what to do with them, so you'll probably get a syntax -// error. -// +/// When the main rust parser encounters a syntax-extension invocation, it +/// parses the arguments to the invocation as a token-tree. This is a very +/// loose structure, such that all sorts of different AST-fragments can +/// be passed to syntax extensions using a uniform type. +/// +/// If the syntax extension is an MBE macro, it will attempt to match its +/// LHS "matchers" against the provided token tree, and if it finds a +/// match, will transcribe the RHS token tree, splicing in any captured +/// macro_parser::matched_nonterminals into the TTNonterminals it finds. +/// +/// The RHS of an MBE macro is the only place a TTNonterminal or TTSeq +/// makes any real sense. You could write them elsewhere but nothing +/// else knows what to do with them, so you'll probably get a syntax +/// error. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] #[doc="For macro invocations; parsing is delegated to the macro"] pub enum TokenTree { - // a single token + /// A single token TTTok(Span, ::parse::token::Token), - // a delimited sequence (the delimiters appear as the first - // and last elements of the vector) + /// A delimited sequence (the delimiters appear as the first + /// and last elements of the vector) // FIXME(eddyb) #6308 Use Rc<[TokenTree]> after DST. TTDelim(Rc>), // These only make sense for right-hand-sides of MBE macros: - // a kleene-style repetition sequence with a span, a TTForest, - // an optional separator, and a boolean where true indicates - // zero or more (..), and false indicates one or more (+). + /// A kleene-style repetition sequence with a span, a TTForest, + /// an optional separator, and a boolean where true indicates + /// zero or more (..), and false indicates one or more (+). // FIXME(eddyb) #6308 Use Rc<[TokenTree]> after DST. TTSeq(Span, Rc>, Option<::parse::token::Token>, bool), - // a syntactic variable that will be filled in by macro expansion. + /// A syntactic variable that will be filled in by macro expansion. TTNonterminal(Span, Ident) } -// -// Matchers are nodes defined-by and recognized-by the main rust parser and -// language, but they're only ever found inside syntax-extension invocations; -// indeed, the only thing that ever _activates_ the rules in the rust parser -// for parsing a matcher is a matcher looking for the 'matchers' nonterminal -// itself. Matchers represent a small sub-language for pattern-matching -// token-trees, and are thus primarily used by the macro-defining extension -// itself. -// -// MatchTok -// -------- -// -// A matcher that matches a single token, denoted by the token itself. So -// long as there's no $ involved. -// -// -// MatchSeq -// -------- -// -// A matcher that matches a sequence of sub-matchers, denoted various -// possible ways: -// -// $(M)* zero or more Ms -// $(M)+ one or more Ms -// $(M),+ one or more comma-separated Ms -// $(A B C);* zero or more semi-separated 'A B C' seqs -// -// -// MatchNonterminal -// ----------------- -// -// A matcher that matches one of a few interesting named rust -// nonterminals, such as types, expressions, items, or raw token-trees. A -// black-box matcher on expr, for example, binds an expr to a given ident, -// and that ident can re-occur as an interpolation in the RHS of a -// macro-by-example rule. For example: -// -// $foo:expr => 1 + $foo // interpolate an expr -// $foo:tt => $foo // interpolate a token-tree -// $foo:tt => bar! $foo // only other valid interpolation -// // is in arg position for another -// // macro -// -// As a final, horrifying aside, note that macro-by-example's input is -// also matched by one of these matchers. Holy self-referential! It is matched -// by a MatchSeq, specifically this one: -// -// $( $lhs:matchers => $rhs:tt );+ -// -// If you understand that, you have closed to loop and understand the whole -// macro system. Congratulations. -// +/// Matchers are nodes defined-by and recognized-by the main rust parser and +/// language, but they're only ever found inside syntax-extension invocations; +/// indeed, the only thing that ever _activates_ the rules in the rust parser +/// for parsing a matcher is a matcher looking for the 'matchers' nonterminal +/// itself. Matchers represent a small sub-language for pattern-matching +/// token-trees, and are thus primarily used by the macro-defining extension +/// itself. +/// +/// MatchTok +/// -------- +/// +/// A matcher that matches a single token, denoted by the token itself. So +/// long as there's no $ involved. +/// +/// +/// MatchSeq +/// -------- +/// +/// A matcher that matches a sequence of sub-matchers, denoted various +/// possible ways: +/// +/// $(M)* zero or more Ms +/// $(M)+ one or more Ms +/// $(M),+ one or more comma-separated Ms +/// $(A B C);* zero or more semi-separated 'A B C' seqs +/// +/// +/// MatchNonterminal +/// ----------------- +/// +/// A matcher that matches one of a few interesting named rust +/// nonterminals, such as types, expressions, items, or raw token-trees. A +/// black-box matcher on expr, for example, binds an expr to a given ident, +/// and that ident can re-occur as an interpolation in the RHS of a +/// macro-by-example rule. For example: +/// +/// $foo:expr => 1 + $foo // interpolate an expr +/// $foo:tt => $foo // interpolate a token-tree +/// $foo:tt => bar! $foo // only other valid interpolation +/// // is in arg position for another +/// // macro +/// +/// As a final, horrifying aside, note that macro-by-example's input is +/// also matched by one of these matchers. Holy self-referential! It is matched +/// by a MatchSeq, specifically this one: +/// +/// $( $lhs:matchers => $rhs:tt );+ +/// +/// If you understand that, you have closed the loop and understand the whole +/// macro system. Congratulations. pub type Matcher = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Matcher_ { - // match one token + /// Match one token MatchTok(::parse::token::Token), - // match repetitions of a sequence: body, separator, zero ok?, - // lo, hi position-in-match-array used: + /// Match repetitions of a sequence: body, separator, zero ok?, + /// lo, hi position-in-match-array used: MatchSeq(Vec , Option<::parse::token::Token>, bool, uint, uint), - // parse a Rust NT: name to bind, name of NT, position in match array: + /// Parse a Rust NT: name to bind, name of NT, position in match array: MatchNonterminal(Ident, Ident, uint) } pub type Mac = Spanned; -// represents a macro invocation. The Path indicates which macro -// is being invoked, and the vector of token-trees contains the source -// of the macro invocation. -// There's only one flavor, now, so this could presumably be simplified. +/// Represents a macro invocation. The Path indicates which macro +/// is being invoked, and the vector of token-trees contains the source +/// of the macro invocation. +/// There's only one flavor, now, so this could presumably be simplified. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum Mac_ { MacInvocTT(Path, Vec , SyntaxContext), // new macro-invocation @@ -659,11 +659,10 @@ pub struct TypeMethod { pub vis: Visibility, } -/// Represents a method declaration in a trait declaration, possibly -/// including a default implementation -// A trait method is either required (meaning it doesn't have an -// implementation, just a signature) or provided (meaning it has a default -// implementation). +/// Represents a method declaration in a trait declaration, possibly including +/// a default implementation A trait method is either required (meaning it +/// doesn't have an implementation, just a signature) or provided (meaning it +/// has a default implementation). #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum TraitMethod { Required(TypeMethod), @@ -720,7 +719,7 @@ pub struct Ty { pub span: Span, } -// Not represented directly in the AST, referred to by name through a ty_path. +/// Not represented directly in the AST, referred to by name through a ty_path. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum PrimTy { TyInt(IntTy), @@ -753,10 +752,10 @@ pub struct ClosureTy { pub fn_style: FnStyle, pub onceness: Onceness, pub decl: P, - // Optional optvec distinguishes between "fn()" and "fn:()" so we can - // implement issue #7264. None means "fn()", which means infer a default - // bound based on pointer sigil during typeck. Some(Empty) means "fn:()", - // which means use no bounds (e.g., not even Owned on a ~fn()). + /// Optional optvec distinguishes between "fn()" and "fn:()" so we can + /// implement issue #7264. None means "fn()", which means infer a default + /// bound based on pointer sigil during typeck. Some(Empty) means "fn:()", + /// which means use no bounds (e.g., not even Owned on a ~fn()). pub bounds: Option>, } @@ -789,11 +788,11 @@ pub enum Ty_ { TyUnboxedFn(Gc), TyTup(Vec> ), TyPath(Path, Option>, NodeId), // for #7264; see above - // No-op; kept solely so that we can pretty-print faithfully + /// No-op; kept solely so that we can pretty-print faithfully TyParen(P), TyTypeof(Gc), - // TyInfer means the type should be inferred instead of it having been - // specified. This can appear anywhere in a type. + /// TyInfer means the type should be inferred instead of it having been + /// specified. This can appear anywhere in a type. TyInfer, } @@ -854,8 +853,10 @@ pub struct FnDecl { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum FnStyle { - UnsafeFn, // declared with "unsafe fn" - NormalFn, // declared with "fn" + /// Declared with "unsafe fn" + UnsafeFn, + /// Declared with "fn" + NormalFn, } impl fmt::Show for FnStyle { @@ -869,18 +870,24 @@ impl fmt::Show for FnStyle { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum RetStyle { - NoReturn, // functions with return type _|_ that always - // raise an error or exit (i.e. never return to the caller) - Return, // everything else + /// Functions with return type ! that always + /// raise an error or exit (i.e. never return to the caller) + NoReturn, + /// Everything else + Return, } /// Represents the kind of 'self' associated with a method #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ExplicitSelf_ { - SelfStatic, // no self - SelfValue(Ident), // `self` - SelfRegion(Option, Mutability, Ident), // `&'lt self`, `&'lt mut self` - SelfUniq(Ident), // `~self` + /// No self + SelfStatic, + /// `self + SelfValue(Ident), + /// `&'lt self`, `&'lt mut self` + SelfRegion(Option, Mutability, Ident), + /// `~self` + SelfUniq(Ident) } pub type ExplicitSelf = Spanned; @@ -959,17 +966,17 @@ pub type ViewPath = Spanned; #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ViewPath_ { - // quux = foo::bar::baz - // - // or just - // - // foo::bar::baz (with 'baz =' implicitly on the left) + /// `quux = foo::bar::baz` + /// + /// or just + /// + /// `foo::bar::baz ` (with 'baz =' implicitly on the left) ViewPathSimple(Ident, Path, NodeId), - // foo::bar::* + /// `foo::bar::*` ViewPathGlob(Path, NodeId), - // foo::bar::{a,b,c} + /// `foo::bar::{a,b,c}` ViewPathList(Path, Vec , NodeId) } @@ -983,20 +990,20 @@ pub struct ViewItem { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum ViewItem_ { - // ident: name used to refer to this crate in the code - // optional (InternedString,StrStyle): if present, this is a location - // (containing arbitrary characters) from which to fetch the crate sources - // For example, extern crate whatever = "github.com/rust-lang/rust" + /// Ident: name used to refer to this crate in the code + /// optional (InternedString,StrStyle): if present, this is a location + /// (containing arbitrary characters) from which to fetch the crate sources + /// For example, extern crate whatever = "github.com/rust-lang/rust" ViewItemExternCrate(Ident, Option<(InternedString,StrStyle)>, NodeId), ViewItemUse(Gc), } -// Meta-data associated with an item +/// Meta-data associated with an item pub type Attribute = Spanned; -// Distinguishes between Attributes that decorate items and Attributes that -// are contained as statements within items. These two cases need to be -// distinguished for pretty-printing. +/// Distinguishes between Attributes that decorate items and Attributes that +/// are contained as statements within items. These two cases need to be +/// distinguished for pretty-printing. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum AttrStyle { AttrOuter, @@ -1006,7 +1013,7 @@ pub enum AttrStyle { #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct AttrId(pub uint); -// doc-comments are promoted to attributes that have is_sugared_doc = true +/// Doc-comments are promoted to attributes that have is_sugared_doc = true #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Attribute_ { pub id: AttrId, @@ -1015,13 +1022,12 @@ pub struct Attribute_ { pub is_sugared_doc: bool, } -/* - TraitRef's appear in impls. - resolve maps each TraitRef's ref_id to its defining trait; that's all - that the ref_id is for. The impl_id maps to the "self type" of this impl. - If this impl is an ItemImpl, the impl_id is redundant (it could be the - same as the impl's node id). - */ + +/// TraitRef's appear in impls. +/// resolve maps each TraitRef's ref_id to its defining trait; that's all +/// that the ref_id is for. The impl_id maps to the "self type" of this impl. +/// If this impl is an ItemImpl, the impl_id is redundant (it could be the +/// same as the impl's node id). #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct TraitRef { pub path: Path, @@ -1065,7 +1071,8 @@ pub type StructField = Spanned; #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum StructFieldKind { NamedField(Ident, Visibility), - UnnamedField(Visibility), // element of a tuple-like struct + /// Element of a tuple-like struct + UnnamedField(Visibility), } impl StructFieldKind { @@ -1079,12 +1086,15 @@ impl StructFieldKind { #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub struct StructDef { - pub fields: Vec, /* fields, not including ctor */ - /* ID of the constructor. This is only used for tuple- or enum-like - * structs. */ + /// Fields, not including ctor + pub fields: Vec, + /// ID of the constructor. This is only used for tuple- or enum-like + /// structs. pub ctor_id: Option, - pub super_struct: Option>, // Super struct, if specified. - pub is_virtual: bool, // True iff the struct may be inherited from. + /// Super struct, if specified. + pub super_struct: Option>, + /// True iff the struct may be inherited from. + pub is_virtual: bool, } /* @@ -1120,7 +1130,7 @@ pub enum Item_ { Option, // (optional) trait this impl implements P, // self Vec>), - // a macro invocation (which includes macro definition) + /// A macro invocation (which includes macro definition) ItemMac(Mac), } @@ -1140,9 +1150,9 @@ pub enum ForeignItem_ { ForeignItemStatic(P, /* is_mutbl */ bool), } -// The data we save and restore about an inlined item or method. This is not -// part of the AST that we parse from a file, but it becomes part of the tree -// that we trans. +/// The data we save and restore about an inlined item or method. This is not +/// part of the AST that we parse from a file, but it becomes part of the tree +/// that we trans. #[deriving(PartialEq, Eq, Encodable, Decodable, Hash)] pub enum InlinedItem { IIItem(Gc), diff --git a/src/libsyntax/ast_map.rs b/src/libsyntax/ast_map.rs index c95ea4a24aadb..25c8e81bdbc91 100644 --- a/src/libsyntax/ast_map.rs +++ b/src/libsyntax/ast_map.rs @@ -112,13 +112,13 @@ pub enum Node { NodeLifetime(Gc), } -// The odd layout is to bring down the total size. +/// The odd layout is to bring down the total size. #[deriving(Clone)] enum MapEntry { - // Placeholder for holes in the map. + /// Placeholder for holes in the map. NotPresent, - // All the node types, with a parent ID. + /// All the node types, with a parent ID. EntryItem(NodeId, Gc), EntryForeignItem(NodeId, Gc), EntryTraitMethod(NodeId, Gc), @@ -133,14 +133,14 @@ enum MapEntry { EntryStructCtor(NodeId, Gc), EntryLifetime(NodeId, Gc), - // Roots for node trees. + /// Roots for node trees. RootCrate, RootInlinedParent(P) } struct InlinedParent { path: Vec , - // Required by NodeTraitMethod and NodeMethod. + /// Required by NodeTraitMethod and NodeMethod. def_id: DefId } @@ -243,7 +243,7 @@ impl Map { ItemForeignMod(ref nm) => Some(nm.abi), _ => None }, - // Wrong but OK, because the only inlined foreign items are intrinsics. + /// Wrong but OK, because the only inlined foreign items are intrinsics. Some(RootInlinedParent(_)) => Some(abi::RustIntrinsic), _ => None }; @@ -432,8 +432,8 @@ pub trait FoldOps { pub struct Ctx<'a, F> { map: &'a Map, - // The node in which we are currently mapping (an item or a method). - // When equal to DUMMY_NODE_ID, the next mapped node becomes the parent. + /// The node in which we are currently mapping (an item or a method). + /// When equal to DUMMY_NODE_ID, the next mapped node becomes the parent. parent: NodeId, fold_ops: F } @@ -618,9 +618,9 @@ pub fn map_crate(krate: Crate, fold_ops: F) -> (Crate, Map) { (krate, map) } -// Used for items loaded from external crate that are being inlined into this -// crate. The `path` should be the path to the item but should not include -// the item itself. +/// Used for items loaded from external crate that are being inlined into this +/// crate. The `path` should be the path to the item but should not include +/// the item itself. pub fn map_decoded_item(map: &Map, path: Vec , fold_ops: F, diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index 57c60b4a94903..004991814fff4 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -101,8 +101,8 @@ pub fn is_path(e: Gc) -> bool { return match e.node { ExprPath(_) => true, _ => false }; } -// Get a string representation of a signed int type, with its value. -// We want to avoid "45int" and "-3int" in favor of "45" and "-3" +/// Get a string representation of a signed int type, with its value. +/// We want to avoid "45int" and "-3int" in favor of "45" and "-3" pub fn int_ty_to_string(t: IntTy, val: Option) -> String { let s = match t { TyI if val.is_some() => "i", @@ -131,8 +131,8 @@ pub fn int_ty_max(t: IntTy) -> u64 { } } -// Get a string representation of an unsigned int type, with its value. -// We want to avoid "42uint" in favor of "42u" +/// Get a string representation of an unsigned int type, with its value. +/// We want to avoid "42uint" in favor of "42u" pub fn uint_ty_to_string(t: UintTy, val: Option) -> String { let s = match t { TyU if val.is_some() => "u", @@ -249,8 +249,8 @@ pub fn public_methods(ms: Vec> ) -> Vec> { }).collect() } -// extract a TypeMethod from a TraitMethod. if the TraitMethod is -// a default, pull out the useful fields to make a TypeMethod +/// extract a TypeMethod from a TraitMethod. if the TraitMethod is +/// a default, pull out the useful fields to make a TypeMethod pub fn trait_method_to_ty_method(method: &TraitMethod) -> TypeMethod { match *method { Required(ref m) => (*m).clone(), @@ -705,7 +705,7 @@ pub fn segments_name_eq(a : &[ast::PathSegment], b : &[ast::PathSegment]) -> boo } } -// Returns true if this literal is a string and false otherwise. +/// Returns true if this literal is a string and false otherwise. pub fn lit_is_str(lit: Gc) -> bool { match lit.node { LitStr(..) => true, diff --git a/src/libsyntax/attr.rs b/src/libsyntax/attr.rs index 3b2ee4e2a6134..e8b9ec9628f7d 100644 --- a/src/libsyntax/attr.rs +++ b/src/libsyntax/attr.rs @@ -46,10 +46,8 @@ pub trait AttrMetaMethods { /// #[foo="bar"] and #[foo(bar)] fn name(&self) -> InternedString; - /** - * Gets the string value if self is a MetaNameValue variant - * containing a string, otherwise None. - */ + /// Gets the string value if self is a MetaNameValue variant + /// containing a string, otherwise None. fn value_str(&self) -> Option; /// Gets a list of inner meta items from a list MetaItem type. fn meta_item_list<'a>(&'a self) -> Option<&'a [Gc]>; @@ -420,18 +418,16 @@ pub fn require_unique_names(diagnostic: &SpanHandler, metas: &[Gc]) { } -/** - * Fold this over attributes to parse #[repr(...)] forms. - * - * Valid repr contents: any of the primitive integral type names (see - * `int_type_of_word`, below) to specify the discriminant type; and `C`, to use - * the same discriminant size that the corresponding C enum would. These are - * not allowed on univariant or zero-variant enums, which have no discriminant. - * - * If a discriminant type is so specified, then the discriminant will be - * present (before fields, if any) with that type; reprensentation - * optimizations which would remove it will not be done. - */ +/// Fold this over attributes to parse #[repr(...)] forms. +/// +/// Valid repr contents: any of the primitive integral type names (see +/// `int_type_of_word`, below) to specify the discriminant type; and `C`, to use +/// the same discriminant size that the corresponding C enum would. These are +/// not allowed on univariant or zero-variant enums, which have no discriminant. +/// +/// If a discriminant type is so specified, then the discriminant will be +/// present (before fields, if any) with that type; reprensentation +/// optimizations which would remove it will not be done. pub fn find_repr_attr(diagnostic: &SpanHandler, attr: &Attribute, acc: ReprAttr) -> ReprAttr { let mut acc = acc; diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index b3adf1daf418c..2f1e01b239d4c 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -252,15 +252,15 @@ pub struct FileMap { } impl FileMap { - // EFFECT: register a start-of-line offset in the - // table of line-beginnings. - // UNCHECKED INVARIANT: these offsets must be added in the right - // order and must be in the right places; there is shared knowledge - // about what ends a line between this file and parse.rs - // WARNING: pos param here is the offset relative to start of CodeMap, - // and CodeMap will append a newline when adding a filemap without a newline at the end, - // so the safe way to call this is with value calculated as - // filemap.start_pos + newline_offset_relative_to_the_start_of_filemap. + /// EFFECT: register a start-of-line offset in the + /// table of line-beginnings. + /// UNCHECKED INVARIANT: these offsets must be added in the right + /// order and must be in the right places; there is shared knowledge + /// about what ends a line between this file and parse.rs + /// WARNING: pos param here is the offset relative to start of CodeMap, + /// and CodeMap will append a newline when adding a filemap without a newline at the end, + /// so the safe way to call this is with value calculated as + /// filemap.start_pos + newline_offset_relative_to_the_start_of_filemap. pub fn next_line(&self, pos: BytePos) { // the new charpos must be > the last one (or it's the first one). let mut lines = self.lines.borrow_mut();; @@ -269,7 +269,7 @@ impl FileMap { lines.push(pos); } - // get a line from the list of pre-computed line-beginnings + /// get a line from the list of pre-computed line-beginnings pub fn get_line(&self, line: int) -> String { let mut lines = self.lines.borrow_mut(); let begin: BytePos = *lines.get(line as uint) - self.start_pos; @@ -428,7 +428,7 @@ impl CodeMap { FileMapAndBytePos {fm: fm, pos: offset} } - // Converts an absolute BytePos to a CharPos relative to the filemap and above. + /// Converts an absolute BytePos to a CharPos relative to the filemap and above. pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { debug!("codemap: converting {:?} to char pos", bpos); let idx = self.lookup_filemap_idx(bpos); diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs index 3805390776e8d..e469f327ae8ba 100644 --- a/src/libsyntax/diagnostic.rs +++ b/src/libsyntax/diagnostic.rs @@ -21,7 +21,7 @@ use std::string::String; use term::WriterWrapper; use term; -// maximum number of lines we will print for each error; arbitrary. +/// maximum number of lines we will print for each error; arbitrary. static MAX_LINES: uint = 6u; #[deriving(Clone)] @@ -73,9 +73,9 @@ pub struct FatalError; /// or `.span_bug` rather than a failed assertion, etc. pub struct ExplicitBug; -// a span-handler is like a handler but also -// accepts span information for source-location -// reporting. +/// A span-handler is like a handler but also +/// accepts span information for source-location +/// reporting. pub struct SpanHandler { pub handler: Handler, pub cm: codemap::CodeMap, @@ -114,9 +114,9 @@ impl SpanHandler { } } -// a handler deals with errors; certain errors -// (fatal, bug, unimpl) may cause immediate exit, -// others log errors for later reporting. +/// A handler deals with errors; certain errors +/// (fatal, bug, unimpl) may cause immediate exit, +/// others log errors for later reporting. pub struct Handler { err_count: Cell, emit: RefCell>, @@ -442,12 +442,12 @@ fn highlight_lines(err: &mut EmitterWriter, Ok(()) } -// Here are the differences between this and the normal `highlight_lines`: -// `custom_highlight_lines` will always put arrow on the last byte of the -// span (instead of the first byte). Also, when the span is too long (more -// than 6 lines), `custom_highlight_lines` will print the first line, then -// dot dot dot, then last line, whereas `highlight_lines` prints the first -// six lines. +/// Here are the differences between this and the normal `highlight_lines`: +/// `custom_highlight_lines` will always put arrow on the last byte of the +/// span (instead of the first byte). Also, when the span is too long (more +/// than 6 lines), `custom_highlight_lines` will print the first line, then +/// dot dot dot, then last line, whereas `highlight_lines` prints the first +/// six lines. fn custom_highlight_lines(w: &mut EmitterWriter, cm: &codemap::CodeMap, sp: Span, diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index a2a442f8b6aa7..bbf38fd7a9d05 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -278,9 +278,9 @@ pub enum SyntaxExtension { pub type NamedSyntaxExtension = (Name, SyntaxExtension); pub struct BlockInfo { - // should macros escape from this scope? + /// Should macros escape from this scope? pub macros_escape: bool, - // what are the pending renames? + /// What are the pending renames? pub pending_renames: mtwt::RenameList, } @@ -293,8 +293,8 @@ impl BlockInfo { } } -// The base map of methods for expanding syntax extension -// AST nodes into full ASTs +/// The base map of methods for expanding syntax extension +/// AST nodes into full ASTs pub fn syntax_expander_table() -> SyntaxEnv { // utility function to simplify creating NormalTT syntax extensions fn builtin_normal_expander(f: MacroExpanderFn) -> SyntaxExtension { @@ -398,9 +398,9 @@ pub fn syntax_expander_table() -> SyntaxEnv { syntax_expanders } -// One of these is made during expansion and incrementally updated as we go; -// when a macro expansion occurs, the resulting nodes have the backtrace() -// -> expn_info of their expansion context stored into their span. +/// One of these is made during expansion and incrementally updated as we go; +/// when a macro expansion occurs, the resulting nodes have the backtrace() +/// -> expn_info of their expansion context stored into their span. pub struct ExtCtxt<'a> { pub parse_sess: &'a parse::ParseSess, pub cfg: ast::CrateConfig, @@ -612,11 +612,11 @@ pub fn get_exprs_from_tts(cx: &mut ExtCtxt, Some(es) } -// in order to have some notion of scoping for macros, -// we want to implement the notion of a transformation -// environment. +/// In order to have some notion of scoping for macros, +/// we want to implement the notion of a transformation +/// environment. -// This environment maps Names to SyntaxExtensions. +/// This environment maps Names to SyntaxExtensions. //impl question: how to implement it? Initially, the // env will contain only macros, so it might be painful @@ -633,7 +633,6 @@ struct MapChainFrame { map: HashMap, } -// Only generic to make it easy to test pub struct SyntaxEnv { chain: Vec , } diff --git a/src/libsyntax/ext/deriving/encodable.rs b/src/libsyntax/ext/deriving/encodable.rs index 652d593c0042c..3b34407edfeaa 100644 --- a/src/libsyntax/ext/deriving/encodable.rs +++ b/src/libsyntax/ext/deriving/encodable.rs @@ -8,79 +8,76 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The compiler code necessary to implement the `#[deriving(Encodable)]` -(and `Decodable`, in decodable.rs) extension. The idea here is that -type-defining items may be tagged with `#[deriving(Encodable, Decodable)]`. - -For example, a type like: - -```ignore -#[deriving(Encodable, Decodable)] -struct Node { id: uint } -``` - -would generate two implementations like: - -```ignore -impl Encodable for Node { - fn encode(&self, s: &S) { - s.emit_struct("Node", 1, || { - s.emit_field("id", 0, || s.emit_uint(self.id)) - }) - } -} - -impl Decodable for node_id { - fn decode(d: &D) -> Node { - d.read_struct("Node", 1, || { - Node { - id: d.read_field("x".to_string(), 0, || decode(d)) - } - }) - } -} -``` - -Other interesting scenarios are whe the item has type parameters or -references other non-built-in types. A type definition like: - -```ignore -#[deriving(Encodable, Decodable)] -struct spanned { node: T, span: Span } -``` - -would yield functions like: - -```ignore - impl< - S: Encoder, - T: Encodable - > spanned: Encodable { - fn encode(s: &S) { - s.emit_rec(|| { - s.emit_field("node", 0, || self.node.encode(s)); - s.emit_field("span", 1, || self.span.encode(s)); - }) - } - } - - impl< - D: Decoder, - T: Decodable - > spanned: Decodable { - fn decode(d: &D) -> spanned { - d.read_rec(|| { - { - node: d.read_field("node".to_string(), 0, || decode(d)), - span: d.read_field("span".to_string(), 1, || decode(d)), - } - }) - } - } -``` -*/ +//! The compiler code necessary to implement the `#[deriving(Encodable)]` +//! (and `Decodable`, in decodable.rs) extension. The idea here is that +//! type-defining items may be tagged with `#[deriving(Encodable, Decodable)]`. +//! +//! For example, a type like: +//! +//! ```ignore +//! #[deriving(Encodable, Decodable)] +//! struct Node { id: uint } +//! ``` +//! +//! would generate two implementations like: +//! +//! ```ignore +//! impl Encodable for Node { +//! fn encode(&self, s: &S) { +//! s.emit_struct("Node", 1, || { +//! s.emit_field("id", 0, || s.emit_uint(self.id)) +//! }) +//! } +//! } +//! +//! impl Decodable for node_id { +//! fn decode(d: &D) -> Node { +//! d.read_struct("Node", 1, || { +//! Node { +//! id: d.read_field("x".to_string(), 0, || decode(d)) +//! } +//! }) +//! } +//! } +//! ``` +//! +//! Other interesting scenarios are whe the item has type parameters or +//! references other non-built-in types. A type definition like: +//! +//! ```ignore +//! #[deriving(Encodable, Decodable)] +//! struct spanned { node: T, span: Span } +//! ``` +//! +//! would yield functions like: +//! +//! ```ignore +//! impl< +//! S: Encoder, +//! T: Encodable +//! > spanned: Encodable { +//! fn encode(s: &S) { +//! s.emit_rec(|| { +//! s.emit_field("node", 0, || self.node.encode(s)); +//! s.emit_field("span", 1, || self.span.encode(s)); +//! }) +//! } +//! } +//! +//! impl< +//! D: Decoder, +//! T: Decodable +//! > spanned: Decodable { +//! fn decode(d: &D) -> spanned { +//! d.read_rec(|| { +//! { +//! node: d.read_field("node".to_string(), 0, || decode(d)), +//! span: d.read_field("span".to_string(), 1, || decode(d)), +//! } +//! }) +//! } +//! } +//! ``` use ast::{MetaItem, Item, Expr, ExprRet, MutMutable, LitNil}; use codemap::Span; diff --git a/src/libsyntax/ext/deriving/generic/mod.rs b/src/libsyntax/ext/deriving/generic/mod.rs index 764c88cc954ed..c9f5936a9bb05 100644 --- a/src/libsyntax/ext/deriving/generic/mod.rs +++ b/src/libsyntax/ext/deriving/generic/mod.rs @@ -8,174 +8,170 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -Some code that abstracts away much of the boilerplate of writing -`deriving` instances for traits. Among other things it manages getting -access to the fields of the 4 different sorts of structs and enum -variants, as well as creating the method and impl ast instances. - -Supported features (fairly exhaustive): - -- Methods taking any number of parameters of any type, and returning - any type, other than vectors, bottom and closures. -- Generating `impl`s for types with type parameters and lifetimes - (e.g. `Option`), the parameters are automatically given the - current trait as a bound. (This includes separate type parameters - and lifetimes for methods.) -- Additional bounds on the type parameters, e.g. the `Ord` instance - requires an explicit `PartialEq` bound at the - moment. (`TraitDef.additional_bounds`) - -Unsupported: FIXME #6257: calling methods on reference fields, -e.g. deriving Eq/Ord/Clone don't work on `struct A(&int)`, -because of how the auto-dereferencing happens. - -The most important thing for implementers is the `Substructure` and -`SubstructureFields` objects. The latter groups 5 possibilities of the -arguments: - -- `Struct`, when `Self` is a struct (including tuple structs, e.g - `struct T(int, char)`). -- `EnumMatching`, when `Self` is an enum and all the arguments are the - same variant of the enum (e.g. `Some(1)`, `Some(3)` and `Some(4)`) -- `EnumNonMatching` when `Self` is an enum and the arguments are not - the same variant (e.g. `None`, `Some(1)` and `None`). If - `const_nonmatching` is true, this will contain an empty list. -- `StaticEnum` and `StaticStruct` for static methods, where the type - being derived upon is either an enum or struct respectively. (Any - argument with type Self is just grouped among the non-self - arguments.) - -In the first two cases, the values from the corresponding fields in -all the arguments are grouped together. In the `EnumNonMatching` case -this isn't possible (different variants have different fields), so the -fields are grouped by which argument they come from. There are no -fields with values in the static cases, so these are treated entirely -differently. - -The non-static cases have `Option` in several places associated -with field `expr`s. This represents the name of the field it is -associated with. It is only not `None` when the associated field has -an identifier in the source code. For example, the `x`s in the -following snippet - -```rust -struct A { x : int } - -struct B(int); - -enum C { - C0(int), - C1 { x: int } -} -``` - -The `int`s in `B` and `C0` don't have an identifier, so the -`Option`s would be `None` for them. - -In the static cases, the structure is summarised, either into the just -spans of the fields or a list of spans and the field idents (for tuple -structs and record structs, respectively), or a list of these, for -enums (one for each variant). For empty struct and empty enum -variants, it is represented as a count of 0. - -# Examples - -The following simplified `PartialEq` is used for in-code examples: - -```rust -trait PartialEq { - fn eq(&self, other: &Self); -} -impl PartialEq for int { - fn eq(&self, other: &int) -> bool { - *self == *other - } -} -``` - -Some examples of the values of `SubstructureFields` follow, using the -above `PartialEq`, `A`, `B` and `C`. - -## Structs - -When generating the `expr` for the `A` impl, the `SubstructureFields` is - -~~~text -Struct(~[FieldInfo { - span: - name: Some(), - self_: , - other: ~[, - name: None, - - ~[] - }]) -~~~ - -## Enums - -When generating the `expr` for a call with `self == C0(a)` and `other -== C0(b)`, the SubstructureFields is - -~~~text -EnumMatching(0, , - ~[FieldInfo { - span: - name: None, - self_: , - other: ~[] - }]) -~~~ - -For `C1 {x}` and `C1 {x}`, - -~~~text -EnumMatching(1, , - ~[FieldInfo { - span: - name: Some(), - self_: , - other: ~[] - }]) -~~~ - -For `C0(a)` and `C1 {x}` , - -~~~text -EnumNonMatching(~[(0, , - ~[(, None, )]), - (1, , - ~[(, Some(), - )])]) -~~~ - -(and vice versa, but with the order of the outermost list flipped.) - -## Static - -A static method on the above would result in, - -~~~text -StaticStruct(, Named(~[(, )])) - -StaticStruct(, Unnamed(~[])) - -StaticEnum(, ~[(, , Unnamed(~[])), - (, , - Named(~[(, )]))]) -~~~ - -*/ +//! Some code that abstracts away much of the boilerplate of writing +//! `deriving` instances for traits. Among other things it manages getting +//! access to the fields of the 4 different sorts of structs and enum +//! variants, as well as creating the method and impl ast instances. +//! +//! Supported features (fairly exhaustive): +//! +//! - Methods taking any number of parameters of any type, and returning +//! any type, other than vectors, bottom and closures. +//! - Generating `impl`s for types with type parameters and lifetimes +//! (e.g. `Option`), the parameters are automatically given the +//! current trait as a bound. (This includes separate type parameters +//! and lifetimes for methods.) +//! - Additional bounds on the type parameters, e.g. the `Ord` instance +//! requires an explicit `PartialEq` bound at the +//! moment. (`TraitDef.additional_bounds`) +//! +//! Unsupported: FIXME #6257: calling methods on reference fields, +//! e.g. deriving Eq/Ord/Clone don't work on `struct A(&int)`, +//! because of how the auto-dereferencing happens. +//! +//! The most important thing for implementers is the `Substructure` and +//! `SubstructureFields` objects. The latter groups 5 possibilities of the +//! arguments: +//! +//! - `Struct`, when `Self` is a struct (including tuple structs, e.g +//! `struct T(int, char)`). +//! - `EnumMatching`, when `Self` is an enum and all the arguments are the +//! same variant of the enum (e.g. `Some(1)`, `Some(3)` and `Some(4)`) +//! - `EnumNonMatching` when `Self` is an enum and the arguments are not +//! the same variant (e.g. `None`, `Some(1)` and `None`). If +//! `const_nonmatching` is true, this will contain an empty list. +//! - `StaticEnum` and `StaticStruct` for static methods, where the type +//! being derived upon is either an enum or struct respectively. (Any +//! argument with type Self is just grouped among the non-self +//! arguments.) +//! +//! In the first two cases, the values from the corresponding fields in +//! all the arguments are grouped together. In the `EnumNonMatching` case +//! this isn't possible (different variants have different fields), so the +//! fields are grouped by which argument they come from. There are no +//! fields with values in the static cases, so these are treated entirely +//! differently. +//! +//! The non-static cases have `Option` in several places associated +//! with field `expr`s. This represents the name of the field it is +//! associated with. It is only not `None` when the associated field has +//! an identifier in the source code. For example, the `x`s in the +//! following snippet +//! +//! ```rust +//! struct A { x : int } +//! +//! struct B(int); +//! +//! enum C { +//! C0(int), +//! C1 { x: int } +//! } +//! ``` +//! +//! The `int`s in `B` and `C0` don't have an identifier, so the +//! `Option`s would be `None` for them. +//! +//! In the static cases, the structure is summarised, either into the just +//! spans of the fields or a list of spans and the field idents (for tuple +//! structs and record structs, respectively), or a list of these, for +//! enums (one for each variant). For empty struct and empty enum +//! variants, it is represented as a count of 0. +//! +//! # Examples +//! +//! The following simplified `PartialEq` is used for in-code examples: +//! +//! ```rust +//! trait PartialEq { +//! fn eq(&self, other: &Self); +//! } +//! impl PartialEq for int { +//! fn eq(&self, other: &int) -> bool { +//! *self == *other +//! } +//! } +//! ``` +//! +//! Some examples of the values of `SubstructureFields` follow, using the +//! above `PartialEq`, `A`, `B` and `C`. +//! +//! ## Structs +//! +//! When generating the `expr` for the `A` impl, the `SubstructureFields` is +//! +//! ~~~text +//! Struct(~[FieldInfo { +//! span: +//! name: Some(), +//! self_: , +//! other: ~[, +//! name: None, +//! +//! ~[] +//! }]) +//! ~~~ +//! +//! ## Enums +//! +//! When generating the `expr` for a call with `self == C0(a)` and `other +//! == C0(b)`, the SubstructureFields is +//! +//! ~~~text +//! EnumMatching(0, , +//! ~[FieldInfo { +//! span: +//! name: None, +//! self_: , +//! other: ~[] +//! }]) +//! ~~~ +//! +//! For `C1 {x}` and `C1 {x}`, +//! +//! ~~~text +//! EnumMatching(1, , +//! ~[FieldInfo { +//! span: +//! name: Some(), +//! self_: , +//! other: ~[] +//! }]) +//! ~~~ +//! +//! For `C0(a)` and `C1 {x}` , +//! +//! ~~~text +//! EnumNonMatching(~[(0, , +//! ~[(, None, )]), +//! (1, , +//! ~[(, Some(), +//! )])]) +//! ~~~ +//! +//! (and vice versa, but with the order of the outermost list flipped.) +//! +//! ## Static +//! +//! A static method on the above would result in, +//! +//! ~~~text +//! StaticStruct(, Named(~[(, )])) +//! +//! StaticStruct(, Unnamed(~[])) +//! +//! StaticEnum(, ~[(, , Unnamed(~[])), +//! (, , +//! Named(~[(, )]))]) +//! ~~~ use std::cell::RefCell; use std::gc::{Gc, GC}; diff --git a/src/libsyntax/ext/deriving/generic/ty.rs b/src/libsyntax/ext/deriving/generic/ty.rs index b53281f99633f..f6a39d7b2e6c1 100644 --- a/src/libsyntax/ext/deriving/generic/ty.rs +++ b/src/libsyntax/ext/deriving/generic/ty.rs @@ -25,8 +25,10 @@ use std::gc::Gc; /// The types of pointers pub enum PtrTy<'a> { - Send, // ~ - Borrowed(Option<&'a str>, ast::Mutability), // &['lifetime] [mut] + /// ~ + Send, + /// &'lifetime mut + Borrowed(Option<&'a str>, ast::Mutability), } /// A path, e.g. `::std::option::Option::` (global). Has support @@ -83,12 +85,12 @@ impl<'a> Path<'a> { /// A type. Supports pointers (except for *), Self, and literals pub enum Ty<'a> { Self, - // &/Box/ Ty + /// &/Box/ Ty Ptr(Box>, PtrTy<'a>), - // mod::mod::Type<[lifetime], [Params...]>, including a plain type - // parameter, and things like `int` + /// mod::mod::Type<[lifetime], [Params...]>, including a plain type + /// parameter, and things like `int` Literal(Path<'a>), - // includes nil + /// includes unit Tuple(Vec> ) } diff --git a/src/libsyntax/ext/deriving/show.rs b/src/libsyntax/ext/deriving/show.rs index 8e673ff246598..05b5131d7e4d3 100644 --- a/src/libsyntax/ext/deriving/show.rs +++ b/src/libsyntax/ext/deriving/show.rs @@ -55,8 +55,8 @@ pub fn expand_deriving_show(cx: &mut ExtCtxt, trait_def.expand(cx, mitem, item, push) } -// we construct a format string and then defer to std::fmt, since that -// knows what's up with formatting at so on. +/// We construct a format string and then defer to std::fmt, since that +/// knows what's up with formatting and so on. fn show_substructure(cx: &mut ExtCtxt, span: Span, substr: &Substructure) -> Gc { // build ``, `({}, {}, ...)` or ` { : {}, diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 9fe431cfb6c75..a095317f663a9 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -246,11 +246,11 @@ pub fn expand_expr(e: Gc, fld: &mut MacroExpander) -> Gc { } } -// Rename loop label and expand its loop body -// -// The renaming procedure for loop is different in the sense that the loop -// body is in a block enclosed by loop head so the renaming of loop label -// must be propagated to the enclosed context. +/// Rename loop label and expand its loop body +/// +/// The renaming procedure for loop is different in the sense that the loop +/// body is in a block enclosed by loop head so the renaming of loop label +/// must be propagated to the enclosed context. fn expand_loop_block(loop_block: P, opt_ident: Option, fld: &mut MacroExpander) -> (P, Option) { diff --git a/src/libsyntax/ext/format.rs b/src/libsyntax/ext/format.rs index f486d2de3398b..786fd953f8901 100644 --- a/src/libsyntax/ext/format.rs +++ b/src/libsyntax/ext/format.rs @@ -37,24 +37,24 @@ struct Context<'a, 'b> { ecx: &'a mut ExtCtxt<'b>, fmtsp: Span, - // Parsed argument expressions and the types that we've found so far for - // them. + /// Parsed argument expressions and the types that we've found so far for + /// them. args: Vec>, arg_types: Vec>, - // Parsed named expressions and the types that we've found for them so far. - // Note that we keep a side-array of the ordering of the named arguments - // found to be sure that we can translate them in the same order that they - // were declared in. + /// Parsed named expressions and the types that we've found for them so far. + /// Note that we keep a side-array of the ordering of the named arguments + /// found to be sure that we can translate them in the same order that they + /// were declared in. names: HashMap>, name_types: HashMap, name_ordering: Vec, - // Collection of the compiled `rt::Piece` structures + /// Collection of the compiled `rt::Piece` structures pieces: Vec>, name_positions: HashMap, method_statics: Vec>, - // Updated as arguments are consumed or methods are entered + /// Updated as arguments are consumed or methods are entered nest_level: uint, next_arg: uint, } diff --git a/src/libsyntax/ext/mtwt.rs b/src/libsyntax/ext/mtwt.rs index 18466e381a58b..8608f7fb54553 100644 --- a/src/libsyntax/ext/mtwt.rs +++ b/src/libsyntax/ext/mtwt.rs @@ -21,16 +21,16 @@ use std::cell::RefCell; use std::rc::Rc; use std::collections::HashMap; -// the SCTable contains a table of SyntaxContext_'s. It -// represents a flattened tree structure, to avoid having -// managed pointers everywhere (that caused an ICE). -// the mark_memo and rename_memo fields are side-tables -// that ensure that adding the same mark to the same context -// gives you back the same context as before. This shouldn't -// change the semantics--everything here is immutable--but -// it should cut down on memory use *a lot*; applying a mark -// to a tree containing 50 identifiers would otherwise generate -// 50 new contexts +/// The SCTable contains a table of SyntaxContext_'s. It +/// represents a flattened tree structure, to avoid having +/// managed pointers everywhere (that caused an ICE). +/// the mark_memo and rename_memo fields are side-tables +/// that ensure that adding the same mark to the same context +/// gives you back the same context as before. This shouldn't +/// change the semantics--everything here is immutable--but +/// it should cut down on memory use *a lot*; applying a mark +/// to a tree containing 50 identifiers would otherwise generate +/// 50 new contexts pub struct SCTable { table: RefCell>, mark_memo: RefCell>, @@ -41,16 +41,16 @@ pub struct SCTable { pub enum SyntaxContext_ { EmptyCtxt, Mark (Mrk,SyntaxContext), - // flattening the name and syntaxcontext into the rename... - // HIDDEN INVARIANTS: - // 1) the first name in a Rename node - // can only be a programmer-supplied name. - // 2) Every Rename node with a given Name in the - // "to" slot must have the same name and context - // in the "from" slot. In essence, they're all - // pointers to a single "rename" event node. + /// flattening the name and syntaxcontext into the rename... + /// HIDDEN INVARIANTS: + /// 1) the first name in a Rename node + /// can only be a programmer-supplied name. + /// 2) Every Rename node with a given Name in the + /// "to" slot must have the same name and context + /// in the "from" slot. In essence, they're all + /// pointers to a single "rename" event node. Rename (Ident,Name,SyntaxContext), - // actually, IllegalCtxt may not be necessary. + /// actually, IllegalCtxt may not be necessary. IllegalCtxt } @@ -62,7 +62,7 @@ pub fn apply_mark(m: Mrk, ctxt: SyntaxContext) -> SyntaxContext { with_sctable(|table| apply_mark_internal(m, ctxt, table)) } -// Extend a syntax context with a given mark and sctable (explicit memoization) +/// Extend a syntax context with a given mark and sctable (explicit memoization) fn apply_mark_internal(m: Mrk, ctxt: SyntaxContext, table: &SCTable) -> SyntaxContext { let key = (ctxt, m); let new_ctxt = |_: &(SyntaxContext, Mrk)| @@ -77,7 +77,7 @@ pub fn apply_rename(id: Ident, to:Name, with_sctable(|table| apply_rename_internal(id, to, ctxt, table)) } -// Extend a syntax context with a given rename and sctable (explicit memoization) +/// Extend a syntax context with a given rename and sctable (explicit memoization) fn apply_rename_internal(id: Ident, to: Name, ctxt: SyntaxContext, @@ -141,7 +141,7 @@ pub fn clear_tables() { with_resolve_table_mut(|table| *table = HashMap::new()); } -// Add a value to the end of a vec, return its index +/// Add a value to the end of a vec, return its index fn idx_push(vec: &mut Vec , val: T) -> u32 { vec.push(val); (vec.len() - 1) as u32 @@ -173,8 +173,8 @@ fn with_resolve_table_mut(op: |&mut ResolveTable| -> T) -> T { } } -// Resolve a syntax object to a name, per MTWT. -// adding memoization to resolve 500+ seconds in resolve for librustc (!) +/// Resolve a syntax object to a name, per MTWT. +/// adding memoization to resolve 500+ seconds in resolve for librustc (!) fn resolve_internal(id: Ident, table: &SCTable, resolve_table: &mut ResolveTable) -> Name { @@ -264,8 +264,8 @@ pub fn outer_mark(ctxt: SyntaxContext) -> Mrk { }) } -// Push a name... unless it matches the one on top, in which -// case pop and discard (so two of the same marks cancel) +/// Push a name... unless it matches the one on top, in which +/// case pop and discard (so two of the same marks cancel) fn xor_push(marks: &mut Vec, mark: Mrk) { if (marks.len() > 0) && (*marks.last().unwrap() == mark) { marks.pop().unwrap(); diff --git a/src/libsyntax/ext/source_util.rs b/src/libsyntax/ext/source_util.rs index 8922f423aad31..5ac9dc86fcec2 100644 --- a/src/libsyntax/ext/source_util.rs +++ b/src/libsyntax/ext/source_util.rs @@ -28,7 +28,7 @@ use std::str; // the column/row/filename of the expression, or they include // a given file into the current one. -/* line!(): expands to the current line number */ +/// line!(): expands to the current line number pub fn expand_line(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { base::check_zero_tts(cx, sp, tts, "line!"); @@ -49,9 +49,9 @@ pub fn expand_col(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) base::MacExpr::new(cx.expr_uint(topmost.call_site, loc.col.to_uint())) } -/* file!(): expands to the current filename */ -/* The filemap (`loc.file`) contains a bunch more information we could spit - * out if we wanted. */ +/// file!(): expands to the current filename */ +/// The filemap (`loc.file`) contains a bunch more information we could spit +/// out if we wanted. pub fn expand_file(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { base::check_zero_tts(cx, sp, tts, "file!"); @@ -82,9 +82,9 @@ pub fn expand_mod(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) token::intern_and_get_ident(string.as_slice()))) } -// include! : parse the given file as an expr -// This is generally a bad idea because it's going to behave -// unhygienically. +/// include! : parse the given file as an expr +/// This is generally a bad idea because it's going to behave +/// unhygienically. pub fn expand_include(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) -> Box { let file = match get_single_str_from_tts(cx, sp, tts, "include!") { diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index b30ede70f0e4b..bdf1f6eb6007e 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -8,7 +8,72 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// Earley-like parser for macros. +//! This is an Earley-like parser, without support for in-grammar nonterminals, +//! only by calling out to the main rust parser for named nonterminals (which it +//! commits to fully when it hits one in a grammar). This means that there are no +//! completer or predictor rules, and therefore no need to store one column per +//! token: instead, there's a set of current Earley items and a set of next +//! ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +//! pathological cases, is worse than traditional Earley parsing, but it's an +//! easier fit for Macro-by-Example-style rules, and I think the overhead is +//! lower. (In order to prevent the pathological case, we'd need to lazily +//! construct the resulting `NamedMatch`es at the very end. It'd be a pain, +//! and require more memory to keep around old items, but it would also save +//! overhead) +//! +//! Quick intro to how the parser works: +//! +//! A 'position' is a dot in the middle of a matcher, usually represented as a +//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a character at a time, maintaining a list +//! of items consistent with the current position in the input string: `cur_eis`. +//! +//! As it processes them, it fills up `eof_eis` with items that would be valid if +//! the macro invocation is now over, `bb_eis` with items that are waiting on +//! a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting +//! on the a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. It only advances or calls out to the +//! real Rust parser when no `cur_eis` items remain +//! +//! Example: Start parsing `a a a a b` against [· a $( a )* a b]. +//! +//! Remaining input: `a a a a b` +//! next_eis: [· a $( a )* a b] +//! +//! - - - Advance over an `a`. - - - +//! +//! Remaining input: `a a a b` +//! cur: [a · $( a )* a b] +//! Descend/Skip (first item). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an `a`. - - - +//! +//! Remaining input: `a a b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an `a`. - - - (this looks exactly like the last step) +//! +//! Remaining input: `a b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an `a`. - - - (this looks exactly like the last step) +//! +//! Remaining input: `b` +//! cur: [a $( a · )* a b] next: [a $( a )* a · b] +//! Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] +//! +//! - - - Advance over a `b`. - - - +//! +//! Remaining input: `` +//! eof: [a $( a )* a b ·] + use ast; use ast::{Matcher, MatchTok, MatchSeq, MatchNonterminal, Ident}; @@ -25,75 +90,6 @@ use std::rc::Rc; use std::gc::GC; use std::collections::HashMap; -/* This is an Earley-like parser, without support for in-grammar nonterminals, -only by calling out to the main rust parser for named nonterminals (which it -commits to fully when it hits one in a grammar). This means that there are no -completer or predictor rules, and therefore no need to store one column per -token: instead, there's a set of current Earley items and a set of next -ones. Instead of NTs, we have a special case for Kleene star. The big-O, in -pathological cases, is worse than traditional Earley parsing, but it's an -easier fit for Macro-by-Example-style rules, and I think the overhead is -lower. (In order to prevent the pathological case, we'd need to lazily -construct the resulting `NamedMatch`es at the very end. It'd be a pain, -and require more memory to keep around old items, but it would also save -overhead)*/ - -/* Quick intro to how the parser works: - -A 'position' is a dot in the middle of a matcher, usually represented as a -dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. - -The parser walks through the input a character at a time, maintaining a list -of items consistent with the current position in the input string: `cur_eis`. - -As it processes them, it fills up `eof_eis` with items that would be valid if -the macro invocation is now over, `bb_eis` with items that are waiting on -a Rust nonterminal like `$e:expr`, and `next_eis` with items that are waiting -on the a particular token. Most of the logic concerns moving the · through the -repetitions indicated by Kleene stars. It only advances or calls out to the -real Rust parser when no `cur_eis` items remain - -Example: Start parsing `a a a a b` against [· a $( a )* a b]. - -Remaining input: `a a a a b` -next_eis: [· a $( a )* a b] - -- - - Advance over an `a`. - - - - -Remaining input: `a a a b` -cur: [a · $( a )* a b] -Descend/Skip (first item). -next: [a $( · a )* a b] [a $( a )* · a b]. - -- - - Advance over an `a`. - - - - -Remaining input: `a a b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] - -- - - Advance over an `a`. - - - (this looks exactly like the last step) - -Remaining input: `a b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] - -- - - Advance over an `a`. - - - (this looks exactly like the last step) - -Remaining input: `b` -cur: [a $( a · )* a b] next: [a $( a )* a · b] -Finish/Repeat (first item) -next: [a $( a )* · a b] [a $( · a )* a b] - -- - - Advance over a `b`. - - - - -Remaining input: `` -eof: [a $( a )* a b ·] - - */ - - /* to avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. */ @@ -147,24 +143,24 @@ pub fn initial_matcher_pos(ms: Vec , sep: Option, lo: BytePos) } } -// NamedMatch is a pattern-match result for a single ast::MatchNonterminal: -// so it is associated with a single ident in a parse, and all -// MatchedNonterminal's in the NamedMatch have the same nonterminal type -// (expr, item, etc). All the leaves in a single NamedMatch correspond to a -// single matcher_nonterminal in the ast::Matcher that produced it. -// -// It should probably be renamed, it has more or less exact correspondence to -// ast::match nodes, and the in-memory structure of a particular NamedMatch -// represents the match that occurred when a particular subset of an -// ast::match -- those ast::Matcher nodes leading to a single -// MatchNonterminal -- was applied to a particular token tree. -// -// The width of each MatchedSeq in the NamedMatch, and the identity of the -// MatchedNonterminal's, will depend on the token tree it was applied to: each -// MatchedSeq corresponds to a single MatchSeq in the originating -// ast::Matcher. The depth of the NamedMatch structure will therefore depend -// only on the nesting depth of ast::MatchSeq's in the originating -// ast::Matcher it was derived from. +/// NamedMatch is a pattern-match result for a single ast::MatchNonterminal: +/// so it is associated with a single ident in a parse, and all +/// MatchedNonterminal's in the NamedMatch have the same nonterminal type +/// (expr, item, etc). All the leaves in a single NamedMatch correspond to a +/// single matcher_nonterminal in the ast::Matcher that produced it. +/// +/// It should probably be renamed, it has more or less exact correspondence to +/// ast::match nodes, and the in-memory structure of a particular NamedMatch +/// represents the match that occurred when a particular subset of an +/// ast::match -- those ast::Matcher nodes leading to a single +/// MatchNonterminal -- was applied to a particular token tree. +/// +/// The width of each MatchedSeq in the NamedMatch, and the identity of the +/// MatchedNonterminal's, will depend on the token tree it was applied to: each +/// MatchedSeq corresponds to a single MatchSeq in the originating +/// ast::Matcher. The depth of the NamedMatch structure will therefore depend +/// only on the nesting depth of ast::MatchSeq's in the originating +/// ast::Matcher it was derived from. pub enum NamedMatch { MatchedSeq(Vec>, codemap::Span), @@ -224,7 +220,8 @@ pub fn parse_or_else(sess: &ParseSess, } } -// perform a token equality check, ignoring syntax context (that is, an unhygienic comparison) +/// Perform a token equality check, ignoring syntax context (that is, an +/// unhygienic comparison) pub fn token_name_eq(t1 : &Token, t2 : &Token) -> bool { match (t1,t2) { (&token::IDENT(id1,_),&token::IDENT(id2,_)) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 2b481cb0596e7..249e9305150d6 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -119,7 +119,7 @@ impl MacResult for MacroRulesDefiner { } } -// Given `lhses` and `rhses`, this is the new macro we create +/// Given `lhses` and `rhses`, this is the new macro we create fn generic_extension(cx: &ExtCtxt, sp: Span, name: Ident, @@ -193,9 +193,9 @@ fn generic_extension(cx: &ExtCtxt, cx.span_fatal(best_fail_spot, best_fail_msg.as_slice()); } -// this procedure performs the expansion of the -// macro_rules! macro. It parses the RHS and adds -// an extension to the current context. +/// This procedure performs the expansion of the +/// macro_rules! macro. It parses the RHS and adds +/// an extension to the current context. pub fn add_new_extension(cx: &mut ExtCtxt, sp: Span, name: Ident, diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index c0c066fe4668b..726a7315f6991 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -32,7 +32,7 @@ struct TtFrame { #[deriving(Clone)] pub struct TtReader<'a> { pub sp_diag: &'a SpanHandler, - // the unzipped tree: + /// the unzipped tree: stack: Vec, /* for MBE-style macro transcription */ interpolations: HashMap>, @@ -43,9 +43,9 @@ pub struct TtReader<'a> { pub cur_span: Span, } -/** This can do Macro-By-Example transcription. On the other hand, if - * `src` contains no `TTSeq`s and `TTNonterminal`s, `interp` can (and - * should) be none. */ +/// This can do Macro-By-Example transcription. On the other hand, if +/// `src` contains no `TTSeq`s and `TTNonterminal`s, `interp` can (and +/// should) be none. pub fn new_tt_reader<'a>(sp_diag: &'a SpanHandler, interp: Option>>, src: Vec ) @@ -138,8 +138,8 @@ fn lockstep_iter_size(t: &TokenTree, r: &TtReader) -> LockstepIterSize { } } -// return the next token from the TtReader. -// EFFECT: advances the reader's token field +/// Return the next token from the TtReader. +/// EFFECT: advances the reader's token field pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { // FIXME(pcwalton): Bad copy? let ret_val = TokenAndSpan { diff --git a/src/libsyntax/lib.rs b/src/libsyntax/lib.rs index 6df91c66a25e8..53ee991385ae3 100644 --- a/src/libsyntax/lib.rs +++ b/src/libsyntax/lib.rs @@ -8,15 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/*! - -The Rust parser and macro expander. - -# Note - -This API is completely unstable and subject to change. - -*/ +//! The Rust parser and macro expander. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. #![crate_id = "syntax#0.11.0"] // NOTE: remove after stage0 #![crate_name = "syntax"] diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index 53489e3283766..b2297ec770cc3 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -18,7 +18,7 @@ use parse::token::INTERPOLATED; use std::gc::{Gc, GC}; -// a parser that can parse attributes. +/// A parser that can parse attributes. pub trait ParserAttr { fn parse_outer_attributes(&mut self) -> Vec; fn parse_attribute(&mut self, permit_inner: bool) -> ast::Attribute; @@ -30,7 +30,7 @@ pub trait ParserAttr { } impl<'a> ParserAttr for Parser<'a> { - // Parse attributes that appear before an item + /// Parse attributes that appear before an item fn parse_outer_attributes(&mut self) -> Vec { let mut attrs: Vec = Vec::new(); loop { @@ -59,10 +59,10 @@ impl<'a> ParserAttr for Parser<'a> { return attrs; } - // matches attribute = # ! [ meta_item ] - // - // if permit_inner is true, then a leading `!` indicates an inner - // attribute + /// Matches `attribute = # ! [ meta_item ]` + /// + /// If permit_inner is true, then a leading `!` indicates an inner + /// attribute fn parse_attribute(&mut self, permit_inner: bool) -> ast::Attribute { debug!("parse_attributes: permit_inner={:?} self.token={:?}", permit_inner, self.token); @@ -114,17 +114,17 @@ impl<'a> ParserAttr for Parser<'a> { }; } - // Parse attributes that appear after the opening of an item. These should - // be preceded by an exclamation mark, but we accept and warn about one - // terminated by a semicolon. In addition to a vector of inner attributes, - // this function also returns a vector that may contain the first outer - // attribute of the next item (since we can't know whether the attribute - // is an inner attribute of the containing item or an outer attribute of - // the first contained item until we see the semi). - - // matches inner_attrs* outer_attr? - // you can make the 'next' field an Option, but the result is going to be - // more useful as a vector. + /// Parse attributes that appear after the opening of an item. These should + /// be preceded by an exclamation mark, but we accept and warn about one + /// terminated by a semicolon. In addition to a vector of inner attributes, + /// this function also returns a vector that may contain the first outer + /// attribute of the next item (since we can't know whether the attribute + /// is an inner attribute of the containing item or an outer attribute of + /// the first contained item until we see the semi). + + /// matches inner_attrs* outer_attr? + /// you can make the 'next' field an Option, but the result is going to be + /// more useful as a vector. fn parse_inner_attrs_and_next(&mut self) -> (Vec , Vec ) { let mut inner_attrs: Vec = Vec::new(); @@ -157,9 +157,9 @@ impl<'a> ParserAttr for Parser<'a> { (inner_attrs, next_outer_attrs) } - // matches meta_item = IDENT - // | IDENT = lit - // | IDENT meta_seq + /// matches meta_item = IDENT + /// | IDENT = lit + /// | IDENT meta_seq fn parse_meta_item(&mut self) -> Gc { match self.token { token::INTERPOLATED(token::NtMeta(e)) => { @@ -201,7 +201,7 @@ impl<'a> ParserAttr for Parser<'a> { } } - // matches meta_seq = ( COMMASEP(meta_item) ) + /// matches meta_seq = ( COMMASEP(meta_item) ) fn parse_meta_seq(&mut self) -> Vec> { self.parse_seq(&token::LPAREN, &token::RPAREN, diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs index 8d9cc305c26e8..516f22cdf4d60 100644 --- a/src/libsyntax/parse/classify.rs +++ b/src/libsyntax/parse/classify.rs @@ -15,13 +15,13 @@ use ast; use std::gc::Gc; -// does this expression require a semicolon to be treated -// as a statement? The negation of this: 'can this expression -// be used as a statement without a semicolon' -- is used -// as an early-bail-out in the parser so that, for instance, -// 'if true {...} else {...} -// |x| 5 ' -// isn't parsed as (if true {...} else {...} | x) | 5 +/// Does this expression require a semicolon to be treated +/// as a statement? The negation of this: 'can this expression +/// be used as a statement without a semicolon' -- is used +/// as an early-bail-out in the parser so that, for instance, +/// if true {...} else {...} +/// |x| 5 +/// isn't parsed as (if true {...} else {...} | x) | 5 pub fn expr_requires_semi_to_be_stmt(e: Gc) -> bool { match e.node { ast::ExprIf(..) @@ -41,9 +41,9 @@ pub fn expr_is_simple_block(e: Gc) -> bool { } } -// this statement requires a semicolon after it. -// note that in one case (stmt_semi), we've already -// seen the semicolon, and thus don't need another. +/// this statement requires a semicolon after it. +/// note that in one case (stmt_semi), we've already +/// seen the semicolon, and thus don't need another. pub fn stmt_ends_with_semi(stmt: &ast::Stmt) -> bool { return match stmt.node { ast::StmtDecl(d, _) => { diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index 3c3f0c7a82044..3842170d67777 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -12,8 +12,8 @@ use parse::token; -// SeqSep : a sequence separator (token) -// and whether a trailing separator is allowed. +/// SeqSep : a sequence separator (token) +/// and whether a trailing separator is allowed. pub struct SeqSep { pub sep: Option, pub trailing_sep_allowed: bool diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 73e5bb97f51d0..c5dd10382a959 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -24,10 +24,14 @@ use std::uint; #[deriving(Clone, PartialEq)] pub enum CommentStyle { - Isolated, // No code on either side of each line of the comment - Trailing, // Code exists to the left of the comment - Mixed, // Code before /* foo */ and after the comment - BlankLine, // Just a manual blank line "\n\n", for layout + /// No code on either side of each line of the comment + Isolated, + /// Code exists to the left of the comment + Trailing, + /// Code before /* foo */ and after the comment + Mixed, + /// Just a manual blank line "\n\n", for layout + BlankLine, } #[deriving(Clone)] @@ -198,9 +202,9 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool, } } -// Returns None if the first col chars of s contain a non-whitespace char. -// Otherwise returns Some(k) where k is first char offset after that leading -// whitespace. Note k may be outside bounds of s. +/// Returns None if the first col chars of s contain a non-whitespace char. +/// Otherwise returns Some(k) where k is first char offset after that leading +/// whitespace. Note k may be outside bounds of s. fn all_whitespace(s: &str, col: CharPos) -> Option { let len = s.len(); let mut col = col.to_uint(); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 1e72b2de20f73..43bbba8527199 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -44,13 +44,13 @@ pub struct TokenAndSpan { pub struct StringReader<'a> { pub span_diagnostic: &'a SpanHandler, - // The absolute offset within the codemap of the next character to read + /// The absolute offset within the codemap of the next character to read pub pos: BytePos, - // The absolute offset within the codemap of the last character read(curr) + /// The absolute offset within the codemap of the last character read(curr) pub last_pos: BytePos, - // The column of the next character to read + /// The column of the next character to read pub col: CharPos, - // The last character to be read + /// The last character to be read pub curr: Option, pub filemap: Rc, /* cached: */ @@ -60,7 +60,7 @@ pub struct StringReader<'a> { impl<'a> Reader for StringReader<'a> { fn is_eof(&self) -> bool { self.curr.is_none() } - // return the next token. EFFECT: advances the string_reader. + /// Return the next token. EFFECT: advances the string_reader. fn next_token(&mut self) -> TokenAndSpan { let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::UNDERSCORE), @@ -417,7 +417,7 @@ impl<'a> StringReader<'a> { return self.consume_any_line_comment(); } - // might return a sugared-doc-attr + /// Might return a sugared-doc-attr fn consume_block_comment(&mut self) -> Option { // block comments starting with "/**" or "/*!" are doc-comments let is_doc_comment = self.curr_is('*') || self.curr_is('!'); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index fb4a23cf326ec..bea8b6a94d43d 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -10,7 +10,6 @@ //! The main parser interface - use ast; use codemap::{Span, CodeMap, FileMap}; use diagnostic::{SpanHandler, mk_span_handler, default_handler, Auto}; @@ -32,7 +31,7 @@ pub mod common; pub mod classify; pub mod obsolete; -// info about a parsing session. +/// Info about a parsing session. pub struct ParseSess { pub span_diagnostic: SpanHandler, // better be the same as the one in the reader! /// Used to determine and report recursive mod inclusions @@ -241,14 +240,14 @@ pub fn file_to_filemap(sess: &ParseSess, path: &Path, spanopt: Option) unreachable!() } -// given a session and a string, add the string to -// the session's codemap and return the new filemap +/// Given a session and a string, add the string to +/// the session's codemap and return the new filemap pub fn string_to_filemap(sess: &ParseSess, source: String, path: String) -> Rc { sess.span_diagnostic.cm.new_filemap(path, source) } -// given a filemap, produce a sequence of token-trees +/// Given a filemap, produce a sequence of token-trees pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec { // it appears to me that the cfg doesn't matter here... indeed, @@ -259,7 +258,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) p1.parse_all_token_trees() } -// given tts and cfg, produce a parser +/// Given tts and cfg, produce a parser pub fn tts_to_parser<'a>(sess: &'a ParseSess, tts: Vec, cfg: ast::CrateConfig) -> Parser<'a> { @@ -267,7 +266,7 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess, Parser::new(sess, cfg, box trdr) } -// abort if necessary +/// Abort if necessary pub fn maybe_aborted(result: T, mut p: Parser) -> T { p.abort_if_errors(); result diff --git a/src/libsyntax/parse/obsolete.rs b/src/libsyntax/parse/obsolete.rs index 025684ae71e8c..cadae7ef12f80 100644 --- a/src/libsyntax/parse/obsolete.rs +++ b/src/libsyntax/parse/obsolete.rs @@ -38,8 +38,8 @@ pub enum ObsoleteSyntax { pub trait ParserObsoleteMethods { /// Reports an obsolete syntax non-fatal error. fn obsolete(&mut self, sp: Span, kind: ObsoleteSyntax); - // Reports an obsolete syntax non-fatal error, and returns - // a placeholder expression + /// Reports an obsolete syntax non-fatal error, and returns + /// a placeholder expression fn obsolete_expr(&mut self, sp: Span, kind: ObsoleteSyntax) -> Gc; fn report(&mut self, sp: Span, @@ -83,8 +83,8 @@ impl<'a> ParserObsoleteMethods for parser::Parser<'a> { self.report(sp, kind, kind_str, desc); } - // Reports an obsolete syntax non-fatal error, and returns - // a placeholder expression + /// Reports an obsolete syntax non-fatal error, and returns + /// a placeholder expression fn obsolete_expr(&mut self, sp: Span, kind: ObsoleteSyntax) -> Gc { self.obsolete(sp, kind); self.mk_expr(sp.lo, sp.hi, ExprLit(box(GC) respan(sp, LitNil))) diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index ac4cbf3aa8e55..3bf88424891bb 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -117,8 +117,8 @@ pub struct PathAndBounds { } enum ItemOrViewItem { - // Indicates a failure to parse any kind of item. The attributes are - // returned. + /// Indicates a failure to parse any kind of item. The attributes are + /// returned. IoviNone(Vec), IoviItem(Gc), IoviForeignItem(Gc), @@ -126,12 +126,12 @@ enum ItemOrViewItem { } -// Possibly accept an `INTERPOLATED` expression (a pre-parsed expression -// dropped into the token stream, which happens while parsing the -// result of macro expansion) -/* Placement of these is not as complex as I feared it would be. -The important thing is to make sure that lookahead doesn't balk -at INTERPOLATED tokens */ +/// Possibly accept an `INTERPOLATED` expression (a pre-parsed expression +/// dropped into the token stream, which happens while parsing the +/// result of macro expansion) +/// Placement of these is not as complex as I feared it would be. +/// The important thing is to make sure that lookahead doesn't balk +/// at INTERPOLATED tokens macro_rules! maybe_whole_expr ( ($p:expr) => ( { @@ -166,7 +166,7 @@ macro_rules! maybe_whole_expr ( ) ) -// As above, but for things other than expressions +/// As maybe_whole_expr, but for things other than expressions macro_rules! maybe_whole ( ($p:expr, $constructor:ident) => ( { @@ -287,14 +287,14 @@ struct ParsedItemsAndViewItems { pub struct Parser<'a> { pub sess: &'a ParseSess, - // the current token: + /// the current token: pub token: token::Token, - // the span of the current token: + /// the span of the current token: pub span: Span, - // the span of the prior token: + /// the span of the prior token: pub last_span: Span, pub cfg: CrateConfig, - // the previous token or None (only stashed sometimes). + /// the previous token or None (only stashed sometimes). pub last_token: Option>, pub buffer: [TokenAndSpan, ..4], pub buffer_start: int, @@ -361,12 +361,13 @@ impl<'a> Parser<'a> { root_module_name: None, } } - // convert a token to a string using self's reader + + /// Convert a token to a string using self's reader pub fn token_to_string(token: &token::Token) -> String { token::to_string(token) } - // convert the current token to a string using self's reader + /// Convert the current token to a string using self's reader pub fn this_token_to_string(&mut self) -> String { Parser::token_to_string(&self.token) } @@ -383,8 +384,8 @@ impl<'a> Parser<'a> { self.fatal(format!("unexpected token: `{}`", this_token).as_slice()); } - // expect and consume the token t. Signal an error if - // the next token is not t. + /// Expect and consume the token t. Signal an error if + /// the next token is not t. pub fn expect(&mut self, t: &token::Token) { if self.token == *t { self.bump(); @@ -397,9 +398,9 @@ impl<'a> Parser<'a> { } } - // Expect next token to be edible or inedible token. If edible, - // then consume it; if inedible, then return without consuming - // anything. Signal a fatal error if next token is unexpected. + /// Expect next token to be edible or inedible token. If edible, + /// then consume it; if inedible, then return without consuming + /// anything. Signal a fatal error if next token is unexpected. pub fn expect_one_of(&mut self, edible: &[token::Token], inedible: &[token::Token]) { @@ -437,9 +438,9 @@ impl<'a> Parser<'a> { } } - // Check for erroneous `ident { }`; if matches, signal error and - // recover (without consuming any expected input token). Returns - // true if and only if input was consumed for recovery. + /// Check for erroneous `ident { }`; if matches, signal error and + /// recover (without consuming any expected input token). Returns + /// true if and only if input was consumed for recovery. pub fn check_for_erroneous_unit_struct_expecting(&mut self, expected: &[token::Token]) -> bool { if self.token == token::LBRACE && expected.iter().all(|t| *t != token::LBRACE) @@ -456,9 +457,9 @@ impl<'a> Parser<'a> { } } - // Commit to parsing a complete expression `e` expected to be - // followed by some token from the set edible + inedible. Recover - // from anticipated input errors, discarding erroneous characters. + /// Commit to parsing a complete expression `e` expected to be + /// followed by some token from the set edible + inedible. Recover + /// from anticipated input errors, discarding erroneous characters. pub fn commit_expr(&mut self, e: Gc, edible: &[token::Token], inedible: &[token::Token]) { debug!("commit_expr {:?}", e); @@ -479,9 +480,9 @@ impl<'a> Parser<'a> { self.commit_expr(e, &[edible], &[]) } - // Commit to parsing a complete statement `s`, which expects to be - // followed by some token from the set edible + inedible. Check - // for recoverable input errors, discarding erroneous characters. + /// Commit to parsing a complete statement `s`, which expects to be + /// followed by some token from the set edible + inedible. Check + /// for recoverable input errors, discarding erroneous characters. pub fn commit_stmt(&mut self, s: Gc, edible: &[token::Token], inedible: &[token::Token]) { debug!("commit_stmt {:?}", s); @@ -526,8 +527,8 @@ impl<'a> Parser<'a> { id: ast::DUMMY_NODE_ID }) } - // consume token 'tok' if it exists. Returns true if the given - // token was present, false otherwise. + /// Consume token 'tok' if it exists. Returns true if the given + /// token was present, false otherwise. pub fn eat(&mut self, tok: &token::Token) -> bool { let is_present = self.token == *tok; if is_present { self.bump() } @@ -538,8 +539,8 @@ impl<'a> Parser<'a> { token::is_keyword(kw, &self.token) } - // if the next token is the given keyword, eat it and return - // true. Otherwise, return false. + /// If the next token is the given keyword, eat it and return + /// true. Otherwise, return false. pub fn eat_keyword(&mut self, kw: keywords::Keyword) -> bool { match self.token { token::IDENT(sid, false) if kw.to_name() == sid.name => { @@ -550,9 +551,9 @@ impl<'a> Parser<'a> { } } - // if the given word is not a keyword, signal an error. - // if the next token is not the given word, signal an error. - // otherwise, eat it. + /// If the given word is not a keyword, signal an error. + /// If the next token is not the given word, signal an error. + /// Otherwise, eat it. pub fn expect_keyword(&mut self, kw: keywords::Keyword) { if !self.eat_keyword(kw) { let id_interned_str = token::get_name(kw.to_name()); @@ -562,7 +563,7 @@ impl<'a> Parser<'a> { } } - // signal an error if the given string is a strict keyword + /// Signal an error if the given string is a strict keyword pub fn check_strict_keywords(&mut self) { if token::is_strict_keyword(&self.token) { let token_str = self.this_token_to_string(); @@ -573,7 +574,7 @@ impl<'a> Parser<'a> { } } - // signal an error if the current token is a reserved keyword + /// Signal an error if the current token is a reserved keyword pub fn check_reserved_keywords(&mut self) { if token::is_reserved_keyword(&self.token) { let token_str = self.this_token_to_string(); @@ -582,8 +583,8 @@ impl<'a> Parser<'a> { } } - // Expect and consume an `&`. If `&&` is seen, replace it with a single - // `&` and continue. If an `&` is not seen, signal an error. + /// Expect and consume an `&`. If `&&` is seen, replace it with a single + /// `&` and continue. If an `&` is not seen, signal an error. fn expect_and(&mut self) { match self.token { token::BINOP(token::AND) => self.bump(), @@ -603,8 +604,8 @@ impl<'a> Parser<'a> { } } - // Expect and consume a `|`. If `||` is seen, replace it with a single - // `|` and continue. If a `|` is not seen, signal an error. + /// Expect and consume a `|`. If `||` is seen, replace it with a single + /// `|` and continue. If a `|` is not seen, signal an error. fn expect_or(&mut self) { match self.token { token::BINOP(token::OR) => self.bump(), @@ -624,26 +625,26 @@ impl<'a> Parser<'a> { } } - // Attempt to consume a `<`. If `<<` is seen, replace it with a single - // `<` and continue. If a `<` is not seen, return false. - // - // This is meant to be used when parsing generics on a path to get the - // starting token. The `force` parameter is used to forcefully break up a - // `<<` token. If `force` is false, then `<<` is only broken when a lifetime - // shows up next. For example, consider the expression: - // - // foo as bar << test - // - // The parser needs to know if `bar <<` is the start of a generic path or if - // it's a left-shift token. If `test` were a lifetime, then it's impossible - // for the token to be a left-shift, but if it's not a lifetime, then it's - // considered a left-shift. - // - // The reason for this is that the only current ambiguity with `<<` is when - // parsing closure types: - // - // foo::<<'a> ||>(); - // impl Foo<<'a> ||>() { ... } + /// Attempt to consume a `<`. If `<<` is seen, replace it with a single + /// `<` and continue. If a `<` is not seen, return false. + /// + /// This is meant to be used when parsing generics on a path to get the + /// starting token. The `force` parameter is used to forcefully break up a + /// `<<` token. If `force` is false, then `<<` is only broken when a lifetime + /// shows up next. For example, consider the expression: + /// + /// foo as bar << test + /// + /// The parser needs to know if `bar <<` is the start of a generic path or if + /// it's a left-shift token. If `test` were a lifetime, then it's impossible + /// for the token to be a left-shift, but if it's not a lifetime, then it's + /// considered a left-shift. + /// + /// The reason for this is that the only current ambiguity with `<<` is when + /// parsing closure types: + /// + /// foo::<<'a> ||>(); + /// impl Foo<<'a> ||>() { ... } fn eat_lt(&mut self, force: bool) -> bool { match self.token { token::LT => { self.bump(); true } @@ -675,7 +676,7 @@ impl<'a> Parser<'a> { } } - // Parse a sequence bracketed by `|` and `|`, stopping before the `|`. + /// Parse a sequence bracketed by `|` and `|`, stopping before the `|`. fn parse_seq_to_before_or( &mut self, sep: &token::Token, @@ -696,9 +697,9 @@ impl<'a> Parser<'a> { vector } - // expect and consume a GT. if a >> is seen, replace it - // with a single > and continue. If a GT is not seen, - // signal an error. + /// Expect and consume a GT. if a >> is seen, replace it + /// with a single > and continue. If a GT is not seen, + /// signal an error. pub fn expect_gt(&mut self) { match self.token { token::GT => self.bump(), @@ -727,8 +728,8 @@ impl<'a> Parser<'a> { } } - // parse a sequence bracketed by '<' and '>', stopping - // before the '>'. + /// Parse a sequence bracketed by '<' and '>', stopping + /// before the '>'. pub fn parse_seq_to_before_gt( &mut self, sep: Option, @@ -762,9 +763,9 @@ impl<'a> Parser<'a> { return v; } - // parse a sequence, including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_seq_to_end( &mut self, ket: &token::Token, @@ -776,9 +777,9 @@ impl<'a> Parser<'a> { val } - // parse a sequence, not including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, not including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_seq_to_before_end( &mut self, ket: &token::Token, @@ -801,9 +802,9 @@ impl<'a> Parser<'a> { return v; } - // parse a sequence, including the closing delimiter. The function - // f must consume tokens until reaching the next separator or - // closing bracket. + /// Parse a sequence, including the closing delimiter. The function + /// f must consume tokens until reaching the next separator or + /// closing bracket. pub fn parse_unspanned_seq( &mut self, bra: &token::Token, @@ -817,8 +818,8 @@ impl<'a> Parser<'a> { result } - // parse a sequence parameter of enum variant. For consistency purposes, - // these should not be empty. + /// Parse a sequence parameter of enum variant. For consistency purposes, + /// these should not be empty. pub fn parse_enum_variant_seq( &mut self, bra: &token::Token, @@ -852,7 +853,7 @@ impl<'a> Parser<'a> { spanned(lo, hi, result) } - // advance the parser by one token + /// Advance the parser by one token pub fn bump(&mut self) { self.last_span = self.span; // Stash token for error recovery (sometimes; clone is not necessarily cheap). @@ -880,14 +881,14 @@ impl<'a> Parser<'a> { self.tokens_consumed += 1u; } - // Advance the parser by one token and return the bumped token. + /// Advance the parser by one token and return the bumped token. pub fn bump_and_get(&mut self) -> token::Token { let old_token = replace(&mut self.token, token::UNDERSCORE); self.bump(); old_token } - // EFFECT: replace the current token and span with the given one + /// EFFECT: replace the current token and span with the given one pub fn replace_token(&mut self, next: token::Token, lo: BytePos, @@ -940,8 +941,8 @@ impl<'a> Parser<'a> { token::get_ident(id) } - // Is the current token one of the keywords that signals a bare function - // type? + /// Is the current token one of the keywords that signals a bare function + /// type? pub fn token_is_bare_fn_keyword(&mut self) -> bool { if token::is_keyword(keywords::Fn, &self.token) { return true @@ -955,14 +956,14 @@ impl<'a> Parser<'a> { false } - // Is the current token one of the keywords that signals a closure type? + /// Is the current token one of the keywords that signals a closure type? pub fn token_is_closure_keyword(&mut self) -> bool { token::is_keyword(keywords::Unsafe, &self.token) || token::is_keyword(keywords::Once, &self.token) } - // Is the current token one of the keywords that signals an old-style - // closure type (with explicit sigil)? + /// Is the current token one of the keywords that signals an old-style + /// closure type (with explicit sigil)? pub fn token_is_old_style_closure_keyword(&mut self) -> bool { token::is_keyword(keywords::Unsafe, &self.token) || token::is_keyword(keywords::Once, &self.token) || @@ -983,7 +984,7 @@ impl<'a> Parser<'a> { } } - // parse a TyBareFn type: + /// parse a TyBareFn type: pub fn parse_ty_bare_fn(&mut self) -> Ty_ { /* @@ -1014,8 +1015,8 @@ impl<'a> Parser<'a> { }); } - // Parses a procedure type (`proc`). The initial `proc` keyword must - // already have been parsed. + /// Parses a procedure type (`proc`). The initial `proc` keyword must + /// already have been parsed. pub fn parse_proc_type(&mut self) -> Ty_ { /* @@ -1063,7 +1064,7 @@ impl<'a> Parser<'a> { }) } - // parse a TyClosure type + /// Parse a TyClosure type pub fn parse_ty_closure(&mut self) -> Ty_ { /* @@ -1154,7 +1155,7 @@ impl<'a> Parser<'a> { } } - // parse a function type (following the 'fn') + /// Parse a function type (following the 'fn') pub fn parse_ty_fn_decl(&mut self, allow_variadic: bool) -> (P, Vec) { /* @@ -1186,7 +1187,7 @@ impl<'a> Parser<'a> { (decl, lifetimes) } - // parse the methods in a trait declaration + /// Parse the methods in a trait declaration pub fn parse_trait_methods(&mut self) -> Vec { self.parse_unspanned_seq( &token::LBRACE, @@ -1255,15 +1256,15 @@ impl<'a> Parser<'a> { }) } - // parse a possibly mutable type + /// Parse a possibly mutable type pub fn parse_mt(&mut self) -> MutTy { let mutbl = self.parse_mutability(); let t = self.parse_ty(true); MutTy { ty: t, mutbl: mutbl } } - // parse [mut/const/imm] ID : TY - // now used only by obsolete record syntax parser... + /// Parse [mut/const/imm] ID : TY + /// now used only by obsolete record syntax parser... pub fn parse_ty_field(&mut self) -> TypeField { let lo = self.span.lo; let mutbl = self.parse_mutability(); @@ -1278,7 +1279,7 @@ impl<'a> Parser<'a> { } } - // parse optional return type [ -> TY ] in function decl + /// Parse optional return type [ -> TY ] in function decl pub fn parse_ret_ty(&mut self) -> (RetStyle, P) { return if self.eat(&token::RARROW) { let lo = self.span.lo; @@ -1478,8 +1479,8 @@ impl<'a> Parser<'a> { } } - // This version of parse arg doesn't necessarily require - // identifier names. + /// This version of parse arg doesn't necessarily require + /// identifier names. pub fn parse_arg_general(&mut self, require_name: bool) -> Arg { let pat = if require_name || self.is_named_argument() { debug!("parse_arg_general parse_pat (require_name:{:?})", @@ -1504,12 +1505,12 @@ impl<'a> Parser<'a> { } } - // parse a single function argument + /// Parse a single function argument pub fn parse_arg(&mut self) -> Arg { self.parse_arg_general(true) } - // parse an argument in a lambda header e.g. |arg, arg| + /// Parse an argument in a lambda header e.g. |arg, arg| pub fn parse_fn_block_arg(&mut self) -> Arg { let pat = self.parse_pat(); let t = if self.eat(&token::COLON) { @@ -1539,7 +1540,7 @@ impl<'a> Parser<'a> { } } - // matches token_lit = LIT_INT | ... + /// Matches token_lit = LIT_INT | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { token::LIT_BYTE(i) => LitByte(i), @@ -1566,7 +1567,7 @@ impl<'a> Parser<'a> { } } - // matches lit = true | false | token_lit + /// Matches lit = true | false | token_lit pub fn parse_lit(&mut self) -> Lit { let lo = self.span.lo; let lit = if self.eat_keyword(keywords::True) { @@ -1581,7 +1582,7 @@ impl<'a> Parser<'a> { codemap::Spanned { node: lit, span: mk_sp(lo, self.last_span.hi) } } - // matches '-' lit | lit + /// matches '-' lit | lit pub fn parse_literal_maybe_minus(&mut self) -> Gc { let minus_lo = self.span.lo; let minus_present = self.eat(&token::BINOP(token::MINUS)); @@ -1719,7 +1720,7 @@ impl<'a> Parser<'a> { } /// Parses a single lifetime - // matches lifetime = LIFETIME + /// Matches lifetime = LIFETIME pub fn parse_lifetime(&mut self) -> ast::Lifetime { match self.token { token::LIFETIME(i) => { @@ -1779,7 +1780,7 @@ impl<'a> Parser<'a> { token::is_keyword(keywords::Const, tok) } - // parse mutability declaration (mut/const/imm) + /// Parse mutability declaration (mut/const/imm) pub fn parse_mutability(&mut self) -> Mutability { if self.eat_keyword(keywords::Mut) { MutMutable @@ -1788,7 +1789,7 @@ impl<'a> Parser<'a> { } } - // parse ident COLON expr + /// Parse ident COLON expr pub fn parse_field(&mut self) -> Field { let lo = self.span.lo; let i = self.parse_ident(); @@ -1867,9 +1868,9 @@ impl<'a> Parser<'a> { } } - // at the bottom (top?) of the precedence hierarchy, - // parse things like parenthesized exprs, - // macros, return, etc. + /// At the bottom (top?) of the precedence hierarchy, + /// parse things like parenthesized exprs, + /// macros, return, etc. pub fn parse_bottom_expr(&mut self) -> Gc { maybe_whole_expr!(self); @@ -2107,7 +2108,7 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ex); } - // parse a block or unsafe block + /// Parse a block or unsafe block pub fn parse_block_expr(&mut self, lo: BytePos, blk_mode: BlockCheckMode) -> Gc { self.expect(&token::LBRACE); @@ -2115,7 +2116,7 @@ impl<'a> Parser<'a> { return self.mk_expr(blk.span.lo, blk.span.hi, ExprBlock(blk)); } - // parse a.b or a(13) or a[4] or just a + /// parse a.b or a(13) or a[4] or just a pub fn parse_dot_or_call_expr(&mut self) -> Gc { let b = self.parse_bottom_expr(); self.parse_dot_or_call_expr_with(b) @@ -2199,8 +2200,8 @@ impl<'a> Parser<'a> { return e; } - // parse an optional separator followed by a kleene-style - // repetition token (+ or *). + /// Parse an optional separator followed by a kleene-style + /// repetition token (+ or *). pub fn parse_sep_and_zerok(&mut self) -> (Option, bool) { fn parse_zerok(parser: &mut Parser) -> Option { match parser.token { @@ -2225,7 +2226,7 @@ impl<'a> Parser<'a> { } } - // parse a single token tree from the input. + /// parse a single token tree from the input. pub fn parse_token_tree(&mut self) -> TokenTree { // FIXME #6994: currently, this is too eager. It // parses token trees but also identifies TTSeq's @@ -2341,9 +2342,9 @@ impl<'a> Parser<'a> { } } - // This goofy function is necessary to correctly match parens in Matcher's. - // Otherwise, `$( ( )` would be a valid Matcher, and `$( () )` would be - // invalid. It's similar to common::parse_seq. + /// This goofy function is necessary to correctly match parens in Matcher's. + /// Otherwise, `$( ( )` would be a valid Matcher, and `$( () )` would be + /// invalid. It's similar to common::parse_seq. pub fn parse_matcher_subseq_upto(&mut self, name_idx: &mut uint, ket: &token::Token) @@ -2392,7 +2393,7 @@ impl<'a> Parser<'a> { return spanned(lo, self.span.hi, m); } - // parse a prefix-operator expr + /// Parse a prefix-operator expr pub fn parse_prefix_expr(&mut self) -> Gc { let lo = self.span.lo; let hi; @@ -2500,13 +2501,13 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ex); } - // parse an expression of binops + /// Parse an expression of binops pub fn parse_binops(&mut self) -> Gc { let prefix_expr = self.parse_prefix_expr(); self.parse_more_binops(prefix_expr, 0) } - // parse an expression of binops of at least min_prec precedence + /// Parse an expression of binops of at least min_prec precedence pub fn parse_more_binops(&mut self, lhs: Gc, min_prec: uint) -> Gc { if self.expr_is_complete(lhs) { return lhs; } @@ -2554,9 +2555,9 @@ impl<'a> Parser<'a> { } } - // parse an assignment expression.... - // actually, this seems to be the main entry point for - // parsing an arbitrary expression. + /// Parse an assignment expression.... + /// actually, this seems to be the main entry point for + /// parsing an arbitrary expression. pub fn parse_assign_expr(&mut self) -> Gc { let lo = self.span.lo; let lhs = self.parse_binops(); @@ -2590,7 +2591,7 @@ impl<'a> Parser<'a> { } } - // parse an 'if' expression ('if' token already eaten) + /// Parse an 'if' expression ('if' token already eaten) pub fn parse_if_expr(&mut self) -> Gc { let lo = self.last_span.lo; let cond = self.parse_expr_res(RESTRICT_NO_STRUCT_LITERAL); @@ -2605,7 +2606,7 @@ impl<'a> Parser<'a> { self.mk_expr(lo, hi, ExprIf(cond, thn, els)) } - // `|args| { ... }` or `{ ...}` like in `do` expressions + /// `|args| { ... }` or `{ ...}` like in `do` expressions pub fn parse_lambda_block_expr(&mut self) -> Gc { self.parse_lambda_expr_( |p| { @@ -2634,15 +2635,15 @@ impl<'a> Parser<'a> { }) } - // `|args| expr` + /// `|args| expr` pub fn parse_lambda_expr(&mut self) -> Gc { self.parse_lambda_expr_(|p| p.parse_fn_block_decl(), |p| p.parse_expr()) } - // parse something of the form |args| expr - // this is used both in parsing a lambda expr - // and in parsing a block expr as e.g. in for... + /// parse something of the form |args| expr + /// this is used both in parsing a lambda expr + /// and in parsing a block expr as e.g. in for... pub fn parse_lambda_expr_(&mut self, parse_decl: |&mut Parser| -> P, parse_body: |&mut Parser| -> Gc) @@ -2671,7 +2672,7 @@ impl<'a> Parser<'a> { } } - // parse a 'for' .. 'in' expression ('for' token already eaten) + /// Parse a 'for' .. 'in' expression ('for' token already eaten) pub fn parse_for_expr(&mut self, opt_ident: Option) -> Gc { // Parse: `for in ` @@ -2737,12 +2738,12 @@ impl<'a> Parser<'a> { return self.mk_expr(lo, hi, ExprMatch(discriminant, arms)); } - // parse an expression + /// Parse an expression pub fn parse_expr(&mut self) -> Gc { return self.parse_expr_res(UNRESTRICTED); } - // parse an expression, subject to the given restriction + /// Parse an expression, subject to the given restriction pub fn parse_expr_res(&mut self, r: restriction) -> Gc { let old = self.restriction; self.restriction = r; @@ -2751,7 +2752,7 @@ impl<'a> Parser<'a> { return e; } - // parse the RHS of a local variable declaration (e.g. '= 14;') + /// Parse the RHS of a local variable declaration (e.g. '= 14;') fn parse_initializer(&mut self) -> Option> { if self.token == token::EQ { self.bump(); @@ -2761,7 +2762,7 @@ impl<'a> Parser<'a> { } } - // parse patterns, separated by '|' s + /// Parse patterns, separated by '|' s fn parse_pats(&mut self) -> Vec> { let mut pats = Vec::new(); loop { @@ -2824,7 +2825,7 @@ impl<'a> Parser<'a> { (before, slice, after) } - // parse the fields of a struct-like pattern + /// Parse the fields of a struct-like pattern fn parse_pat_fields(&mut self) -> (Vec , bool) { let mut fields = Vec::new(); let mut etc = false; @@ -2884,7 +2885,7 @@ impl<'a> Parser<'a> { return (fields, etc); } - // parse a pattern. + /// Parse a pattern. pub fn parse_pat(&mut self) -> Gc { maybe_whole!(self, NtPat); @@ -3126,9 +3127,9 @@ impl<'a> Parser<'a> { } } - // parse ident or ident @ pat - // used by the copy foo and ref foo patterns to give a good - // error message when parsing mistakes like ref foo(a,b) + /// Parse ident or ident @ pat + /// used by the copy foo and ref foo patterns to give a good + /// error message when parsing mistakes like ref foo(a,b) fn parse_pat_ident(&mut self, binding_mode: ast::BindingMode) -> ast::Pat_ { @@ -3162,7 +3163,7 @@ impl<'a> Parser<'a> { PatIdent(binding_mode, name, sub) } - // parse a local variable declaration + /// Parse a local variable declaration fn parse_local(&mut self) -> Gc { let lo = self.span.lo; let pat = self.parse_pat(); @@ -3186,14 +3187,14 @@ impl<'a> Parser<'a> { } } - // parse a "let" stmt + /// Parse a "let" stmt fn parse_let(&mut self) -> Gc { let lo = self.span.lo; let local = self.parse_local(); box(GC) spanned(lo, self.last_span.hi, DeclLocal(local)) } - // parse a structure field + /// Parse a structure field fn parse_name_and_ty(&mut self, pr: Visibility, attrs: Vec ) -> StructField { let lo = self.span.lo; @@ -3211,8 +3212,8 @@ impl<'a> Parser<'a> { }) } - // parse a statement. may include decl. - // precondition: any attributes are parsed already + /// Parse a statement. may include decl. + /// Precondition: any attributes are parsed already pub fn parse_stmt(&mut self, item_attrs: Vec) -> Gc { maybe_whole!(self, NtStmt); @@ -3315,13 +3316,13 @@ impl<'a> Parser<'a> { } } - // is this expression a successfully-parsed statement? + /// Is this expression a successfully-parsed statement? fn expr_is_complete(&mut self, e: Gc) -> bool { return self.restriction == RESTRICT_STMT_EXPR && !classify::expr_requires_semi_to_be_stmt(e); } - // parse a block. No inner attrs are allowed. + /// Parse a block. No inner attrs are allowed. pub fn parse_block(&mut self) -> P { maybe_whole!(no_clone self, NtBlock); @@ -3331,7 +3332,7 @@ impl<'a> Parser<'a> { return self.parse_block_tail_(lo, DefaultBlock, Vec::new()); } - // parse a block. Inner attrs are allowed. + /// Parse a block. Inner attrs are allowed. fn parse_inner_attrs_and_block(&mut self) -> (Vec , P) { @@ -3344,15 +3345,15 @@ impl<'a> Parser<'a> { (inner, self.parse_block_tail_(lo, DefaultBlock, next)) } - // Precondition: already parsed the '{' or '#{' - // I guess that also means "already parsed the 'impure'" if - // necessary, and this should take a qualifier. - // some blocks start with "#{"... + /// Precondition: already parsed the '{' or '#{' + /// I guess that also means "already parsed the 'impure'" if + /// necessary, and this should take a qualifier. + /// Some blocks start with "#{"... fn parse_block_tail(&mut self, lo: BytePos, s: BlockCheckMode) -> P { self.parse_block_tail_(lo, s, Vec::new()) } - // parse the rest of a block expression or function body + /// Parse the rest of a block expression or function body fn parse_block_tail_(&mut self, lo: BytePos, s: BlockCheckMode, first_item_attrs: Vec ) -> P { let mut stmts = Vec::new(); @@ -3510,18 +3511,18 @@ impl<'a> Parser<'a> { } } - // matches bounds = ( boundseq )? - // where boundseq = ( bound + boundseq ) | bound - // and bound = 'static | ty - // Returns "None" if there's no colon (e.g. "T"); - // Returns "Some(Empty)" if there's a colon but nothing after (e.g. "T:") - // Returns "Some(stuff)" otherwise (e.g. "T:stuff"). - // NB: The None/Some distinction is important for issue #7264. - // - // Note that the `allow_any_lifetime` argument is a hack for now while the - // AST doesn't support arbitrary lifetimes in bounds on type parameters. In - // the future, this flag should be removed, and the return value of this - // function should be Option<~[TyParamBound]> + /// matches optbounds = ( ( : ( boundseq )? )? ) + /// where boundseq = ( bound + boundseq ) | bound + /// and bound = 'static | ty + /// Returns "None" if there's no colon (e.g. "T"); + /// Returns "Some(Empty)" if there's a colon but nothing after (e.g. "T:") + /// Returns "Some(stuff)" otherwise (e.g. "T:stuff"). + /// NB: The None/Some distinction is important for issue #7264. + /// + /// Note that the `allow_any_lifetime` argument is a hack for now while the + /// AST doesn't support arbitrary lifetimes in bounds on type parameters. In + /// the future, this flag should be removed, and the return value of this + /// function should be Option<~[TyParamBound]> fn parse_ty_param_bounds(&mut self, allow_any_lifetime: bool) -> (Option, OwnedSlice) { @@ -3588,7 +3589,7 @@ impl<'a> Parser<'a> { } } - // matches typaram = (unbound`?`)? IDENT optbounds ( EQ ty )? + /// Matches typaram = (unbound`?`)? IDENT optbounds ( EQ ty )? fn parse_ty_param(&mut self) -> TyParam { // This is a bit hacky. Currently we are only interested in a single // unbound, and it may only be `Sized`. To avoid backtracking and other @@ -3632,10 +3633,10 @@ impl<'a> Parser<'a> { } } - // parse a set of optional generic type parameter declarations - // matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > ) - // | ( < lifetimes , typaramseq ( , )? > ) - // where typaramseq = ( typaram ) | ( typaram , typaramseq ) + /// Parse a set of optional generic type parameter declarations + /// matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > ) + /// | ( < lifetimes , typaramseq ( , )? > ) + /// where typaramseq = ( typaram ) | ( typaram , typaramseq ) pub fn parse_generics(&mut self) -> ast::Generics { if self.eat(&token::LT) { let lifetimes = self.parse_lifetimes(); @@ -3727,7 +3728,7 @@ impl<'a> Parser<'a> { (args, variadic) } - // parse the argument list and result type of a function declaration + /// Parse the argument list and result type of a function declaration pub fn parse_fn_decl(&mut self, allow_variadic: bool) -> P { let (args, variadic) = self.parse_fn_args(true, allow_variadic); @@ -3762,8 +3763,8 @@ impl<'a> Parser<'a> { } } - // parse the argument list and result type of a function - // that may have a self type. + /// Parse the argument list and result type of a function + /// that may have a self type. fn parse_fn_decl_with_self(&mut self, parse_arg_fn: |&mut Parser| -> Arg) -> (ExplicitSelf, P) { fn maybe_parse_borrowed_explicit_self(this: &mut Parser) @@ -3921,7 +3922,7 @@ impl<'a> Parser<'a> { (spanned(lo, hi, explicit_self), fn_decl) } - // parse the |arg, arg| header on a lambda + /// Parse the |arg, arg| header on a lambda fn parse_fn_block_decl(&mut self) -> P { let inputs_captures = { if self.eat(&token::OROR) { @@ -3953,7 +3954,7 @@ impl<'a> Parser<'a> { }) } - // Parses the `(arg, arg) -> return_type` header on a procedure. + /// Parses the `(arg, arg) -> return_type` header on a procedure. fn parse_proc_decl(&mut self) -> P { let inputs = self.parse_unspanned_seq(&token::LPAREN, @@ -3979,7 +3980,7 @@ impl<'a> Parser<'a> { }) } - // parse the name and optional generic types of a function header. + /// Parse the name and optional generic types of a function header. fn parse_fn_header(&mut self) -> (Ident, ast::Generics) { let id = self.parse_ident(); let generics = self.parse_generics(); @@ -3999,7 +4000,7 @@ impl<'a> Parser<'a> { } } - // parse an item-position function declaration. + /// Parse an item-position function declaration. fn parse_item_fn(&mut self, fn_style: FnStyle, abi: abi::Abi) -> ItemInfo { let (ident, generics) = self.parse_fn_header(); let decl = self.parse_fn_decl(false); @@ -4007,7 +4008,7 @@ impl<'a> Parser<'a> { (ident, ItemFn(decl, fn_style, abi, generics, body), Some(inner_attrs)) } - // parse a method in a trait impl, starting with `attrs` attributes. + /// Parse a method in a trait impl, starting with `attrs` attributes. fn parse_method(&mut self, already_parsed_attrs: Option>) -> Gc { let next_attrs = self.parse_outer_attributes(); @@ -4043,7 +4044,7 @@ impl<'a> Parser<'a> { } } - // parse trait Foo { ... } + /// Parse trait Foo { ... } fn parse_item_trait(&mut self) -> ItemInfo { let ident = self.parse_ident(); let tps = self.parse_generics(); @@ -4062,9 +4063,9 @@ impl<'a> Parser<'a> { (ident, ItemTrait(tps, sized, traits, meths), None) } - // Parses two variants (with the region/type params always optional): - // impl Foo { ... } - // impl ToString for ~[T] { ... } + /// Parses two variants (with the region/type params always optional): + /// impl Foo { ... } + /// impl ToString for ~[T] { ... } fn parse_item_impl(&mut self) -> ItemInfo { // First, parse type parameters if necessary. let generics = self.parse_generics(); @@ -4117,7 +4118,7 @@ impl<'a> Parser<'a> { (ident, ItemImpl(generics, opt_trait, ty, meths), Some(inner_attrs)) } - // parse a::B + /// Parse a::B fn parse_trait_ref(&mut self) -> TraitRef { ast::TraitRef { path: self.parse_path(LifetimeAndTypesWithoutColons).path, @@ -4125,7 +4126,7 @@ impl<'a> Parser<'a> { } } - // parse B + C + D + /// Parse B + C + D fn parse_trait_ref_list(&mut self, ket: &token::Token) -> Vec { self.parse_seq_to_before_end( ket, @@ -4134,7 +4135,7 @@ impl<'a> Parser<'a> { ) } - // parse struct Foo { ... } + /// Parse struct Foo { ... } fn parse_item_struct(&mut self, is_virtual: bool) -> ItemInfo { let class_name = self.parse_ident(); let generics = self.parse_generics(); @@ -4217,7 +4218,7 @@ impl<'a> Parser<'a> { None) } - // parse a structure field declaration + /// Parse a structure field declaration pub fn parse_single_struct_field(&mut self, vis: Visibility, attrs: Vec ) @@ -4239,7 +4240,7 @@ impl<'a> Parser<'a> { a_var } - // parse an element of a struct definition + /// Parse an element of a struct definition fn parse_struct_decl_field(&mut self) -> StructField { let attrs = self.parse_outer_attributes(); @@ -4251,7 +4252,7 @@ impl<'a> Parser<'a> { return self.parse_single_struct_field(Inherited, attrs); } - // parse visiility: PUB, PRIV, or nothing + /// Parse visiility: PUB, PRIV, or nothing fn parse_visibility(&mut self) -> Visibility { if self.eat_keyword(keywords::Pub) { Public } else { Inherited } @@ -4273,8 +4274,8 @@ impl<'a> Parser<'a> { } } - // given a termination token and a vector of already-parsed - // attributes (of length 0 or 1), parse all of the items in a module + /// Given a termination token and a vector of already-parsed + /// attributes (of length 0 or 1), parse all of the items in a module fn parse_mod_items(&mut self, term: token::Token, first_item_attrs: Vec, @@ -4342,7 +4343,7 @@ impl<'a> Parser<'a> { (id, ItemStatic(ty, m, e), None) } - // parse a `mod { ... }` or `mod ;` item + /// Parse a `mod { ... }` or `mod ;` item fn parse_item_mod(&mut self, outer_attrs: &[Attribute]) -> ItemInfo { let id_span = self.span; let id = self.parse_ident(); @@ -4380,7 +4381,7 @@ impl<'a> Parser<'a> { self.mod_path_stack.pop().unwrap(); } - // read a module from a source file. + /// Read a module from a source file. fn eval_src_mod(&mut self, id: ast::Ident, outer_attrs: &[ast::Attribute], @@ -4488,7 +4489,7 @@ impl<'a> Parser<'a> { return (ast::ItemMod(m0), mod_attrs); } - // parse a function declaration from a foreign module + /// Parse a function declaration from a foreign module fn parse_item_foreign_fn(&mut self, vis: ast::Visibility, attrs: Vec) -> Gc { let lo = self.span.lo; @@ -4506,7 +4507,7 @@ impl<'a> Parser<'a> { vis: vis } } - // parse a static item from a foreign module + /// Parse a static item from a foreign module fn parse_item_foreign_static(&mut self, vis: ast::Visibility, attrs: Vec ) -> Gc { let lo = self.span.lo; @@ -4529,7 +4530,7 @@ impl<'a> Parser<'a> { } } - // parse safe/unsafe and fn + /// Parse safe/unsafe and fn fn parse_fn_style(&mut self) -> FnStyle { if self.eat_keyword(keywords::Fn) { NormalFn } else if self.eat_keyword(keywords::Unsafe) { @@ -4540,8 +4541,8 @@ impl<'a> Parser<'a> { } - // at this point, this is essentially a wrapper for - // parse_foreign_items. + /// At this point, this is essentially a wrapper for + /// parse_foreign_items. fn parse_foreign_mod_items(&mut self, abi: abi::Abi, first_item_attrs: Vec ) @@ -4642,7 +4643,7 @@ impl<'a> Parser<'a> { return IoviItem(item); } - // parse type Foo = Bar; + /// Parse type Foo = Bar; fn parse_item_type(&mut self) -> ItemInfo { let ident = self.parse_ident(); let tps = self.parse_generics(); @@ -4652,8 +4653,8 @@ impl<'a> Parser<'a> { (ident, ItemTy(ty, tps), None) } - // parse a structure-like enum variant definition - // this should probably be renamed or refactored... + /// Parse a structure-like enum variant definition + /// this should probably be renamed or refactored... fn parse_struct_def(&mut self) -> Gc { let mut fields: Vec = Vec::new(); while self.token != token::RBRACE { @@ -4669,7 +4670,7 @@ impl<'a> Parser<'a> { }; } - // parse the part of an "enum" decl following the '{' + /// Parse the part of an "enum" decl following the '{' fn parse_enum_def(&mut self, _generics: &ast::Generics) -> EnumDef { let mut variants = Vec::new(); let mut all_nullary = true; @@ -4733,7 +4734,7 @@ impl<'a> Parser<'a> { ast::EnumDef { variants: variants } } - // parse an "enum" declaration + /// Parse an "enum" declaration fn parse_item_enum(&mut self) -> ItemInfo { let id = self.parse_ident(); let generics = self.parse_generics(); @@ -4750,8 +4751,8 @@ impl<'a> Parser<'a> { } } - // Parses a string as an ABI spec on an extern type or module. Consumes - // the `extern` keyword, if one is found. + /// Parses a string as an ABI spec on an extern type or module. Consumes + /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> Option { match self.token { token::LIT_STR(s) | token::LIT_STR_RAW(s, _) => { @@ -4777,10 +4778,10 @@ impl<'a> Parser<'a> { } } - // parse one of the items or view items allowed by the - // flags; on failure, return IoviNone. - // NB: this function no longer parses the items inside an - // extern crate. + /// Parse one of the items or view items allowed by the + /// flags; on failure, return IoviNone. + /// NB: this function no longer parses the items inside an + /// extern crate. fn parse_item_or_view_item(&mut self, attrs: Vec , macros_allowed: bool) @@ -4988,7 +4989,7 @@ impl<'a> Parser<'a> { self.parse_macro_use_or_failure(attrs,macros_allowed,lo,visibility) } - // parse a foreign item; on failure, return IoviNone. + /// Parse a foreign item; on failure, return IoviNone. fn parse_foreign_item(&mut self, attrs: Vec , macros_allowed: bool) @@ -5011,7 +5012,7 @@ impl<'a> Parser<'a> { self.parse_macro_use_or_failure(attrs,macros_allowed,lo,visibility) } - // this is the fall-through for parsing items. + /// This is the fall-through for parsing items. fn parse_macro_use_or_failure( &mut self, attrs: Vec , @@ -5095,17 +5096,17 @@ impl<'a> Parser<'a> { } } - // parse, e.g., "use a::b::{z,y}" + /// Parse, e.g., "use a::b::{z,y}" fn parse_use(&mut self) -> ViewItem_ { return ViewItemUse(self.parse_view_path()); } - // matches view_path : MOD? IDENT EQ non_global_path - // | MOD? non_global_path MOD_SEP LBRACE RBRACE - // | MOD? non_global_path MOD_SEP LBRACE ident_seq RBRACE - // | MOD? non_global_path MOD_SEP STAR - // | MOD? non_global_path + /// Matches view_path : MOD? IDENT EQ non_global_path + /// | MOD? non_global_path MOD_SEP LBRACE RBRACE + /// | MOD? non_global_path MOD_SEP LBRACE ident_seq RBRACE + /// | MOD? non_global_path MOD_SEP STAR + /// | MOD? non_global_path fn parse_view_path(&mut self) -> Gc { let lo = self.span.lo; @@ -5228,10 +5229,10 @@ impl<'a> Parser<'a> { ViewPathSimple(last, path, ast::DUMMY_NODE_ID)); } - // Parses a sequence of items. Stops when it finds program - // text that can't be parsed as an item - // - mod_items uses extern_mod_allowed = true - // - block_tail_ uses extern_mod_allowed = false + /// Parses a sequence of items. Stops when it finds program + /// text that can't be parsed as an item + /// - mod_items uses extern_mod_allowed = true + /// - block_tail_ uses extern_mod_allowed = false fn parse_items_and_view_items(&mut self, first_item_attrs: Vec , mut extern_mod_allowed: bool, @@ -5313,8 +5314,8 @@ impl<'a> Parser<'a> { } } - // Parses a sequence of foreign items. Stops when it finds program - // text that can't be parsed as an item + /// Parses a sequence of foreign items. Stops when it finds program + /// text that can't be parsed as an item fn parse_foreign_items(&mut self, first_item_attrs: Vec , macros_allowed: bool) -> ParsedItemsAndViewItems { @@ -5353,8 +5354,8 @@ impl<'a> Parser<'a> { } } - // Parses a source module as a crate. This is the main - // entry point for the parser. + /// Parses a source module as a crate. This is the main + /// entry point for the parser. pub fn parse_crate_mod(&mut self) -> Crate { let lo = self.span.lo; // parse the crate's inner attrs, maybe (oops) one diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 55db3482a61a7..8bd74b9ca770d 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -92,9 +92,9 @@ pub enum Token { LIT_BINARY_RAW(Rc>, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ - // an identifier contains an "is_mod_name" boolean, - // indicating whether :: follows this token with no - // whitespace in between. + /// An identifier contains an "is_mod_name" boolean, + /// indicating whether :: follows this token with no + /// whitespace in between. IDENT(ast::Ident, bool), UNDERSCORE, LIFETIME(ast::Ident), diff --git a/src/libsyntax/print/pp.rs b/src/libsyntax/print/pp.rs index 24ab4b38e54b8..fe84eeff4f87f 100644 --- a/src/libsyntax/print/pp.rs +++ b/src/libsyntax/print/pp.rs @@ -8,58 +8,56 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -/* - * This pretty-printer is a direct reimplementation of Philip Karlton's - * Mesa pretty-printer, as described in appendix A of - * - * STAN-CS-79-770: "Pretty Printing", by Derek C. Oppen. - * Stanford Department of Computer Science, 1979. - * - * The algorithm's aim is to break a stream into as few lines as possible - * while respecting the indentation-consistency requirements of the enclosing - * block, and avoiding breaking at silly places on block boundaries, for - * example, between "x" and ")" in "x)". - * - * I am implementing this algorithm because it comes with 20 pages of - * documentation explaining its theory, and because it addresses the set of - * concerns I've seen other pretty-printers fall down on. Weirdly. Even though - * it's 32 years old. What can I say? - * - * Despite some redundancies and quirks in the way it's implemented in that - * paper, I've opted to keep the implementation here as similar as I can, - * changing only what was blatantly wrong, a typo, or sufficiently - * non-idiomatic rust that it really stuck out. - * - * In particular you'll see a certain amount of churn related to INTEGER vs. - * CARDINAL in the Mesa implementation. Mesa apparently interconverts the two - * somewhat readily? In any case, I've used uint for indices-in-buffers and - * ints for character-sizes-and-indentation-offsets. This respects the need - * for ints to "go negative" while carrying a pending-calculation balance, and - * helps differentiate all the numbers flying around internally (slightly). - * - * I also inverted the indentation arithmetic used in the print stack, since - * the Mesa implementation (somewhat randomly) stores the offset on the print - * stack in terms of margin-col rather than col itself. I store col. - * - * I also implemented a small change in the String token, in that I store an - * explicit length for the string. For most tokens this is just the length of - * the accompanying string. But it's necessary to permit it to differ, for - * encoding things that are supposed to "go on their own line" -- certain - * classes of comment and blank-line -- where relying on adjacent - * hardbreak-like Break tokens with long blankness indication doesn't actually - * work. To see why, consider when there is a "thing that should be on its own - * line" between two long blocks, say functions. If you put a hardbreak after - * each function (or before each) and the breaking algorithm decides to break - * there anyways (because the functions themselves are long) you wind up with - * extra blank lines. If you don't put hardbreaks you can wind up with the - * "thing which should be on its own line" not getting its own line in the - * rare case of "really small functions" or such. This re-occurs with comments - * and explicit blank lines. So in those cases we use a string with a payload - * we want isolated to a line and an explicit length that's huge, surrounded - * by two zero-length breaks. The algorithm will try its best to fit it on a - * line (which it can't) and so naturally place the content on its own line to - * avoid combining it with other lines and making matters even worse. - */ +//! This pretty-printer is a direct reimplementation of Philip Karlton's +//! Mesa pretty-printer, as described in appendix A of +//! +//! STAN-CS-79-770: "Pretty Printing", by Derek C. Oppen. +//! Stanford Department of Computer Science, 1979. +//! +//! The algorithm's aim is to break a stream into as few lines as possible +//! while respecting the indentation-consistency requirements of the enclosing +//! block, and avoiding breaking at silly places on block boundaries, for +//! example, between "x" and ")" in "x)". +//! +//! I am implementing this algorithm because it comes with 20 pages of +//! documentation explaining its theory, and because it addresses the set of +//! concerns I've seen other pretty-printers fall down on. Weirdly. Even though +//! it's 32 years old. What can I say? +//! +//! Despite some redundancies and quirks in the way it's implemented in that +//! paper, I've opted to keep the implementation here as similar as I can, +//! changing only what was blatantly wrong, a typo, or sufficiently +//! non-idiomatic rust that it really stuck out. +//! +//! In particular you'll see a certain amount of churn related to INTEGER vs. +//! CARDINAL in the Mesa implementation. Mesa apparently interconverts the two +//! somewhat readily? In any case, I've used uint for indices-in-buffers and +//! ints for character-sizes-and-indentation-offsets. This respects the need +//! for ints to "go negative" while carrying a pending-calculation balance, and +//! helps differentiate all the numbers flying around internally (slightly). +//! +//! I also inverted the indentation arithmetic used in the print stack, since +//! the Mesa implementation (somewhat randomly) stores the offset on the print +//! stack in terms of margin-col rather than col itself. I store col. +//! +//! I also implemented a small change in the String token, in that I store an +//! explicit length for the string. For most tokens this is just the length of +//! the accompanying string. But it's necessary to permit it to differ, for +//! encoding things that are supposed to "go on their own line" -- certain +//! classes of comment and blank-line -- where relying on adjacent +//! hardbreak-like Break tokens with long blankness indication doesn't actually +//! work. To see why, consider when there is a "thing that should be on its own +//! line" between two long blocks, say functions. If you put a hardbreak after +//! each function (or before each) and the breaking algorithm decides to break +//! there anyways (because the functions themselves are long) you wind up with +//! extra blank lines. If you don't put hardbreaks you can wind up with the +//! "thing which should be on its own line" not getting its own line in the +//! rare case of "really small functions" or such. This re-occurs with comments +//! and explicit blank lines. So in those cases we use a string with a payload +//! we want isolated to a line and an explicit length that's huge, surrounded +//! by two zero-length breaks. The algorithm will try its best to fit it on a +//! line (which it can't) and so naturally place the content on its own line to +//! avoid combining it with other lines and making matters even worse. use std::io; use std::string::String; @@ -186,107 +184,116 @@ pub fn mk_printer(out: Box, linewidth: uint) -> Printer { } -/* - * In case you do not have the paper, here is an explanation of what's going - * on. - * - * There is a stream of input tokens flowing through this printer. - * - * The printer buffers up to 3N tokens inside itself, where N is linewidth. - * Yes, linewidth is chars and tokens are multi-char, but in the worst - * case every token worth buffering is 1 char long, so it's ok. - * - * Tokens are String, Break, and Begin/End to delimit blocks. - * - * Begin tokens can carry an offset, saying "how far to indent when you break - * inside here", as well as a flag indicating "consistent" or "inconsistent" - * breaking. Consistent breaking means that after the first break, no attempt - * will be made to flow subsequent breaks together onto lines. Inconsistent - * is the opposite. Inconsistent breaking example would be, say: - * - * foo(hello, there, good, friends) - * - * breaking inconsistently to become - * - * foo(hello, there - * good, friends); - * - * whereas a consistent breaking would yield: - * - * foo(hello, - * there - * good, - * friends); - * - * That is, in the consistent-break blocks we value vertical alignment - * more than the ability to cram stuff onto a line. But in all cases if it - * can make a block a one-liner, it'll do so. - * - * Carrying on with high-level logic: - * - * The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and - * 'right' indices denote the active portion of the ring buffer as well as - * describing hypothetical points-in-the-infinite-stream at most 3N tokens - * apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch - * between using 'left' and 'right' terms to denote the wrapepd-to-ring-buffer - * and point-in-infinite-stream senses freely. - * - * There is a parallel ring buffer, 'size', that holds the calculated size of - * each token. Why calculated? Because for Begin/End pairs, the "size" - * includes everything between the pair. That is, the "size" of Begin is - * actually the sum of the sizes of everything between Begin and the paired - * End that follows. Since that is arbitrarily far in the future, 'size' is - * being rewritten regularly while the printer runs; in fact most of the - * machinery is here to work out 'size' entries on the fly (and give up when - * they're so obviously over-long that "infinity" is a good enough - * approximation for purposes of line breaking). - * - * The "input side" of the printer is managed as an abstract process called - * SCAN, which uses 'scan_stack', 'scan_stack_empty', 'top' and 'bottom', to - * manage calculating 'size'. SCAN is, in other words, the process of - * calculating 'size' entries. - * - * The "output side" of the printer is managed by an abstract process called - * PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to - * do with each token/size pair it consumes as it goes. It's trying to consume - * the entire buffered window, but can't output anything until the size is >= - * 0 (sizes are set to negative while they're pending calculation). - * - * So SCAN takes input and buffers tokens and pending calculations, while - * PRINT gobbles up completed calculations and tokens from the buffer. The - * theory is that the two can never get more than 3N tokens apart, because - * once there's "obviously" too much data to fit on a line, in a size - * calculation, SCAN will write "infinity" to the size and let PRINT consume - * it. - * - * In this implementation (following the paper, again) the SCAN process is - * the method called 'pretty_print', and the 'PRINT' process is the method - * called 'print'. - */ +/// In case you do not have the paper, here is an explanation of what's going +/// on. +/// +/// There is a stream of input tokens flowing through this printer. +/// +/// The printer buffers up to 3N tokens inside itself, where N is linewidth. +/// Yes, linewidth is chars and tokens are multi-char, but in the worst +/// case every token worth buffering is 1 char long, so it's ok. +/// +/// Tokens are String, Break, and Begin/End to delimit blocks. +/// +/// Begin tokens can carry an offset, saying "how far to indent when you break +/// inside here", as well as a flag indicating "consistent" or "inconsistent" +/// breaking. Consistent breaking means that after the first break, no attempt +/// will be made to flow subsequent breaks together onto lines. Inconsistent +/// is the opposite. Inconsistent breaking example would be, say: +/// +/// foo(hello, there, good, friends) +/// +/// breaking inconsistently to become +/// +/// foo(hello, there +/// good, friends); +/// +/// whereas a consistent breaking would yield: +/// +/// foo(hello, +/// there +/// good, +/// friends); +/// +/// That is, in the consistent-break blocks we value vertical alignment +/// more than the ability to cram stuff onto a line. But in all cases if it +/// can make a block a one-liner, it'll do so. +/// +/// Carrying on with high-level logic: +/// +/// The buffered tokens go through a ring-buffer, 'tokens'. The 'left' and +/// 'right' indices denote the active portion of the ring buffer as well as +/// describing hypothetical points-in-the-infinite-stream at most 3N tokens +/// apart (i.e. "not wrapped to ring-buffer boundaries"). The paper will switch +/// between using 'left' and 'right' terms to denote the wrapepd-to-ring-buffer +/// and point-in-infinite-stream senses freely. +/// +/// There is a parallel ring buffer, 'size', that holds the calculated size of +/// each token. Why calculated? Because for Begin/End pairs, the "size" +/// includes everything betwen the pair. That is, the "size" of Begin is +/// actually the sum of the sizes of everything between Begin and the paired +/// End that follows. Since that is arbitrarily far in the future, 'size' is +/// being rewritten regularly while the printer runs; in fact most of the +/// machinery is here to work out 'size' entries on the fly (and give up when +/// they're so obviously over-long that "infinity" is a good enough +/// approximation for purposes of line breaking). +/// +/// The "input side" of the printer is managed as an abstract process called +/// SCAN, which uses 'scan_stack', 'scan_stack_empty', 'top' and 'bottom', to +/// manage calculating 'size'. SCAN is, in other words, the process of +/// calculating 'size' entries. +/// +/// The "output side" of the printer is managed by an abstract process called +/// PRINT, which uses 'print_stack', 'margin' and 'space' to figure out what to +/// do with each token/size pair it consumes as it goes. It's trying to consume +/// the entire buffered window, but can't output anything until the size is >= +/// 0 (sizes are set to negative while they're pending calculation). +/// +/// So SCAN takes input and buffers tokens and pending calculations, while +/// PRINT gobbles up completed calculations and tokens from the buffer. The +/// theory is that the two can never get more than 3N tokens apart, because +/// once there's "obviously" too much data to fit on a line, in a size +/// calculation, SCAN will write "infinity" to the size and let PRINT consume +/// it. +/// +/// In this implementation (following the paper, again) the SCAN process is +/// the method called 'pretty_print', and the 'PRINT' process is the method +/// called 'print'. pub struct Printer { pub out: Box, buf_len: uint, - margin: int, // width of lines we're constrained to - space: int, // number of spaces left on line - left: uint, // index of left side of input stream - right: uint, // index of right side of input stream - token: Vec , // ring-buffr stream goes through - size: Vec , // ring-buffer of calculated sizes - left_total: int, // running size of stream "...left" - right_total: int, // running size of stream "...right" - // pseudo-stack, really a ring too. Holds the - // primary-ring-buffers index of the Begin that started the - // current block, possibly with the most recent Break after that - // Begin (if there is any) on top of it. Stuff is flushed off the - // bottom as it becomes irrelevant due to the primary ring-buffer - // advancing. + /// Width of lines we're constrained to + margin: int, + /// Number of spaces left on line + space: int, + /// Index of left side of input stream + left: uint, + /// Index of right side of input stream + right: uint, + /// Ring-buffr stream goes through + token: Vec , + /// Ring-buffer of calculated sizes + size: Vec , + /// Running size of stream "...left" + left_total: int, + /// Running size of stream "...right" + right_total: int, + /// Pseudo-stack, really a ring too. Holds the + /// primary-ring-buffers index of the Begin that started the + /// current block, possibly with the most recent Break after that + /// Begin (if there is any) on top of it. Stuff is flushed off the + /// bottom as it becomes irrelevant due to the primary ring-buffer + /// advancing. scan_stack: Vec , - scan_stack_empty: bool, // top==bottom disambiguator - top: uint, // index of top of scan_stack - bottom: uint, // index of bottom of scan_stack - // stack of blocks-in-progress being flushed by print + /// Top==bottom disambiguator + scan_stack_empty: bool, + /// Index of top of scan_stack + top: uint, + /// Index of bottom of scan_stack + bottom: uint, + /// Stack of blocks-in-progress being flushed by print print_stack: Vec , - // buffered indentation to avoid writing trailing whitespace + /// Buffered indentation to avoid writing trailing whitespace pending_indentation: int, } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index a5d70a9333dde..170cb7a249c4b 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -88,9 +88,9 @@ pub static indent_unit: uint = 4u; pub static default_columns: uint = 78u; -// Requires you to pass an input filename and reader so that -// it can scan the input text for comments and literals to -// copy forward. +/// Requires you to pass an input filename and reader so that +/// it can scan the input text for comments and literals to +/// copy forward. pub fn print_crate<'a>(cm: &'a CodeMap, span_diagnostic: &diagnostic::SpanHandler, krate: &ast::Crate, diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index 4d88aaca7486b..55fff38f99131 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -8,9 +8,9 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -// An "interner" is a data structure that associates values with uint tags and -// allows bidirectional lookup; i.e. given a value, one can easily find the -// type, and vice versa. +//! An "interner" is a data structure that associates values with uint tags and +//! allows bidirectional lookup; i.e. given a value, one can easily find the +//! type, and vice versa. use ast::Name; diff --git a/src/libsyntax/util/parser_testing.rs b/src/libsyntax/util/parser_testing.rs index 04116dec60e31..f50739a7069e0 100644 --- a/src/libsyntax/util/parser_testing.rs +++ b/src/libsyntax/util/parser_testing.rs @@ -17,14 +17,14 @@ use parse::token; use std::gc::Gc; -// map a string to tts, using a made-up filename: +/// Map a string to tts, using a made-up filename: pub fn string_to_tts(source_str: String) -> Vec { let ps = new_parse_sess(); filemap_to_tts(&ps, string_to_filemap(&ps, source_str, "bogofile".to_string())) } -// map string to parser (via tts) +/// Map string to parser (via tts) pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> { new_parser_from_source_str(ps, Vec::new(), @@ -40,51 +40,51 @@ fn with_error_checking_parse(s: String, f: |&mut Parser| -> T) -> T { x } -// parse a string, return a crate. +/// Parse a string, return a crate. pub fn string_to_crate (source_str : String) -> ast::Crate { with_error_checking_parse(source_str, |p| { p.parse_crate_mod() }) } -// parse a string, return an expr +/// Parse a string, return an expr pub fn string_to_expr (source_str : String) -> Gc { with_error_checking_parse(source_str, |p| { p.parse_expr() }) } -// parse a string, return an item +/// Parse a string, return an item pub fn string_to_item (source_str : String) -> Option> { with_error_checking_parse(source_str, |p| { p.parse_item(Vec::new()) }) } -// parse a string, return a stmt +/// Parse a string, return a stmt pub fn string_to_stmt(source_str : String) -> Gc { with_error_checking_parse(source_str, |p| { p.parse_stmt(Vec::new()) }) } -// parse a string, return a pat. Uses "irrefutable"... which doesn't -// (currently) affect parsing. +/// Parse a string, return a pat. Uses "irrefutable"... which doesn't +/// (currently) affect parsing. pub fn string_to_pat(source_str: String) -> Gc { string_to_parser(&new_parse_sess(), source_str).parse_pat() } -// convert a vector of strings to a vector of ast::Ident's +/// Convert a vector of strings to a vector of ast::Ident's pub fn strs_to_idents(ids: Vec<&str> ) -> Vec { ids.iter().map(|u| token::str_to_ident(*u)).collect() } -// does the given string match the pattern? whitespace in the first string -// may be deleted or replaced with other whitespace to match the pattern. -// this function is unicode-ignorant; fortunately, the careful design of -// UTF-8 mitigates this ignorance. In particular, this function only collapses -// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode -// chars. Unsurprisingly, it doesn't do NKF-normalization(?). +/// Does the given string match the pattern? whitespace in the first string +/// may be deleted or replaced with other whitespace to match the pattern. +/// this function is unicode-ignorant; fortunately, the careful design of +/// UTF-8 mitigates this ignorance. In particular, this function only collapses +/// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode +/// chars. Unsurprisingly, it doesn't do NKF-normalization(?). pub fn matches_codepattern(a : &str, b : &str) -> bool { let mut idx_a = 0; let mut idx_b = 0; @@ -122,9 +122,9 @@ pub fn matches_codepattern(a : &str, b : &str) -> bool { } } -// given a string and an index, return the first uint >= idx -// that is a non-ws-char or is outside of the legal range of -// the string. +/// Given a string and an index, return the first uint >= idx +/// that is a non-ws-char or is outside of the legal range of +/// the string. fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint { let mut i = idx; let len = a.len(); @@ -134,7 +134,7 @@ fn scan_for_non_ws_or_end(a : &str, idx: uint) -> uint { i } -// copied from lexer. +/// Copied from lexer. pub fn is_whitespace(c: char) -> bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } diff --git a/src/libsyntax/visit.rs b/src/libsyntax/visit.rs index df34ff30db67f..9298b58c4267d 100644 --- a/src/libsyntax/visit.rs +++ b/src/libsyntax/visit.rs @@ -8,6 +8,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +//! Context-passing AST walker. Each overridden visit method has full control +//! over what happens with its node, it can do its own traversal of the node's +//! children (potentially passing in different contexts to each), call +//! `visit::visit_*` to apply the default traversal algorithm (again, it can +//! override the context), or prevent deeper traversal by doing nothing. +//! +//! Note: it is an important invariant that the default visitor walks the body +//! of a function in "execution order" (more concretely, reverse post-order +//! with respect to the CFG implied by the AST), meaning that if AST node A may +//! execute before AST node B, then A is visited first. The borrow checker in +//! particular relies on this property. +//! use abi::Abi; use ast::*; use ast; @@ -17,27 +29,15 @@ use owned_slice::OwnedSlice; use std::gc::Gc; -// Context-passing AST walker. Each overridden visit method has full control -// over what happens with its node, it can do its own traversal of the node's -// children (potentially passing in different contexts to each), call -// visit::visit_* to apply the default traversal algorithm (again, it can -// override the context), or prevent deeper traversal by doing nothing. -// -// Note: it is an important invariant that the default visitor walks the body -// of a function in "execution order" (more concretely, reverse post-order -// with respect to the CFG implied by the AST), meaning that if AST node A may -// execute before AST node B, then A is visited first. The borrow checker in -// particular relies on this property. - pub enum FnKind<'a> { - // fn foo() or extern "Abi" fn foo() + /// fn foo() or extern "Abi" fn foo() FkItemFn(Ident, &'a Generics, FnStyle, Abi), - // fn foo(&self) + /// fn foo(&self) FkMethod(Ident, &'a Generics, &'a Method), - // |x, y| ... - // proc(x, y) ... + /// |x, y| ... + /// proc(x, y) ... FkFnBlock, } From 91357a9c4eebf9ac9ff2b5c4ab5b27496cf818cd Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 9 Jun 2014 13:19:38 -0700 Subject: [PATCH 02/15] token: replace ast::Ident with just Ident --- src/libsyntax/parse/token.rs | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 8bd74b9ca770d..c7aeae04ba2fa 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -84,10 +84,10 @@ pub enum Token { LIT_INT(i64, ast::IntTy), LIT_UINT(u64, ast::UintTy), LIT_INT_UNSUFFIXED(i64), - LIT_FLOAT(ast::Ident, ast::FloatTy), - LIT_FLOAT_UNSUFFIXED(ast::Ident), - LIT_STR(ast::Ident), - LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */ + LIT_FLOAT(Ident, ast::FloatTy), + LIT_FLOAT_UNSUFFIXED(Ident), + LIT_STR(Ident), + LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */ LIT_BINARY(Rc>), LIT_BINARY_RAW(Rc>, uint), /* raw binary str delimited by n hash symbols */ @@ -95,14 +95,14 @@ pub enum Token { /// An identifier contains an "is_mod_name" boolean, /// indicating whether :: follows this token with no /// whitespace in between. - IDENT(ast::Ident, bool), + IDENT(Ident, bool), UNDERSCORE, - LIFETIME(ast::Ident), + LIFETIME(Ident), /* For interpolation */ INTERPOLATED(Nonterminal), - DOC_COMMENT(ast::Ident), + DOC_COMMENT(Ident), EOF, } @@ -115,11 +115,12 @@ pub enum Nonterminal { NtPat( Gc), NtExpr(Gc), NtTy( P), - // see IDENT, above, for meaning of bool in NtIdent: - NtIdent(Box, bool), - NtMeta(Gc), // stuff inside brackets for attributes + /// See IDENT, above, for meaning of bool in NtIdent: + NtIdent(Box, bool), + /// Stuff inside brackets for attributes + NtMeta(Gc), NtPath(Box), - NtTT( Gc), // needs @ed to break a circularity + NtTT( Gc), // needs Gc'd to break a circularity NtMatchers(Vec ) } @@ -683,20 +684,20 @@ pub fn gensym(s: &str) -> Name { /// Maps a string to an identifier with an empty syntax context. #[inline] -pub fn str_to_ident(s: &str) -> ast::Ident { - ast::Ident::new(intern(s)) +pub fn str_to_ident(s: &str) -> Ident { + Ident::new(intern(s)) } /// Maps a string to a gensym'ed identifier. #[inline] -pub fn gensym_ident(s: &str) -> ast::Ident { - ast::Ident::new(gensym(s)) +pub fn gensym_ident(s: &str) -> Ident { + Ident::new(gensym(s)) } // create a fresh name that maps to the same string as the old one. // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src))); // that is, that the new name and the old one are connected to ptr_eq strings. -pub fn fresh_name(src: &ast::Ident) -> Name { +pub fn fresh_name(src: &Ident) -> Name { let interner = get_ident_interner(); interner.gensym_copy(src.name) // following: debug version. Could work in final except that it's incompatible with @@ -767,8 +768,8 @@ mod test { use ast; use ext::mtwt; - fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident { - ast::Ident{name:id.name,ctxt:mtwt::apply_mark(m,id.ctxt)} + fn mark_ident(id : Ident, m : ast::Mrk) -> Ident { + Ident{name:id.name,ctxt:mtwt::apply_mark(m,id.ctxt)} } #[test] fn mtwt_token_eq_test() { From c2dd553bed572368953801dd1d339013f58b53d6 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 16 Jun 2014 14:12:44 -0700 Subject: [PATCH 03/15] testsuite: merge some lexer testcases Now that the lexer is more robust, these tests don't need to be in separate files. Yay! --- ...har-escape.rs => lex-bad-char-literals.rs} | 16 +++++- src/test/compile-fail/lex-bad-fp-base-2.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-3.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-4.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-5.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-6.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-7.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-8.rs | 13 ----- src/test/compile-fail/lex-bad-fp-base-9.rs | 13 ----- src/test/compile-fail/lex-bad-fp-lit.rs | 13 ----- .../compile-fail/lex-bad-numeric-literals.rs | 57 +++++++++++++++++++ ...{lex-bad-fp-base-1.rs => lex-bad-token.rs} | 4 +- src/test/compile-fail/lex-hex-float-lit.rs | 13 ----- .../compile-fail/lex-int-lit-too-large-2.rs | 13 ----- .../compile-fail/lex-int-lit-too-large.rs | 13 ----- .../compile-fail/lex-no-valid-digits-2.rs | 13 ----- src/test/compile-fail/lex-no-valid-digits.rs | 13 ----- .../compile-fail/lex-unknown-char-escape.rs | 13 ----- .../compile-fail/lex-unknown-start-tok.rs | 13 ----- .../compile-fail/lex-unknown-str-escape.rs | 13 ----- .../lex-unterminated-char-const.rs | 13 ----- 21 files changed, 73 insertions(+), 238 deletions(-) rename src/test/compile-fail/{lex-illegal-num-char-escape.rs => lex-bad-char-literals.rs} (75%) delete mode 100644 src/test/compile-fail/lex-bad-fp-base-2.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-3.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-4.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-5.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-6.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-7.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-8.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-base-9.rs delete mode 100644 src/test/compile-fail/lex-bad-fp-lit.rs create mode 100644 src/test/compile-fail/lex-bad-numeric-literals.rs rename src/test/compile-fail/{lex-bad-fp-base-1.rs => lex-bad-token.rs} (85%) delete mode 100644 src/test/compile-fail/lex-hex-float-lit.rs delete mode 100644 src/test/compile-fail/lex-int-lit-too-large-2.rs delete mode 100644 src/test/compile-fail/lex-int-lit-too-large.rs delete mode 100644 src/test/compile-fail/lex-no-valid-digits-2.rs delete mode 100644 src/test/compile-fail/lex-no-valid-digits.rs delete mode 100644 src/test/compile-fail/lex-unknown-char-escape.rs delete mode 100644 src/test/compile-fail/lex-unknown-start-tok.rs delete mode 100644 src/test/compile-fail/lex-unknown-str-escape.rs delete mode 100644 src/test/compile-fail/lex-unterminated-char-const.rs diff --git a/src/test/compile-fail/lex-illegal-num-char-escape.rs b/src/test/compile-fail/lex-bad-char-literals.rs similarity index 75% rename from src/test/compile-fail/lex-illegal-num-char-escape.rs rename to src/test/compile-fail/lex-bad-char-literals.rs index 8f4c756c891d5..0eaa81bd6ab95 100644 --- a/src/test/compile-fail/lex-illegal-num-char-escape.rs +++ b/src/test/compile-fail/lex-bad-char-literals.rs @@ -31,5 +31,19 @@ static s: &'static str = static s2: &'static str = "\u23q" //~ ERROR: illegal character in numeric character escape + //~^ ERROR: numeric character escape is too short +; + +static c: char = + '\●' //~ ERROR: unknown character escape +; + +static s: &'static str = + "\●" //~ ERROR: unknown character escape +; + +// THIS MUST BE LAST, since unterminated character constants kill the lexer + +static c: char = + '● //~ ERROR: unterminated character constant ; -//~^^ ERROR: numeric character escape is too short diff --git a/src/test/compile-fail/lex-bad-fp-base-2.rs b/src/test/compile-fail/lex-bad-fp-base-2.rs deleted file mode 100644 index b1d45f78e4a5b..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-2.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let b = 0o2f32; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-3.rs b/src/test/compile-fail/lex-bad-fp-base-3.rs deleted file mode 100644 index 79c42360adb2f..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-3.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let c = 0o3.0f32; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-4.rs b/src/test/compile-fail/lex-bad-fp-base-4.rs deleted file mode 100644 index eaea61b0089af..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-4.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let d = 0o4e4; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-5.rs b/src/test/compile-fail/lex-bad-fp-base-5.rs deleted file mode 100644 index ee25ed95639e2..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-5.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let e = 0o5.0e5; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-6.rs b/src/test/compile-fail/lex-bad-fp-base-6.rs deleted file mode 100644 index bf08ec1eae5fe..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-6.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let f = 0o6e6f32; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-7.rs b/src/test/compile-fail/lex-bad-fp-base-7.rs deleted file mode 100644 index 921ed8f1b69e8..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-7.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let g = 0o7.0e7f64; //~ ERROR: octal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-8.rs b/src/test/compile-fail/lex-bad-fp-base-8.rs deleted file mode 100644 index 10e334ede01c2..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-8.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let h = 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-base-9.rs b/src/test/compile-fail/lex-bad-fp-base-9.rs deleted file mode 100644 index 3ea151cb9826a..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-base-9.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - let i = 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported -} diff --git a/src/test/compile-fail/lex-bad-fp-lit.rs b/src/test/compile-fail/lex-bad-fp-lit.rs deleted file mode 100644 index 5a5e9d7d8f238..0000000000000 --- a/src/test/compile-fail/lex-bad-fp-lit.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static f: float = - 1e+ //~ ERROR: scan_exponent: bad fp literal -; diff --git a/src/test/compile-fail/lex-bad-numeric-literals.rs b/src/test/compile-fail/lex-bad-numeric-literals.rs new file mode 100644 index 0000000000000..23a526a1eccc4 --- /dev/null +++ b/src/test/compile-fail/lex-bad-numeric-literals.rs @@ -0,0 +1,57 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + 0o1.0; //~ ERROR: octal float literal is not supported + 0o2f32; //~ ERROR: octal float literal is not supported + 0o3.0f32; //~ ERROR: octal float literal is not supported + 0o4e4; //~ ERROR: octal float literal is not supported + 0o5.0e5; //~ ERROR: octal float literal is not supported + 0o6e6f32; //~ ERROR: octal float literal is not supported + 0o7.0e7f64; //~ ERROR: octal float literal is not supported + 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported + 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported +} + +static F: f32 = + 1e+ //~ ERROR: scan_exponent: bad fp literal +; + + +static F: f32 = + 0x539.0 //~ ERROR: hexadecimal float literal is not supported +; + +static I: int = + 99999999999999999999999999999999 //~ ERROR: int literal is too large +; + +static J: int = + 99999999999999999999999999999999u32 //~ ERROR: int literal is too large +; + +static A: int = + 0x //~ ERROR: no valid digits +; +static B: int = + 0xu32 //~ ERROR: no valid digits +; +static C: int = + 0ou32 //~ ERROR: no valid digits +; +static D: int = + 0bu32 //~ ERROR: no valid digits +; +static E: int = + 0b //~ ERROR: no valid digits +; +static F: int = + 0o //~ ERROR: no valid digits +; diff --git a/src/test/compile-fail/lex-bad-fp-base-1.rs b/src/test/compile-fail/lex-bad-token.rs similarity index 85% rename from src/test/compile-fail/lex-bad-fp-base-1.rs rename to src/test/compile-fail/lex-bad-token.rs index 659cb5c837955..d28d9a20c6eed 100644 --- a/src/test/compile-fail/lex-bad-fp-base-1.rs +++ b/src/test/compile-fail/lex-bad-token.rs @@ -8,6 +8,4 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -fn main() { - let a = 0o1.0; //~ ERROR: octal float literal is not supported -} +● //~ ERROR: unknown start of token diff --git a/src/test/compile-fail/lex-hex-float-lit.rs b/src/test/compile-fail/lex-hex-float-lit.rs deleted file mode 100644 index 457c6126c44a5..0000000000000 --- a/src/test/compile-fail/lex-hex-float-lit.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static f: float = - 0x539.0 //~ ERROR: hexadecimal float literal is not supported -; diff --git a/src/test/compile-fail/lex-int-lit-too-large-2.rs b/src/test/compile-fail/lex-int-lit-too-large-2.rs deleted file mode 100644 index 39d1cba64b08b..0000000000000 --- a/src/test/compile-fail/lex-int-lit-too-large-2.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 99999999999999999999999999999999u32 //~ ERROR: int literal is too large -; diff --git a/src/test/compile-fail/lex-int-lit-too-large.rs b/src/test/compile-fail/lex-int-lit-too-large.rs deleted file mode 100644 index 6343be651fa59..0000000000000 --- a/src/test/compile-fail/lex-int-lit-too-large.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 99999999999999999999999999999999 //~ ERROR: int literal is too large -; diff --git a/src/test/compile-fail/lex-no-valid-digits-2.rs b/src/test/compile-fail/lex-no-valid-digits-2.rs deleted file mode 100644 index 549dbf5bc8c6c..0000000000000 --- a/src/test/compile-fail/lex-no-valid-digits-2.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 0xu32 //~ ERROR: no valid digits -; diff --git a/src/test/compile-fail/lex-no-valid-digits.rs b/src/test/compile-fail/lex-no-valid-digits.rs deleted file mode 100644 index 6a5b8e93f010a..0000000000000 --- a/src/test/compile-fail/lex-no-valid-digits.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static i: int = - 0x //~ ERROR: no valid digits -; diff --git a/src/test/compile-fail/lex-unknown-char-escape.rs b/src/test/compile-fail/lex-unknown-char-escape.rs deleted file mode 100644 index f2445c2b60eba..0000000000000 --- a/src/test/compile-fail/lex-unknown-char-escape.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static c: char = - '\●' //~ ERROR: unknown character escape -; diff --git a/src/test/compile-fail/lex-unknown-start-tok.rs b/src/test/compile-fail/lex-unknown-start-tok.rs deleted file mode 100644 index 1bb682303451b..0000000000000 --- a/src/test/compile-fail/lex-unknown-start-tok.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -fn main() { - ● //~ ERROR: unknown start of token -} diff --git a/src/test/compile-fail/lex-unknown-str-escape.rs b/src/test/compile-fail/lex-unknown-str-escape.rs deleted file mode 100644 index 9a59c4227114b..0000000000000 --- a/src/test/compile-fail/lex-unknown-str-escape.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static s: &'static str = - "\●" //~ ERROR: unknown character escape -; diff --git a/src/test/compile-fail/lex-unterminated-char-const.rs b/src/test/compile-fail/lex-unterminated-char-const.rs deleted file mode 100644 index 551360ff9e095..0000000000000 --- a/src/test/compile-fail/lex-unterminated-char-const.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -static c: char = - '● //~ ERROR: unterminated character constant -; From 3c7f619b76b87c400e270eebfa7625844afcd673 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 18 Jun 2014 10:40:38 -0700 Subject: [PATCH 04/15] str: use more helpful assertion failure messages --- src/libcore/str.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 94df7a5a6c2d9..f94d5a5e4b5e3 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1764,7 +1764,9 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn slice(&self, begin: uint, end: uint) -> &'a str { - assert!(self.is_char_boundary(begin) && self.is_char_boundary(end)); + assert!(self.is_char_boundary(begin) && self.is_char_boundary(end), + "index {} and/or {} in `{}` do not lie on character boundary", begin, + end, *self); unsafe { raw::slice_bytes(*self, begin, end) } } @@ -1775,7 +1777,8 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn slice_to(&self, end: uint) -> &'a str { - assert!(self.is_char_boundary(end)); + assert!(self.is_char_boundary(end), "index {} in `{}` does not lie on \ + a character boundary", end, *self); unsafe { raw::slice_bytes(*self, 0, end) } } From b0303b3c22eeb602ed8d51a267f52306aadc9322 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 18 Jun 2014 10:44:09 -0700 Subject: [PATCH 05/15] test: simplify numeric literal cfail tests --- .../compile-fail/lex-bad-numeric-literals.rs | 50 ++++++------------- src/test/compile-fail/no-oct-float-literal.rs | 17 ------- 2 files changed, 14 insertions(+), 53 deletions(-) delete mode 100644 src/test/compile-fail/no-oct-float-literal.rs diff --git a/src/test/compile-fail/lex-bad-numeric-literals.rs b/src/test/compile-fail/lex-bad-numeric-literals.rs index 23a526a1eccc4..9a490be6a0169 100644 --- a/src/test/compile-fail/lex-bad-numeric-literals.rs +++ b/src/test/compile-fail/lex-bad-numeric-literals.rs @@ -18,40 +18,18 @@ fn main() { 0o7.0e7f64; //~ ERROR: octal float literal is not supported 0x8.0e+9; //~ ERROR: hexadecimal float literal is not supported 0x9.0e-9; //~ ERROR: hexadecimal float literal is not supported + 0o; //~ ERROR: no valid digits + 1e+; //~ ERROR: expected at least one digit in exponent + 0x539.0; //~ ERROR: hexadecimal float literal is not supported + 99999999999999999999999999999999; //~ ERROR: int literal is too large + 99999999999999999999999999999999u32; //~ ERROR: int literal is too large + 0x; //~ ERROR: no valid digits + 0xu32; //~ ERROR: no valid digits + 0ou32; //~ ERROR: no valid digits + 0bu32; //~ ERROR: no valid digits + 0b; //~ ERROR: no valid digits + 0o123f64; //~ ERROR: octal float literal is not supported + 0o123.456; //~ ERROR: octal float literal is not supported + 0b101f64; //~ ERROR: binary float literal is not supported + 0b111.101; //~ ERROR: binary float literal is not supported } - -static F: f32 = - 1e+ //~ ERROR: scan_exponent: bad fp literal -; - - -static F: f32 = - 0x539.0 //~ ERROR: hexadecimal float literal is not supported -; - -static I: int = - 99999999999999999999999999999999 //~ ERROR: int literal is too large -; - -static J: int = - 99999999999999999999999999999999u32 //~ ERROR: int literal is too large -; - -static A: int = - 0x //~ ERROR: no valid digits -; -static B: int = - 0xu32 //~ ERROR: no valid digits -; -static C: int = - 0ou32 //~ ERROR: no valid digits -; -static D: int = - 0bu32 //~ ERROR: no valid digits -; -static E: int = - 0b //~ ERROR: no valid digits -; -static F: int = - 0o //~ ERROR: no valid digits -; diff --git a/src/test/compile-fail/no-oct-float-literal.rs b/src/test/compile-fail/no-oct-float-literal.rs deleted file mode 100644 index 511116b1c559c..0000000000000 --- a/src/test/compile-fail/no-oct-float-literal.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// error-pattern:octal float literal is not supported - -fn main() { - 0o123f64; - 0o123.456; - 0o123p4f; -} From ae9a92bd4ef25b8aba0ef06e7ad26319691e9e5a Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 16 Jun 2014 23:00:49 -0700 Subject: [PATCH 06/15] syntax: use a better Show impl for Ident Rather than just dumping the id in the interner, which is useless, actually print the interned string. Adjust the lexer logging to use Show instead of Poly. --- src/libsyntax/ast.rs | 8 +++++++- src/libsyntax/parse/lexer/mod.rs | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index c5afc5067b6a9..d96f1393bc917 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -41,7 +41,7 @@ pub fn P(value: T) -> P { /// table) and a SyntaxContext to track renaming and /// macro expansion per Flatt et al., "Macros /// That Work Together" -#[deriving(Clone, Hash, PartialOrd, Eq, Ord, Show)] +#[deriving(Clone, Hash, PartialOrd, Eq, Ord)] pub struct Ident { pub name: Name, pub ctxt: SyntaxContext @@ -52,6 +52,12 @@ impl Ident { pub fn new(name: Name) -> Ident { Ident {name: name, ctxt: EMPTY_CTXT}} } +impl Show for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "\"{}\"#{}", token::get_ident(*self).get(), self.ctxt) + } +} + impl PartialEq for Ident { fn eq(&self, other: &Ident) -> bool { if self.ctxt == other.ctxt { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 43bbba8527199..41035ffe89edc 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -90,7 +90,7 @@ impl<'a> Reader for TtReader<'a> { } fn next_token(&mut self) -> TokenAndSpan { let r = tt_next_token(self); - debug!("TtReader: r={:?}", r); + debug!("TtReader: r={}", r); r } fn fatal(&self, m: &str) -> ! { From 5f970e690f2ca7ddb7884b6b8752baf3771ad459 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 23 Jun 2014 13:37:30 -0700 Subject: [PATCH 07/15] codemap: be less annoying in debug logging --- src/libsyntax/codemap.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 2f1e01b239d4c..5345776029471 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -430,7 +430,6 @@ impl CodeMap { /// Converts an absolute BytePos to a CharPos relative to the filemap and above. pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { - debug!("codemap: converting {:?} to char pos", bpos); let idx = self.lookup_filemap_idx(bpos); let files = self.files.borrow(); let map = files.get(idx); @@ -439,7 +438,7 @@ impl CodeMap { let mut total_extra_bytes = 0; for mbc in map.multibyte_chars.borrow().iter() { - debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos); + debug!("{}-byte char at {}", mbc.bytes, mbc.pos); if mbc.pos < bpos { // every character is at least one byte, so we only // count the actual extra bytes. @@ -514,11 +513,11 @@ impl CodeMap { let chpos = self.bytepos_to_file_charpos(pos); let linebpos = *f.lines.borrow().get(a); let linechpos = self.bytepos_to_file_charpos(linebpos); - debug!("codemap: byte pos {:?} is on the line at byte pos {:?}", + debug!("byte pos {} is on the line at byte pos {}", pos, linebpos); - debug!("codemap: char pos {:?} is on the line at char pos {:?}", + debug!("char pos {} is on the line at char pos {}", chpos, linechpos); - debug!("codemap: byte is on line: {:?}", line); + debug!("byte is on line: {}", line); assert!(chpos >= linechpos); Loc { file: f, From 47fe8aa6bfaa18d42561801dc230d2043b984b76 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 2 Jul 2014 09:39:48 -0700 Subject: [PATCH 08/15] lexer: shuffle around some functions --- src/libsyntax/parse/lexer/mod.rs | 199 ++++++++++++++++--------------- 1 file changed, 100 insertions(+), 99 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 41035ffe89edc..f22e7af08564f 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -969,108 +969,12 @@ impl<'a> StringReader<'a> { 'b' => { self.bump(); return match self.curr { - Some('\'') => parse_byte(self), - Some('"') => parse_byte_string(self), - Some('r') => parse_raw_byte_string(self), + Some('\'') => self.scan_byte(), + Some('"') => self.scan_byte_string(), + Some('r') => self.scan_raw_byte_string(), _ => unreachable!() // Should have been a token::IDENT above. }; - fn parse_byte(self_: &mut StringReader) -> token::Token { - self_.bump(); - let start = self_.last_pos; - - // the eof will be picked up by the final `'` check below - let mut c2 = self_.curr.unwrap_or('\x00'); - self_.bump(); - - c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap(); - if !self_.curr_is('\'') { - // Byte offsetting here is okay because the - // character before position `start` are an - // ascii single quote and ascii 'b'. - let last_pos = self_.last_pos; - self_.fatal_span_verbose( - start - BytePos(2), last_pos, - "unterminated byte constant".to_string()); - } - self_.bump(); // advance curr past token - return token::LIT_BYTE(c2 as u8); - } - - fn parse_byte_string(self_: &mut StringReader) -> token::Token { - self_.bump(); - let start = self_.last_pos; - let mut value = Vec::new(); - while !self_.curr_is('"') { - if self_.is_eof() { - let last_pos = self_.last_pos; - self_.fatal_span_(start, last_pos, - "unterminated double quote byte string"); - } - - let ch_start = self_.last_pos; - let ch = self_.curr.unwrap(); - self_.bump(); - self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"') - .map(|ch| value.push(ch as u8)); - } - self_.bump(); - return token::LIT_BINARY(Rc::new(value)); - } - - fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token { - let start_bpos = self_.last_pos; - self_.bump(); - let mut hash_count = 0u; - while self_.curr_is('#') { - self_.bump(); - hash_count += 1; - } - - if self_.is_eof() { - let last_pos = self_.last_pos; - self_.fatal_span_(start_bpos, last_pos, "unterminated raw string"); - } else if !self_.curr_is('"') { - let last_pos = self_.last_pos; - let ch = self_.curr.unwrap(); - self_.fatal_span_char(start_bpos, last_pos, - "only `#` is allowed in raw string delimitation; \ - found illegal character", - ch); - } - self_.bump(); - let content_start_bpos = self_.last_pos; - let mut content_end_bpos; - 'outer: loop { - match self_.curr { - None => { - let last_pos = self_.last_pos; - self_.fatal_span_(start_bpos, last_pos, "unterminated raw string") - }, - Some('"') => { - content_end_bpos = self_.last_pos; - for _ in range(0, hash_count) { - self_.bump(); - if !self_.curr_is('#') { - continue 'outer; - } - } - break; - }, - Some(c) => if c > '\x7F' { - let last_pos = self_.last_pos; - self_.err_span_char( - last_pos, last_pos, "raw byte string must be ASCII", c); - } - } - self_.bump(); - } - self_.bump(); - let bytes = self_.with_str_from_to(content_start_bpos, - content_end_bpos, - |s| s.as_bytes().to_owned()); - return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); - } } '"' => { let mut accum_str = String::new(); @@ -1221,6 +1125,103 @@ impl<'a> StringReader<'a> { // consider shebangs comments, but not inner attributes || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) } + + fn scan_byte(&mut self) -> token::Token { + self.bump(); + let start = self.last_pos; + + // the eof will be picked up by the final `'` check below + let mut c2 = self.curr.unwrap_or('\x00'); + self.bump(); + + c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap(); + if !self.curr_is('\'') { + // Byte offsetting here is okay because the + // character before position `start` are an + // ascii single quote and ascii 'b'. + let last_pos = self.last_pos; + self.fatal_span_verbose( + start - BytePos(2), last_pos, + "unterminated byte constant".to_string()); + } + self.bump(); // advance curr past token + return token::LIT_BYTE(c2 as u8); + } + + fn scan_byte_string(&mut self) -> token::Token { + self.bump(); + let start = self.last_pos; + let mut value = Vec::new(); + while !self.curr_is('"') { + if self.is_eof() { + let last_pos = self.last_pos; + self.fatal_span_(start, last_pos, + "unterminated double quote byte string"); + } + + let ch_start = self.last_pos; + let ch = self.curr.unwrap(); + self.bump(); + self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"') + .map(|ch| value.push(ch as u8)); + } + self.bump(); + return token::LIT_BINARY(Rc::new(value)); + } + + fn scan_raw_byte_string(&mut self) -> token::Token { + let start_bpos = self.last_pos; + self.bump(); + let mut hash_count = 0u; + while self.curr_is('#') { + self.bump(); + hash_count += 1; + } + + if self.is_eof() { + let last_pos = self.last_pos; + self.fatal_span_(start_bpos, last_pos, "unterminated raw string"); + } else if !self.curr_is('"') { + let last_pos = self.last_pos; + let ch = self.curr.unwrap(); + self.fatal_span_char(start_bpos, last_pos, + "only `#` is allowed in raw string delimitation; \ + found illegal character", + ch); + } + self.bump(); + let content_start_bpos = self.last_pos; + let mut content_end_bpos; + 'outer: loop { + match self.curr { + None => { + let last_pos = self.last_pos; + self.fatal_span_(start_bpos, last_pos, "unterminated raw string") + }, + Some('"') => { + content_end_bpos = self.last_pos; + for _ in range(0, hash_count) { + self.bump(); + if !self.curr_is('#') { + continue 'outer; + } + } + break; + }, + Some(c) => if c > '\x7F' { + let last_pos = self.last_pos; + self.err_span_char( + last_pos, last_pos, "raw byte string must be ASCII", c); + } + } + self.bump(); + } + self.bump(); + let bytes = self.with_str_from_to(content_start_bpos, + content_end_bpos, + |s| s.as_bytes().to_owned()); + return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); + } } pub fn is_whitespace(c: Option) -> bool { From c8a02527ae6364175c31a5447dd4e21bb6ac3488 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Tue, 24 Jun 2014 17:44:50 -0700 Subject: [PATCH 09/15] lexer: add ident_from and ident_from_to methods --- src/libsyntax/parse/lexer/mod.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f22e7af08564f..7a9051c16aed6 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -217,6 +217,20 @@ impl<'a> StringReader<'a> { self.with_str_from_to(start, self.last_pos, f) } + /// Create an Ident from a given offset to the current offset, each + /// adjusted 1 towards each other (assumes that on either side there is a + /// single-byte delimiter). + pub fn ident_from(&self, start: BytePos) -> ast::Ident { + debug!("taking an ident from {} to {}", start, self.last_pos); + self.with_str_from(start, str_to_ident) + } + + /// As ident_from, with an explicit endpoint. + pub fn ident_from_to(&self, start: BytePos, end: BytePos) -> ast::Ident { + debug!("taking an ident from {} to {}", start, end); + self.with_str_from_to(start, end, str_to_ident) + } + /// Calls `f` with a string slice of the source text spanning from `start` /// up to but excluding `end`. fn with_str_from_to(&self, start: BytePos, end: BytePos, f: |s: &str| -> T) -> T { From bf04a7ccb1c1d23478885ea4f67fad374ffe0a72 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Thu, 3 Jul 2014 00:45:59 -0700 Subject: [PATCH 10/15] ast: add an `as_str` method to Ident This is technically unsafe but interned strings are considered immortal. --- src/libsyntax/ast.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index d96f1393bc917..b9bb05d1950cc 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -50,6 +50,13 @@ pub struct Ident { impl Ident { /// Construct an identifier with the given name and an empty context: pub fn new(name: Name) -> Ident { Ident {name: name, ctxt: EMPTY_CTXT}} + + pub fn as_str<'a>(&'a self) -> &'a str { + unsafe { + // FIXME #12938: can't use copy_lifetime since &str isn't a &T + ::std::mem::transmute(token::get_ident(*self).get()) + } + } } impl Show for Ident { From 9f5e21da4ef95e5d2914a76b09848ebc2504c53d Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Thu, 3 Jul 2014 00:47:30 -0700 Subject: [PATCH 11/15] syntax: don't process string/char/byte/binary lits This shuffles things around a bit so that LIT_CHAR and co store an Ident which is the original, unaltered literal in the source. When creating the AST, unescape and postprocess them. This changes how syntax extensions can work, slightly, but otherwise poses no visible changes. To get a useful value out of one of these tokens, call `parse::{char_lit, byte_lit, bin_lit, str_lit}` [breaking-change] --- src/libsyntax/ext/base.rs | 6 +- src/libsyntax/ext/quote.rs | 4 +- src/libsyntax/parse/lexer/mod.rs | 100 ++++++------ src/libsyntax/parse/mod.rs | 232 ++++++++++++++++++++++++++++ src/libsyntax/parse/parser.rs | 17 +- src/libsyntax/parse/token.rs | 34 ++-- src/test/run-pass/string-escapes.rs | 15 ++ 7 files changed, 327 insertions(+), 81 deletions(-) create mode 100644 src/test/run-pass/string-escapes.rs diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index bbf38fd7a9d05..1dbbe3b973c83 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -579,9 +579,9 @@ pub fn get_single_str_from_tts(cx: &ExtCtxt, cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice()); } else { match tts[0] { - ast::TTTok(_, token::LIT_STR(ident)) - | ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => { - return Some(token::get_ident(ident).get().to_string()) + ast::TTTok(_, token::LIT_STR(ident)) => return Some(parse::str_lit(ident.as_str())), + ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => { + return Some(parse::raw_str_lit(ident.as_str())) } _ => { cx.span_err(sp, diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index a3c901904a948..f950a0d3340ff 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -401,13 +401,13 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { } LIT_BYTE(i) => { - let e_byte = cx.expr_lit(sp, ast::LitByte(i)); + let e_byte = mk_ident(cx, sp, i); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte)); } LIT_CHAR(i) => { - let e_char = cx.expr_lit(sp, ast::LitChar(i)); + let e_char = mk_ident(cx, sp, i); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char)); } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 7a9051c16aed6..e1317e0ed35ca 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -685,7 +685,7 @@ impl<'a> StringReader<'a> { } - fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char { + fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> bool { let mut accum_int = 0u32; let start_bpos = self.last_pos; for _ in range(0, n_hex_digits) { @@ -709,11 +709,11 @@ impl<'a> StringReader<'a> { } match char::from_u32(accum_int) { - Some(x) => x, + Some(_) => true, None => { let last_bpos = self.last_pos; self.err_span_(start_bpos, last_bpos, "illegal numeric character escape"); - '?' + false } } } @@ -721,8 +721,10 @@ impl<'a> StringReader<'a> { /// Scan for a single (possibly escaped) byte or char /// in a byte, (non-raw) byte string, char, or (non-raw) string literal. /// `start` is the position of `first_source_char`, which is already consumed. + /// + /// Returns true if there was a valid char/byte, false otherwise. fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char, - ascii_only: bool, delim: char) -> Option { + ascii_only: bool, delim: char) -> bool { match first_source_char { '\\' => { // '\X' for some X must be a character constant: @@ -732,24 +734,18 @@ impl<'a> StringReader<'a> { match escaped { None => {}, // EOF here is an error that will be checked later. Some(e) => { - return Some(match e { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '"' => '"', - '0' => '\x00', + return match e { + 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, 'x' => self.scan_numeric_escape(2u, delim), 'u' if !ascii_only => self.scan_numeric_escape(4u, delim), 'U' if !ascii_only => self.scan_numeric_escape(8u, delim), '\n' if delim == '"' => { self.consume_whitespace(); - return None + true }, '\r' if delim == '"' && self.curr_is('\n') => { self.consume_whitespace(); - return None + true } c => { let last_pos = self.last_pos; @@ -758,9 +754,9 @@ impl<'a> StringReader<'a> { if ascii_only { "unknown byte escape" } else { "unknown character escape" }, c); - c + false } - }) + } } } } @@ -771,14 +767,16 @@ impl<'a> StringReader<'a> { if ascii_only { "byte constant must be escaped" } else { "character constant must be escaped" }, first_source_char); + return false; } '\r' => { if self.curr_is('\n') { self.bump(); - return Some('\n'); + return true; } else { self.err_span_(start, self.last_pos, "bare CR not allowed in string, use \\r instead"); + return false; } } _ => if ascii_only && first_source_char > '\x7F' { @@ -787,9 +785,10 @@ impl<'a> StringReader<'a> { start, last_pos, "byte constant must be ASCII. \ Use a \\xHH escape for a non-ASCII byte", first_source_char); + return false; } } - Some(first_source_char) + true } fn binop(&mut self, op: token::BinOp) -> token::Token { @@ -924,7 +923,7 @@ impl<'a> StringReader<'a> { let start = self.last_pos; // the eof will be picked up by the final `'` check below - let mut c2 = self.curr.unwrap_or('\x00'); + let c2 = self.curr.unwrap_or('\x00'); self.bump(); // If the character is an ident start not followed by another single @@ -967,7 +966,7 @@ impl<'a> StringReader<'a> { } // Otherwise it is a character constant: - c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap(); + let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\''); if !self.curr_is('\'') { let last_bpos = self.last_pos; self.fatal_span_verbose( @@ -977,8 +976,9 @@ impl<'a> StringReader<'a> { start - BytePos(1), last_bpos, "unterminated character constant".to_string()); } + let id = if valid { self.ident_from(start) } else { str_to_ident("0") }; self.bump(); // advance curr past token - return token::LIT_CHAR(c2); + return token::LIT_CHAR(id); } 'b' => { self.bump(); @@ -991,8 +991,8 @@ impl<'a> StringReader<'a> { } '"' => { - let mut accum_str = String::new(); let start_bpos = self.last_pos; + let mut valid = true; self.bump(); while !self.curr_is('"') { if self.is_eof() { @@ -1003,11 +1003,13 @@ impl<'a> StringReader<'a> { let ch_start = self.last_pos; let ch = self.curr.unwrap(); self.bump(); - self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"') - .map(|ch| accum_str.push_char(ch)); + valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"'); } + // adjust for the ACSII " at the start of the literal + let id = if valid { self.ident_from(start_bpos + BytePos(1)) } + else { str_to_ident("??") }; self.bump(); - return token::LIT_STR(str_to_ident(accum_str.as_slice())); + return token::LIT_STR(id); } 'r' => { let start_bpos = self.last_pos; @@ -1032,7 +1034,7 @@ impl<'a> StringReader<'a> { self.bump(); let content_start_bpos = self.last_pos; let mut content_end_bpos; - let mut has_cr = false; + let mut valid = true; 'outer: loop { if self.is_eof() { let last_bpos = self.last_pos; @@ -1055,23 +1057,26 @@ impl<'a> StringReader<'a> { } } break; - } + }, '\r' => { - has_cr = true; + if !self.nextch_is('\n') { + let last_bpos = self.last_pos; + self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \ + string, use \\r instead"); + valid = false; + } } _ => () } self.bump(); } self.bump(); - let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| { - let string = if has_cr { - self.translate_crlf(content_start_bpos, string, - "bare CR not allowed in raw string") - } else { string.into_maybe_owned() }; - str_to_ident(string.as_slice()) - }); - return token::LIT_STR_RAW(str_content, hash_count); + let id = if valid { + self.ident_from_to(content_start_bpos, content_end_bpos) + } else { + str_to_ident("??") + }; + return token::LIT_STR_RAW(id, hash_count); } '-' => { if self.nextch_is('>') { @@ -1145,10 +1150,10 @@ impl<'a> StringReader<'a> { let start = self.last_pos; // the eof will be picked up by the final `'` check below - let mut c2 = self.curr.unwrap_or('\x00'); + let c2 = self.curr.unwrap_or('\x00'); self.bump(); - c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap(); + let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\''); if !self.curr_is('\'') { // Byte offsetting here is okay because the // character before position `start` are an @@ -1158,14 +1163,17 @@ impl<'a> StringReader<'a> { start - BytePos(2), last_pos, "unterminated byte constant".to_string()); } + + let id = if valid { self.ident_from(start) } else { str_to_ident("??") }; self.bump(); // advance curr past token - return token::LIT_BYTE(c2 as u8); + return token::LIT_BYTE(id); } fn scan_byte_string(&mut self) -> token::Token { self.bump(); let start = self.last_pos; - let mut value = Vec::new(); + let mut valid = true; + while !self.curr_is('"') { if self.is_eof() { let last_pos = self.last_pos; @@ -1176,11 +1184,11 @@ impl<'a> StringReader<'a> { let ch_start = self.last_pos; let ch = self.curr.unwrap(); self.bump(); - self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"') - .map(|ch| value.push(ch as u8)); + valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"'); } + let id = if valid { self.ident_from(start) } else { str_to_ident("??") }; self.bump(); - return token::LIT_BINARY(Rc::new(value)); + return token::LIT_BINARY(id); } fn scan_raw_byte_string(&mut self) -> token::Token { @@ -1231,10 +1239,8 @@ impl<'a> StringReader<'a> { self.bump(); } self.bump(); - let bytes = self.with_str_from_to(content_start_bpos, - content_end_bpos, - |s| s.as_bytes().to_owned()); - return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count); + return token::LIT_BINARY_RAW(self.ident_from_to(content_start_bpos, content_end_bpos), + hash_count); } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index bea8b6a94d43d..62750e60bf8ef 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -272,7 +272,239 @@ pub fn maybe_aborted(result: T, mut p: Parser) -> T { result } +/// Parse a string representing a character literal into its final form. +/// Rather than just accepting/rejecting a given literal, unescapes it as +/// well. Can take any slice prefixed by a character escape. Returns the +/// character and the number of characters consumed. +pub fn char_lit(lit: &str) -> (char, int) { + use std::{num, char}; + + let mut chars = lit.chars(); + let c = match (chars.next(), chars.next()) { + (Some(c), None) if c != '\\' => return (c, 1), + (Some('\\'), Some(c)) => match c { + '"' => Some('"'), + 'n' => Some('\n'), + 'r' => Some('\r'), + 't' => Some('\t'), + '\\' => Some('\\'), + '\'' => Some('\''), + '0' => Some('\0'), + _ => { None } + }, + _ => fail!("lexer accepted invalid char escape `{}`", lit) + }; + + match c { + Some(x) => return (x, 2), + None => { } + } + + let msg = format!("lexer should have rejected a bad character escape {}", lit); + let msg2 = msg.as_slice(); + + let esc: |uint| -> Option<(char, int)> = |len| + num::from_str_radix(lit.slice(2, len), 16) + .and_then(char::from_u32) + .map(|x| (x, len as int)); + + // Unicode escapes + return match lit.as_bytes()[1] as char { + 'x' | 'X' => esc(4), + 'u' => esc(6), + 'U' => esc(10), + _ => None, + }.expect(msg2); +} + +/// Parse a string representing a string literal into its final form. Does +/// unescaping. +pub fn str_lit(lit: &str) -> String { + debug!("parse_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let error = |i| format!("lexer should have rejected {} at {}", lit, i); + + /// Eat everything up to a non-whitespace + fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) { + loop { + match it.peek().map(|x| x.val1()) { + Some(' ') | Some('\n') | Some('\r') | Some('\t') => { + it.next(); + }, + _ => { break; } + } + } + } + + let mut chars = lit.char_indices().peekable(); + loop { + match chars.next() { + Some((i, c)) => { + let em = error(i); + match c { + '\\' => { + if chars.peek().expect(em.as_slice()).val1() == '\n' { + eat(&mut chars); + } else if chars.peek().expect(em.as_slice()).val1() == '\r' { + chars.next(); + if chars.peek().expect(em.as_slice()).val1() != '\n' { + fail!("lexer accepted bare CR"); + } + eat(&mut chars); + } else { + // otherwise, a normal escape + let (c, n) = char_lit(lit.slice_from(i)); + for _ in range(0, n - 1) { // we don't need to move past the first \ + chars.next(); + } + res.push_char(c); + } + }, + '\r' => { + if chars.peek().expect(em.as_slice()).val1() != '\n' { + fail!("lexer accepted bare CR"); + } + chars.next(); + res.push_char('\n'); + } + c => res.push_char(c), + } + }, + None => break + } + } + + res.shrink_to_fit(); // probably not going to do anything, unless there was an escape. + debug!("parse_str_lit: returning {}", res); + res +} + +/// Parse a string representing a raw string literal into its final form. The +/// only operation this does is convert embedded CRLF into a single LF. +pub fn raw_str_lit(lit: &str) -> String { + debug!("raw_str_lit: given {}", lit.escape_default()); + let mut res = String::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let mut chars = lit.chars().peekable(); + loop { + match chars.next() { + Some(c) => { + if c == '\r' { + if *chars.peek().unwrap() != '\n' { + fail!("lexer accepted bare CR"); + } + chars.next(); + res.push_char('\n'); + } else { + res.push_char(c); + } + }, + None => break + } + } + res.shrink_to_fit(); + res +} + +pub fn float_lit(s: &str) -> ast::Lit_ { + debug!("float_lit: {}", s); + // FIXME #2252: bounds checking float literals is defered until trans + let s2 = s.chars().filter(|&c| c != '_').collect::(); + let s = s2.as_slice(); + + let mut ty = None; + + if s.ends_with("f32") { + ty = Some(ast::TyF32); + } else if s.ends_with("f64") { + ty = Some(ast::TyF64); + } + + + match ty { + Some(t) => { + ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t) + }, + None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s)) + } +} + +/// Parse a string representing a byte literal into its final form. Similar to `char_lit` +pub fn byte_lit(lit: &str) -> (u8, uint) { + let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i); + + if lit.len() == 1 { + (lit.as_bytes()[0], 1) + } else { + assert!(lit.as_bytes()[0] == b'\\', err(0i)); + let b = match lit.as_bytes()[1] { + b'"' => b'"', + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'\'' => b'\'', + b'0' => b'\0', + _ => { + match ::std::num::from_str_radix::(lit.slice(2, 4), 16) { + Some(c) => + if c > 0xFF { + fail!(err(2)) + } else { + return (c as u8, 4) + }, + None => fail!(err(3)) + } + } + }; + return (b, 2); + } +} + +pub fn binary_lit(lit: &str) -> Rc> { + let mut res = Vec::with_capacity(lit.len()); + + // FIXME #8372: This could be a for-loop if it didn't borrow the iterator + let error = |i| format!("lexer should have rejected {} at {}", lit, i); + + // binary literals *must* be ASCII, but the escapes don't have to be + let mut chars = lit.as_bytes().iter().enumerate().peekable(); + loop { + match chars.next() { + Some((i, &c)) => { + if c == b'\\' { + if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' { + loop { + // eat everything up to a non-whitespace + match chars.peek().map(|x| *x.val1()) { + Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => { + chars.next(); + }, + _ => { break; } + } + } + } else { + // otherwise, a normal escape + let (c, n) = byte_lit(lit.slice_from(i)); + for _ in range(0, n - 1) { // we don't need to move past the first \ + chars.next(); + } + res.push(c); + } + } else { + res.push(c); + } + }, + None => { break; } + } + } + + Rc::new(res) +} #[cfg(test)] mod test { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 3bf88424891bb..553e685bddece 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -61,6 +61,7 @@ use ast_util::{as_prec, ident_to_path, lit_is_str, operator_prec}; use ast_util; use codemap::{Span, BytePos, Spanned, spanned, mk_sp}; use codemap; +use parse; use parse::attr::ParserAttr; use parse::classify; use parse::common::{SeqSep, seq_sep_none}; @@ -1543,8 +1544,8 @@ impl<'a> Parser<'a> { /// Matches token_lit = LIT_INT | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { - token::LIT_BYTE(i) => LitByte(i), - token::LIT_CHAR(i) => LitChar(i), + token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()), + token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()), token::LIT_INT(i, it) => LitInt(i, it), token::LIT_UINT(u, ut) => LitUint(u, ut), token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i), @@ -1555,13 +1556,17 @@ impl<'a> Parser<'a> { LitFloatUnsuffixed(self.id_to_interned_str(s)) } token::LIT_STR(s) => { - LitStr(self.id_to_interned_str(s), ast::CookedStr) + LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), + ast::CookedStr) } token::LIT_STR_RAW(s, n) => { - LitStr(self.id_to_interned_str(s), ast::RawStr(n)) + LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()), + ast::RawStr(n)) } - token::LIT_BINARY_RAW(ref v, _) | - token::LIT_BINARY(ref v) => LitBinary(v.clone()), + token::LIT_BINARY(i) => + LitBinary(parse::binary_lit(self.id_to_interned_str(i).get())), + token::LIT_BINARY_RAW(i, _) => + LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), token::LPAREN => { self.expect(&token::RPAREN); LitNil }, _ => { self.unexpected_last(tok); } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index c7aeae04ba2fa..bb6183b7e9ea3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -79,8 +79,8 @@ pub enum Token { QUESTION, /* Literals */ - LIT_BYTE(u8), - LIT_CHAR(char), + LIT_BYTE(Ident), + LIT_CHAR(Ident), LIT_INT(i64, ast::IntTy), LIT_UINT(u64, ast::UintTy), LIT_INT_UNSUFFIXED(i64), @@ -88,8 +88,8 @@ pub enum Token { LIT_FLOAT_UNSUFFIXED(Ident), LIT_STR(Ident), LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */ - LIT_BINARY(Rc>), - LIT_BINARY_RAW(Rc>, uint), /* raw binary str delimited by n hash symbols */ + LIT_BINARY(Ident), + LIT_BINARY_RAW(Ident, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ /// An identifier contains an "is_mod_name" boolean, @@ -201,20 +201,10 @@ pub fn to_string(t: &Token) -> String { /* Literals */ LIT_BYTE(b) => { - let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { - res.push_char(c); - }); - res.push_char('\''); - res + format!("b'{}'", get_ident(b).get()) } LIT_CHAR(c) => { - let mut res = String::from_str("'"); - c.escape_default(|c| { - res.push_char(c); - }); - res.push_char('\''); - res + format!("'{}'", get_ident(c).get()) } LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)), LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)), @@ -235,20 +225,18 @@ pub fn to_string(t: &Token) -> String { body } LIT_STR(s) => { - format!("\"{}\"", get_ident(s).get().escape_default()) + format!("\"{}\"", get_ident(s).get()) } LIT_STR_RAW(s, n) => { format!("r{delim}\"{string}\"{delim}", delim="#".repeat(n), string=get_ident(s)) } - LIT_BINARY(ref v) => { - format!( - "b\"{}\"", - v.iter().map(|&b| b as char).collect::().escape_default()) + LIT_BINARY(v) => { + format!("b\"{}\"", get_ident(v).get()) } - LIT_BINARY_RAW(ref s, n) => { + LIT_BINARY_RAW(s, n) => { format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii()) + delim="#".repeat(n), string=get_ident(s).get()) } /* Name components */ diff --git a/src/test/run-pass/string-escapes.rs b/src/test/run-pass/string-escapes.rs new file mode 100644 index 0000000000000..7abe8276a9782 --- /dev/null +++ b/src/test/run-pass/string-escapes.rs @@ -0,0 +1,15 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + let x = "\\\\\ + "; + assert!(x == r"\\"); // extraneous whitespace stripped +} From cc4213418e3ab225867d8e3911f592481b1bbffc Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Wed, 18 Jun 2014 10:44:20 -0700 Subject: [PATCH 12/15] syntax: don't parse numeric literals in the lexer This removes a bunch of token types. Tokens now store the original, unaltered numeric literal (that is still checked for correctness), which is parsed into an actual number later, as needed, when creating the AST. This can change how syntax extensions work, but otherwise poses no visible changes. [breaking-change] --- src/librustc/middle/trans/consts.rs | 1 + src/librustdoc/html/highlight.rs | 3 +- src/libsyntax/ast.rs | 32 ++- src/libsyntax/codemap.rs | 2 +- src/libsyntax/ext/quote.rs | 41 +--- src/libsyntax/parse/lexer/mod.rs | 342 +++++++++++++--------------- src/libsyntax/parse/mod.rs | 109 +++++++++ src/libsyntax/parse/parser.rs | 19 +- src/libsyntax/parse/token.rs | 42 +--- 9 files changed, 325 insertions(+), 266 deletions(-) diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs index c35767f99a835..11a8207f8c43e 100644 --- a/src/librustc/middle/trans/consts.rs +++ b/src/librustc/middle/trans/consts.rs @@ -42,6 +42,7 @@ use syntax::{ast, ast_util}; pub fn const_lit(cx: &CrateContext, e: &ast::Expr, lit: ast::Lit) -> ValueRef { let _icx = push_ctxt("trans_lit"); + debug!("const_lit: {}", lit); match lit.node { ast::LitByte(b) => C_integral(Type::uint_from_ty(cx, ast::TyU8), b as u64, false), ast::LitChar(i) => C_integral(Type::char(cx), i as u64, false), diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 82bb1bd58a6d3..af7a822bc29e9 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -144,8 +144,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string", // number literals - t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) | - t::LIT_FLOAT(..) | t::LIT_FLOAT_UNSUFFIXED(..) => "number", + t::LIT_INTEGER(..) | t::LIT_FLOAT(..) => "number", // keywords are also included in the identifier set t::IDENT(ident, _is_mod_sep) => { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index b9bb05d1950cc..2a49d0e0f5bf8 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -619,7 +619,7 @@ pub enum Mac_ { MacInvocTT(Path, Vec , SyntaxContext), // new macro-invocation } -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub enum StrStyle { CookedStr, RawStr(uint) @@ -627,7 +627,7 @@ pub enum StrStyle { pub type Lit = Spanned; -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub enum Lit_ { LitStr(InternedString, StrStyle), LitBinary(Rc >), @@ -697,6 +697,16 @@ impl fmt::Show for IntTy { } } +impl IntTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyI => 1, + TyI8 => 2, + TyI16 | TyI32 | TyI64 => 3, + } + } +} + #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub enum UintTy { TyU, @@ -706,6 +716,16 @@ pub enum UintTy { TyU64, } +impl UintTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyU => 1, + TyU8 => 2, + TyU16 | TyU32 | TyU64 => 3, + } + } +} + impl fmt::Show for UintTy { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", ast_util::uint_ty_to_string(*self, None)) @@ -724,6 +744,14 @@ impl fmt::Show for FloatTy { } } +impl FloatTy { + pub fn suffix_len(&self) -> uint { + match *self { + TyF32 | TyF64 => 3, // add F128 handling here + } + } +} + // NB PartialEq method appears below. #[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] pub struct Ty { diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 5345776029471..ef4024a8f83fe 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -96,7 +96,7 @@ pub struct Span { pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None }; -#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash)] +#[deriving(Clone, PartialEq, Eq, Encodable, Decodable, Hash, Show)] pub struct Spanned { pub node: T, pub span: Span, diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index f950a0d3340ff..24630dd09a20e 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -412,45 +412,14 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char)); } - LIT_INT(i, ity) => { - let s_ity = match ity { - ast::TyI => "TyI", - ast::TyI8 => "TyI8", - ast::TyI16 => "TyI16", - ast::TyI32 => "TyI32", - ast::TyI64 => "TyI64" - }; - let e_ity = mk_ast_path(cx, sp, s_ity); - let e_i64 = cx.expr_lit(sp, ast::LitInt(i, ast::TyI64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INT"), vec!(e_i64, e_ity)); + LIT_INTEGER(i) => { + let e_int = mk_ident(cx, sp, i); + return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INTEGER"), vec!(e_int)); } - LIT_UINT(u, uty) => { - let s_uty = match uty { - ast::TyU => "TyU", - ast::TyU8 => "TyU8", - ast::TyU16 => "TyU16", - ast::TyU32 => "TyU32", - ast::TyU64 => "TyU64" - }; - let e_uty = mk_ast_path(cx, sp, s_uty); - let e_u64 = cx.expr_lit(sp, ast::LitUint(u, ast::TyU64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_UINT"), vec!(e_u64, e_uty)); - } - - LIT_INT_UNSUFFIXED(i) => { - let e_i64 = cx.expr_lit(sp, ast::LitInt(i, ast::TyI64)); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INT_UNSUFFIXED"), vec!(e_i64)); - } - - LIT_FLOAT(fident, fty) => { - let s_fty = match fty { - ast::TyF32 => "TyF32", - ast::TyF64 => "TyF64", - }; - let e_fty = mk_ast_path(cx, sp, s_fty); + LIT_FLOAT(fident) => { let e_fident = mk_ident(cx, sp, fident); - return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_FLOAT"), vec!(e_fident, e_fty)); + return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_FLOAT"), vec!(e_fident)); } LIT_STR(ident) => { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index e1317e0ed35ca..61a37f77d348b 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -18,7 +18,6 @@ use parse::token::{str_to_ident}; use std::char; use std::mem::replace; -use std::num::from_str_radix; use std::rc::Rc; use std::str; @@ -491,204 +490,113 @@ impl<'a> StringReader<'a> { if res.is_some() { res } else { self.consume_whitespace_and_comments() } } - fn scan_exponent(&mut self, start_bpos: BytePos) -> Option { - // \x00 hits the `return None` case immediately, so this is fine. - let mut c = self.curr.unwrap_or('\x00'); - let mut rslt = String::new(); - if c == 'e' || c == 'E' { - rslt.push_char(c); - self.bump(); - c = self.curr.unwrap_or('\x00'); - if c == '-' || c == '+' { - rslt.push_char(c); - self.bump(); - } - let exponent = self.scan_digits(10u); - if exponent.len() > 0u { - rslt.push_str(exponent.as_slice()); - return Some(rslt); - } else { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "scan_exponent: bad fp literal"); - rslt.push_str("1"); // arbitrary placeholder exponent - return Some(rslt); - } - } else { - return None::; - } - } - - fn scan_digits(&mut self, radix: uint) -> String { - let mut rslt = String::new(); + /// Scan through any digits (base `radix`) or underscores, and return how + /// many digits there were. + fn scan_digits(&mut self, radix: uint) -> uint { + let mut len = 0u; loop { let c = self.curr; - if c == Some('_') { self.bump(); continue; } + if c == Some('_') { debug!("skipping a _"); self.bump(); continue; } match c.and_then(|cc| char::to_digit(cc, radix)) { - Some(_) => { - rslt.push_char(c.unwrap()); - self.bump(); - } - _ => return rslt + Some(_) => { + debug!("{} in scan_digits", c); + len += 1; + self.bump(); + } + _ => return len } }; } - fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: uint) { - match base { - 16u => self.err_span_(start_bpos, last_bpos, - "hexadecimal float literal is not supported"), - 8u => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), - 2u => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), - _ => () - } - } - + /// Lex a LIT_INTEGER or a LIT_FLOAT fn scan_number(&mut self, c: char) -> token::Token { - let mut num_str; - let mut base = 10u; - let mut c = c; - let mut n = self.nextch().unwrap_or('\x00'); + let mut num_digits; + let mut base = 10; let start_bpos = self.last_pos; - if c == '0' && n == 'x' { - self.bump(); - self.bump(); - base = 16u; - } else if c == '0' && n == 'o' { - self.bump(); - self.bump(); - base = 8u; - } else if c == '0' && n == 'b' { - self.bump(); - self.bump(); - base = 2u; - } - num_str = self.scan_digits(base); - c = self.curr.unwrap_or('\x00'); - self.nextch(); - if c == 'u' || c == 'i' { - enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) } - let signed = c == 'i'; - let mut tp = { - if signed { Signed(ast::TyI) } - else { Unsigned(ast::TyU) } - }; - self.bump(); - c = self.curr.unwrap_or('\x00'); - if c == '8' { - self.bump(); - tp = if signed { Signed(ast::TyI8) } - else { Unsigned(ast::TyU8) }; - } - n = self.nextch().unwrap_or('\x00'); - if c == '1' && n == '6' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI16) } - else { Unsigned(ast::TyU16) }; - } else if c == '3' && n == '2' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI32) } - else { Unsigned(ast::TyU32) }; - } else if c == '6' && n == '4' { - self.bump(); - self.bump(); - tp = if signed { Signed(ast::TyI64) } - else { Unsigned(ast::TyU64) }; - } - if num_str.len() == 0u { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "no valid digits found for number"); - num_str = "1".to_string(); - } - let parsed = match from_str_radix::(num_str.as_slice(), - base as uint) { - Some(p) => p, - None => { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "int literal is too large"); - 1 - } - }; - match tp { - Signed(t) => return token::LIT_INT(parsed as i64, t), - Unsigned(t) => return token::LIT_UINT(parsed, t) + self.bump(); + + if c == '0' { + match self.curr.unwrap_or('\0') { + 'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2); } + 'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8); } + 'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16); } + '0'..'9' | '_' | '.' => { + num_digits = self.scan_digits(10) + 1; + } + 'u' | 'i' => { + self.scan_int_suffix(); + return token::LIT_INTEGER(self.ident_from(start_bpos)); + }, + 'f' => { + let last_pos = self.last_pos; + self.scan_float_suffix(); + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.ident_from(start_bpos)); + } + _ => { + // just a 0 + return token::LIT_INTEGER(self.ident_from(start_bpos)); + } } + } else if c.is_digit_radix(10) { + num_digits = self.scan_digits(10) + 1; + } else { + num_digits = 0; } - let mut is_float = false; - if self.curr_is('.') && !(ident_start(self.nextch()) || self.nextch_is('.')) { - is_float = true; - self.bump(); - let dec_part = self.scan_digits(10u); - num_str.push_char('.'); - num_str.push_str(dec_part.as_slice()); - } - match self.scan_exponent(start_bpos) { - Some(ref s) => { - is_float = true; - num_str.push_str(s.as_slice()); - } - None => () + + if num_digits == 0 { + self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); + // eat any suffix + self.scan_int_suffix(); + return token::LIT_INTEGER(str_to_ident("0")); } - if self.curr_is('f') { + // might be a float, but don't be greedy if this is actually an + // integer literal followed by field/method access or a range pattern + // (`0..2` and `12.foo()`) + if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0') + .is_XID_start() { + // might have stuff after the ., and if it does, it needs to start + // with a number self.bump(); - c = self.curr.unwrap_or('\x00'); - n = self.nextch().unwrap_or('\x00'); - if c == '3' && n == '2' { - self.bump(); - self.bump(); - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), - ast::TyF32); - } else if c == '6' && n == '4' { - self.bump(); - self.bump(); - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), - ast::TyF64); - /* FIXME (#2252): if this is out of range for either a - 32-bit or 64-bit float, it won't be noticed till the - back-end. */ + if self.curr.unwrap_or('\0').is_digit_radix(10) { + self.scan_digits(10); + self.scan_float_exponent(); + self.scan_float_suffix(); } - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "expected `f32` or `f64` suffix"); - } - if is_float { - let last_bpos = self.last_pos; - self.check_float_base(start_bpos, last_bpos, base); - return token::LIT_FLOAT_UNSUFFIXED(str_to_ident( - num_str.as_slice())); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.ident_from(start_bpos)); + } else if self.curr_is('f') { + // or it might be an integer literal suffixed as a float + self.scan_float_suffix(); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.ident_from(start_bpos)); } else { - if num_str.len() == 0u { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "no valid digits found for number"); - num_str = "1".to_string(); + // it might be a float if it has an exponent + if self.curr_is('e') || self.curr_is('E') { + self.scan_float_exponent(); + self.scan_float_suffix(); + let last_pos = self.last_pos; + self.check_float_base(start_bpos, last_pos, base); + return token::LIT_FLOAT(self.ident_from(start_bpos)); } - let parsed = match from_str_radix::(num_str.as_slice(), - base as uint) { - Some(p) => p, - None => { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "int literal is too large"); - 1 - } - }; - - debug!("lexing {} as an unsuffixed integer literal", - num_str.as_slice()); - return token::LIT_INT_UNSUFFIXED(parsed as i64); + // but we certainly have an integer! + self.scan_int_suffix(); + return token::LIT_INTEGER(self.ident_from(start_bpos)); } } - - fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> bool { - let mut accum_int = 0u32; + /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an + /// error if too many or too few digits are encountered. + fn scan_hex_digits(&mut self, n_digits: uint, delim: char) -> bool { + debug!("scanning {} digits until {}", n_digits, delim); let start_bpos = self.last_pos; - for _ in range(0, n_hex_digits) { + let mut accum_int = 0; + + for _ in range(0, n_digits) { if self.is_eof() { let last_bpos = self.last_pos; self.fatal_span_(start_bpos, last_bpos, "unterminated numeric character escape"); @@ -736,9 +644,9 @@ impl<'a> StringReader<'a> { Some(e) => { return match e { 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, - 'x' => self.scan_numeric_escape(2u, delim), - 'u' if !ascii_only => self.scan_numeric_escape(4u, delim), - 'U' if !ascii_only => self.scan_numeric_escape(8u, delim), + 'x' => self.scan_hex_digits(2u, delim), + 'u' if !ascii_only => self.scan_hex_digits(4u, delim), + 'U' if !ascii_only => self.scan_hex_digits(8u, delim), '\n' if delim == '"' => { self.consume_whitespace(); true @@ -791,6 +699,80 @@ impl<'a> StringReader<'a> { true } + /// Scan over an int literal suffix. + fn scan_int_suffix(&mut self) { + match self.curr { + Some('i') | Some('u') => { + self.bump(); + + if self.curr_is('8') { + self.bump(); + } else if self.curr_is('1') { + if !self.nextch_is('6') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } else if self.curr_is('3') { + if !self.nextch_is('2') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } else if self.curr_is('6') { + if !self.nextch_is('4') { + self.err_span_(self.last_pos, self.pos, + "illegal int suffix"); + } else { + self.bump(); self.bump(); + } + } + }, + _ => { } + } + } + + /// Scan over a float literal suffix + fn scan_float_suffix(&mut self) { + if self.curr_is('f') { + if (self.nextch_is('3') && self.nextnextch_is('2')) + || (self.nextch_is('6') && self.nextnextch_is('4')) { + self.bump(); + self.bump(); + self.bump(); + } else { + self.err_span_(self.last_pos, self.pos, "illegal float suffix"); + } + } + } + + /// Scan over a float exponent. + fn scan_float_exponent(&mut self) { + if self.curr_is('e') || self.curr_is('E') { + self.bump(); + if self.curr_is('-') || self.curr_is('+') { + self.bump(); + } + if self.scan_digits(10) == 0 { + self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent") + } + } + } + + /// Check that a base is valid for a floating literal, emitting a nice + /// error if it isn't. + fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: uint) { + match base { + 16u => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \ + supported"), + 8u => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), + 2u => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), + _ => () + } + } + fn binop(&mut self, op: token::BinOp) -> token::Token { self.bump(); if self.curr_is('=') { diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 62750e60bf8ef..37c84c95af654 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -506,6 +506,115 @@ pub fn binary_lit(lit: &str) -> Rc> { Rc::new(res) } +pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { + // s can only be ascii, byte indexing is fine + + let s2 = s.chars().filter(|&c| c != '_').collect::(); + let mut s = s2.as_slice(); + + debug!("parse_integer_lit: {}", s); + + if s.len() == 1 { + return ast::LitIntUnsuffixed((s.char_at(0)).to_digit(10).unwrap() as i64); + } + + let mut base = 10; + let orig = s; + + #[deriving(Show)] + enum Result { + Nothing, + Signed(ast::IntTy), + Unsigned(ast::UintTy) + } + + impl Result { + fn suffix_len(&self) -> uint { + match *self { + Nothing => 0, + Signed(s) => s.suffix_len(), + Unsigned(u) => u.suffix_len() + } + } + } + + let mut ty = Nothing; + + + if s.char_at(0) == '0' { + match s.char_at(1) { + 'x' => base = 16, + 'o' => base = 8, + 'b' => base = 2, + _ => { } + } + } + + if base != 10 { + s = s.slice_from(2); + } + + let last = s.len() - 1; + match s.char_at(last) { + 'i' => ty = Signed(ast::TyI), + 'u' => ty = Unsigned(ast::TyU), + '8' => { + if s.len() > 2 { + match s.char_at(last - 1) { + 'i' => ty = Signed(ast::TyI8), + 'u' => ty = Unsigned(ast::TyU8), + _ => { } + } + } + }, + '6' => { + if s.len() > 3 && s.char_at(last - 1) == '1' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI16), + 'u' => ty = Unsigned(ast::TyU16), + _ => { } + } + } + }, + '2' => { + if s.len() > 3 && s.char_at(last - 1) == '3' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI32), + 'u' => ty = Unsigned(ast::TyU32), + _ => { } + } + } + }, + '4' => { + if s.len() > 3 && s.char_at(last - 1) == '6' { + match s.char_at(last - 2) { + 'i' => ty = Signed(ast::TyI64), + 'u' => ty = Unsigned(ast::TyU64), + _ => { } + } + } + }, + _ => { } + } + + + s = s.slice_to(s.len() - ty.suffix_len()); + + debug!("The suffix is {}, base {}, the new string is {}, the original \ + string was {}", ty, base, s, orig); + + let res: u64 = match ::std::num::from_str_radix(s, base) { + Some(r) => r, + None => { sd.span_err(sp, "int literal is too large"); 0 } + }; + + match ty { + Nothing => ast::LitIntUnsuffixed(res as i64), + Signed(t) => ast::LitInt(res as i64, t), + Unsigned(t) => ast::LitUint(res, t) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 553e685bddece..e0bcb41a75369 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -33,8 +33,8 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod}; use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic}; use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl}; use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_}; -use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary}; -use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet}; +use ast::{LitBool, LitChar, LitByte, LitBinary}; +use ast::{LitNil, LitStr, LitUint, Local, LocalLet}; use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal}; use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability}; use ast::{NamedField, UnNeg, NoReturn, UnNot, P, Pat, PatEnum}; @@ -1541,20 +1541,14 @@ impl<'a> Parser<'a> { } } - /// Matches token_lit = LIT_INT | ... + /// Matches token_lit = LIT_INTEGER | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()), token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()), - token::LIT_INT(i, it) => LitInt(i, it), - token::LIT_UINT(u, ut) => LitUint(u, ut), - token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i), - token::LIT_FLOAT(s, ft) => { - LitFloat(self.id_to_interned_str(s), ft) - } - token::LIT_FLOAT_UNSUFFIXED(s) => { - LitFloatUnsuffixed(self.id_to_interned_str(s)) - } + token::LIT_INTEGER(s) => parse::integer_lit(self.id_to_interned_str(s).get(), + &self.sess.span_diagnostic, self.span), + token::LIT_FLOAT(s) => parse::float_lit(s.as_str()), token::LIT_STR(s) => { LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), ast::CookedStr) @@ -5398,3 +5392,4 @@ impl<'a> Parser<'a> { } } } + diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index bb6183b7e9ea3..83d373d033b3d 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -10,7 +10,6 @@ use ast; use ast::{P, Ident, Name, Mrk}; -use ast_util; use ext::mtwt; use parse::token; use util::interner::{RcStr, StrInterner}; @@ -81,11 +80,8 @@ pub enum Token { /* Literals */ LIT_BYTE(Ident), LIT_CHAR(Ident), - LIT_INT(i64, ast::IntTy), - LIT_UINT(u64, ast::UintTy), - LIT_INT_UNSUFFIXED(i64), - LIT_FLOAT(Ident, ast::FloatTy), - LIT_FLOAT_UNSUFFIXED(Ident), + LIT_INTEGER(Ident), + LIT_FLOAT(Ident), LIT_STR(Ident), LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */ LIT_BINARY(Ident), @@ -206,24 +202,10 @@ pub fn to_string(t: &Token) -> String { LIT_CHAR(c) => { format!("'{}'", get_ident(c).get()) } - LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)), - LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)), - LIT_INT_UNSUFFIXED(i) => { (i as u64).to_string() } - LIT_FLOAT(s, t) => { - let mut body = String::from_str(get_ident(s).get()); - if body.as_slice().ends_with(".") { - body.push_char('0'); // `10.f` is not a float literal - } - body.push_str(ast_util::float_ty_to_string(t).as_slice()); - body - } - LIT_FLOAT_UNSUFFIXED(s) => { - let mut body = String::from_str(get_ident(s).get()); - if body.as_slice().ends_with(".") { - body.push_char('0'); // `10.f` is not a float literal - } - body + LIT_INTEGER(c) | LIT_FLOAT(c) => { + get_ident(c).get().to_string() } + LIT_STR(s) => { format!("\"{}\"", get_ident(s).get()) } @@ -285,11 +267,8 @@ pub fn can_begin_expr(t: &Token) -> bool { TILDE => true, LIT_BYTE(_) => true, LIT_CHAR(_) => true, - LIT_INT(_, _) => true, - LIT_UINT(_, _) => true, - LIT_INT_UNSUFFIXED(_) => true, - LIT_FLOAT(_, _) => true, - LIT_FLOAT_UNSUFFIXED(_) => true, + LIT_INTEGER(_) => true, + LIT_FLOAT(_) => true, LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, @@ -326,11 +305,8 @@ pub fn is_lit(t: &Token) -> bool { match *t { LIT_BYTE(_) => true, LIT_CHAR(_) => true, - LIT_INT(_, _) => true, - LIT_UINT(_, _) => true, - LIT_INT_UNSUFFIXED(_) => true, - LIT_FLOAT(_, _) => true, - LIT_FLOAT_UNSUFFIXED(_) => true, + LIT_INTEGER(_) => true, + LIT_FLOAT(_) => true, LIT_STR(_) => true, LIT_STR_RAW(_, _) => true, LIT_BINARY(_) => true, From f512779554a436d11dd9ffde4c198da6241dfd58 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Fri, 4 Jul 2014 22:30:39 -0700 Subject: [PATCH 13/15] lexer: lex WS/COMMENT/SHEBANG rather than skipping Now, the lexer will categorize every byte in its input according to the grammar. The parser skips over these while parsing, thus avoiding their presence in the input to syntax extensions. --- src/librustdoc/html/highlight.rs | 41 +++----- src/libsyntax/parse/attr.rs | 2 +- src/libsyntax/parse/lexer/comments.rs | 8 +- src/libsyntax/parse/lexer/mod.rs | 134 ++++++++++++++++---------- src/libsyntax/parse/parser.rs | 20 +++- src/libsyntax/parse/token.rs | 16 ++- 6 files changed, 134 insertions(+), 87 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index af7a822bc29e9..3cb5cdc043962 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -18,7 +18,6 @@ use std::io; use syntax::parse; use syntax::parse::lexer; -use syntax::codemap::{BytePos, Span}; use html::escape::Escape; @@ -59,38 +58,30 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, None => {} } try!(write!(out, "class='rust {}'>\n", class.unwrap_or(""))); - let mut last = BytePos(0); let mut is_attribute = false; let mut is_macro = false; let mut is_macro_nonterminal = false; loop { let next = lexer.next_token(); - let test = if next.tok == t::EOF {lexer.pos} else {next.sp.lo}; - - // The lexer consumes all whitespace and non-doc-comments when iterating - // between tokens. If this token isn't directly adjacent to our last - // token, then we need to emit the whitespace/comment. - // - // If the gap has any '/' characters then we consider the whole thing a - // comment. This will classify some whitespace as a comment, but that - // doesn't matter too much for syntax highlighting purposes. - if test > last { - let snip = sess.span_diagnostic.cm.span_to_snippet(Span { - lo: last, - hi: test, - expn_info: None, - }).unwrap(); - if snip.as_slice().contains("/") { - try!(write!(out, "{}", - Escape(snip.as_slice()))); - } else { - try!(write!(out, "{}", Escape(snip.as_slice()))); - } - } - last = next.sp.hi; + + let snip = |sp| sess.span_diagnostic.cm.span_to_snippet(sp).unwrap(); + if next.tok == t::EOF { break } let klass = match next.tok { + t::WS => { + try!(write!(out, "{}", Escape(snip(next.sp).as_slice()))); + continue + }, + t::COMMENT => { + try!(write!(out, "{}", + Escape(snip(next.sp).as_slice()))); + continue + }, + t::SHEBANG(s) => { + try!(write!(out, "{}", Escape(s.as_str()))); + continue + }, // If this '&' token is directly adjacent to another token, assume // that it's the address-of operator instead of the and-operator. // This allows us to give all pointers their own class (`Box` and diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index b2297ec770cc3..c227d8a0fedc8 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -34,7 +34,7 @@ impl<'a> ParserAttr for Parser<'a> { fn parse_outer_attributes(&mut self) -> Vec { let mut attrs: Vec = Vec::new(); loop { - debug!("parse_outer_attributes: self.token={:?}", + debug!("parse_outer_attributes: self.token={}", self.token); match self.token { token::POUND => { diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index c5dd10382a959..3f3a8a723f10c 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,7 +13,7 @@ use codemap::{BytePos, CharPos, CodeMap, Pos}; use diagnostic; use parse::lexer::{is_whitespace, Reader}; use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; +use parse::lexer::is_block_doc_comment; use parse::lexer; use parse::token; @@ -42,9 +42,9 @@ pub struct Comment { } pub fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && !is_line_non_doc_comment(s)) || + (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || - (s.starts_with("/**") && !is_block_non_doc_comment(s)) || + (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") } @@ -260,7 +260,7 @@ fn read_block_comment(rdr: &mut StringReader, rdr.bump(); rdr.bump(); } - if !is_block_non_doc_comment(curr_line.as_slice()) { + if is_block_doc_comment(curr_line.as_slice()) { return } assert!(!curr_line.as_slice().contains_char('\n')); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 61a37f77d348b..947f3d59b86fa 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -187,7 +187,7 @@ impl<'a> StringReader<'a> { /// Advance peek_tok and peek_span to refer to the next token, and /// possibly update the interner. fn advance_token(&mut self) { - match self.consume_whitespace_and_comments() { + match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span = comment.sp; self.peek_tok = comment.tok; @@ -339,8 +339,7 @@ impl<'a> StringReader<'a> { /// PRECONDITION: self.curr is not whitespace /// Eats any kind of comment. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise - fn consume_any_line_comment(&mut self) -> Option { + fn scan_comment(&mut self) -> Option { match self.curr { Some(c) => { if c.is_whitespace() { @@ -375,28 +374,32 @@ impl<'a> StringReader<'a> { } self.bump(); } - let ret = self.with_str_from(start_bpos, |string| { + return self.with_str_from(start_bpos, |string| { // but comments with only more "/"s are not - if !is_line_non_doc_comment(string) { - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string)), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) + let tok = if is_doc_comment(string) { + token::DOC_COMMENT(str_to_ident(string)) } else { - None - } - }); + token::COMMENT + }; - if ret.is_some() { - return ret; - } + return Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + }); } else { + let start_bpos = self.last_pos - BytePos(2); while !self.curr_is('\n') && !self.is_eof() { self.bump(); } + return Some(TokenAndSpan { + tok: token::COMMENT, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); } - // Restart whitespace munch. - self.consume_whitespace_and_comments() } - Some('*') => { self.bump(); self.bump(); self.consume_block_comment() } + Some('*') => { + self.bump(); self.bump(); + self.scan_block_comment() + } _ => None } } else if self.curr_is('#') { @@ -412,9 +415,15 @@ impl<'a> StringReader<'a> { let cmap = CodeMap::new(); cmap.files.borrow_mut().push(self.filemap.clone()); let loc = cmap.lookup_char_pos_adj(self.last_pos); + debug!("Skipping a shebang"); if loc.line == 1u && loc.col == CharPos(0u) { + // FIXME: Add shebang "token", return it + let start = self.last_pos; while !self.curr_is('\n') && !self.is_eof() { self.bump(); } - return self.consume_whitespace_and_comments(); + return Some(TokenAndSpan { + tok: token::SHEBANG(self.ident_from(start)), + sp: codemap::mk_sp(start, self.last_pos) + }); } } None @@ -423,15 +432,33 @@ impl<'a> StringReader<'a> { } } - /// EFFECT: eats whitespace and comments. - /// Returns a Some(sugared-doc-attr) if one exists, None otherwise. - fn consume_whitespace_and_comments(&mut self) -> Option { - while is_whitespace(self.curr) { self.bump(); } - return self.consume_any_line_comment(); + /// If there is whitespace, shebang, or a comment, scan it. Otherwise, + /// return None. + fn scan_whitespace_or_comment(&mut self) -> Option { + match self.curr.unwrap_or('\0') { + // # to handle shebang at start of file -- this is the entry point + // for skipping over all "junk" + '/' | '#' => { + let c = self.scan_comment(); + debug!("scanning a comment {}", c); + c + }, + c if is_whitespace(Some(c)) => { + let start_bpos = self.last_pos; + while is_whitespace(self.curr) { self.bump(); } + let c = Some(TokenAndSpan { + tok: token::WS, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }); + debug!("scanning whitespace: {}", c); + c + }, + _ => None + } } /// Might return a sugared-doc-attr - fn consume_block_comment(&mut self) -> Option { + fn scan_block_comment(&mut self) -> Option { // block comments starting with "/**" or "/*!" are doc-comments let is_doc_comment = self.curr_is('*') || self.curr_is('!'); let start_bpos = self.last_pos - BytePos(2); @@ -466,28 +493,23 @@ impl<'a> StringReader<'a> { self.bump(); } - let res = if is_doc_comment { - self.with_str_from(start_bpos, |string| { - // but comments with only "*"s between two "/"s are not - if !is_block_non_doc_comment(string) { - let string = if has_cr { - self.translate_crlf(start_bpos, string, - "bare CR not allowed in block doc-comment") - } else { string.into_maybe_owned() }; - Some(TokenAndSpan{ - tok: token::DOC_COMMENT(str_to_ident(string.as_slice())), - sp: codemap::mk_sp(start_bpos, self.last_pos) - }) - } else { - None - } - }) - } else { - None - }; + self.with_str_from(start_bpos, |string| { + // but comments with only "*"s between two "/"s are not + let tok = if is_block_doc_comment(string) { + let string = if has_cr { + self.translate_crlf(start_bpos, string, + "bare CR not allowed in block doc-comment") + } else { string.into_maybe_owned() }; + token::DOC_COMMENT(str_to_ident(string.as_slice())) + } else { + token::COMMENT + }; - // restart whitespace munch. - if res.is_some() { res } else { self.consume_whitespace_and_comments() } + Some(TokenAndSpan{ + tok: tok, + sp: codemap::mk_sp(start_bpos, self.last_pos) + }) + }) } /// Scan through any digits (base `radix`) or underscores, and return how @@ -1242,12 +1264,18 @@ fn in_range(c: Option, lo: char, hi: char) -> bool { fn is_dec_digit(c: Option) -> bool { return in_range(c, '0', '9'); } -pub fn is_line_non_doc_comment(s: &str) -> bool { - s.starts_with("////") +pub fn is_doc_comment(s: &str) -> bool { + let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') + || s.starts_with("//!"); + debug!("is `{}` a doc comment? {}", s, res); + res } -pub fn is_block_non_doc_comment(s: &str) -> bool { - s.starts_with("/***") +pub fn is_block_doc_comment(s: &str) -> bool { + let res = (s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') + || s.starts_with("/*!"); + debug!("is `{}` a doc comment? {}", s, res); + res } fn ident_start(c: Option) -> bool { @@ -1383,9 +1411,9 @@ mod test { } #[test] fn line_doc_comments() { - assert!(!is_line_non_doc_comment("///")); - assert!(!is_line_non_doc_comment("/// blah")); - assert!(is_line_non_doc_comment("////")); + assert!(is_doc_comment("///")); + assert!(is_doc_comment("/// blah")); + assert!(!is_doc_comment("////")); } #[test] fn nested_block_comments() { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index e0bcb41a75369..51f2c74d3aeff 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -325,10 +325,24 @@ fn is_plain_ident_or_underscore(t: &token::Token) -> bool { is_plain_ident(t) || *t == token::UNDERSCORE } +/// Get a token the parser cares about +fn real_token(rdr: &mut Reader) -> TokenAndSpan { + let mut t = rdr.next_token(); + loop { + match t.tok { + token::WS | token::COMMENT | token::SHEBANG(_) => { + t = rdr.next_token(); + }, + _ => break + } + } + t +} + impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, cfg: ast::CrateConfig, mut rdr: Box) -> Parser<'a> { - let tok0 = rdr.next_token(); + let tok0 = real_token(rdr); let span = tok0.sp; let placeholder = TokenAndSpan { tok: token::UNDERSCORE, @@ -864,7 +878,7 @@ impl<'a> Parser<'a> { None }; let next = if self.buffer_start == self.buffer_end { - self.reader.next_token() + real_token(self.reader) } else { // Avoid token copies with `replace`. let buffer_start = self.buffer_start as uint; @@ -908,7 +922,7 @@ impl<'a> Parser<'a> { -> R { let dist = distance as int; while self.buffer_length() < dist { - self.buffer[self.buffer_end as uint] = self.reader.next_token(); + self.buffer[self.buffer_end as uint] = real_token(self.reader); self.buffer_end = (self.buffer_end + 1) & 3; } f(&self.buffer[((self.buffer_start + dist - 1) & 3) as uint].tok) diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 83d373d033b3d..e65f9f208a317 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -97,8 +97,18 @@ pub enum Token { /* For interpolation */ INTERPOLATED(Nonterminal), - DOC_COMMENT(Ident), + + // Junk. These carry no data because we don't really care about the data + // they *would* carry, and don't really want to allocate a new ident for + // them. Instead, users could extract that from the associated span. + + /// Whitespace + WS, + /// Comment + COMMENT, + SHEBANG(Ident), + EOF, } @@ -231,6 +241,10 @@ pub fn to_string(t: &Token) -> String { /* Other */ DOC_COMMENT(s) => get_ident(s).get().to_string(), EOF => "".to_string(), + WS => " ".to_string(), + COMMENT => "/* */".to_string(), + SHEBANG(s) => format!("/* shebang: {}*/", s.as_str()), + INTERPOLATED(ref nt) => { match nt { &NtExpr(ref e) => ::print::pprust::expr_to_string(&**e), From 092c5078be5b9abfc4e1a80e3ef9d015d321479c Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Sun, 6 Jul 2014 01:17:59 -0700 Subject: [PATCH 14/15] ast: make Name its own type --- src/librustc/lint/builtin.rs | 2 +- src/librustc/metadata/decoder.rs | 2 +- src/libsyntax/ast.rs | 26 ++++++-- src/libsyntax/ext/base.rs | 3 + src/libsyntax/ext/mtwt.rs | 6 +- src/libsyntax/ext/quote.rs | 23 ++++--- src/libsyntax/parse/attr.rs | 4 +- src/libsyntax/parse/lexer/mod.rs | 50 +++++++-------- src/libsyntax/parse/parser.rs | 20 +++--- src/libsyntax/parse/token.rs | 101 +++++++++++++++++-------------- src/libsyntax/util/interner.rs | 18 +++--- 11 files changed, 149 insertions(+), 106 deletions(-) diff --git a/src/librustc/lint/builtin.rs b/src/librustc/lint/builtin.rs index 98a6f7d5ed38d..ae401b9d6f15c 100644 --- a/src/librustc/lint/builtin.rs +++ b/src/librustc/lint/builtin.rs @@ -1114,7 +1114,7 @@ impl UnusedMut { match mode { ast::BindByValue(ast::MutMutable) => { if !token::get_ident(ident).get().starts_with("_") { - mutables.insert_or_update_with(ident.name as uint, + mutables.insert_or_update_with(ident.name.uint(), vec!(id), |_, old| { old.push(id); }); } } diff --git a/src/librustc/metadata/decoder.rs b/src/librustc/metadata/decoder.rs index 8a2b95ae463b4..cc41223688ee0 100644 --- a/src/librustc/metadata/decoder.rs +++ b/src/librustc/metadata/decoder.rs @@ -323,7 +323,7 @@ fn item_name(intr: &IdentInterner, item: ebml::Doc) -> ast::Ident { let string = name.as_str_slice(); match intr.find_equiv(&string) { None => token::str_to_ident(string), - Some(val) => ast::Ident::new(val as ast::Name), + Some(val) => ast::Ident::new(val), } } diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 2a49d0e0f5bf8..ebfc45d22cee9 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -52,10 +52,7 @@ impl Ident { pub fn new(name: Name) -> Ident { Ident {name: name, ctxt: EMPTY_CTXT}} pub fn as_str<'a>(&'a self) -> &'a str { - unsafe { - // FIXME #12938: can't use copy_lifetime since &str isn't a &T - ::std::mem::transmute(token::get_ident(*self).get()) - } + self.name.as_str() } } @@ -109,7 +106,26 @@ pub static ILLEGAL_CTXT : SyntaxContext = 1; /// A name is a part of an identifier, representing a string or gensym. It's /// the result of interning. -pub type Name = u32; +#[deriving(Eq, Ord, PartialEq, PartialOrd, Hash, Encodable, Decodable, Clone, Show)] +pub struct Name(pub u32); + +impl Name { + pub fn as_str<'a>(&'a self) -> &'a str { + unsafe { + // FIXME #12938: can't use copy_lifetime since &str isn't a &T + ::std::mem::transmute(token::get_name(*self).get()) + } + } + + pub fn uint(&self) -> uint { + let Name(nm) = *self; + nm as uint + } + + pub fn ident(&self) -> Ident { + Ident { name: *self, ctxt: 0 } + } +} /// A mark represents a unique id associated with a macro expansion pub type Mrk = u32; diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 1dbbe3b973c83..9a5c7e86d21c6 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -535,6 +535,9 @@ impl<'a> ExtCtxt<'a> { pub fn ident_of(&self, st: &str) -> ast::Ident { str_to_ident(st) } + pub fn name_of(&self, st: &str) -> ast::Name { + token::intern(st) + } } /// Extract a string literal from the macro expanded version of `expr`, diff --git a/src/libsyntax/ext/mtwt.rs b/src/libsyntax/ext/mtwt.rs index 8608f7fb54553..7b29bbaef77eb 100644 --- a/src/libsyntax/ext/mtwt.rs +++ b/src/libsyntax/ext/mtwt.rs @@ -82,8 +82,8 @@ fn apply_rename_internal(id: Ident, to: Name, ctxt: SyntaxContext, table: &SCTable) -> SyntaxContext { - let key = (ctxt,id,to); - let new_ctxt = |_: &(SyntaxContext, Ident, Mrk)| + let key = (ctxt, id, to); + let new_ctxt = |_: &(SyntaxContext, Ident, Name)| idx_push(&mut *table.table.borrow_mut(), Rename(id, to, ctxt)); *table.rename_memo.borrow_mut().find_or_insert_with(key, new_ctxt) @@ -142,7 +142,7 @@ pub fn clear_tables() { } /// Add a value to the end of a vec, return its index -fn idx_push(vec: &mut Vec , val: T) -> u32 { +fn idx_push(vec: &mut Vec, val: T) -> u32 { vec.push(val); (vec.len() - 1) as u32 } diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index 24630dd09a20e..696d62838ba79 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -363,6 +363,15 @@ fn mk_ident(cx: &ExtCtxt, sp: Span, ident: ast::Ident) -> Gc { vec!(e_str)) } +// Lift a name to the expr that evaluates to that name +fn mk_name(cx: &ExtCtxt, sp: Span, ident: ast::Ident) -> Gc { + let e_str = cx.expr_str(sp, token::get_ident(ident)); + cx.expr_method_call(sp, + cx.expr_ident(sp, id_ext("ext_cx")), + id_ext("name_of"), + vec!(e_str)) +} + fn mk_ast_path(cx: &ExtCtxt, sp: Span, name: &str) -> Gc { let idents = vec!(id_ext("syntax"), id_ext("ast"), id_ext(name)); cx.expr_path(cx.path_global(sp, idents)) @@ -401,37 +410,37 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { } LIT_BYTE(i) => { - let e_byte = mk_ident(cx, sp, i); + let e_byte = mk_name(cx, sp, i.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte)); } LIT_CHAR(i) => { - let e_char = mk_ident(cx, sp, i); + let e_char = mk_name(cx, sp, i.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char)); } LIT_INTEGER(i) => { - let e_int = mk_ident(cx, sp, i); + let e_int = mk_name(cx, sp, i.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_INTEGER"), vec!(e_int)); } LIT_FLOAT(fident) => { - let e_fident = mk_ident(cx, sp, fident); + let e_fident = mk_name(cx, sp, fident.ident()); return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_FLOAT"), vec!(e_fident)); } LIT_STR(ident) => { return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_STR"), - vec!(mk_ident(cx, sp, ident))); + vec!(mk_name(cx, sp, ident.ident()))); } LIT_STR_RAW(ident, n) => { return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_STR_RAW"), - vec!(mk_ident(cx, sp, ident), cx.expr_uint(sp, n))); + vec!(mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))); } IDENT(ident, b) => { @@ -449,7 +458,7 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { DOC_COMMENT(ident) => { return cx.expr_call(sp, mk_token_path(cx, sp, "DOC_COMMENT"), - vec!(mk_ident(cx, sp, ident))); + vec!(mk_name(cx, sp, ident.ident()))); } INTERPOLATED(_) => fail!("quote! with interpolated token"), diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index c227d8a0fedc8..55ad1b7712310 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -43,7 +43,7 @@ impl<'a> ParserAttr for Parser<'a> { token::DOC_COMMENT(s) => { let attr = ::attr::mk_sugared_doc_attr( attr::mk_attr_id(), - self.id_to_interned_str(s), + self.id_to_interned_str(s.ident()), self.span.lo, self.span.hi ); @@ -139,7 +139,7 @@ impl<'a> ParserAttr for Parser<'a> { let Span { lo, hi, .. } = self.span; self.bump(); attr::mk_sugared_doc_attr(attr::mk_attr_id(), - self.id_to_interned_str(s), + self.id_to_interned_str(s.ident()), lo, hi) } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 947f3d59b86fa..f462ab92383ac 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -216,18 +216,18 @@ impl<'a> StringReader<'a> { self.with_str_from_to(start, self.last_pos, f) } - /// Create an Ident from a given offset to the current offset, each + /// Create a Name from a given offset to the current offset, each /// adjusted 1 towards each other (assumes that on either side there is a /// single-byte delimiter). - pub fn ident_from(&self, start: BytePos) -> ast::Ident { + pub fn name_from(&self, start: BytePos) -> ast::Name { debug!("taking an ident from {} to {}", start, self.last_pos); - self.with_str_from(start, str_to_ident) + self.with_str_from(start, token::intern) } - /// As ident_from, with an explicit endpoint. - pub fn ident_from_to(&self, start: BytePos, end: BytePos) -> ast::Ident { + /// As name_from, with an explicit endpoint. + pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { debug!("taking an ident from {} to {}", start, end); - self.with_str_from_to(start, end, str_to_ident) + self.with_str_from_to(start, end, token::intern) } /// Calls `f` with a string slice of the source text spanning from `start` @@ -377,7 +377,7 @@ impl<'a> StringReader<'a> { return self.with_str_from(start_bpos, |string| { // but comments with only more "/"s are not let tok = if is_doc_comment(string) { - token::DOC_COMMENT(str_to_ident(string)) + token::DOC_COMMENT(token::intern(string)) } else { token::COMMENT }; @@ -421,7 +421,7 @@ impl<'a> StringReader<'a> { let start = self.last_pos; while !self.curr_is('\n') && !self.is_eof() { self.bump(); } return Some(TokenAndSpan { - tok: token::SHEBANG(self.ident_from(start)), + tok: token::SHEBANG(self.name_from(start)), sp: codemap::mk_sp(start, self.last_pos) }); } @@ -500,7 +500,7 @@ impl<'a> StringReader<'a> { self.translate_crlf(start_bpos, string, "bare CR not allowed in block doc-comment") } else { string.into_maybe_owned() }; - token::DOC_COMMENT(str_to_ident(string.as_slice())) + token::DOC_COMMENT(token::intern(string.as_slice())) } else { token::COMMENT }; @@ -548,17 +548,17 @@ impl<'a> StringReader<'a> { } 'u' | 'i' => { self.scan_int_suffix(); - return token::LIT_INTEGER(self.ident_from(start_bpos)); + return token::LIT_INTEGER(self.name_from(start_bpos)); }, 'f' => { let last_pos = self.last_pos; self.scan_float_suffix(); self.check_float_base(start_bpos, last_pos, base); - return token::LIT_FLOAT(self.ident_from(start_bpos)); + return token::LIT_FLOAT(self.name_from(start_bpos)); } _ => { // just a 0 - return token::LIT_INTEGER(self.ident_from(start_bpos)); + return token::LIT_INTEGER(self.name_from(start_bpos)); } } } else if c.is_digit_radix(10) { @@ -571,7 +571,7 @@ impl<'a> StringReader<'a> { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); // eat any suffix self.scan_int_suffix(); - return token::LIT_INTEGER(str_to_ident("0")); + return token::LIT_INTEGER(token::intern("0")); } // might be a float, but don't be greedy if this is actually an @@ -589,13 +589,13 @@ impl<'a> StringReader<'a> { } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LIT_FLOAT(self.ident_from(start_bpos)); + return token::LIT_FLOAT(self.name_from(start_bpos)); } else if self.curr_is('f') { // or it might be an integer literal suffixed as a float self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LIT_FLOAT(self.ident_from(start_bpos)); + return token::LIT_FLOAT(self.name_from(start_bpos)); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { @@ -603,11 +603,11 @@ impl<'a> StringReader<'a> { self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LIT_FLOAT(self.ident_from(start_bpos)); + return token::LIT_FLOAT(self.name_from(start_bpos)); } // but we certainly have an integer! self.scan_int_suffix(); - return token::LIT_INTEGER(self.ident_from(start_bpos)); + return token::LIT_INTEGER(self.name_from(start_bpos)); } } @@ -980,7 +980,7 @@ impl<'a> StringReader<'a> { start - BytePos(1), last_bpos, "unterminated character constant".to_string()); } - let id = if valid { self.ident_from(start) } else { str_to_ident("0") }; + let id = if valid { self.name_from(start) } else { token::intern("0") }; self.bump(); // advance curr past token return token::LIT_CHAR(id); } @@ -1010,8 +1010,8 @@ impl<'a> StringReader<'a> { valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"'); } // adjust for the ACSII " at the start of the literal - let id = if valid { self.ident_from(start_bpos + BytePos(1)) } - else { str_to_ident("??") }; + let id = if valid { self.name_from(start_bpos + BytePos(1)) } + else { token::intern("??") }; self.bump(); return token::LIT_STR(id); } @@ -1076,9 +1076,9 @@ impl<'a> StringReader<'a> { } self.bump(); let id = if valid { - self.ident_from_to(content_start_bpos, content_end_bpos) + self.name_from_to(content_start_bpos, content_end_bpos) } else { - str_to_ident("??") + token::intern("??") }; return token::LIT_STR_RAW(id, hash_count); } @@ -1168,7 +1168,7 @@ impl<'a> StringReader<'a> { "unterminated byte constant".to_string()); } - let id = if valid { self.ident_from(start) } else { str_to_ident("??") }; + let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); // advance curr past token return token::LIT_BYTE(id); } @@ -1190,7 +1190,7 @@ impl<'a> StringReader<'a> { self.bump(); valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"'); } - let id = if valid { self.ident_from(start) } else { str_to_ident("??") }; + let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); return token::LIT_BINARY(id); } @@ -1243,7 +1243,7 @@ impl<'a> StringReader<'a> { self.bump(); } self.bump(); - return token::LIT_BINARY_RAW(self.ident_from_to(content_start_bpos, content_end_bpos), + return token::LIT_BINARY_RAW(self.name_from_to(content_start_bpos, content_end_bpos), hash_count); } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 51f2c74d3aeff..743eeed9da5e2 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1560,8 +1560,8 @@ impl<'a> Parser<'a> { match *tok { token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()), token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()), - token::LIT_INTEGER(s) => parse::integer_lit(self.id_to_interned_str(s).get(), - &self.sess.span_diagnostic, self.span), + token::LIT_INTEGER(s) => parse::integer_lit(s.as_str(), + &self.sess.span_diagnostic, self.span), token::LIT_FLOAT(s) => parse::float_lit(s.as_str()), token::LIT_STR(s) => { LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), @@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> { ast::RawStr(n)) } token::LIT_BINARY(i) => - LitBinary(parse::binary_lit(self.id_to_interned_str(i).get())), + LitBinary(parse::binary_lit(i.as_str())), token::LIT_BINARY_RAW(i, _) => LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), token::LPAREN => { self.expect(&token::RPAREN); LitNil }, @@ -1948,7 +1948,12 @@ impl<'a> Parser<'a> { }); return self.mk_expr(lo, body.span.hi, ExprProc(decl, fakeblock)); }, - token::IDENT(id @ ast::Ident{name:token::SELF_KEYWORD_NAME,ctxt:_},false) => { + // FIXME #13626: Should be able to stick in + // token::SELF_KEYWORD_NAME + token::IDENT(id @ ast::Ident{ + name: ast::Name(token::SELF_KEYWORD_NAME_NUM), + ctxt: _ + } ,false) => { self.bump(); let path = ast_util::ident_to_path(mk_sp(lo, hi), id); ex = ExprPath(path); @@ -4770,8 +4775,7 @@ impl<'a> Parser<'a> { match self.token { token::LIT_STR(s) | token::LIT_STR_RAW(s, _) => { self.bump(); - let identifier_string = token::get_ident(s); - let the_string = identifier_string.get(); + let the_string = s.as_str(); match abi::lookup(the_string) { Some(abi) => Some(abi), None => { @@ -5389,9 +5393,9 @@ impl<'a> Parser<'a> { pub fn parse_optional_str(&mut self) -> Option<(InternedString, ast::StrStyle)> { let (s, style) = match self.token { - token::LIT_STR(s) => (self.id_to_interned_str(s), ast::CookedStr), + token::LIT_STR(s) => (self.id_to_interned_str(s.ident()), ast::CookedStr), token::LIT_STR_RAW(s, n) => { - (self.id_to_interned_str(s), ast::RawStr(n)) + (self.id_to_interned_str(s.ident()), ast::RawStr(n)) } _ => return None }; diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index e65f9f208a317..076db789f063a 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -78,14 +78,14 @@ pub enum Token { QUESTION, /* Literals */ - LIT_BYTE(Ident), - LIT_CHAR(Ident), - LIT_INTEGER(Ident), - LIT_FLOAT(Ident), - LIT_STR(Ident), - LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */ - LIT_BINARY(Ident), - LIT_BINARY_RAW(Ident, uint), /* raw binary str delimited by n hash symbols */ + LIT_BYTE(Name), + LIT_CHAR(Name), + LIT_INTEGER(Name), + LIT_FLOAT(Name), + LIT_STR(Name), + LIT_STR_RAW(Name, uint), /* raw str delimited by n hash symbols */ + LIT_BINARY(Name), + LIT_BINARY_RAW(Name, uint), /* raw binary str delimited by n hash symbols */ /* Name components */ /// An identifier contains an "is_mod_name" boolean, @@ -97,7 +97,7 @@ pub enum Token { /* For interpolation */ INTERPOLATED(Nonterminal), - DOC_COMMENT(Ident), + DOC_COMMENT(Name), // Junk. These carry no data because we don't really care about the data // they *would* carry, and don't really want to allocate a new ident for @@ -107,7 +107,7 @@ pub enum Token { WS, /// Comment COMMENT, - SHEBANG(Ident), + SHEBANG(Name), EOF, } @@ -207,28 +207,28 @@ pub fn to_string(t: &Token) -> String { /* Literals */ LIT_BYTE(b) => { - format!("b'{}'", get_ident(b).get()) + format!("b'{}'", b.as_str()) } LIT_CHAR(c) => { - format!("'{}'", get_ident(c).get()) + format!("'{}'", c.as_str()) } LIT_INTEGER(c) | LIT_FLOAT(c) => { - get_ident(c).get().to_string() + c.as_str().to_string() } LIT_STR(s) => { - format!("\"{}\"", get_ident(s).get()) + format!("\"{}\"", s.as_str()) } LIT_STR_RAW(s, n) => { format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n), string=get_ident(s)) + delim="#".repeat(n), string=s.as_str()) } LIT_BINARY(v) => { - format!("b\"{}\"", get_ident(v).get()) + format!("b\"{}\"", v.as_str()) } LIT_BINARY_RAW(s, n) => { format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), string=get_ident(s).get()) + delim="#".repeat(n), string=s.as_str()) } /* Name components */ @@ -239,7 +239,7 @@ pub fn to_string(t: &Token) -> String { UNDERSCORE => "_".to_string(), /* Other */ - DOC_COMMENT(s) => get_ident(s).get().to_string(), + DOC_COMMENT(s) => s.as_str().to_string(), EOF => "".to_string(), WS => " ".to_string(), COMMENT => "/* */".to_string(), @@ -374,19 +374,19 @@ macro_rules! declare_special_idents_and_keywords {( $( ($rk_name:expr, $rk_variant:ident, $rk_str:expr); )* } ) => { - static STRICT_KEYWORD_START: Name = first!($( $sk_name, )*); - static STRICT_KEYWORD_FINAL: Name = last!($( $sk_name, )*); - static RESERVED_KEYWORD_START: Name = first!($( $rk_name, )*); - static RESERVED_KEYWORD_FINAL: Name = last!($( $rk_name, )*); + static STRICT_KEYWORD_START: Name = first!($( Name($sk_name), )*); + static STRICT_KEYWORD_FINAL: Name = last!($( Name($sk_name), )*); + static RESERVED_KEYWORD_START: Name = first!($( Name($rk_name), )*); + static RESERVED_KEYWORD_FINAL: Name = last!($( Name($rk_name), )*); pub mod special_idents { - use ast::Ident; - $( pub static $si_static: Ident = Ident { name: $si_name, ctxt: 0 }; )* + use ast::{Ident, Name}; + $( pub static $si_static: Ident = Ident { name: Name($si_name), ctxt: 0 }; )* } pub mod special_names { use ast::Name; - $( pub static $si_static: Name = $si_name; )* + $( pub static $si_static: Name = Name($si_name); )* } /** @@ -407,8 +407,8 @@ macro_rules! declare_special_idents_and_keywords {( impl Keyword { pub fn to_name(&self) -> Name { match *self { - $( $sk_variant => $sk_name, )* - $( $rk_variant => $rk_name, )* + $( $sk_variant => Name($sk_name), )* + $( $rk_variant => Name($rk_name), )* } } } @@ -427,8 +427,11 @@ macro_rules! declare_special_idents_and_keywords {( }} // If the special idents get renumbered, remember to modify these two as appropriate -pub static SELF_KEYWORD_NAME: Name = 1; -static STATIC_KEYWORD_NAME: Name = 2; +pub static SELF_KEYWORD_NAME: Name = Name(SELF_KEYWORD_NAME_NUM); +static STATIC_KEYWORD_NAME: Name = Name(STATIC_KEYWORD_NAME_NUM); + +pub static SELF_KEYWORD_NAME_NUM: u32 = 1; +static STATIC_KEYWORD_NAME_NUM: u32 = 2; // NB: leaving holes in the ident table is bad! a different ident will get // interned with the id from the hole, but it will be between the min and max @@ -438,8 +441,8 @@ declare_special_idents_and_keywords! { pub mod special_idents { // These ones are statics (0, invalid, ""); - (super::SELF_KEYWORD_NAME, self_, "self"); - (super::STATIC_KEYWORD_NAME, statik, "static"); + (super::SELF_KEYWORD_NAME_NUM, self_, "self"); + (super::STATIC_KEYWORD_NAME_NUM, statik, "static"); (3, static_lifetime, "'static"); // for matcher NTs @@ -479,8 +482,8 @@ declare_special_idents_and_keywords! { (29, Ref, "ref"); (30, Return, "return"); // Static and Self are also special idents (prefill de-dupes) - (super::STATIC_KEYWORD_NAME, Static, "static"); - (super::SELF_KEYWORD_NAME, Self, "self"); + (super::STATIC_KEYWORD_NAME_NUM, Static, "static"); + (super::SELF_KEYWORD_NAME_NUM, Self, "self"); (31, Struct, "struct"); (32, Super, "super"); (33, True, "true"); @@ -687,7 +690,7 @@ pub fn fresh_name(src: &Ident) -> Name { // create a fresh mark. pub fn fresh_mark() -> Mrk { - gensym("mark") + gensym("mark").uint() as u32 } // See the macro above about the types of keywords @@ -701,10 +704,13 @@ pub fn is_keyword(kw: keywords::Keyword, tok: &Token) -> bool { pub fn is_any_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME | - STRICT_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + n == SELF_KEYWORD_NAME + || n == STATIC_KEYWORD_NAME + || STRICT_KEYWORD_START <= n + && n <= RESERVED_KEYWORD_FINAL }, _ => false } @@ -712,10 +718,13 @@ pub fn is_any_keyword(tok: &Token) -> bool { pub fn is_strict_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - SELF_KEYWORD_NAME | STATIC_KEYWORD_NAME | - STRICT_KEYWORD_START .. STRICT_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + n == SELF_KEYWORD_NAME + || n == STATIC_KEYWORD_NAME + || STRICT_KEYWORD_START <= n + && n <= STRICT_KEYWORD_FINAL }, _ => false, } @@ -723,9 +732,11 @@ pub fn is_strict_keyword(tok: &Token) -> bool { pub fn is_reserved_keyword(tok: &Token) -> bool { match *tok { - token::IDENT(sid, false) => match sid.name { - RESERVED_KEYWORD_START .. RESERVED_KEYWORD_FINAL => true, - _ => false, + token::IDENT(sid, false) => { + let n = sid.name; + + RESERVED_KEYWORD_START <= n + && n <= RESERVED_KEYWORD_FINAL }, _ => false, } diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index 55fff38f99131..b370678c6df21 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -52,7 +52,7 @@ impl Interner { } let mut vect = self.vect.borrow_mut(); - let new_idx = (*vect).len() as Name; + let new_idx = Name((*vect).len() as u32); (*map).insert(val.clone(), new_idx); (*vect).push(val); new_idx @@ -60,7 +60,7 @@ impl Interner { pub fn gensym(&self, val: T) -> Name { let mut vect = self.vect.borrow_mut(); - let new_idx = (*vect).len() as Name; + let new_idx = Name((*vect).len() as u32); // leave out of .map to avoid colliding (*vect).push(val); new_idx @@ -68,7 +68,7 @@ impl Interner { pub fn get(&self, idx: Name) -> T { let vect = self.vect.borrow(); - (*(*vect).get(idx as uint)).clone() + (*(*vect).get(idx.uint())).clone() } pub fn len(&self) -> uint { @@ -155,7 +155,7 @@ impl StrInterner { None => (), } - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); let val = RcStr::new(val); map.insert(val.clone(), new_idx); self.vect.borrow_mut().push(val); @@ -163,7 +163,7 @@ impl StrInterner { } pub fn gensym(&self, val: &str) -> Name { - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); // leave out of .map to avoid colliding self.vect.borrow_mut().push(RcStr::new(val)); new_idx @@ -180,23 +180,23 @@ impl StrInterner { /// Create a gensym with the same name as an existing /// entry. pub fn gensym_copy(&self, idx : Name) -> Name { - let new_idx = self.len() as Name; + let new_idx = Name(self.len() as u32); // leave out of map to avoid colliding let mut vect = self.vect.borrow_mut(); - let existing = (*vect.get(idx as uint)).clone(); + let existing = (*vect.get(idx.uint())).clone(); vect.push(existing); new_idx } pub fn get(&self, idx: Name) -> RcStr { - (*self.vect.borrow().get(idx as uint)).clone() + (*self.vect.borrow().get(idx.uint())).clone() } /// Returns this string with lifetime tied to the interner. Since /// strings may never be removed from the interner, this is safe. pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str { let vect = self.vect.borrow(); - let s: &str = vect.get(idx as uint).as_slice(); + let s: &str = vect.get(idx.uint()).as_slice(); unsafe { mem::transmute(s) } From 69a0cdf49195d2bc042b44f75e309eb280bcc475 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Tue, 8 Jul 2014 22:28:52 -0700 Subject: [PATCH 15/15] Fix all the test fallout --- src/librustc/middle/astencode.rs | 3 +- src/libsyntax/ast_util.rs | 8 ++-- src/libsyntax/ext/expand.rs | 10 ++--- src/libsyntax/ext/mtwt.rs | 66 ++++++++++++++--------------- src/libsyntax/parse/lexer/mod.rs | 24 +++++++---- src/libsyntax/parse/token.rs | 4 +- src/libsyntax/util/interner.rs | 72 ++++++++++++++++---------------- 7 files changed, 100 insertions(+), 87 deletions(-) diff --git a/src/librustc/middle/astencode.rs b/src/librustc/middle/astencode.rs index 11b1687dc5599..fb2b4951ea3d6 100644 --- a/src/librustc/middle/astencode.rs +++ b/src/librustc/middle/astencode.rs @@ -1523,7 +1523,7 @@ fn test_basic() { fn foo() {} )); } - +/* NOTE: When there's a snapshot, update this (yay quasiquoter!) #[test] fn test_smalltalk() { let cx = mk_ctxt(); @@ -1531,6 +1531,7 @@ fn test_smalltalk() { fn foo() -> int { 3 + 4 } // first smalltalk program ever executed. )); } +*/ #[test] fn test_more() { diff --git a/src/libsyntax/ast_util.rs b/src/libsyntax/ast_util.rs index 004991814fff4..13fe8a1506459 100644 --- a/src/libsyntax/ast_util.rs +++ b/src/libsyntax/ast_util.rs @@ -754,14 +754,14 @@ mod test { #[test] fn idents_name_eq_test() { assert!(segments_name_eq( - [Ident{name:3,ctxt:4}, Ident{name:78,ctxt:82}] + [Ident{name:Name(3),ctxt:4}, Ident{name:Name(78),ctxt:82}] .iter().map(ident_to_segment).collect::>().as_slice(), - [Ident{name:3,ctxt:104}, Ident{name:78,ctxt:182}] + [Ident{name:Name(3),ctxt:104}, Ident{name:Name(78),ctxt:182}] .iter().map(ident_to_segment).collect::>().as_slice())); assert!(!segments_name_eq( - [Ident{name:3,ctxt:4}, Ident{name:78,ctxt:82}] + [Ident{name:Name(3),ctxt:4}, Ident{name:Name(78),ctxt:82}] .iter().map(ident_to_segment).collect::>().as_slice(), - [Ident{name:3,ctxt:104}, Ident{name:77,ctxt:182}] + [Ident{name:Name(3),ctxt:104}, Ident{name:Name(77),ctxt:182}] .iter().map(ident_to_segment).collect::>().as_slice())); } } diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index a095317f663a9..b7d72ae4debc1 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -1150,7 +1150,7 @@ mod test { use super::{pattern_bindings, expand_crate, contains_macro_escape}; use super::{PatIdentFinder, IdentRenamer, PatIdentRenamer}; use ast; - use ast::{Attribute_, AttrOuter, MetaWord}; + use ast::{Attribute_, AttrOuter, MetaWord, Name}; use attr; use codemap; use codemap::Spanned; @@ -1665,12 +1665,12 @@ foo_module!() let f_ident = token::str_to_ident("f"); let x_ident = token::str_to_ident("x"); let int_ident = token::str_to_ident("int"); - let renames = vec!((x_ident,16)); + let renames = vec!((x_ident,Name(16))); let mut renamer = IdentRenamer{renames: &renames}; let renamed_crate = renamer.fold_crate(the_crate); let idents = crate_idents(&renamed_crate); let resolved : Vec = idents.iter().map(|id| mtwt::resolve(*id)).collect(); - assert_eq!(resolved,vec!(f_ident.name,16,int_ident.name,16,16,16)); + assert_eq!(resolved,vec!(f_ident.name,Name(16),int_ident.name,Name(16),Name(16),Name(16))); } // test the PatIdentRenamer; only PatIdents get renamed @@ -1680,13 +1680,13 @@ foo_module!() let f_ident = token::str_to_ident("f"); let x_ident = token::str_to_ident("x"); let int_ident = token::str_to_ident("int"); - let renames = vec!((x_ident,16)); + let renames = vec!((x_ident,Name(16))); let mut renamer = PatIdentRenamer{renames: &renames}; let renamed_crate = renamer.fold_crate(the_crate); let idents = crate_idents(&renamed_crate); let resolved : Vec = idents.iter().map(|id| mtwt::resolve(*id)).collect(); let x_name = x_ident.name; - assert_eq!(resolved,vec!(f_ident.name,16,int_ident.name,16,x_name,x_name)); + assert_eq!(resolved,vec!(f_ident.name,Name(16),int_ident.name,Name(16),x_name,x_name)); } diff --git a/src/libsyntax/ext/mtwt.rs b/src/libsyntax/ext/mtwt.rs index 7b29bbaef77eb..2c94db5296750 100644 --- a/src/libsyntax/ext/mtwt.rs +++ b/src/libsyntax/ext/mtwt.rs @@ -301,8 +301,8 @@ mod tests { assert_eq!(s.clone(), vec!(14)); } - fn id(n: Name, s: SyntaxContext) -> Ident { - Ident {name: n, ctxt: s} + fn id(n: u32, s: SyntaxContext) -> Ident { + Ident {name: Name(n), ctxt: s} } // because of the SCTable, I now need a tidy way of @@ -349,12 +349,12 @@ mod tests { fn test_unfold_refold(){ let mut t = new_sctable_internal(); - let test_sc = vec!(M(3),R(id(101,0),14),M(9)); + let test_sc = vec!(M(3),R(id(101,0),Name(14)),M(9)); assert_eq!(unfold_test_sc(test_sc.clone(),EMPTY_CTXT,&mut t),4); { let table = t.table.borrow(); assert!(*table.get(2) == Mark(9,0)); - assert!(*table.get(3) == Rename(id(101,0),14,2)); + assert!(*table.get(3) == Rename(id(101,0),Name(14),2)); assert!(*table.get(4) == Mark(3,3)); } assert_eq!(refold_test_sc(4,&t),test_sc); @@ -381,8 +381,8 @@ mod tests { #[test] fn test_marksof () { - let stopname = 242; - let name1 = 243; + let stopname = Name(242); + let name1 = Name(243); let mut t = new_sctable_internal(); assert_eq!(marksof_internal (EMPTY_CTXT,stopname,&t),Vec::new()); // FIXME #5074: ANF'd to dodge nested calls @@ -396,16 +396,16 @@ mod tests { assert_eq! (marksof_internal (ans, stopname,&t), vec!(16));} // rename where stop doesn't match: { let chain = vec!(M(9), - R(id(name1, + R(id(name1.uint() as u32, apply_mark_internal (4, EMPTY_CTXT,&mut t)), - 100101102), + Name(100101102)), M(14)); let ans = unfold_test_sc(chain,EMPTY_CTXT,&mut t); assert_eq! (marksof_internal (ans, stopname, &t), vec!(9,14));} // rename where stop does match { let name1sc = apply_mark_internal(4, EMPTY_CTXT, &mut t); let chain = vec!(M(9), - R(id(name1, name1sc), + R(id(name1.uint() as u32, name1sc), stopname), M(14)); let ans = unfold_test_sc(chain,EMPTY_CTXT,&mut t); @@ -419,55 +419,55 @@ mod tests { let mut t = new_sctable_internal(); let mut rt = HashMap::new(); // - ctxt is MT - assert_eq!(resolve_internal(id(a,EMPTY_CTXT),&mut t, &mut rt),a); + assert_eq!(resolve_internal(id(a,EMPTY_CTXT),&mut t, &mut rt),Name(a)); // - simple ignored marks { let sc = unfold_marks(vec!(1,2,3),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),a);} + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(a));} // - orthogonal rename where names don't match - { let sc = unfold_test_sc(vec!(R(id(50,EMPTY_CTXT),51),M(12)),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),a);} + { let sc = unfold_test_sc(vec!(R(id(50,EMPTY_CTXT),Name(51)),M(12)),EMPTY_CTXT,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(a));} // - rename where names do match, but marks don't { let sc1 = apply_mark_internal(1,EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50), + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50)), M(1), M(2)), EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), a);} + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(a));} // - rename where names and marks match { let sc1 = unfold_test_sc(vec!(M(1),M(2)),EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50),M(1),M(2)),EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 50); } + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50)),M(1),M(2)),EMPTY_CTXT,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(50)); } // - rename where names and marks match by literal sharing { let sc1 = unfold_test_sc(vec!(M(1),M(2)),EMPTY_CTXT,&mut t); - let sc = unfold_test_sc(vec!(R(id(a,sc1),50)),sc1,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 50); } + let sc = unfold_test_sc(vec!(R(id(a,sc1),Name(50))),sc1,&mut t); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(50)); } // - two renames of the same var.. can only happen if you use // local-expand to prevent the inner binding from being renamed // during the rename-pass caused by the first: println!("about to run bad test"); - { let sc = unfold_test_sc(vec!(R(id(a,EMPTY_CTXT),50), - R(id(a,EMPTY_CTXT),51)), + { let sc = unfold_test_sc(vec!(R(id(a,EMPTY_CTXT),Name(50)), + R(id(a,EMPTY_CTXT),Name(51))), EMPTY_CTXT,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), 51); } + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt), Name(51)); } // the simplest double-rename: - { let a_to_a50 = apply_rename_internal(id(a,EMPTY_CTXT),50,EMPTY_CTXT,&mut t); - let a50_to_a51 = apply_rename_internal(id(a,a_to_a50),51,a_to_a50,&mut t); - assert_eq!(resolve_internal(id(a,a50_to_a51),&mut t, &mut rt),51); + { let a_to_a50 = apply_rename_internal(id(a,EMPTY_CTXT),Name(50),EMPTY_CTXT,&mut t); + let a50_to_a51 = apply_rename_internal(id(a,a_to_a50),Name(51),a_to_a50,&mut t); + assert_eq!(resolve_internal(id(a,a50_to_a51),&mut t, &mut rt),Name(51)); // mark on the outside doesn't stop rename: let sc = apply_mark_internal(9,a50_to_a51,&mut t); - assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),51); + assert_eq!(resolve_internal(id(a,sc),&mut t, &mut rt),Name(51)); // but mark on the inside does: - let a50_to_a51_b = unfold_test_sc(vec!(R(id(a,a_to_a50),51), + let a50_to_a51_b = unfold_test_sc(vec!(R(id(a,a_to_a50),Name(51)), M(9)), a_to_a50, &mut t); - assert_eq!(resolve_internal(id(a,a50_to_a51_b),&mut t, &mut rt),50);} + assert_eq!(resolve_internal(id(a,a50_to_a51_b),&mut t, &mut rt),Name(50));} } #[test] fn mtwt_resolve_test(){ let a = 40; - assert_eq!(resolve(id(a,EMPTY_CTXT)),a); + assert_eq!(resolve(id(a,EMPTY_CTXT)),Name(a)); } @@ -496,10 +496,10 @@ mod tests { #[test] fn new_resolves_test() { - let renames = vec!((Ident{name:23,ctxt:EMPTY_CTXT},24), - (Ident{name:29,ctxt:EMPTY_CTXT},29)); + let renames = vec!((Ident{name:Name(23),ctxt:EMPTY_CTXT},Name(24)), + (Ident{name:Name(29),ctxt:EMPTY_CTXT},Name(29))); let new_ctxt1 = apply_renames(&renames,EMPTY_CTXT); - assert_eq!(resolve(Ident{name:23,ctxt:new_ctxt1}),24); - assert_eq!(resolve(Ident{name:29,ctxt:new_ctxt1}),29); + assert_eq!(resolve(Ident{name:Name(23),ctxt:new_ctxt1}),Name(24)); + assert_eq!(resolve(Ident{name:Name(29),ctxt:new_ctxt1}),Name(29)); } } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f462ab92383ac..0aaddacfab624 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1326,11 +1326,14 @@ mod test { "/* my source file */ \ fn main() { println!(\"zebra\"); }\n".to_string()); let id = str_to_ident("fn"); + assert_eq!(string_reader.next_token().tok, token::COMMENT); + assert_eq!(string_reader.next_token().tok, token::WS); let tok1 = string_reader.next_token(); let tok2 = TokenAndSpan{ tok:token::IDENT(id, false), sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}}; assert_eq!(tok1,tok2); + assert_eq!(string_reader.next_token().tok, token::WS); // the 'main' id is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(28)); // read another token: @@ -1359,6 +1362,7 @@ mod test { #[test] fn doublecolonparsing () { check_tokenization(setup(&mk_sh(), "a b".to_string()), vec!(mk_ident("a",false), + token::WS, mk_ident("b",false))); } @@ -1372,6 +1376,7 @@ mod test { #[test] fn dcparsing_3 () { check_tokenization(setup(&mk_sh(), "a ::b".to_string()), vec!(mk_ident("a",false), + token::WS, token::MOD_SEP, mk_ident("b",false))); } @@ -1380,22 +1385,23 @@ mod test { check_tokenization(setup(&mk_sh(), "a:: b".to_string()), vec!(mk_ident("a",true), token::MOD_SEP, + token::WS, mk_ident("b",false))); } #[test] fn character_a() { assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, - token::LIT_CHAR('a')); + token::LIT_CHAR(token::intern("a"))); } #[test] fn character_space() { assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, - token::LIT_CHAR(' ')); + token::LIT_CHAR(token::intern(" "))); } #[test] fn character_escaped() { assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, - token::LIT_CHAR('\n')); + token::LIT_CHAR(token::intern("\\n"))); } #[test] fn lifetime_name() { @@ -1407,7 +1413,7 @@ mod test { assert_eq!(setup(&mk_sh(), "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() .tok, - token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3)); + token::LIT_STR_RAW(token::intern("\"#a\\b\x00c\""), 3)); } #[test] fn line_doc_comments() { @@ -1417,9 +1423,13 @@ mod test { } #[test] fn nested_block_comments() { - assert_eq!(setup(&mk_sh(), - "/* /* */ */'a'".to_string()).next_token().tok, - token::LIT_CHAR('a')); + let sh = mk_sh(); + let mut lexer = setup(&sh, "/* /* */ */'a'".to_string()); + match lexer.next_token().tok { + token::COMMENT => { }, + _ => fail!("expected a comment!") + } + assert_eq!(lexer.next_token().tok, token::LIT_CHAR(token::intern("a"))); } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 076db789f063a..5839df6702245 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -757,8 +757,8 @@ mod test { use ast; use ext::mtwt; - fn mark_ident(id : Ident, m : ast::Mrk) -> Ident { - Ident{name:id.name,ctxt:mtwt::apply_mark(m,id.ctxt)} + fn mark_ident(id : ast::Ident, m : ast::Mrk) -> ast::Ident { + ast::Ident { name: id.name, ctxt:mtwt::apply_mark(m, id.ctxt) } } #[test] fn mtwt_token_eq_test() { diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index b370678c6df21..452b5a5251222 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -222,36 +222,38 @@ impl StrInterner { #[cfg(test)] mod tests { use super::*; + use ast::Name; + #[test] #[should_fail] fn i1 () { let i : Interner = Interner::new(); - i.get(13); + i.get(Name(13)); } #[test] fn interner_tests () { let i : Interner = Interner::new(); // first one is zero: - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // re-use gets the same entry: - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // different string gets a different #: - assert_eq!(i.intern(RcStr::new("cat")), 1); - assert_eq!(i.intern(RcStr::new("cat")), 1); + assert_eq!(i.intern(RcStr::new("cat")), Name(1)); + assert_eq!(i.intern(RcStr::new("cat")), Name(1)); // dog is still at zero - assert_eq!(i.intern(RcStr::new("dog")), 0); + assert_eq!(i.intern(RcStr::new("dog")), Name(0)); // gensym gets 3 - assert_eq!(i.gensym(RcStr::new("zebra") ), 2); + assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2)); // gensym of same string gets new number : - assert_eq!(i.gensym (RcStr::new("zebra") ), 3); + assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3)); // gensym of *existing* string gets new number: - assert_eq!(i.gensym(RcStr::new("dog")), 4); - assert_eq!(i.get(0), RcStr::new("dog")); - assert_eq!(i.get(1), RcStr::new("cat")); - assert_eq!(i.get(2), RcStr::new("zebra")); - assert_eq!(i.get(3), RcStr::new("zebra")); - assert_eq!(i.get(4), RcStr::new("dog")); + assert_eq!(i.gensym(RcStr::new("dog")), Name(4)); + assert_eq!(i.get(Name(0)), RcStr::new("dog")); + assert_eq!(i.get(Name(1)), RcStr::new("cat")); + assert_eq!(i.get(Name(2)), RcStr::new("zebra")); + assert_eq!(i.get(Name(3)), RcStr::new("zebra")); + assert_eq!(i.get(Name(4)), RcStr::new("dog")); } #[test] @@ -261,39 +263,39 @@ mod tests { RcStr::new("Bob"), RcStr::new("Carol") ]); - assert_eq!(i.get(0), RcStr::new("Alan")); - assert_eq!(i.get(1), RcStr::new("Bob")); - assert_eq!(i.get(2), RcStr::new("Carol")); - assert_eq!(i.intern(RcStr::new("Bob")), 1); + assert_eq!(i.get(Name(0)), RcStr::new("Alan")); + assert_eq!(i.get(Name(1)), RcStr::new("Bob")); + assert_eq!(i.get(Name(2)), RcStr::new("Carol")); + assert_eq!(i.intern(RcStr::new("Bob")), Name(1)); } #[test] fn string_interner_tests() { let i : StrInterner = StrInterner::new(); // first one is zero: - assert_eq!(i.intern("dog"), 0); + assert_eq!(i.intern("dog"), Name(0)); // re-use gets the same entry: - assert_eq!(i.intern ("dog"), 0); + assert_eq!(i.intern ("dog"), Name(0)); // different string gets a different #: - assert_eq!(i.intern("cat"), 1); - assert_eq!(i.intern("cat"), 1); + assert_eq!(i.intern("cat"), Name(1)); + assert_eq!(i.intern("cat"), Name(1)); // dog is still at zero - assert_eq!(i.intern("dog"), 0); + assert_eq!(i.intern("dog"), Name(0)); // gensym gets 3 - assert_eq!(i.gensym("zebra"), 2); + assert_eq!(i.gensym("zebra"), Name(2)); // gensym of same string gets new number : - assert_eq!(i.gensym("zebra"), 3); + assert_eq!(i.gensym("zebra"), Name(3)); // gensym of *existing* string gets new number: - assert_eq!(i.gensym("dog"), 4); + assert_eq!(i.gensym("dog"), Name(4)); // gensym tests again with gensym_copy: - assert_eq!(i.gensym_copy(2), 5); - assert_eq!(i.get(5), RcStr::new("zebra")); - assert_eq!(i.gensym_copy(2), 6); - assert_eq!(i.get(6), RcStr::new("zebra")); - assert_eq!(i.get(0), RcStr::new("dog")); - assert_eq!(i.get(1), RcStr::new("cat")); - assert_eq!(i.get(2), RcStr::new("zebra")); - assert_eq!(i.get(3), RcStr::new("zebra")); - assert_eq!(i.get(4), RcStr::new("dog")); + assert_eq!(i.gensym_copy(Name(2)), Name(5)); + assert_eq!(i.get(Name(5)), RcStr::new("zebra")); + assert_eq!(i.gensym_copy(Name(2)), Name(6)); + assert_eq!(i.get(Name(6)), RcStr::new("zebra")); + assert_eq!(i.get(Name(0)), RcStr::new("dog")); + assert_eq!(i.get(Name(1)), RcStr::new("cat")); + assert_eq!(i.get(Name(2)), RcStr::new("zebra")); + assert_eq!(i.get(Name(3)), RcStr::new("zebra")); + assert_eq!(i.get(Name(4)), RcStr::new("dog")); } }