diff --git a/src/ast.ts b/src/ast.ts index 3f725bd24c..d587e7ad84 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -22,8 +22,11 @@ import { } from "./common"; import { - Token, Range +} from "./diagnostics"; + +import { + Token } from "./tokenizer"; import { @@ -42,7 +45,7 @@ import { } from "./types"; /** Indicates the kind of a node. */ -export enum NodeKind { +export const enum NodeKind { SOURCE, @@ -923,7 +926,7 @@ export class TypeParameterNode extends Node { } /** Represents the kind of a parameter. */ -export enum ParameterKind { +export const enum ParameterKind { /** No specific flags. */ DEFAULT, /** Is an optional parameter. */ @@ -1067,7 +1070,7 @@ export class DecoratorNode extends Node { } /** Comment kinds. */ -export enum CommentKind { +export const enum CommentKind { /** Line comment. */ LINE, /** Triple-slash line comment. */ @@ -1110,7 +1113,7 @@ export class IdentifierExpression extends Expression { } /** Indicates the kind of a literal. */ -export enum LiteralKind { +export const enum LiteralKind { FLOAT, INTEGER, STRING, @@ -1145,7 +1148,7 @@ export class ArrayLiteralExpression extends LiteralExpression { } /** Indicates the kind of an assertion. */ -export enum AssertionKind { +export const enum AssertionKind { /** A prefix assertion, i.e. `expr`. */ PREFIX, /** An as assertion, i.e. `expr as T`. */ @@ -1586,7 +1589,7 @@ export class CompiledExpression extends Expression { export abstract class Statement extends Node { } /** Indicates the specific kind of a source. */ -export enum SourceKind { +export const enum SourceKind { /** User-provided file. */ USER = 0, /** User-provided entry file. */ diff --git a/src/common.ts b/src/common.ts index 10ef6265de..ac5dff0c48 100644 --- a/src/common.ts +++ b/src/common.ts @@ -4,7 +4,7 @@ */ /** Indicates traits of a {@link Node} or {@link Element}. */ -export enum CommonFlags { +export const enum CommonFlags { /** No flags set. */ NONE = 0, diff --git a/src/compiler.ts b/src/compiler.ts index 033372fcb6..64283121a4 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -15,6 +15,7 @@ import { } from "./builtins"; import { + Range, DiagnosticCode, DiagnosticEmitter } from "./diagnostics"; @@ -108,7 +109,6 @@ import { import { Token, - Range, operatorTokenToString } from "./tokenizer"; diff --git a/src/diagnostics.ts b/src/diagnostics.ts index 3fe2ecb821..9c6fe53a2e 100644 --- a/src/diagnostics.ts +++ b/src/diagnostics.ts @@ -3,10 +3,6 @@ * @license Apache-2.0 */ -import { - Range -} from "./tokenizer"; - import { Source } from "./ast"; @@ -33,7 +29,7 @@ export { } from "./diagnosticMessages.generated"; /** Indicates the category of a {@link DiagnosticMessage}. */ -export enum DiagnosticCategory { +export const enum DiagnosticCategory { /** Overly pedantic message. */ PEDANTIC, /** Informatory message. */ @@ -44,6 +40,48 @@ export enum DiagnosticCategory { ERROR } +export class Range { + + source!: Source; + debugInfoRef: usize = 0; + + constructor(public start: i32, public end: i32) {} + + static join(a: Range, b: Range): Range { + if (a.source != b.source) throw new Error("source mismatch"); + let range = new Range( + a.start < b.start ? a.start : b.start, + a.end > b.end ? a.end : b.end + ); + range.source = a.source; + return range; + } + + equals(other: Range): bool { + return ( + this.source == other.source && + this.start == other.start && + this.end == other.end + ); + } + + get atStart(): Range { + let range = new Range(this.start, this.start); + range.source = this.source; + return range; + } + + get atEnd(): Range { + let range = new Range(this.end, this.end); + range.source = this.source; + return range; + } + + toString(): string { + return this.source.text.substring(this.start, this.end); + } +} + /** Returns the string representation of the specified diagnostic category. */ export function diagnosticCategoryToString(category: DiagnosticCategory): string { switch (category) { diff --git a/src/flow.ts b/src/flow.ts index c40c23686c..405661a225 100644 --- a/src/flow.ts +++ b/src/flow.ts @@ -154,7 +154,7 @@ export const enum FlowFlags { } /** Flags indicating the current state of a local. */ -export enum LocalFlags { +export const enum LocalFlags { /** No specific conditions. */ NONE = 0, @@ -169,7 +169,7 @@ export enum LocalFlags { } /** Flags indicating the current state of a field. */ -export enum FieldFlags { +export const enum FieldFlags { NONE = 0, INITIALIZED = 1 << 0 } diff --git a/src/module.ts b/src/module.ts index bd979cd974..c2f4d788e0 100644 --- a/src/module.ts +++ b/src/module.ts @@ -68,7 +68,7 @@ export namespace TypeRef { } /** Binaryen feature constants. */ -export enum FeatureFlags { +export const enum FeatureFlags { MVP = 0 /* _BinaryenFeatureMVP */, Atomics = 1 /* _BinaryenFeatureAtomics */, MutableGloabls = 2 /* _BinaryenFeatureMutableGlobals */, @@ -88,7 +88,7 @@ export enum FeatureFlags { } /** Binaryen expression id constants. */ -export enum ExpressionId { +export const enum ExpressionId { Invalid = 0 /* _BinaryenInvalidId */, Block = 1 /* _BinaryenBlockId */, If = 2 /* _BinaryenIfId */, @@ -164,7 +164,7 @@ export enum ExpressionId { } /** Binaryen external kind constants. */ -export enum ExternalKind { +export const enum ExternalKind { Function = 0 /* _BinaryenExternalFunction */, Table = 1 /* _BinaryenExternalTable */, Memory = 2 /* _BinaryenExternalMemory */, @@ -173,7 +173,7 @@ export enum ExternalKind { } /** Binaryen unary operation constants. */ -export enum UnaryOp { +export const enum UnaryOp { /** i32.clz */ ClzI32 = 0 /* _BinaryenClzInt32 */, /** i64.clz */ @@ -449,7 +449,7 @@ export enum UnaryOp { } /** Binaryen binary operation constants. */ -export enum BinaryOp { +export const enum BinaryOp { /** i32.add */ AddI32 = 0 /* _BinaryenAddInt32 */, /** i32.sub */ @@ -903,7 +903,7 @@ export enum BinaryOp { } /** Binaryen atomic read-modify-write operation constants. */ -export enum AtomicRMWOp { +export const enum AtomicRMWOp { /** i32.atomic.rmw.add, i32.atomic.rmw8.add_u, i32.atomic.rmw16.add_u, i64.atomic.rmw.add, i64.atomic.rmw8.add_u, i64.atomic.rmw16.add_u, i64.atomic.rmw32.add_u */ Add = 0 /* _BinaryenAtomicRMWAdd */, /** i32.atomic.rmw.sub, i32.atomic.rmw8.sub_u, i32.atomic.rmw16.sub_u, i64.atomic.rmw.sub, i64.atomic.rmw8.sub_u, i64.atomic.rmw16.sub_u, i64.atomic.rmw32.sub_u */ @@ -919,7 +919,7 @@ export enum AtomicRMWOp { } /** Binaryen SIMD extract operation constants. */ -export enum SIMDExtractOp { +export const enum SIMDExtractOp { /** i8x16.extract_lane_s */ ExtractLaneI8x16 = 0 /* _BinaryenExtractLaneSVecI8x16 */, /** i8x16.extract_lane_u */ @@ -939,7 +939,7 @@ export enum SIMDExtractOp { } /** Binaryen SIMD replace operation constants. */ -export enum SIMDReplaceOp { +export const enum SIMDReplaceOp { /** i8x16.replace_lane */ ReplaceLaneI8x16 = 0 /* _BinaryenReplaceLaneVecI8x16 */, /** i16x8.replace_lane */ @@ -955,7 +955,7 @@ export enum SIMDReplaceOp { } /** Binaryen SIMD shift operation constants. */ -export enum SIMDShiftOp { +export const enum SIMDShiftOp { /** i8x16.shl */ ShlI8x16 = 0 /* _BinaryenShlVecI8x16 */, /** i8x16.shr_s */ @@ -983,7 +983,7 @@ export enum SIMDShiftOp { } /** Binaryen SIMD load operation constants. */ -export enum SIMDLoadOp { +export const enum SIMDLoadOp { /** v128.load8_splat */ Load8Splat = 0 /* _BinaryenLoad8SplatVec128 */, /** v128.load16_splat */ @@ -1011,7 +1011,7 @@ export enum SIMDLoadOp { } /** Binaryen SIMD load/store lane operation constants. */ -export enum SIMDLoadStoreLaneOp { +export const enum SIMDLoadStoreLaneOp { /** v128.load8_lane */ Load8Lane = 0 /* _BinaryenLoad8LaneVec128 */, /** v128.load16_lane */ @@ -1031,13 +1031,13 @@ export enum SIMDLoadStoreLaneOp { } /** Binaryen SIMD ternary operation constants. */ -export enum SIMDTernaryOp { +export const enum SIMDTernaryOp { /** v128.bitselect */ Bitselect = 0 /* _BinaryenBitselectVec128 */ } /** Binaryen RefIs operation constants. */ -export enum RefIsOp { +export const enum RefIsOp { /** ref.is_null */ RefIsNull = 0 /* _BinaryenRefIsNull */, /** ref.is_func */ @@ -1049,7 +1049,7 @@ export enum RefIsOp { } /** Binaryen RefAs operation constants. */ -export enum RefAsOp { +export const enum RefAsOp { /** ref.as_non_null */ RefAsNonNull = 0 /* _BinaryenRefAsNonNull */, /** ref.as_func */ @@ -1061,7 +1061,7 @@ export enum RefAsOp { } /** Binaryen BrOn operation constants. */ -export enum BrOnOp { +export const enum BrOnOp { /** br_on_null */ BrOnNull = 0 /* TODO_BinaryenBrOnNull */, /** br_on_cast */ @@ -1075,7 +1075,7 @@ export enum BrOnOp { } /** Binaryen expression runner flags. */ -export enum ExpressionRunnerFlags { +export const enum ExpressionRunnerFlags { Default = 0 /* _ExpressionRunnerFlagsDefault */, PreserveSideeffects = 1 /* _ExpressionRunnerFlagsPreserveSideeffects */, TraverseCalls = 2 /* _ExpressionRunnerFlagsTraverseCalls */ @@ -3016,7 +3016,7 @@ export class SwitchBuilder { } } -export enum SideEffects { +export const enum SideEffects { None = 0 /* _BinaryenSideEffectNone */, Branches = 1 /* _BinaryenSideEffectBranches */, Calls = 2 /* _BinaryenSideEffectCalls */, diff --git a/src/parser.ts b/src/parser.ts index 60510cdcd0..6e1962b635 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -16,13 +16,13 @@ import { import { Tokenizer, Token, - Range, CommentHandler, IdentifierHandling, isIllegalVariableIdentifier } from "./tokenizer"; import { + Range, DiagnosticCode, DiagnosticEmitter, DiagnosticMessage @@ -119,10 +119,10 @@ export class Parser extends DiagnosticEmitter { /** Constructs a new parser. */ constructor( diagnostics: DiagnosticMessage[] | null = null, - sources: Source[] | null = null + sources: Source[] = [] ) { super(diagnostics); - this.sources = sources ? sources : new Array(); + this.sources = sources; } /** Parses a file and adds its definitions to the program. */ @@ -355,7 +355,7 @@ export class Parser extends DiagnosticEmitter { // handle plain exports if (flags & CommonFlags.EXPORT) { - if (defaultEnd && tn.skipIdentifier(IdentifierHandling.PREFER)) { + if (defaultEnd && tn.skipIdentifier()) { if (declareEnd) { this.error( DiagnosticCode.An_export_assignment_cannot_have_modifiers, @@ -408,7 +408,10 @@ export class Parser extends DiagnosticEmitter { case NodeKind.CLASSDECLARATION: case NodeKind.INTERFACEDECLARATION: case NodeKind.NAMESPACEDECLARATION: { - return Node.createExportDefaultStatement(statement, tn.range(startPos, tn.pos)); + return Node.createExportDefaultStatement( + statement, + tn.range(startPos, tn.pos) + ); } default: { this.error( @@ -564,7 +567,7 @@ export class Parser extends DiagnosticEmitter { Node.createSimpleTypeName("this", tn.range()), [], false, tn.range(startPos, tn.pos) ); - // 'true' + // 'true' | `false` } else if (token == Token.TRUE || token == Token.FALSE) { type = Node.createNamedType( Node.createSimpleTypeName("bool", tn.range()), [], false, tn.range(startPos, tn.pos) @@ -619,19 +622,21 @@ export class Parser extends DiagnosticEmitter { return null; } // ... | null - while (tn.skip(Token.BAR)) { - if (tn.skip(Token.NULL)) { - type.isNullable = true; - } else { - let notNullStart = tn.pos; - let notNull = this.parseType(tn, false, true); - if (!suppressErrors) { - this.error( - DiagnosticCode._0_expected, - notNull ? notNull.range : tn.range(notNullStart), "null" - ); + if (tn.peek() != Token.BAR_BAR) { + while (tn.skip(Token.BAR)) { + if (tn.skip(Token.NULL)) { + type.isNullable = true; + } else { + let notNullStart = tn.pos; + let notNull = this.parseType(tn, false, true); + if (!suppressErrors) { + this.error( + DiagnosticCode._0_expected, + notNull ? notNull.range : tn.range(notNullStart), "null" + ); + } + return null; } - return null; } } // ... [][] @@ -697,7 +702,6 @@ export class Parser extends DiagnosticEmitter { isSignature = true; tn.discard(state); parameters = []; - } else { isSignature = false; // not yet known do { @@ -764,7 +768,9 @@ export class Parser extends DiagnosticEmitter { } } if (isSignature) { - let param = Node.createParameter(kind, name, Node.createOmittedType(tn.range(tn.pos)), null, tn.range(paramStart, tn.pos)); + let param = Node.createParameter( + kind, name, Node.createOmittedType(tn.range(tn.pos)), null, tn.range(paramStart, tn.pos) + ); if (!parameters) parameters = [ param ]; else parameters.push(param); this.error( @@ -870,7 +876,7 @@ export class Parser extends DiagnosticEmitter { let name = tn.readIdentifier(); let expression: Expression = Node.createIdentifierExpression(name, tn.range(startPos, tn.pos)); while (tn.skip(Token.DOT)) { - if (tn.skipIdentifier(IdentifierHandling.PREFER)) { + if (tn.skipIdentifier()) { name = tn.readIdentifier(); expression = Node.createPropertyAccessExpression( expression, @@ -1566,7 +1572,9 @@ export class Parser extends DiagnosticEmitter { var parameters = this.parseParameters(tn); if (!parameters) return null; - return this.parseFunctionExpressionCommon(tn, name, parameters, this.parseParametersThis, arrowKind, startPos, signatureStart); + return this.parseFunctionExpressionCommon( + tn, name, parameters, this.parseParametersThis, arrowKind, startPos, signatureStart + ); } private parseFunctionExpressionCommon( @@ -1709,8 +1717,8 @@ export class Parser extends DiagnosticEmitter { return null; } if (!isInterface) { - if (!implementsTypes) implementsTypes = []; - implementsTypes.push(type); + if (!implementsTypes) implementsTypes = [ type ]; + else implementsTypes.push(type); } } while (tn.skip(Token.COMMA)); } @@ -1854,8 +1862,8 @@ export class Parser extends DiagnosticEmitter { do { let decorator = this.parseDecorator(tn); if (!decorator) break; - if (!decorators) decorators = new Array(); - decorators.push(decorator); + if (!decorators) decorators = [ decorator ]; + else decorators.push(decorator); } while (tn.skip(Token.AT)); if (isInterface && decorators !== null) { this.error( @@ -3593,6 +3601,9 @@ export class Parser extends DiagnosticEmitter { // NewExpression case Token.NEW: { + + // at 'new': Identifier ('<' TypeArguments '>')? ('(' Arguments ')')? ';'? + if (!tn.skipIdentifier()) { this.error( DiagnosticCode.Identifier_expected, @@ -4137,26 +4148,6 @@ export class Parser extends DiagnosticEmitter { } break; } - // BinaryExpression (right associative) - case Token.EQUALS: - case Token.PLUS_EQUALS: - case Token.MINUS_EQUALS: - case Token.ASTERISK_ASTERISK_EQUALS: - case Token.ASTERISK_EQUALS: - case Token.SLASH_EQUALS: - case Token.PERCENT_EQUALS: - case Token.LESSTHAN_LESSTHAN_EQUALS: - case Token.GREATERTHAN_GREATERTHAN_EQUALS: - case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: - case Token.AMPERSAND_EQUALS: - case Token.CARET_EQUALS: - case Token.BAR_EQUALS: - case Token.ASTERISK_ASTERISK: { - let next = this.parseExpression(tn, nextPrecedence); - if (!next) return null; - expr = Node.createBinaryExpression(token, expr, next, tn.range(startPos, tn.pos)); - break; - } // BinaryExpression case Token.LESSTHAN: case Token.GREATERTHAN: @@ -4179,7 +4170,24 @@ export class Parser extends DiagnosticEmitter { case Token.CARET: case Token.AMPERSAND_AMPERSAND: case Token.BAR_BAR: { - let next = this.parseExpression(tn, nextPrecedence + 1); + ++nextPrecedence; + } + // BinaryExpression (right associative) + case Token.EQUALS: + case Token.PLUS_EQUALS: + case Token.MINUS_EQUALS: + case Token.ASTERISK_ASTERISK_EQUALS: + case Token.ASTERISK_EQUALS: + case Token.SLASH_EQUALS: + case Token.PERCENT_EQUALS: + case Token.LESSTHAN_LESSTHAN_EQUALS: + case Token.GREATERTHAN_GREATERTHAN_EQUALS: + case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: + case Token.AMPERSAND_EQUALS: + case Token.CARET_EQUALS: + case Token.BAR_EQUALS: + case Token.ASTERISK_ASTERISK: { + let next = this.parseExpression(tn, nextPrecedence); if (!next) return null; expr = Node.createBinaryExpression(token, expr, next, tn.range(startPos, tn.pos)); break; @@ -4307,6 +4315,11 @@ export class Parser extends DiagnosticEmitter { tn.checkForIdentifierStartAfterNumericLiteral(); break; } + case Token.SLASH: { + tn.readRegexpPattern(); + tn.readRegexpFlags(); + break; + } case Token.OPENBRACE: { this.skipBlock(tn); break; @@ -4350,7 +4363,7 @@ export class Parser extends DiagnosticEmitter { } case Token.TEMPLATELITERAL: { tn.readString(); - while(tn.readingTemplateString){ + while (tn.readingTemplateString) { this.skipBlock(tn); tn.readString(CharCode.BACKTICK); } @@ -4366,6 +4379,11 @@ export class Parser extends DiagnosticEmitter { tn.checkForIdentifierStartAfterNumericLiteral(); break; } + case Token.SLASH: { + tn.readRegexpPattern(); + tn.readRegexpFlags(); + break; + } } } while (again); } @@ -4401,11 +4419,15 @@ export const enum Precedence { function determinePrecedence(kind: Token): Precedence { switch (kind) { case Token.COMMA: return Precedence.COMMA; + case Token.YIELD: return Precedence.YIELD; case Token.EQUALS: case Token.PLUS_EQUALS: case Token.MINUS_EQUALS: case Token.ASTERISK_ASTERISK_EQUALS: + case Token.BAR_BAR_EQUALS: + case Token.AMPERSAND_AMPERSAND_EQUALS: case Token.ASTERISK_EQUALS: + case Token.QUESTION_QUESTION_EQUALS: case Token.SLASH_EQUALS: case Token.PERCENT_EQUALS: case Token.LESSTHAN_LESSTHAN_EQUALS: @@ -4415,7 +4437,8 @@ function determinePrecedence(kind: Token): Precedence { case Token.CARET_EQUALS: case Token.BAR_EQUALS: return Precedence.ASSIGNMENT; case Token.QUESTION: return Precedence.CONDITIONAL; - case Token.BAR_BAR: return Precedence.LOGICAL_OR; + case Token.BAR_BAR: + case Token.QUESTION_QUESTION: return Precedence.LOGICAL_OR; case Token.AMPERSAND_AMPERSAND: return Precedence.LOGICAL_AND; case Token.BAR: return Precedence.BITWISE_OR; case Token.CARET: return Precedence.BITWISE_XOR; diff --git a/src/passes/pass.ts b/src/passes/pass.ts index 5106c307df..32262c3453 100644 --- a/src/passes/pass.ts +++ b/src/passes/pass.ts @@ -197,7 +197,7 @@ export abstract class Visitor { get parentExpressionOrNull(): ExpressionRef { var stack = this.stack; var length = stack.length; - return length ? stack[length - 1] : 0; + return length ? unchecked(stack[length - 1]) : 0; } // Expressions diff --git a/src/program.ts b/src/program.ts index e1faf7596f..0f912cd8de 100644 --- a/src/program.ts +++ b/src/program.ts @@ -57,6 +57,7 @@ import { } from "./compiler"; import { + Range, DiagnosticCode, DiagnosticMessage, DiagnosticEmitter @@ -70,8 +71,7 @@ import { } from "./types"; import { - Token, - Range + Token } from "./tokenizer"; import { @@ -2623,7 +2623,7 @@ export class Program extends DiagnosticEmitter { } /** Indicates the specific kind of an {@link Element}. */ -export enum ElementKind { +export const enum ElementKind { /** A {@link Global}. */ GLOBAL, /** A {@link Local}. */ diff --git a/src/resolver.ts b/src/resolver.ts index 5903551901..57f72dfd50 100644 --- a/src/resolver.ts +++ b/src/resolver.ts @@ -12,6 +12,7 @@ */ import { + Range, DiagnosticEmitter, DiagnosticCode } from "./diagnostics"; @@ -44,10 +45,6 @@ import { Flow } from "./flow"; -import { - Range -} from "./tokenizer"; - import { FunctionTypeNode, ParameterKind, @@ -109,7 +106,7 @@ import { } from "./builtins"; /** Indicates whether errors are reported or not. */ -export enum ReportMode { +export const enum ReportMode { /** Report errors. */ REPORT, /** Swallow errors. */ diff --git a/src/tokenizer.ts b/src/tokenizer.ts index fab060fd5c..3ef1f9ca46 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -12,6 +12,7 @@ */ import { + Range, DiagnosticCode, DiagnosticMessage, DiagnosticEmitter @@ -30,12 +31,16 @@ import { isIdentifierPart, isDecimal, isOctal, + isHexPart, isHighSurrogate, isLowSurrogate } from "./util"; +const MIN_KEYWORD_LENGTH = 2; // 'as', 'if' and etc +const MAX_KEYWORD_LENGTH = 11; // 'constructor' + /** Named token types. */ -export enum Token { +export const enum Token { // keywords // discarded: ANY, BOOLEAN, NEVER, NUMBER, STRING, SYMBOL, UNDEFINED, LESSTHAN_SLASH @@ -138,16 +143,20 @@ export enum Token { BAR, CARET, EXCLAMATION, + QUESTION, TILDE, AMPERSAND_AMPERSAND, BAR_BAR, - QUESTION, + QUESTION_QUESTION, COLON, EQUALS, PLUS_EQUALS, MINUS_EQUALS, ASTERISK_EQUALS, ASTERISK_ASTERISK_EQUALS, + BAR_BAR_EQUALS, + AMPERSAND_AMPERSAND_EQUALS, + QUESTION_QUESTION_EQUALS, SLASH_EQUALS, PERCENT_EQUALS, LESSTHAN_LESSTHAN_EQUALS, @@ -168,182 +177,352 @@ export enum Token { // meta + DIGIT, + IDENTIFIER_OR_KEYWORD, + WHITESPACE, + COMMENT_OR_OPERATOR, + OPERATOR, INVALID, ENDOFFILE } -export enum IdentifierHandling { +export const enum IdentifierHandling { DEFAULT, PREFER, ALWAYS } -export function tokenFromKeyword(text: string): Token { +// Classify single character tokens (0..127) +const BASIC_TOKENS: Token[] = [ + /* 0x00 */ Token.INVALID, + /* 0x01 */ Token.INVALID, + /* 0x02 */ Token.INVALID, + /* 0x03 */ Token.INVALID, + /* 0x04 */ Token.INVALID, + /* 0x05 */ Token.INVALID, + /* 0x06 */ Token.INVALID, + /* 0x07 */ Token.INVALID, + /* 0x08 */ Token.INVALID, + /* \t */ Token.WHITESPACE, + /* \n */ Token.WHITESPACE, + /* \v */ Token.WHITESPACE, + /* \f */ Token.WHITESPACE, + /* \r */ Token.WHITESPACE, + /* 0x0E */ Token.INVALID, + /* 0x0F */ Token.INVALID, + /* 0x10 */ Token.INVALID, + /* 0x11 */ Token.INVALID, + /* 0x12 */ Token.INVALID, + /* 0x13 */ Token.INVALID, + /* 0x14 */ Token.INVALID, + /* 0x15 */ Token.INVALID, + /* 0x16 */ Token.INVALID, + /* 0x17 */ Token.INVALID, + /* 0x18 */ Token.INVALID, + /* 0x19 */ Token.INVALID, + /* 0x1A */ Token.INVALID, + /* 0x1B */ Token.INVALID, + /* 0x1C */ Token.INVALID, + /* 0x1D */ Token.INVALID, + /* 0x1E */ Token.INVALID, + /* 0x1F */ Token.INVALID, + /* ' ' */ Token.WHITESPACE, + /* ! */ Token.OPERATOR, + /* " */ Token.STRINGLITERAL, + /* # */ Token.INVALID, + /* $ */ Token.IDENTIFIER, + /* % */ Token.OPERATOR, + /* & */ Token.OPERATOR, + /* ' */ Token.STRINGLITERAL, + /* ( */ Token.OPENPAREN, + /* ) */ Token.CLOSEPAREN, + /* * */ Token.OPERATOR, + /* + */ Token.OPERATOR, + /* , */ Token.COMMA, + /* - */ Token.OPERATOR, + /* . */ Token.OPERATOR, + /* / */ Token.COMMENT_OR_OPERATOR, + /* 0 */ Token.DIGIT, + /* 1 */ Token.DIGIT, + /* 2 */ Token.DIGIT, + /* 3 */ Token.DIGIT, + /* 4 */ Token.DIGIT, + /* 5 */ Token.DIGIT, + /* 6 */ Token.DIGIT, + /* 7 */ Token.DIGIT, + /* 8 */ Token.DIGIT, + /* 9 */ Token.DIGIT, + /* : */ Token.COLON, + /* ; */ Token.SEMICOLON, + /* < */ Token.OPERATOR, + /* = */ Token.OPERATOR, + /* > */ Token.OPERATOR, + /* ? */ Token.OPERATOR, + /* @ */ Token.AT, + /* A */ Token.IDENTIFIER, + /* B */ Token.IDENTIFIER, + /* C */ Token.IDENTIFIER, + /* D */ Token.IDENTIFIER, + /* E */ Token.IDENTIFIER, + /* F */ Token.IDENTIFIER, + /* G */ Token.IDENTIFIER, + /* H */ Token.IDENTIFIER, + /* I */ Token.IDENTIFIER, + /* J */ Token.IDENTIFIER, + /* K */ Token.IDENTIFIER, + /* L */ Token.IDENTIFIER, + /* M */ Token.IDENTIFIER, + /* N */ Token.IDENTIFIER, + /* O */ Token.IDENTIFIER, + /* P */ Token.IDENTIFIER, + /* Q */ Token.IDENTIFIER, + /* R */ Token.IDENTIFIER, + /* S */ Token.IDENTIFIER, + /* T */ Token.IDENTIFIER, + /* U */ Token.IDENTIFIER, + /* V */ Token.IDENTIFIER, + /* W */ Token.IDENTIFIER, + /* X */ Token.IDENTIFIER, + /* Y */ Token.IDENTIFIER, + /* Z */ Token.IDENTIFIER, + /* [ */ Token.OPENBRACKET, + /* \ */ Token.INVALID, + /* ] */ Token.CLOSEBRACKET, + /* ^ */ Token.OPERATOR, + /* _ */ Token.IDENTIFIER, + /* ` */ Token.TEMPLATELITERAL, + /* a */ Token.IDENTIFIER_OR_KEYWORD, + /* b */ Token.IDENTIFIER_OR_KEYWORD, + /* c */ Token.IDENTIFIER_OR_KEYWORD, + /* d */ Token.IDENTIFIER_OR_KEYWORD, + /* e */ Token.IDENTIFIER_OR_KEYWORD, + /* f */ Token.IDENTIFIER_OR_KEYWORD, + /* g */ Token.IDENTIFIER_OR_KEYWORD, + /* h */ Token.IDENTIFIER, + /* i */ Token.IDENTIFIER_OR_KEYWORD, + /* j */ Token.IDENTIFIER, + /* k */ Token.IDENTIFIER_OR_KEYWORD, + /* l */ Token.IDENTIFIER_OR_KEYWORD, + /* m */ Token.IDENTIFIER_OR_KEYWORD, + /* n */ Token.IDENTIFIER_OR_KEYWORD, + /* o */ Token.IDENTIFIER_OR_KEYWORD, + /* p */ Token.IDENTIFIER_OR_KEYWORD, + /* q */ Token.IDENTIFIER, + /* r */ Token.IDENTIFIER_OR_KEYWORD, + /* s */ Token.IDENTIFIER_OR_KEYWORD, + /* t */ Token.IDENTIFIER_OR_KEYWORD, + /* u */ Token.IDENTIFIER, + /* v */ Token.IDENTIFIER_OR_KEYWORD, + /* w */ Token.IDENTIFIER_OR_KEYWORD, + /* x */ Token.IDENTIFIER, + /* y */ Token.IDENTIFIER_OR_KEYWORD, + /* z */ Token.IDENTIFIER, + /* { */ Token.OPENBRACE, + /* | */ Token.OPERATOR, + /* } */ Token.CLOSEBRACE, + /* ~ */ Token.TILDE, + /* 0x7F */ Token.INVALID, +]; + +export function scanKeyword(text: string): Token { let len = text.length; - assert(len); switch (text.charCodeAt(0)) { - case CharCode.a: { - if (len == 5) { - if (text == "async") return Token.ASYNC; - if (text == "await") return Token.AWAIT; + case CharCode.a: + if (len == 2) { + if (text.charCodeAt(1) == CharCode.s) return Token.AS; break; } - if (text == "as") return Token.AS; if (text == "abstract") return Token.ABSTRACT; + if (text == "async") return Token.ASYNC; + if (text == "await") return Token.AWAIT; break; - } - case CharCode.b: { + + case CharCode.b: if (text == "break") return Token.BREAK; break; - } - case CharCode.c: { + + case CharCode.c: if (len == 5) { - if (text == "const") return Token.CONST; - if (text == "class") return Token.CLASS; - if (text == "catch") return Token.CATCH; + switch (text.charCodeAt(1)) { + case CharCode.o: + if (text == "const") return Token.CONST; + break; + + case CharCode.l: + if (text == "class") return Token.CLASS; + break; + + case CharCode.a: + if (text == "catch") return Token.CATCH; + break; + } break; } - if (text == "case") return Token.CASE; - if (text == "continue") return Token.CONTINUE; - if (text == "constructor") return Token.CONSTRUCTOR; + switch (text.charCodeAt(3)) { + case CharCode.e: + if (text == "case") return Token.CASE; + break; + + case CharCode.s: + if (text == "constructor") return Token.CONSTRUCTOR; + break; + + case CharCode.t: + if (text == "continue") return Token.CONTINUE; + break; + } break; - } - case CharCode.d: { - if (len == 7) { - if (text == "default") return Token.DEFAULT; - if (text == "declare") return Token.DECLARE; + + case CharCode.d: + if (len == 2) { + if (text.charCodeAt(1) == CharCode.o) return Token.DO; break; } - if (text == "do") return Token.DO; + if (text == "default") return Token.DEFAULT; + if (text == "declare") return Token.DECLARE; if (text == "delete") return Token.DELETE; if (text == "debugger") return Token.DEBUGGER; break; - } - case CharCode.e: { - if (len == 4) { - if (text == "else") return Token.ELSE; - if (text == "enum") return Token.ENUM; - break; - } + + case CharCode.e: + if (text == "else") return Token.ELSE; if (text == "export") return Token.EXPORT; + if (text == "enum") return Token.ENUM; if (text == "extends") return Token.EXTENDS; break; - } - case CharCode.f: { - if (len <= 5) { - if (text == "false") return Token.FALSE; - if (text == "for") return Token.FOR; - if (text == "from") return Token.FROM; - break; + + case CharCode.f: + switch (text.charCodeAt(1)) { + case CharCode.a: + if (text == "false") return Token.FALSE; + break; + + case CharCode.u: + if (text == "function") return Token.FUNCTION; + break; + + case CharCode.o: + if (text == "for") return Token.FOR; + break; + + case CharCode.r: + if (text == "from") return Token.FROM; + break; + + case CharCode.i: + if (text == "finally") return Token.FINALLY; + break; } - if (text == "function") return Token.FUNCTION; - if (text == "finally") return Token.FINALLY; break; - } - case CharCode.g: { + + case CharCode.g: if (text == "get") return Token.GET; break; - } - case CharCode.i: { + + case CharCode.i: if (len == 2) { - if (text == "if") return Token.IF; - if (text == "in") return Token.IN; - if (text == "is") return Token.IS; + switch (text.charCodeAt(1)) { + case CharCode.f: return Token.IF; + case CharCode.n: return Token.IN; + case CharCode.s: return Token.IS; + } break; } switch (text.charCodeAt(3)) { - case CharCode.l: { + case CharCode.e: + if (text == "interface") return Token.INTERFACE; + break; + + case CharCode.l: if (text == "implements") return Token.IMPLEMENTS; break; - } - case CharCode.o: { + + case CharCode.o: if (text == "import") return Token.IMPORT; break; - } - case CharCode.t: { + + case CharCode.t: if (text == "instanceof") return Token.INSTANCEOF; break; - } - case CharCode.e: { - if (text == "interface") return Token.INTERFACE; - break; - } } break; - } - case CharCode.k: { + + case CharCode.k: if (text == "keyof") return Token.KEYOF; break; - } - case CharCode.l: { + + case CharCode.l: if (text == "let") return Token.LET; break; - } - case CharCode.m: { + + case CharCode.m: if (text == "module") return Token.MODULE; break; - } - case CharCode.n: { - if (text == "new") return Token.NEW; + + case CharCode.n: if (text == "null") return Token.NULL; + if (text == "new") return Token.NEW; if (text == "namespace") return Token.NAMESPACE; break; - } - case CharCode.o: { - if (text == "of") return Token.OF; + + case CharCode.o: + if (len == 2 && text.charCodeAt(1) == CharCode.f) return Token.OF; break; - } - case CharCode.p: { - if (len == 7) { - if (text == "private") return Token.PRIVATE; - if (text == "package") return Token.PACKAGE; - break; + + case CharCode.p: + switch (text.charCodeAt(2)) { + case CharCode.b: + if (text == "public") return Token.PUBLIC; + break; + + case CharCode.i: + if (text == "private") return Token.PRIVATE; + break; + + case CharCode.o: + if (text == "protected") return Token.PROTECTED; + break; + + case CharCode.c: + if (text == "package") return Token.PACKAGE; + break; } - if (text == "public") return Token.PUBLIC; - if (text == "protected") return Token.PROTECTED; break; - } - case CharCode.r: { + + case CharCode.r: if (text == "return") return Token.RETURN; if (text == "readonly") return Token.READONLY; break; - } - case CharCode.s: { - if (len == 6) { - if (text == "switch") return Token.SWITCH; - if (text == "static") return Token.STATIC; - break; - } + + case CharCode.s: + if (text == "switch") return Token.SWITCH; + if (text == "static") return Token.STATIC; if (text == "set") return Token.SET; if (text == "super") return Token.SUPER; break; - } - case CharCode.t: { - if (len == 4) { - if (text == "true") return Token.TRUE; - if (text == "this") return Token.THIS; - if (text == "type") return Token.TYPE; - break; - } - if (text == "try") return Token.TRY; - if (text == "throw") return Token.THROW; + + case CharCode.t: + if (text == "true") return Token.TRUE; + if (text == "this") return Token.THIS; + if (text == "type") return Token.TYPE; if (text == "typeof") return Token.TYPEOF; + if (text == "throw") return Token.THROW; + if (text == "try") return Token.TRY; break; - } - case CharCode.v: { + + case CharCode.v: if (text == "var") return Token.VAR; if (text == "void") return Token.VOID; break; - } - case CharCode.w: { + + case CharCode.w: if (text == "while") return Token.WHILE; if (text == "with") return Token.WITH; break; - } - case CharCode.y: { + + case CharCode.y: if (text == "yield") return Token.YIELD; break; - } } return Token.INVALID; } @@ -355,8 +534,8 @@ export function tokenIsAlsoIdentifier(token: Token): bool { case Token.CONSTRUCTOR: case Token.DECLARE: case Token.DELETE: - case Token.FROM: case Token.FOR: + case Token.FROM: case Token.GET: case Token.INSTANCEOF: case Token.IS: @@ -364,6 +543,7 @@ export function tokenIsAlsoIdentifier(token: Token): bool { case Token.MODULE: case Token.NAMESPACE: case Token.NULL: + case Token.OF: case Token.READONLY: case Token.SET: case Token.TYPE: @@ -373,7 +553,6 @@ export function tokenIsAlsoIdentifier(token: Token): bool { } export function isIllegalVariableIdentifier(name: string): bool { - assert(name.length); switch (name.charCodeAt(0)) { case CharCode.d: return name == "delete"; case CharCode.f: return name == "for"; @@ -386,10 +565,12 @@ export function isIllegalVariableIdentifier(name: string): bool { export function operatorTokenToString(token: Token): string { switch (token) { + case Token.AWAIT: return "await"; case Token.DELETE: return "delete"; case Token.IN: return "in"; case Token.INSTANCEOF: return "instanceof"; case Token.NEW: return "new"; + case Token.OF: return "of"; case Token.TYPEOF: return "typeof"; case Token.VOID: return "void"; case Token.YIELD: return "yield"; @@ -418,14 +599,19 @@ export function operatorTokenToString(token: Token): string { case Token.BAR: return "|"; case Token.CARET: return "^"; case Token.EXCLAMATION: return "!"; + case Token.QUESTION: return "?"; case Token.TILDE: return "~"; case Token.AMPERSAND_AMPERSAND: return "&&"; case Token.BAR_BAR: return "||"; + case Token.QUESTION_QUESTION: return "??"; case Token.EQUALS: return "="; case Token.PLUS_EQUALS: return "+="; case Token.MINUS_EQUALS: return "-="; case Token.ASTERISK_EQUALS: return "*="; case Token.ASTERISK_ASTERISK_EQUALS: return "**="; + case Token.AMPERSAND_AMPERSAND_EQUALS: return "&&="; + case Token.BAR_BAR_EQUALS: return "||="; + case Token.QUESTION_QUESTION_EQUALS: return "??="; case Token.SLASH_EQUALS: return "/="; case Token.PERCENT_EQUALS: return "%="; case Token.LESSTHAN_LESSTHAN_EQUALS: return "<<="; @@ -441,49 +627,6 @@ export function operatorTokenToString(token: Token): string { } } -export class Range { - - start: i32; - end: i32; - source!: Source; - debugInfoRef: usize = 0; - - constructor(start: i32, end: i32) { - this.start = start; - this.end = end; - } - - static join(a: Range, b: Range): Range { - if (a.source != b.source) throw new Error("source mismatch"); - let range = new Range( - a.start < b.start ? a.start : b.start, - a.end > b.end ? a.end : b.end - ); - range.source = a.source; - return range; - } - - equals(other: Range): bool { - return this.source == other.source && this.start == other.start && this.end == other.end; - } - - get atStart(): Range { - let range = new Range(this.start, this.start); - range.source = this.source; - return range; - } - - get atEnd(): Range { - let range = new Range(this.end, this.end); - range.source = this.source; - return range; - } - - toString(): string { - return this.source.text.substring(this.start, this.end); - } -} - /** Handler for intercepting comments while tokenizing. */ export type CommentHandler = (kind: CommentKind, text: string, range: Range) => void; @@ -560,470 +703,523 @@ export class Tokenizer extends DiagnosticEmitter { while (pos < end) { this.tokenPos = pos; let c = text.charCodeAt(pos); - switch (c) { - case CharCode.CARRIAGERETURN: { - if (!( - ++pos < end && - text.charCodeAt(pos) == CharCode.LINEFEED - )) break; - // otherwise fall-through + if (c <= 0x7F) { + let token = unchecked(BASIC_TOKENS[c]); + switch (token) { + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: + // `"`, `'`, ``` + case Token.STRINGLITERAL: + case Token.TEMPLATELITERAL: + case Token.INVALID: + this.pos = pos; + return token; + // `0`..`9`, `0.`, `0x`, `0b`, `0o` + case Token.DIGIT: + return this.scanNumber(text, c, pos, end); + // `a`..`z` + case Token.IDENTIFIER_OR_KEYWORD: + return this.scanKeyword(text, pos, end, identifierHandling); + // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` + case Token.WHITESPACE: { + // `\r`, `\r\n` + if (c == CharCode.CARRIAGERETURN) { + if (!( + ++pos < end && + text.charCodeAt(pos) == CharCode.LINEFEED + )) continue; + } + ++pos; + continue; + } + // `/`, `/=`, `/*`, `//`, `///` + case Token.COMMENT_OR_OPERATOR: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.SLASH) { // single-line + pos = this.skipLineComment(text, pos, end); + continue; + } + if (c == CharCode.ASTERISK) { // multi-line + pos = this.skipBlockComment(text, pos, end); + continue; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.SLASH_EQUALS; + } + } + this.pos = pos; + return Token.SLASH; + } + // `+`, `-`, `*`, `=`, `>`, .. + case Token.OPERATOR: + return this.scanOperator(text, c, pos, end, maxTokenLength); + // `[`, `{`, `(`, `:`, `;`, `@`, .. + default: + this.pos = pos + 1; + return token; } - case CharCode.LINEFEED: - case CharCode.TAB: - case CharCode.VERTICALTAB: - case CharCode.FORMFEED: - case CharCode.SPACE: { + } else { // c > 0x7F + // TODO: \uXXXX also support for identifiers + if (isIdentifierStart(c)) { + this.pos = pos; + return Token.IDENTIFIER; + } else if (isWhiteSpace(c)) { ++pos; break; } - case CharCode.EXCLAMATION: { + let start = pos++; + if ( + pos < end && isHighSurrogate(c) && + isLowSurrogate(text.charCodeAt(pos)) + ) ++pos; + this.error( + DiagnosticCode.Invalid_character, + this.range(start, pos) + ); + this.pos = pos; + return Token.INVALID; + } + } + this.pos = pos; + return Token.ENDOFFILE; + } + + peek( + checkOnNewLine: bool = false, + identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT, + maxCompoundLength: i32 = i32.MAX_VALUE + ): Token { + var text = this.source.text; + var nextToken = this.nextToken; + if (nextToken < 0) { + let posBefore = this.pos; + let tokenBefore = this.token; + let tokenPosBefore = this.tokenPos; + do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); + while (nextToken == Token.INVALID); + this.nextToken = nextToken; + this.nextTokenPos = this.tokenPos; + if (checkOnNewLine) { + this.nextTokenOnNewLine = false; + for (let pos = posBefore, end = this.nextTokenPos; pos < end; ++pos) { + if (isLineBreak(text.charCodeAt(pos))) { + this.nextTokenOnNewLine = true; + break; + } + } + } + this.pos = posBefore; + this.token = tokenBefore; + this.tokenPos = tokenPosBefore; + } + return nextToken; + } + + // Scan and determine is it integer or float without update of position. + private scanNumber(text: string, c: i32, pos: i32, end: i32): Token { + this.pos = pos++; + // `0.`, `0x`, `0b`, `0o` + if (c == CharCode._0) { + if (pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.DOT) return Token.FLOATLITERAL; + switch (c | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: + return Token.INTEGERLITERAL; + } + } + } + while (pos < end) { + let c = text.charCodeAt(pos); + if (c == CharCode.DOT || (c | 32) == CharCode.e) { + return Token.FLOATLITERAL; + } + if (c != CharCode._ && !isDecimal(c)) break; + // does not validate separator placement (this is done in readXYInteger) + ++pos; + } + return Token.INTEGERLITERAL; + } + + private scanKeyword(text: string, pos: i32, end: i32, identifierHandling: IdentifierHandling): Token { + let startPos = pos; + if (identifierHandling != IdentifierHandling.ALWAYS) { + while ( + ++pos < end && + isIdentifierPart(text.charCodeAt(pos)) + ) { /* nop */ } + if ( + pos - startPos >= MIN_KEYWORD_LENGTH && + pos - startPos <= MAX_KEYWORD_LENGTH + ) { + let keyword = scanKeyword(text.substring(startPos, pos)); + if (keyword != Token.INVALID && !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(keyword) + )) { + this.pos = pos; + return keyword; + } + } + } + this.pos = startPos; + return Token.IDENTIFIER; + } + + private scanOperator(text: string, c: i32, pos: i32, end: i32, maxTokenLength: i32): Token { + // Operator tokens + switch (c) { + // `!`, `!=`, `!==` + case CharCode.EXCLAMATION: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { ++pos; if ( - maxTokenLength > 1 && pos < end && + maxTokenLength > 2 && pos < end && text.charCodeAt(pos) == CharCode.EQUALS ) { + this.pos = pos + 1; + return Token.EXCLAMATION_EQUALS_EQUALS; + } + this.pos = pos; + return Token.EXCLAMATION_EQUALS; + } + this.pos = pos; + return Token.EXCLAMATION; + } + // `%`, `%=` + case CharCode.PERCENT: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { + this.pos = pos + 1; + return Token.PERCENT_EQUALS; + } + this.pos = pos; + return Token.PERCENT; + } + // `&`, `&&`, `&=`, `&&=` + case CharCode.AMPERSAND: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.AMPERSAND_EQUALS; + } + if (c == CharCode.AMPERSAND) { ++pos; if ( maxTokenLength > 2 && pos < end && text.charCodeAt(pos) == CharCode.EQUALS ) { this.pos = pos + 1; - return Token.EXCLAMATION_EQUALS_EQUALS; + return Token.AMPERSAND_AMPERSAND_EQUALS; } this.pos = pos; - return Token.EXCLAMATION_EQUALS; + return Token.AMPERSAND_AMPERSAND; } - this.pos = pos; - return Token.EXCLAMATION; } - case CharCode.DOUBLEQUOTE: - case CharCode.SINGLEQUOTE: { - this.pos = pos; - return Token.STRINGLITERAL; - } - case CharCode.BACKTICK: { - this.pos = pos; - return Token.TEMPLATELITERAL; - } - case CharCode.PERCENT: { - ++pos; - if ( - maxTokenLength > 1 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + this.pos = pos; + return Token.AMPERSAND; + } + // `*`, `**`, `*=`, `**=` + case CharCode.ASTERISK: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.PERCENT_EQUALS; + return Token.ASTERISK_EQUALS; } - this.pos = pos; - return Token.PERCENT; - } - case CharCode.AMPERSAND: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.AMPERSAND) { - this.pos = pos + 1; - return Token.AMPERSAND_AMPERSAND; - } - if (chr == CharCode.EQUALS) { + if (c == CharCode.ASTERISK) { + ++pos; + if ( + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.AMPERSAND_EQUALS; + return Token.ASTERISK_ASTERISK_EQUALS; } + this.pos = pos; + return Token.ASTERISK_ASTERISK; } - this.pos = pos; - return Token.AMPERSAND; } - case CharCode.OPENPAREN: { - this.pos = pos + 1; - return Token.OPENPAREN; - } - case CharCode.CLOSEPAREN: { - this.pos = pos + 1; - return Token.CLOSEPAREN; - } - case CharCode.ASTERISK: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.ASTERISK_EQUALS; - } - if (chr == CharCode.ASTERISK) { - ++pos; - if ( - maxTokenLength > 2 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.ASTERISK_ASTERISK_EQUALS; - } - this.pos = pos; - return Token.ASTERISK_ASTERISK; - } + this.pos = pos; + return Token.ASTERISK; + } + // `+`, `++`, `+=` + case CharCode.PLUS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.PLUS) { + this.pos = pos + 1; + return Token.PLUS_PLUS; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.PLUS_EQUALS; } - this.pos = pos; - return Token.ASTERISK; } - case CharCode.PLUS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.PLUS) { - this.pos = pos + 1; - return Token.PLUS_PLUS; - } - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.PLUS_EQUALS; - } + this.pos = pos; + return Token.PLUS; + } + // `-`, `-=`, `--` + case CharCode.MINUS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.MINUS) { + this.pos = pos + 1; + return Token.MINUS_MINUS; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.MINUS_EQUALS; } - this.pos = pos; - return Token.PLUS; } - case CharCode.COMMA: { - this.pos = pos + 1; - return Token.COMMA; + this.pos = pos; + return Token.MINUS; + } + // `.`, `.{d}`, `...` + case CharCode.DOT: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (isDecimal(c)) { + this.pos = pos - 1; + return Token.FLOATLITERAL; // expects a call to readFloat + } + if ( + maxTokenLength > 2 && + pos + 1 < end && c == CharCode.DOT && + text.charCodeAt(pos + 1) == CharCode.DOT + ) { + this.pos = pos + 2; + return Token.DOT_DOT_DOT; + } } - case CharCode.MINUS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.MINUS) { - this.pos = pos + 1; - return Token.MINUS_MINUS; - } - if (chr == CharCode.EQUALS) { + this.pos = pos; + return Token.DOT; + } + // `<`, `<<`, `<=` `<<=` + case CharCode.LESSTHAN: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.LESSTHAN) { + ++pos; + if ( + maxTokenLength > 2 && + pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.MINUS_EQUALS; + return Token.LESSTHAN_LESSTHAN_EQUALS; } + this.pos = pos; + return Token.LESSTHAN_LESSTHAN; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.LESSTHAN_EQUALS; } - this.pos = pos; - return Token.MINUS; } - case CharCode.DOT: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (isDecimal(chr)) { - this.pos = pos - 1; - return Token.FLOATLITERAL; // expects a call to readFloat - } + this.pos = pos; + return Token.LESSTHAN; + } + // `=`, `==`, `===`, `=>` + case CharCode.EQUALS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + ++pos; if ( - maxTokenLength > 2 && pos + 1 < end && - chr == CharCode.DOT && - text.charCodeAt(pos + 1) == CharCode.DOT + maxTokenLength > 2 && + pos < end && + text.charCodeAt(pos) == CharCode.EQUALS ) { - this.pos = pos + 2; - return Token.DOT_DOT_DOT; + this.pos = pos + 1; + return Token.EQUALS_EQUALS_EQUALS; } + this.pos = pos; + return Token.EQUALS_EQUALS; + } + if (c == CharCode.GREATERTHAN) { + this.pos = pos + 1; + return Token.EQUALS_GREATERTHAN; } - this.pos = pos; - return Token.DOT; } - case CharCode.SLASH: { - let commentStartPos = pos; - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.SLASH) { // single-line - let commentKind = CommentKind.LINE; - if ( - pos + 1 < end && - text.charCodeAt(pos + 1) == CharCode.SLASH - ) { + this.pos = pos; + return Token.EQUALS; + } + // `>`, `>>`, `>>>`, `>=` `>>=`, `>>>=` + case CharCode.GREATERTHAN: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { + ++pos; + if (maxTokenLength > 2 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { ++pos; - commentKind = CommentKind.TRIPLE; - } - while (++pos < end) { - if (text.charCodeAt(pos) == CharCode.LINEFEED) { - ++pos; - break; - } - } - if (this.onComment) { - this.onComment( - commentKind, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) - ); - } - break; - } - if (chr == CharCode.ASTERISK) { // multi-line - let closed = false; - while (++pos < end) { - c = text.charCodeAt(pos); if ( - c == CharCode.ASTERISK && - pos + 1 < end && - text.charCodeAt(pos + 1) == CharCode.SLASH + maxTokenLength > 3 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS ) { - pos += 2; - closed = true; - break; + this.pos = pos + 1; + return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS; } + this.pos = pos; + return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN; } - if (!closed) { - this.error( - DiagnosticCode._0_expected, - this.range(pos), "*/" - ); - } else if (this.onComment) { - this.onComment( - CommentKind.BLOCK, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) - ); - } - break; - } - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.SLASH_EQUALS; - } - } - this.pos = pos; - return Token.SLASH; - } - case CharCode._0: - case CharCode._1: - case CharCode._2: - case CharCode._3: - case CharCode._4: - case CharCode._5: - case CharCode._6: - case CharCode._7: - case CharCode._8: - case CharCode._9: { - this.pos = pos; - return this.testInteger() - ? Token.INTEGERLITERAL // expects a call to readInteger - : Token.FLOATLITERAL; // expects a call to readFloat - } - case CharCode.COLON: { - this.pos = pos + 1; - return Token.COLON; - } - case CharCode.SEMICOLON: { - this.pos = pos + 1; - return Token.SEMICOLON; - } - case CharCode.LESSTHAN: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.LESSTHAN) { - ++pos; - if ( - maxTokenLength > 2 && - pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.LESSTHAN_LESSTHAN_EQUALS; + return Token.GREATERTHAN_GREATERTHAN_EQUALS; } - this.pos = pos; - return Token.LESSTHAN_LESSTHAN; - } - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.LESSTHAN_EQUALS; } + this.pos = pos; + return Token.GREATERTHAN_GREATERTHAN; } - this.pos = pos; - return Token.LESSTHAN; - } - case CharCode.EQUALS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { - ++pos; - if ( - maxTokenLength > 2 && - pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.EQUALS_EQUALS_EQUALS; - } - this.pos = pos; - return Token.EQUALS_EQUALS; - } - if (chr == CharCode.GREATERTHAN) { - this.pos = pos + 1; - return Token.EQUALS_GREATERTHAN; - } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.GREATERTHAN_EQUALS; } - this.pos = pos; - return Token.EQUALS; } - case CharCode.GREATERTHAN: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.GREATERTHAN) { - ++pos; - if (maxTokenLength > 2 && pos < end) { - chr = text.charCodeAt(pos); - if (chr == CharCode.GREATERTHAN) { - ++pos; - if ( - maxTokenLength > 3 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS; - } - this.pos = pos; - return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN; - } - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.GREATERTHAN_GREATERTHAN_EQUALS; - } + this.pos = pos; + return Token.GREATERTHAN; + } + // `?`, `??`, `??=` + case CharCode.QUESTION: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.QUESTION) { + ++pos; + if (maxTokenLength > 2 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.QUESTION_QUESTION_EQUALS; } - this.pos = pos; - return Token.GREATERTHAN_GREATERTHAN; - } - if (chr == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.GREATERTHAN_EQUALS; } + this.pos = pos; + return Token.QUESTION_QUESTION; } - this.pos = pos; - return Token.GREATERTHAN; - } - case CharCode.QUESTION: { - this.pos = pos + 1; - return Token.QUESTION; - } - case CharCode.OPENBRACKET: { - this.pos = pos + 1; - return Token.OPENBRACKET; } - case CharCode.CLOSEBRACKET: { + this.pos = pos; + return Token.QUESTION; + } + // `^`, `^=` + case CharCode.CARET: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.CLOSEBRACKET; + return Token.CARET_EQUALS; } - case CharCode.CARET: { - ++pos; - if ( - maxTokenLength > 1 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + this.pos = pos; + return Token.CARET; + } + // `|`, `||`, `|=`, `||=` + case CharCode.BAR: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.CARET_EQUALS; + return Token.BAR_EQUALS; } - this.pos = pos; - return Token.CARET; - } - case CharCode.OPENBRACE: { - this.pos = pos + 1; - return Token.OPENBRACE; - } - case CharCode.BAR: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.BAR) { - this.pos = pos + 1; - return Token.BAR_BAR; - } - if (chr == CharCode.EQUALS) { + if (c == CharCode.BAR) { + ++pos; + if ( + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.BAR_EQUALS; - } - } - this.pos = pos; - return Token.BAR; - } - case CharCode.CLOSEBRACE: { - this.pos = pos + 1; - return Token.CLOSEBRACE; - } - case CharCode.TILDE: { - this.pos = pos + 1; - return Token.TILDE; - } - case CharCode.AT: { - this.pos = pos + 1; - return Token.AT; - } - default: { - if (isIdentifierStart(c)) { - let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - if (identifierHandling != IdentifierHandling.ALWAYS) { - let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); - if ( - maybeKeywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(maybeKeywordToken) - ) - ) { - this.pos = pos; - return maybeKeywordToken; - } + return Token.BAR_BAR_EQUALS; } - this.pos = posBefore; - return Token.IDENTIFIER; - } else if (isWhiteSpace(c)) { - ++pos; - break; + this.pos = pos; + return Token.BAR_BAR; } - let start = pos++; - if ( - isHighSurrogate(c) && pos < end && - isLowSurrogate(text.charCodeAt(pos)) - ) ++pos; - this.error( - DiagnosticCode.Invalid_character, - this.range(start, pos) - ); - this.pos = pos; - return Token.INVALID; } + this.pos = pos; + return Token.BAR; } } this.pos = pos; - return Token.ENDOFFILE; + return Token.INVALID; } - peek( - checkOnNewLine: bool = false, - identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT, - maxCompoundLength: i32 = i32.MAX_VALUE - ): Token { - var text = this.source.text; - if (this.nextToken < 0) { - let posBefore = this.pos; - let tokenBefore = this.token; - let tokenPosBefore = this.tokenPos; - let nextToken: Token; - do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); - while (nextToken == Token.INVALID); - this.nextToken = nextToken; - this.nextTokenPos = this.tokenPos; - if (checkOnNewLine) { - this.nextTokenOnNewLine = false; - for (let pos = posBefore, end = this.nextTokenPos; pos < end; ++pos) { - if (isLineBreak(text.charCodeAt(pos))) { - this.nextTokenOnNewLine = true; - break; - } - } + skipLineComment(text: string, pos: i32, end: i32): i32 { + var startPos = pos - 1; + var kind = CommentKind.LINE; + if ( + pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.SLASH + ) { + ++pos; + kind = CommentKind.TRIPLE; + } + while (++pos < end) { + if (isLineBreak(text.charCodeAt(pos))) { + ++pos; + break; } - this.pos = posBefore; - this.token = tokenBefore; - this.tokenPos = tokenPosBefore; } - return this.nextToken; + if (this.onComment) { + this.onComment( + kind, + text.substring(startPos, pos), + this.range(startPos, pos) + ); + } + return pos; + } + + skipBlockComment(text: string, pos: i32, end: i32): i32 { + var startPos = pos - 1; + var closed = false; + while (++pos < end) { + let c = text.charCodeAt(pos); + if ( + c == CharCode.ASTERISK && + pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.SLASH + ) { + pos += 2; + closed = true; + break; + } + } + if (!closed) { + this.error( + DiagnosticCode._0_expected, + this.range(pos), "*/" + ); + } else if (this.onComment) { + this.onComment( + CommentKind.BLOCK, + text.substring(startPos, pos), + this.range(startPos, pos) + ); + } + return pos; } skipIdentifier(identifierHandling: IdentifierHandling = IdentifierHandling.PREFER): bool { @@ -1035,8 +1231,19 @@ export class Tokenizer extends DiagnosticEmitter { var tokenBefore = this.token; var tokenPosBefore = this.tokenPos; var maxCompoundLength = i32.MAX_VALUE; - if (token == Token.GREATERTHAN) { // where parsing type arguments - maxCompoundLength = 1; + switch (token) { + case Token.EQUALS: + case Token.GREATERTHAN: + case Token.LESSTHAN: + case Token.SLASH: + case Token.BAR: + case Token.EXCLAMATION: + case Token.DOT: + case Token.ASTERISK: + case Token.AMPERSAND: + case Token.QUESTION: + maxCompoundLength = 1; + break; } var nextToken: Token; do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); @@ -1099,7 +1306,11 @@ export class Tokenizer extends DiagnosticEmitter { isIdentifierPart(text.charCodeAt(pos)) ); this.pos = pos; - return text.substring(start, pos); + if (pos - start == 1) { + return text[start]; + } else { + return text.substring(start, pos); + } } readingTemplateString: bool = false; @@ -1140,7 +1351,10 @@ export class Tokenizer extends DiagnosticEmitter { continue; } if (quote == CharCode.BACKTICK) { - if (c == CharCode.DOLLAR && pos + 1 < end && text.charCodeAt(pos + 1) == CharCode.OPENBRACE) { + if ( + c == CharCode.DOLLAR && pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.OPENBRACE + ) { result += text.substring(start, pos); this.readStringEnd = pos; this.pos = pos + 2; @@ -1165,8 +1379,8 @@ export class Tokenizer extends DiagnosticEmitter { readEscapeSequence(isTaggedTemplate: bool = false): string { // for context on isTaggedTemplate, see: https://tc39.es/proposal-template-literal-revision/ - var start = this.pos; var end = this.end; + var start = this.pos; if (++this.pos >= end) { this.error( DiagnosticCode.Unexpected_end_of_text, @@ -1179,10 +1393,10 @@ export class Tokenizer extends DiagnosticEmitter { var c = text.charCodeAt(this.pos++); switch (c) { case CharCode._0: { - if (isTaggedTemplate && this.pos < end && isDecimal(text.charCodeAt(this.pos))) { - ++this.pos; - return text.substring(start, this.pos); - } + if ( + isTaggedTemplate && this.pos < end && + isDecimal(text.charCodeAt(this.pos)) + ) return text.substring(start, ++this.pos); return "\0"; } case CharCode.b: return "\b"; @@ -1210,9 +1424,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( this.pos < end && text.charCodeAt(this.pos) == CharCode.LINEFEED - ) { - ++this.pos; - } + ) ++this.pos; // fall through } case CharCode.LINEFEED: @@ -1257,14 +1469,14 @@ export class Tokenizer extends DiagnosticEmitter { readRegexpFlags(): string { var text = this.source.text; - var start = this.pos; var end = this.end; + var pos = this.pos; + var start = pos; var flags = 0; - while (this.pos < end) { - let c: i32 = text.charCodeAt(this.pos); + while (pos < end) { + let c = text.charCodeAt(pos); if (!isIdentifierPart(c)) break; - ++this.pos; - + ++pos; // make sure each supported flag is unique switch (c) { case CharCode.g: { @@ -1288,38 +1500,19 @@ export class Tokenizer extends DiagnosticEmitter { if (flags == -1) { this.error( DiagnosticCode.Invalid_regular_expression_flags, - this.range(start, this.pos) + this.range(start, pos) ); } - return text.substring(start, this.pos); - } - - testInteger(): bool { - var text = this.source.text; - var pos = this.pos; - var end = this.end; - if (pos + 1 < end && text.charCodeAt(pos) == CharCode._0) { - switch (text.charCodeAt(pos + 2) | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: return true; - } - } - while (pos < end) { - let c = text.charCodeAt(pos); - if (c == CharCode.DOT || (c | 32) == CharCode.e) return false; - if (c != CharCode._ && (c < CharCode._0 || c > CharCode._9)) break; - // does not validate separator placement (this is done in readXYInteger) - pos++; - } - return true; + this.pos = pos; + return text.substring(start, pos); } readInteger(): i64 { var text = this.source.text; var pos = this.pos; if (pos + 2 < this.end && text.charCodeAt(pos) == CharCode._0) { - switch (text.charCodeAt(pos + 1) | 32) { + let c1 = text.charCodeAt(pos + 1); + switch (c1 | 32) { case CharCode.x: { this.pos = pos + 2; return this.readHexInteger(); @@ -1332,49 +1525,39 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 2; return this.readOctalInteger(); } - } - if (isOctal(text.charCodeAt(pos + 1))) { - let start = pos; - this.pos = pos + 1; - let value = this.readOctalInteger(); - this.error( - DiagnosticCode.Octal_literals_are_not_allowed_in_strict_mode, - this.range(start, this.pos) - ); - return value; + default: { + if (isOctal(c1)) { + let start = pos; + this.pos = pos + 1; + let value = this.readOctalInteger(); + this.error( + DiagnosticCode.Octal_literals_are_not_allowed_in_strict_mode, + this.range(start, this.pos) + ); + return value; + } + } } } return this.readDecimalInteger(); } - readHexInteger(): i64 { + readDecimalInteger(): i64 { var text = this.source.text; - let pos = this.pos; + var pos = this.pos; var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_4 = i64_new(4); + var value = i64_zero; + var i64_10 = i64_new(10); while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { - // value = (value << 4) + c - CharCode._0; + if (isDecimal(c)) { + // value = value * 10 + c - CharCode._0; value = i64_add( - i64_shl(value, i64_4), + i64_mul(value, i64_10), i64_new(c - CharCode._0) ); - } else if (c >= CharCode.A && c <= CharCode.F) { - // value = (value << 4) + 10 + c - CharCode.A; - value = i64_add( - i64_shl(value, i64_4), - i64_new(10 + c - CharCode.A) - ); - } else if (c >= CharCode.a && c <= CharCode.f) { - // value = (value << 4) + 10 + c - CharCode.a; - value = i64_add( - i64_shl(value, i64_4), - i64_new(10 + c - CharCode.a) - ); } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1383,6 +1566,11 @@ export class Tokenizer extends DiagnosticEmitter { : DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted, this.range(pos) ); + } else if (pos - 1 == start && text.charCodeAt(pos - 1) == CharCode._0) { + this.error( + DiagnosticCode.Numeric_separators_are_not_allowed_here, + this.range(pos) + ); } sepEnd = pos + 1; } else { @@ -1392,7 +1580,7 @@ export class Tokenizer extends DiagnosticEmitter { } if (pos == start) { this.error( - DiagnosticCode.Hexadecimal_digit_expected, + DiagnosticCode.Digit_expected, this.range(start) ); } else if (sepEnd == pos) { @@ -1405,22 +1593,32 @@ export class Tokenizer extends DiagnosticEmitter { return value; } - readDecimalInteger(): i64 { + readHexInteger(): i64 { var text = this.source.text; - var pos = this.pos; + let pos = this.pos; var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_10 = i64_new(10); + var value = i64_zero; + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { - // value = value * 10 + c - CharCode._0; + if (c == CharCode._0) { + ++zeros; + } else if (isDecimal(c)) { + // value = (value << (zeros + 1) * 4) + c - CharCode._0; value = i64_add( - i64_mul(value, i64_10), + i64_shl(value, i64_new((zeros + 1) << 2)), i64_new(c - CharCode._0) ); + zeros = 0; + } else if (isHexPart(c)) { + // value = (value << (zeros + 1) * 4) + 10 + (c | 32) - CharCode.a; + value = i64_add( + i64_shl(value, i64_new((zeros + 1) << 2)), + i64_new(10 + (c | 32) - CharCode.a) + ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1429,11 +1627,6 @@ export class Tokenizer extends DiagnosticEmitter { : DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted, this.range(pos) ); - } else if (pos - 1 == start && text.charCodeAt(pos - 1) == CharCode._0) { - this.error( - DiagnosticCode.Numeric_separators_are_not_allowed_here, - this.range(pos) - ); } sepEnd = pos + 1; } else { @@ -1441,9 +1634,12 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros << 2)); + } if (pos == start) { this.error( - DiagnosticCode.Digit_expected, + DiagnosticCode.Hexadecimal_digit_expected, this.range(start) ); } else if (sepEnd == pos) { @@ -1462,16 +1658,19 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_3 = i64_new(3); + var value = i64_zero; + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._7) { - // value = (value << 3) + c - CharCode._0; + if (c == CharCode._0) { + ++zeros; + } else if (isOctal(c)) { + // value = (value << (zeros + 1) * 3) + c - CharCode._0; value = i64_add( - i64_shl(value, i64_3), + i64_shl(value, i64_new((zeros + 1) * 3)), i64_new(c - CharCode._0) ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1487,6 +1686,9 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros * 3)); + } if (pos == start) { this.error( DiagnosticCode.Octal_digit_expected, @@ -1508,19 +1710,19 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_1 = i64_new(1); + var value = i64_zero; + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); if (c == CharCode._0) { - // value = (value << 1); - value = i64_shl(value, i64_1); + ++zeros; } else if (c == CharCode._1) { - // value = (value << 1) + 1; - value = i64_add( - i64_shl(value, i64_1), - i64_1 + // (value << zeros + 1) | 1 + value = i64_or( + i64_shl(value, i64_new(zeros + 1)), + i64_one ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1536,6 +1738,9 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros)); + } if (pos == start) { this.error( DiagnosticCode.Binary_digit_expected, @@ -1569,10 +1774,10 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var end = this.end; var start = this.pos; - var sepCount = this.readDecimalFloatPartial(false); + var hasSep = this.scanFloatPart(false); if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) { ++this.pos; - sepCount += this.readDecimalFloatPartial(); + hasSep |= this.scanFloatPart(); } if (this.pos < end) { let c = text.charCodeAt(this.pos); @@ -1584,26 +1789,33 @@ export class Tokenizer extends DiagnosticEmitter { ) { ++this.pos; } - sepCount += this.readDecimalFloatPartial(); + hasSep |= this.scanFloatPart(); } } - let result = text.substring(start, this.pos); - if (sepCount) result = result.replaceAll("_", ""); + let pos = this.pos; + if (pos - start == 3 && text.charCodeAt(start + 1) == CharCode.DOT) { + // fast path for the most common literals: + // 0.0 ... 9.9 with step = 0.1 + let d1 = (text.charCodeAt(start + 0) - CharCode._0); + let d2 = (text.charCodeAt(start + 2) - CharCode._0); + return d1 + d2 / 10.0; + } + let result = text.substring(start, pos); + if (hasSep) result = result.replaceAll("_", ""); return parseFloat(result); } - /** Reads past one section of a decimal float literal. Returns the number of separators encountered. */ - private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): u32 { + /** Scan past one section of a decimal float literal. Returns `1` if separators encountered. */ + private scanFloatPart(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; + var end = this.end; var pos = this.pos; var start = pos; - var end = this.end; - var sepEnd = start; - var sepCount = 0; + var sepEnd = pos; + var hasSep = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1619,7 +1831,7 @@ export class Tokenizer extends DiagnosticEmitter { ); } sepEnd = pos + 1; - ++sepCount; + hasSep = 1; } else if (!isDecimal(c)) { break; } @@ -1634,7 +1846,7 @@ export class Tokenizer extends DiagnosticEmitter { } this.pos = pos; - return sepCount; + return hasSep; } readHexFloat(): f64 { @@ -1648,12 +1860,10 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; while (pos < end) { let c = text.charCodeAt(pos++); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { value = (value << 4) + c - CharCode._0; - } else if (c >= CharCode.A && c <= CharCode.F) { - value = (value << 4) + c + (10 - CharCode.A); - } else if (c >= CharCode.a && c <= CharCode.f) { - value = (value << 4) + c + (10 - CharCode.a); + } else if (isHexPart(c)) { + value = (value << 4) + (c | 32) + (10 - CharCode.a); } else if (~startIfTaggedTemplate) { this.pos = --pos; return text.substring(startIfTaggedTemplate, pos); diff --git a/src/tsconfig.json b/src/tsconfig.json index 0acc81470a..596d9b9697 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -1,16 +1,14 @@ { "extends": "../std/portable.json", "compilerOptions": { + "target": "esnext", "outDir": "../out", "types" : ["node"], - "allowJs": false, "sourceMap": true, - "skipLibCheck": true, - "target": "esnext", - "useDefineForClassFields": false, + "allowJs": false, "strict": true, - "noImplicitReturns": true, - "noPropertyAccessFromIndexSignature": true + "skipLibCheck": true, + "noImplicitReturns": true }, "include": [ "./**/*.ts" diff --git a/src/util/text.ts b/src/util/text.ts index 6b3598e061..3267182151 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -144,13 +144,9 @@ export function isLineBreak(c: i32): bool { case CharCode.LINEFEED: case CharCode.CARRIAGERETURN: case CharCode.LINESEPARATOR: - case CharCode.PARAGRAPHSEPARATOR: { - return true; - } - default: { - return false; - } + case CharCode.PARAGRAPHSEPARATOR: return true; } + return false; } /** Tests if the specified character code is some sort of white space. */ @@ -166,13 +162,9 @@ export function isWhiteSpace(c: i32): bool { case CharCode.NARROWNOBREAKSPACE: case CharCode.MATHEMATICALSPACE: case CharCode.IDEOGRAPHICSPACE: - case CharCode.BYTEORDERMARK: { - return true; - } - default: { - return c >= CharCode.ENQUAD && c <= CharCode.ZEROWIDTHSPACE; - } + case CharCode.BYTEORDERMARK: return true; } + return c >= CharCode.ENQUAD && c <= CharCode.ZEROWIDTHSPACE; } /** First high (lead) surrogate. */ @@ -234,10 +226,15 @@ export function isOctal(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } +/** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */ +export function isHexPart(c: i32): bool { + let c0 = c | 32; // unify uppercases and lowercases a|A - f|F + return c0 >= CharCode.a && c0 <= CharCode.f; +} + /** Tests if the specified character code is a valid hexadecimal digit. */ export function isHex(c: i32): bool { - let c0 = c | 32; // unify uppercases and lowercases a|A - f|F - return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f); + return isDecimal(c) || isHexPart(c); } /** Tests if the specified character code is trivially alphanumeric. */ diff --git a/std/portable.json b/std/portable.json index 1a1e427d21..a9bb503c7f 100644 --- a/std/portable.json +++ b/std/portable.json @@ -5,7 +5,8 @@ "module": "commonjs", "allowJs": true, "downlevelIteration": true, - "preserveConstEnums": true, + "useDefineForClassFields": false, + "noPropertyAccessFromIndexSignature": true, "typeRoots": [ "types" ], "types": [ "portable" ], "lib": ["esnext", "esnext.string"] diff --git a/tests/parser/also-identifier.ts b/tests/parser/also-identifier.ts index 702cead65b..53c9bc66d9 100644 --- a/tests/parser/also-identifier.ts +++ b/tests/parser/also-identifier.ts @@ -1,38 +1,44 @@ class Foo { as: i32; - //declare: i32; - delete: i32; - from: i32; + is: i32; + in: i32; + of: i32; for: i32; get: i32; - instanceof: i32; - is: i32; - keyof: i32; - module: i32; - namespace: i32; - null: i32; - readonly: i32; set: i32; type: i32; void: i32; + null: i32; + from: i32; + true: i32; + false: i32; + keyof: i32; + module: i32; + delete: i32; + readonly: i32; + namespace: i32; + instanceof: i32; } var as: i32; -var constructor: i32; -//var declare: i32; -var from: i32; -var get: i32; var is: i32; +var of: i32; +var get: i32; +var set: i32; +var from: i32; var keyof: i32; var module: i32; -var namespace: i32; +var declare: i32; var readonly: i32; -var set: i32; +var namespace: i32; +var constructor: i32; // -- illegal -- -// var delete: i32; // var for: i32; -// var instanceof: i32; // var null: i32; // var type: i32; // var void: i32; +// var true: i32; +// var false: i32; +// var delete: i32; +// var instanceof: i32; diff --git a/tests/parser/also-identifier.ts.fixture.ts b/tests/parser/also-identifier.ts.fixture.ts index 8d3188b81e..8e0e4f6af0 100644 --- a/tests/parser/also-identifier.ts.fixture.ts +++ b/tests/parser/also-identifier.ts.fixture.ts @@ -1,27 +1,33 @@ class Foo { as: i32; - delete: i32; - from: i32; + is: i32; + in: i32; + of: i32; for: i32; get: i32; - instanceof: i32; - is: i32; - keyof: i32; - module: i32; - namespace: i32; - null: i32; - readonly: i32; set: i32; type: i32; void: i32; + null: i32; + from: i32; + true: i32; + false: i32; + keyof: i32; + module: i32; + delete: i32; + readonly: i32; + namespace: i32; + instanceof: i32; } var as: i32; -var constructor: i32; -var from: i32; -var get: i32; var is: i32; +var of: i32; +var get: i32; +var set: i32; +var from: i32; var keyof: i32; var module: i32; -var namespace: i32; +var declare: i32; var readonly: i32; -var set: i32; +var namespace: i32; +var constructor: i32; diff --git a/tests/parser/literals.ts b/tests/parser/literals.ts index 1f445aeb77..02976cfe99 100644 --- a/tests/parser/literals.ts +++ b/tests/parser/literals.ts @@ -56,6 +56,9 @@ 1.0e+1; 1e-1; 1.0e-1; ++.1; +-.2; +-1.; ""; "\""; "123"; diff --git a/tests/parser/literals.ts.fixture.ts b/tests/parser/literals.ts.fixture.ts index 8f7d004851..c5ca601974 100644 --- a/tests/parser/literals.ts.fixture.ts +++ b/tests/parser/literals.ts.fixture.ts @@ -56,6 +56,9 @@ 10; 0.1; 0.1; ++0.1; +-0.2; +-1; ""; "\""; "123"; @@ -97,13 +100,13 @@ c; d; a; b; -// ERROR 1109: "Expression expected." in literals.ts(86,4+1) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(87,2+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(88,2+0) -// ERROR 1109: "Expression expected." in literals.ts(89,3+1) -// ERROR 6188: "Numeric separators are not allowed here." in literals.ts(91,2+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(92,3+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(93,4+0) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,4+1) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,12+1) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,16+1) +// ERROR 1109: "Expression expected." in literals.ts(89,4+1) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(90,2+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(91,2+0) +// ERROR 1109: "Expression expected." in literals.ts(92,3+1) +// ERROR 6188: "Numeric separators are not allowed here." in literals.ts(94,2+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(95,3+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(96,4+0) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,4+1) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,12+1) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,16+1)