From 60f14a87572065451329221ca917a222602b746c Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 19:59:06 +0200 Subject: [PATCH 001/124] init --- src/tokenizer.ts | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5aec3bd243..4005583cf6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -33,7 +33,7 @@ import { } from "./util"; /** Named token types. */ -export enum Token { +export const enum Token { // keywords // discarded: ANY, BOOLEAN, NEVER, NUMBER, STRING, SYMBOL, UNDEFINED, LESSTHAN_SLASH @@ -170,7 +170,7 @@ export enum Token { ENDOFFILE } -export enum IdentifierHandling { +export const enum IdentifierHandling { DEFAULT, PREFER, ALWAYS @@ -784,7 +784,25 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.SLASH; } - case CharCode._0: + case CharCode._0: { + if (pos + 1 < end) { + switch (text.charCodeAt(pos + 1) | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: { + // 0x | 0b | 0o + this.pos = pos; + return Token.INTEGERLITERAL; + } + case CharCode.DOT: { + // 0. + this.pos = pos; + return Token.FLOATLITERAL; + } + } + } + // fall-through + } case CharCode._1: case CharCode._2: case CharCode._3: @@ -1296,13 +1314,6 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var pos = this.pos; var end = this.end; - if (pos + 1 < end && text.charCodeAt(pos) == CharCode._0) { - switch (text.charCodeAt(pos + 2) | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: return true; - } - } while (pos < end) { let c = text.charCodeAt(pos); if (c == CharCode.DOT || (c | 32) == CharCode.e) return false; From bd5ef0363f7cb49f5444b5257c281be57ef711d0 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 20:10:40 +0200 Subject: [PATCH 002/124] advance pos for readInteger --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4005583cf6..af4cd59fda 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1312,7 +1312,7 @@ export class Tokenizer extends DiagnosticEmitter { testInteger(): bool { var text = this.source.text; - var pos = this.pos; + var pos = this.pos + 1; var end = this.end; while (pos < end) { let c = text.charCodeAt(pos); From 307266439945605058f86d70172b08c7c273c352 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 20:14:32 +0200 Subject: [PATCH 003/124] fix --- src/tokenizer.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index af4cd59fda..56f71a06f8 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -786,7 +786,13 @@ export class Tokenizer extends DiagnosticEmitter { } case CharCode._0: { if (pos + 1 < end) { - switch (text.charCodeAt(pos + 1) | 32) { + let ch = text.charCodeAt(pos + 1); + if (ch == CharCode.DOT) { + // 0. + this.pos = pos; + return Token.FLOATLITERAL; + } + switch (ch | 32) { case CharCode.x: case CharCode.b: case CharCode.o: { @@ -794,11 +800,6 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.INTEGERLITERAL; } - case CharCode.DOT: { - // 0. - this.pos = pos; - return Token.FLOATLITERAL; - } } } // fall-through From fcb42004b77741317c442ada35ed629979dde2ca Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 23:00:49 +0200 Subject: [PATCH 004/124] comments --- src/tokenizer.ts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 56f71a06f8..69620ff29b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -559,6 +559,7 @@ export class Tokenizer extends DiagnosticEmitter { this.tokenPos = pos; let c = text.charCodeAt(pos); switch (c) { + // `\r`, `\r\n` case CharCode.CARRIAGERETURN: { if (!( ++pos < end && @@ -574,6 +575,7 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; break; } + // `!`, `!=`, `!==` case CharCode.EXCLAMATION: { ++pos; if ( @@ -603,6 +605,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.TEMPLATELITERAL; } + // `%`, `%=` case CharCode.PERCENT: { ++pos; if ( @@ -615,6 +618,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.PERCENT; } + // `&`, `&&`, `&=` case CharCode.AMPERSAND: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -639,6 +643,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.CLOSEPAREN; } + // `*`, `*=`, `**`, `**=` case CharCode.ASTERISK: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -663,6 +668,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.ASTERISK; } + // `+`, `+=`, `++` case CharCode.PLUS: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -683,6 +689,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.COMMA; } + // `-`, `-=`, `--` case CharCode.MINUS: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -699,6 +706,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.MINUS; } + // `.`, `.{d}`, `...` case CharCode.DOT: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -719,6 +727,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.DOT; } + // `/`, `//`, `/*`, `/=`, `///` case CharCode.SLASH: { let commentStartPos = pos; ++pos; @@ -784,11 +793,11 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.SLASH; } + // `0.`, `0x`, `0b`, `0o` case CharCode._0: { if (pos + 1 < end) { let ch = text.charCodeAt(pos + 1); if (ch == CharCode.DOT) { - // 0. this.pos = pos; return Token.FLOATLITERAL; } @@ -796,7 +805,6 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.x: case CharCode.b: case CharCode.o: { - // 0x | 0b | 0o this.pos = pos; return Token.INTEGERLITERAL; } @@ -826,6 +834,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.SEMICOLON; } + // `<`, `<=`, `<<`, `<<=` case CharCode.LESSTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -851,6 +860,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.LESSTHAN; } + // `=`, `==`, `===`, `=>` case CharCode.EQUALS: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -876,6 +886,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.EQUALS; } + // `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=` case CharCode.GREATERTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -924,6 +935,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.CLOSEBRACKET; } + // `^`, `^=` case CharCode.CARET: { ++pos; if ( @@ -940,6 +952,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.OPENBRACE; } + // `|`, `||`, `|=` case CharCode.BAR: { ++pos; if (maxTokenLength > 1 && pos < end) { From 26ab53555b567c09110e7e5809337da4bf2e3ab6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 23:07:33 +0200 Subject: [PATCH 005/124] better --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 69620ff29b..2f88e1a8a2 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -643,7 +643,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.CLOSEPAREN; } - // `*`, `*=`, `**`, `**=` + // `*`, `**`, `*=`, `**=` case CharCode.ASTERISK: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -668,7 +668,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.ASTERISK; } - // `+`, `+=`, `++` + // `+`, `++`, `+=` case CharCode.PLUS: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -834,7 +834,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.SEMICOLON; } - // `<`, `<=`, `<<`, `<<=` + // `<`, `<<`, `<=` `<<=` case CharCode.LESSTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { @@ -886,7 +886,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.EQUALS; } - // `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=` + // `>`, `>>`, `>>>`, `>=` `>>=`, `>>>=` case CharCode.GREATERTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { From 72a62032e5fa0be4de86d9b8ea74fb165a2c5ffa Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 00:20:27 +0200 Subject: [PATCH 006/124] Guarantee skip upper cased tokens as keywords --- src/tokenizer.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 2f88e1a8a2..760900dea3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -988,7 +988,11 @@ export class Tokenizer extends DiagnosticEmitter { ++pos < end && isIdentifierPart(c = text.charCodeAt(pos)) ) { /* nop */ } - if (identifierHandling != IdentifierHandling.ALWAYS) { + if ( + identifierHandling != IdentifierHandling.ALWAYS && + // Only a non-capitalised token can be a keyword + (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z + ) { let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); if ( maybeKeywordToken != Token.INVALID && From 4c4c4c9f9b15e4d7f6d57e4a2a733ed71b90ca76 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 00:50:35 +0200 Subject: [PATCH 007/124] add keyword length limit --- src/tokenizer.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 760900dea3..6373eb728b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -32,6 +32,8 @@ import { isOctalDigit } from "./util"; +const maxKeywordLength = 11; // 'constructor' + /** Named token types. */ export const enum Token { @@ -990,6 +992,7 @@ export class Tokenizer extends DiagnosticEmitter { ) { /* nop */ } if ( identifierHandling != IdentifierHandling.ALWAYS && + pos - posBefore <= maxKeywordLength && // Only a non-capitalised token can be a keyword (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z ) { From 6159cd8750b8f36fef5880d4378f6abfc5c6c9d0 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 00:55:04 +0200 Subject: [PATCH 008/124] better --- src/tokenizer.ts | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 6373eb728b..4c5a9ced2e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -986,26 +986,27 @@ export class Tokenizer extends DiagnosticEmitter { default: { if (isIdentifierStart(c)) { let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - if ( - identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore <= maxKeywordLength && - // Only a non-capitalised token can be a keyword - (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z - ) { - let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); + // Only a non-capitalised token can be a keyword + if (c >= CharCode.a && c <= CharCode.z) { + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } if ( - maybeKeywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(maybeKeywordToken) - ) + identifierHandling != IdentifierHandling.ALWAYS && + pos - posBefore <= maxKeywordLength ) { - this.pos = pos; - return maybeKeywordToken; + let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); + if ( + maybeKeywordToken != Token.INVALID && + !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(maybeKeywordToken) + ) + ) { + this.pos = pos; + return maybeKeywordToken; + } } } this.pos = posBefore; From 35fb736c51aba989feea69ec426f779fab0145c1 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 01:09:53 +0200 Subject: [PATCH 009/124] revert --- src/tokenizer.ts | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4c5a9ced2e..8e5a3647fe 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -986,27 +986,28 @@ export class Tokenizer extends DiagnosticEmitter { default: { if (isIdentifierStart(c)) { let posBefore = pos; - // Only a non-capitalised token can be a keyword - if (c >= CharCode.a && c <= CharCode.z) { - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + // TODO: check vlid termination of identifier? + + if ( + identifierHandling != IdentifierHandling.ALWAYS && + pos - posBefore <= maxKeywordLength && + // Only a non-capitalised token can be a keyword + (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z + ) { + let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); if ( - identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore <= maxKeywordLength + maybeKeywordToken != Token.INVALID && + !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(maybeKeywordToken) + ) ) { - let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); - if ( - maybeKeywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(maybeKeywordToken) - ) - ) { - this.pos = pos; - return maybeKeywordToken; - } + this.pos = pos; + return maybeKeywordToken; } } this.pos = posBefore; From 182eb41ebe07f30a0956df64e7ffcfcd9834b4f0 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 15:24:27 +0200 Subject: [PATCH 010/124] improve token skip --- src/tokenizer.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8e5a3647fe..8566a0c8f6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1074,8 +1074,18 @@ export class Tokenizer extends DiagnosticEmitter { var tokenBefore = this.token; var tokenPosBefore = this.tokenPos; var maxCompoundLength = i32.MAX_VALUE; - if (token == Token.GREATERTHAN) { // where parsing type arguments - maxCompoundLength = 1; + switch (token) { + case Token.ASTERISK: + case Token.GREATERTHAN: + case Token.LESSTHAN: + case Token.QUESTION: + case Token.EXCLAMATION: + case Token.EQUALS: + case Token.SLASH: + case Token.BAR: + case Token.DOT: + maxCompoundLength = 1; + break; } var nextToken: Token; do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); From 1b91de35ef5dc8187c6ebdd501851878adf69b80 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 16:04:56 +0200 Subject: [PATCH 011/124] add isKeyword helper --- src/tokenizer.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8566a0c8f6..a7399f66d3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -102,6 +102,8 @@ export const enum Token { WITH, // ES2017 YIELD, // ES2017 + LAST_KEYWORD = YIELD, + // punctuation OPENBRACE, @@ -178,6 +180,10 @@ export const enum IdentifierHandling { ALWAYS } +export function isKeyword(token: Token): bool { + return token >= Token.ABSTRACT && token <= Token.LAST_KEYWORD; +} + export function tokenFromKeyword(text: string): Token { let len = text.length; assert(len); From 1c22bc5848cbc2e6475107b1b8dad04741b785d4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 16:09:49 +0200 Subject: [PATCH 012/124] refactor --- src/tokenizer.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a7399f66d3..d9b09bb12e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1081,15 +1081,16 @@ export class Tokenizer extends DiagnosticEmitter { var tokenPosBefore = this.tokenPos; var maxCompoundLength = i32.MAX_VALUE; switch (token) { - case Token.ASTERISK: + case Token.EQUALS: case Token.GREATERTHAN: case Token.LESSTHAN: - case Token.QUESTION: - case Token.EXCLAMATION: - case Token.EQUALS: case Token.SLASH: case Token.BAR: + case Token.EXCLAMATION: case Token.DOT: + case Token.ASTERISK: + case Token.AMPERSAND: + case Token.QUESTION: maxCompoundLength = 1; break; } From 207b7d9f4a783e0a3b2b7be999d9240db868a351 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 16:49:25 +0200 Subject: [PATCH 013/124] add skipKeyword --- src/tokenizer.ts | 88 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index d9b09bb12e..72b8512b6d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -32,8 +32,6 @@ import { isOctalDigit } from "./util"; -const maxKeywordLength = 11; // 'constructor' - /** Named token types. */ export const enum Token { @@ -180,6 +178,75 @@ export const enum IdentifierHandling { ALWAYS } +const MAX_KEYWORD_LENGTH = 11; // 'constructor' +function getKeywordLength(token: Token): i32 { + switch (token) { + case Token.ABSTRACT: return 8; + case Token.AS: return 2; + case Token.ASYNC: return 5; + case Token.AWAIT: return 5; + case Token.BREAK: return 5; + case Token.CASE: return 4; + case Token.CATCH: return 5; + case Token.CLASS: return 5; + case Token.CONST: return 5; + case Token.CONTINUE: return 8; + case Token.CONSTRUCTOR: return 11; + case Token.DEBUGGER: return 8; + case Token.DECLARE: return 7; + case Token.DEFAULT: return 7; + case Token.DELETE: return 6; + case Token.DO: return 2; + case Token.ELSE: return 4; + case Token.ENUM: return 4; + case Token.EXPORT: return 6; + case Token.EXTENDS: return 7; + case Token.FALSE: return 5; + case Token.FINALLY: return 7; + case Token.FOR: return 3; + case Token.FROM: return 4; + case Token.FUNCTION: return 8; + case Token.GET: return 3; + case Token.IF: return 2; + case Token.IMPLEMENTS: return 10; + case Token.IMPORT: return 6; + case Token.IN: return 2; + case Token.INSTANCEOF: return 10; + case Token.INTERFACE: return 9; + case Token.IS: return 2; + case Token.KEYOF: return 5; + case Token.LET: return 3; + case Token.MODULE: return 6; + case Token.NAMESPACE: return 9; + case Token.NEW: return 3; + case Token.NULL: return 4; + case Token.OF: return 2; + case Token.PACKAGE: return 7; + case Token.PRIVATE: return 7; + case Token.PROTECTED: return 9; + case Token.PUBLIC: return 6; + case Token.READONLY: return 8; + case Token.RETURN: return 6; + case Token.SET: return 3; + case Token.STATIC: return 6; + case Token.SUPER: return 5; + case Token.SWITCH: return 6; + case Token.THIS: return 4; + case Token.THROW: return 5; + case Token.TRUE: return 4; + case Token.TRY: return 3; + case Token.TYPE: return 4; + case Token.TYPEOF: return 6; + case Token.VAR: return 3; + case Token.VOID: return 4; + case Token.WHILE: return 5; + case Token.WITH: return 4; + case Token.YIELD: return 5; + } + assert(false); + return 0; +} + export function isKeyword(token: Token): bool { return token >= Token.ABSTRACT && token <= Token.LAST_KEYWORD; } @@ -1000,7 +1067,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore <= maxKeywordLength && + pos - posBefore <= MAX_KEYWORD_LENGTH && // Only a non-capitalised token can be a keyword (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z ) { @@ -1075,6 +1142,21 @@ export class Tokenizer extends DiagnosticEmitter { return this.skip(Token.IDENTIFIER, identifierHandling); } + skipKeyword(token: Token): bool { + var end = this.end; + var pos = this.pos; + var text = this.source.text; + var keywordLen = getKeywordLength(token); + if (pos + keywordLen < end) { + let maybeToken = text.substring(pos, pos + keywordLen); + if (tokenFromKeyword(maybeToken) == token) { + return true; + } + } + // fallback to skip + return this.skip(token); + } + skip(token: Token, identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT): bool { var posBefore = this.pos; var tokenBefore = this.token; From 7c1fa02bc37406c4707884143da1fdbbd61d56f6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 18:44:10 +0200 Subject: [PATCH 014/124] clean --- src/tokenizer.ts | 90 ++---------------------------------------------- 1 file changed, 2 insertions(+), 88 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 72b8512b6d..2a09ebfbaa 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -32,6 +32,8 @@ import { isOctalDigit } from "./util"; +const MAX_KEYWORD_LENGTH = 11; // 'constructor' + /** Named token types. */ export const enum Token { @@ -178,79 +180,6 @@ export const enum IdentifierHandling { ALWAYS } -const MAX_KEYWORD_LENGTH = 11; // 'constructor' -function getKeywordLength(token: Token): i32 { - switch (token) { - case Token.ABSTRACT: return 8; - case Token.AS: return 2; - case Token.ASYNC: return 5; - case Token.AWAIT: return 5; - case Token.BREAK: return 5; - case Token.CASE: return 4; - case Token.CATCH: return 5; - case Token.CLASS: return 5; - case Token.CONST: return 5; - case Token.CONTINUE: return 8; - case Token.CONSTRUCTOR: return 11; - case Token.DEBUGGER: return 8; - case Token.DECLARE: return 7; - case Token.DEFAULT: return 7; - case Token.DELETE: return 6; - case Token.DO: return 2; - case Token.ELSE: return 4; - case Token.ENUM: return 4; - case Token.EXPORT: return 6; - case Token.EXTENDS: return 7; - case Token.FALSE: return 5; - case Token.FINALLY: return 7; - case Token.FOR: return 3; - case Token.FROM: return 4; - case Token.FUNCTION: return 8; - case Token.GET: return 3; - case Token.IF: return 2; - case Token.IMPLEMENTS: return 10; - case Token.IMPORT: return 6; - case Token.IN: return 2; - case Token.INSTANCEOF: return 10; - case Token.INTERFACE: return 9; - case Token.IS: return 2; - case Token.KEYOF: return 5; - case Token.LET: return 3; - case Token.MODULE: return 6; - case Token.NAMESPACE: return 9; - case Token.NEW: return 3; - case Token.NULL: return 4; - case Token.OF: return 2; - case Token.PACKAGE: return 7; - case Token.PRIVATE: return 7; - case Token.PROTECTED: return 9; - case Token.PUBLIC: return 6; - case Token.READONLY: return 8; - case Token.RETURN: return 6; - case Token.SET: return 3; - case Token.STATIC: return 6; - case Token.SUPER: return 5; - case Token.SWITCH: return 6; - case Token.THIS: return 4; - case Token.THROW: return 5; - case Token.TRUE: return 4; - case Token.TRY: return 3; - case Token.TYPE: return 4; - case Token.TYPEOF: return 6; - case Token.VAR: return 3; - case Token.VOID: return 4; - case Token.WHILE: return 5; - case Token.WITH: return 4; - case Token.YIELD: return 5; - } - assert(false); - return 0; -} - -export function isKeyword(token: Token): bool { - return token >= Token.ABSTRACT && token <= Token.LAST_KEYWORD; -} - export function tokenFromKeyword(text: string): Token { let len = text.length; assert(len); @@ -1142,21 +1071,6 @@ export class Tokenizer extends DiagnosticEmitter { return this.skip(Token.IDENTIFIER, identifierHandling); } - skipKeyword(token: Token): bool { - var end = this.end; - var pos = this.pos; - var text = this.source.text; - var keywordLen = getKeywordLength(token); - if (pos + keywordLen < end) { - let maybeToken = text.substring(pos, pos + keywordLen); - if (tokenFromKeyword(maybeToken) == token) { - return true; - } - } - // fallback to skip - return this.skip(token); - } - skip(token: Token, identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT): bool { var posBefore = this.pos; var tokenBefore = this.token; From 133892b63824ea25eaac0f34bdd40f2017b1b457 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 19:45:37 +0200 Subject: [PATCH 015/124] allow "of" as identifier --- src/tokenizer.ts | 7 ++-- tests/parser/also-identifier.ts | 44 +++++++++++++--------- tests/parser/also-identifier.ts.fixture.ts | 37 ++++++++++-------- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 2a09ebfbaa..ef249976ff 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -357,8 +357,8 @@ export function tokenIsAlsoIdentifier(token: Token): bool { case Token.CONSTRUCTOR: case Token.DECLARE: case Token.DELETE: - case Token.FROM: case Token.FOR: + case Token.FROM: case Token.GET: case Token.INSTANCEOF: case Token.IS: @@ -366,6 +366,7 @@ export function tokenIsAlsoIdentifier(token: Token): bool { case Token.MODULE: case Token.NAMESPACE: case Token.NULL: + case Token.OF: case Token.READONLY: case Token.SET: case Token.TYPE: @@ -379,7 +380,7 @@ export function isIllegalVariableIdentifier(name: string): bool { switch (name.charCodeAt(0)) { case CharCode.d: return name == "delete"; case CharCode.f: return name == "for"; - case CharCode.i: return name == "instanceof"; + case CharCode.i: return name == "in" || name == "instanceof"; case CharCode.n: return name == "null"; case CharCode.v: return name == "void"; } @@ -992,7 +993,7 @@ export class Tokenizer extends DiagnosticEmitter { ++pos < end && isIdentifierPart(c = text.charCodeAt(pos)) ) { /* nop */ } - // TODO: check vlid termination of identifier? + // TODO: check valid termination of identifier? if ( identifierHandling != IdentifierHandling.ALWAYS && diff --git a/tests/parser/also-identifier.ts b/tests/parser/also-identifier.ts index 39b5477bba..ce8b874fd6 100644 --- a/tests/parser/also-identifier.ts +++ b/tests/parser/also-identifier.ts @@ -1,38 +1,46 @@ class Foo { as: i32; - declare: i32; - delete: i32; - from: i32; + is: i32; + in: i32; + of: i32; for: i32; get: i32; - instanceof: i32; - is: i32; - keyof: i32; - module: i32; - namespace: i32; - null: i32; - readonly: i32; set: i32; type: i32; void: i32; + null: i32; + from: i32; + true: i32; + false: i32; + keyof: i32; + module: i32; + delete: i32; + declare: i32; + readonly: i32; + namespace: i32; + instanceof: i32; } var as: i32; -var constructor: i32; -var declare: i32; -var from: i32; -var get: i32; var is: i32; +var of: i32; +var get: i32; +var set: i32; +var from: i32; var keyof: i32; var module: i32; -var namespace: i32; +var declare: i32; var readonly: i32; -var set: i32; +var namespace: i32; +var constructor: i32; // -- illegal -- -// var delete: i32; +// var in: i32; // var for: i32; -// var instanceof: i32; // var null: i32; // var type: i32; // var void: i32; +// var true: i32; +// var false: i32; +// var delete: i32; +// var instanceof: i32; diff --git a/tests/parser/also-identifier.ts.fixture.ts b/tests/parser/also-identifier.ts.fixture.ts index 8de8b51707..19870c5c66 100644 --- a/tests/parser/also-identifier.ts.fixture.ts +++ b/tests/parser/also-identifier.ts.fixture.ts @@ -1,29 +1,34 @@ class Foo { as: i32; - declare: i32; - delete: i32; - from: i32; + is: i32; + in: i32; + of: i32; for: i32; get: i32; - instanceof: i32; - is: i32; - keyof: i32; - module: i32; - namespace: i32; - null: i32; - readonly: i32; set: i32; type: i32; void: i32; + null: i32; + from: i32; + true: i32; + false: i32; + keyof: i32; + module: i32; + delete: i32; + declare: i32; + readonly: i32; + namespace: i32; + instanceof: i32; } var as: i32; -var constructor: i32; -var declare: i32; -var from: i32; -var get: i32; var is: i32; +var get: i32; +var set: i32; +var from: i32; var keyof: i32; var module: i32; -var namespace: i32; +var declare: i32; var readonly: i32; -var set: i32; +var namespace: i32; +var constructor: i32; +// ERROR 1003: "Identifier expected." in also-identifier.ts(26,1+3) From 7cfb3a3b4ab31951037476633bfa8a39a427b5dc Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 19:58:54 +0200 Subject: [PATCH 016/124] fix --- src/tokenizer.ts | 2 +- tests/parser/also-identifier.ts.fixture.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ef249976ff..3d14cc0c3b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -380,7 +380,7 @@ export function isIllegalVariableIdentifier(name: string): bool { switch (name.charCodeAt(0)) { case CharCode.d: return name == "delete"; case CharCode.f: return name == "for"; - case CharCode.i: return name == "in" || name == "instanceof"; + case CharCode.i: return name == "instanceof"; case CharCode.n: return name == "null"; case CharCode.v: return name == "void"; } diff --git a/tests/parser/also-identifier.ts.fixture.ts b/tests/parser/also-identifier.ts.fixture.ts index 19870c5c66..89f51b6108 100644 --- a/tests/parser/also-identifier.ts.fixture.ts +++ b/tests/parser/also-identifier.ts.fixture.ts @@ -22,6 +22,7 @@ class Foo { } var as: i32; var is: i32; +var of: i32; var get: i32; var set: i32; var from: i32; @@ -31,4 +32,3 @@ var declare: i32; var readonly: i32; var namespace: i32; var constructor: i32; -// ERROR 1003: "Identifier expected." in also-identifier.ts(26,1+3) From b12bb19a61d49c56d4e3dba04707fd2e39a02573 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 20:16:37 +0200 Subject: [PATCH 017/124] better --- src/tokenizer.ts | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 3d14cc0c3b..980c4cf5c2 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -185,13 +185,13 @@ export function tokenFromKeyword(text: string): Token { assert(len); switch (text.charCodeAt(0)) { case CharCode.a: { + if (text == "as") return Token.AS; + if (text == "abstract") return Token.ABSTRACT; if (len == 5) { if (text == "async") return Token.ASYNC; if (text == "await") return Token.AWAIT; break; } - if (text == "as") return Token.AS; - if (text == "abstract") return Token.ABSTRACT; break; } case CharCode.b: { @@ -222,13 +222,13 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.e: { + if (text == "export") return Token.EXPORT; + if (text == "extends") return Token.EXTENDS; if (len == 4) { if (text == "else") return Token.ELSE; if (text == "enum") return Token.ENUM; break; } - if (text == "export") return Token.EXPORT; - if (text == "extends") return Token.EXTENDS; break; } case CharCode.f: { @@ -254,22 +254,22 @@ export function tokenFromKeyword(text: string): Token { break; } switch (text.charCodeAt(3)) { - case CharCode.l: { - if (text == "implements") return Token.IMPLEMENTS; - break; - } - case CharCode.o: { - if (text == "import") return Token.IMPORT; - break; - } case CharCode.t: { if (text == "instanceof") return Token.INSTANCEOF; break; } + case CharCode.l: { + if (text == "implements") return Token.IMPLEMENTS; + break; + } case CharCode.e: { if (text == "interface") return Token.INTERFACE; break; } + case CharCode.o: { + if (text == "import") return Token.IMPORT; + break; + } } break; } @@ -286,8 +286,8 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.n: { - if (text == "new") return Token.NEW; if (text == "null") return Token.NULL; + if (text == "new") return Token.NEW; if (text == "namespace") return Token.NAMESPACE; break; } From 6721afc0d0c284eceef7ad6bc4371a9eaccf724a Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 20:39:44 +0200 Subject: [PATCH 018/124] use const enums --- src/ast.ts | 12 ++++++------ src/common.ts | 2 +- src/diagnostics.ts | 2 +- src/flow.ts | 4 ++-- src/module.ts | 34 +++++++++++++++++----------------- src/program.ts | 2 +- src/resolver.ts | 2 +- src/tokenizer.ts | 1 + 8 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 38114d8fae..849d6a5b40 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -42,7 +42,7 @@ import { } from "./types"; /** Indicates the kind of a node. */ -export enum NodeKind { +export const enum NodeKind { SOURCE, @@ -923,7 +923,7 @@ export class TypeParameterNode extends Node { } /** Represents the kind of a parameter. */ -export enum ParameterKind { +export const enum ParameterKind { /** No specific flags. */ DEFAULT, /** Is an optional parameter. */ @@ -1067,7 +1067,7 @@ export class DecoratorNode extends Node { } /** Comment kinds. */ -export enum CommentKind { +export const enum CommentKind { /** Line comment. */ LINE, /** Triple-slash line comment. */ @@ -1110,7 +1110,7 @@ export class IdentifierExpression extends Expression { } /** Indicates the kind of a literal. */ -export enum LiteralKind { +export const enum LiteralKind { FLOAT, INTEGER, STRING, @@ -1145,7 +1145,7 @@ export class ArrayLiteralExpression extends LiteralExpression { } /** Indicates the kind of an assertion. */ -export enum AssertionKind { +export const enum AssertionKind { /** A prefix assertion, i.e. `expr`. */ PREFIX, /** An as assertion, i.e. `expr as T`. */ @@ -1586,7 +1586,7 @@ export class CompiledExpression extends Expression { export abstract class Statement extends Node { } /** Indicates the specific kind of a source. */ -export enum SourceKind { +export const enum SourceKind { /** User-provided file. */ USER = 0, /** User-provided entry file. */ diff --git a/src/common.ts b/src/common.ts index 10ef6265de..ac5dff0c48 100644 --- a/src/common.ts +++ b/src/common.ts @@ -4,7 +4,7 @@ */ /** Indicates traits of a {@link Node} or {@link Element}. */ -export enum CommonFlags { +export const enum CommonFlags { /** No flags set. */ NONE = 0, diff --git a/src/diagnostics.ts b/src/diagnostics.ts index 3fe2ecb821..43606fe95f 100644 --- a/src/diagnostics.ts +++ b/src/diagnostics.ts @@ -33,7 +33,7 @@ export { } from "./diagnosticMessages.generated"; /** Indicates the category of a {@link DiagnosticMessage}. */ -export enum DiagnosticCategory { +export const enum DiagnosticCategory { /** Overly pedantic message. */ PEDANTIC, /** Informatory message. */ diff --git a/src/flow.ts b/src/flow.ts index c40c23686c..405661a225 100644 --- a/src/flow.ts +++ b/src/flow.ts @@ -154,7 +154,7 @@ export const enum FlowFlags { } /** Flags indicating the current state of a local. */ -export enum LocalFlags { +export const enum LocalFlags { /** No specific conditions. */ NONE = 0, @@ -169,7 +169,7 @@ export enum LocalFlags { } /** Flags indicating the current state of a field. */ -export enum FieldFlags { +export const enum FieldFlags { NONE = 0, INITIALIZED = 1 << 0 } diff --git a/src/module.ts b/src/module.ts index e4f871de7d..ffe8b91ceb 100644 --- a/src/module.ts +++ b/src/module.ts @@ -61,7 +61,7 @@ export namespace TypeRef { } /** Binaryen feature constants. */ -export enum FeatureFlags { +export const enum FeatureFlags { MVP = 0 /* _BinaryenFeatureMVP */, Atomics = 1 /* _BinaryenFeatureAtomics */, MutableGloabls = 2 /* _BinaryenFeatureMutableGlobals */, @@ -81,7 +81,7 @@ export enum FeatureFlags { } /** Binaryen expression id constants. */ -export enum ExpressionId { +export const enum ExpressionId { Invalid = 0 /* _BinaryenInvalidId */, Block = 1 /* _BinaryenBlockId */, If = 2 /* _BinaryenIfId */, @@ -157,7 +157,7 @@ export enum ExpressionId { } /** Binaryen external kind constants. */ -export enum ExternalKind { +export const enum ExternalKind { Function = 0 /* _BinaryenExternalFunction */, Table = 1 /* _BinaryenExternalTable */, Memory = 2 /* _BinaryenExternalMemory */, @@ -166,7 +166,7 @@ export enum ExternalKind { } /** Binaryen unary operation constants. */ -export enum UnaryOp { +export const enum UnaryOp { /** i32.clz */ ClzI32 = 0 /* _BinaryenClzInt32 */, /** i64.clz */ @@ -442,7 +442,7 @@ export enum UnaryOp { } /** Binaryen binary operation constants. */ -export enum BinaryOp { +export const enum BinaryOp { /** i32.add */ AddI32 = 0 /* _BinaryenAddInt32 */, /** i32.sub */ @@ -896,7 +896,7 @@ export enum BinaryOp { } /** Binaryen atomic read-modify-write operation constants. */ -export enum AtomicRMWOp { +export const enum AtomicRMWOp { /** i32.atomic.rmw.add, i32.atomic.rmw8.add_u, i32.atomic.rmw16.add_u, i64.atomic.rmw.add, i64.atomic.rmw8.add_u, i64.atomic.rmw16.add_u, i64.atomic.rmw32.add_u */ Add = 0 /* _BinaryenAtomicRMWAdd */, /** i32.atomic.rmw.sub, i32.atomic.rmw8.sub_u, i32.atomic.rmw16.sub_u, i64.atomic.rmw.sub, i64.atomic.rmw8.sub_u, i64.atomic.rmw16.sub_u, i64.atomic.rmw32.sub_u */ @@ -912,7 +912,7 @@ export enum AtomicRMWOp { } /** Binaryen SIMD extract operation constants. */ -export enum SIMDExtractOp { +export const enum SIMDExtractOp { /** i8x16.extract_lane_s */ ExtractLaneI8x16 = 0 /* _BinaryenExtractLaneSVecI8x16 */, /** i8x16.extract_lane_u */ @@ -932,7 +932,7 @@ export enum SIMDExtractOp { } /** Binaryen SIMD replace operation constants. */ -export enum SIMDReplaceOp { +export const enum SIMDReplaceOp { /** i8x16.replace_lane */ ReplaceLaneI8x16 = 0 /* _BinaryenReplaceLaneVecI8x16 */, /** i16x8.replace_lane */ @@ -948,7 +948,7 @@ export enum SIMDReplaceOp { } /** Binaryen SIMD shift operation constants. */ -export enum SIMDShiftOp { +export const enum SIMDShiftOp { /** i8x16.shl */ ShlI8x16 = 0 /* _BinaryenShlVecI8x16 */, /** i8x16.shr_s */ @@ -976,7 +976,7 @@ export enum SIMDShiftOp { } /** Binaryen SIMD load operation constants. */ -export enum SIMDLoadOp { +export const enum SIMDLoadOp { /** v128.load8_splat */ Load8Splat = 0 /* _BinaryenLoad8SplatVec128 */, /** v128.load16_splat */ @@ -1004,7 +1004,7 @@ export enum SIMDLoadOp { } /** Binaryen SIMD load/store lane operation constants. */ -export enum SIMDLoadStoreLaneOp { +export const enum SIMDLoadStoreLaneOp { /** v128.load8_lane */ Load8Lane = 0 /* _BinaryenLoad8LaneVec128 */, /** v128.load16_lane */ @@ -1024,13 +1024,13 @@ export enum SIMDLoadStoreLaneOp { } /** Binaryen SIMD ternary operation constants. */ -export enum SIMDTernaryOp { +export const enum SIMDTernaryOp { /** v128.bitselect */ Bitselect = 0 /* _BinaryenBitselectVec128 */ } /** Binaryen RefIs operation constants. */ -export enum RefIsOp { +export const enum RefIsOp { /** ref.is_null */ RefIsNull = 0 /* _BinaryenRefIsNull */, /** ref.is_func */ @@ -1042,7 +1042,7 @@ export enum RefIsOp { } /** Binaryen RefAs operation constants. */ -export enum RefAsOp { +export const enum RefAsOp { /** ref.as_non_null */ RefAsNonNull = 0 /* _BinaryenRefAsNonNull */, /** ref.as_func */ @@ -1054,7 +1054,7 @@ export enum RefAsOp { } /** Binaryen BrOn operation constants. */ -export enum BrOnOp { +export const enum BrOnOp { /** br_on_null */ BrOnNull = 0 /* TODO_BinaryenBrOnNull */, /** br_on_cast */ @@ -1068,7 +1068,7 @@ export enum BrOnOp { } /** Binaryen expression runner flags. */ -export enum ExpressionRunnerFlags { +export const enum ExpressionRunnerFlags { Default = 0 /* _ExpressionRunnerFlagsDefault */, PreserveSideeffects = 1 /* _ExpressionRunnerFlagsPreserveSideeffects */, TraverseCalls = 2 /* _ExpressionRunnerFlagsTraverseCalls */ @@ -3001,7 +3001,7 @@ export class SwitchBuilder { } } -export enum SideEffects { +export const enum SideEffects { None = 0 /* _BinaryenSideEffectNone */, Branches = 1 /* _BinaryenSideEffectBranches */, Calls = 2 /* _BinaryenSideEffectCalls */, diff --git a/src/program.ts b/src/program.ts index 8df389b4b4..c463dcf26a 100644 --- a/src/program.ts +++ b/src/program.ts @@ -2630,7 +2630,7 @@ export class Program extends DiagnosticEmitter { } /** Indicates the specific kind of an {@link Element}. */ -export enum ElementKind { +export const enum ElementKind { /** A {@link Global}. */ GLOBAL, /** A {@link Local}. */ diff --git a/src/resolver.ts b/src/resolver.ts index dd79ab78f3..5452e54479 100644 --- a/src/resolver.ts +++ b/src/resolver.ts @@ -109,7 +109,7 @@ import { } from "./builtins"; /** Indicates whether errors are reported or not. */ -export enum ReportMode { +export const enum ReportMode { /** Report errors. */ REPORT, /** Swallow errors. */ diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 980c4cf5c2..6ada62c5d9 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -393,6 +393,7 @@ export function operatorTokenToString(token: Token): string { case Token.IN: return "in"; case Token.INSTANCEOF: return "instanceof"; case Token.NEW: return "new"; + case Token.OF: return "of"; case Token.TYPEOF: return "typeof"; case Token.VOID: return "void"; case Token.YIELD: return "yield"; From 43be89569e81ca6713e820bde56264cf963d582e Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 20:59:30 +0200 Subject: [PATCH 019/124] improve ts emit --- src/tsconfig.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tsconfig.json b/src/tsconfig.json index 033f601ce4..0acc81470a 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -7,7 +7,10 @@ "sourceMap": true, "skipLibCheck": true, "target": "esnext", - "strict": true + "useDefineForClassFields": false, + "strict": true, + "noImplicitReturns": true, + "noPropertyAccessFromIndexSignature": true }, "include": [ "./**/*.ts" From c8ed972f1b73921d436ba303ff201f660a2ca1e8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 21:51:28 +0200 Subject: [PATCH 020/124] fix --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 1ffd242c8b..fedce0a5ac 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -619,7 +619,7 @@ export class Parser extends DiagnosticEmitter { return null; } // ... | null - while (tn.skip(Token.BAR)) { + while (tn.peek() != Token.BAR_BAR && tn.skip(Token.BAR)) { if (tn.skip(Token.NULL)) { type.isNullable = true; } else { From 0729ee3524fcdeaf32b3aac63fc46f42953302c5 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 00:16:37 +0200 Subject: [PATCH 021/124] refactor --- src/parser.ts | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index fedce0a5ac..5633aa3c69 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -619,19 +619,21 @@ export class Parser extends DiagnosticEmitter { return null; } // ... | null - while (tn.peek() != Token.BAR_BAR && tn.skip(Token.BAR)) { - if (tn.skip(Token.NULL)) { - type.isNullable = true; - } else { - let notNullStart = tn.pos; - let notNull = this.parseType(tn, false, true); - if (!suppressErrors) { - this.error( - DiagnosticCode._0_expected, - notNull ? notNull.range : tn.range(notNullStart), "null" - ); + if (tn.peek() != Token.BAR_BAR) { + while (tn.skip(Token.BAR)) { + if (tn.skip(Token.NULL)) { + type.isNullable = true; + } else { + let notNullStart = tn.pos; + let notNull = this.parseType(tn, false, true); + if (!suppressErrors) { + this.error( + DiagnosticCode._0_expected, + notNull ? notNull.range : tn.range(notNullStart), "null" + ); + } + return null; } - return null; } } // ... [][] From ad65d6a591c7fdcb357be61f20f36c871464647b Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 19:05:20 +0200 Subject: [PATCH 022/124] add MIN_KEYWORD_LENGTH --- src/tokenizer.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index f45172ea93..a31f3fba08 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -34,6 +34,7 @@ import { isLowSurrogate } from "./util"; +const MIN_KEYWORD_LENGTH = 2; // 'as', 'if' and etc const MAX_KEYWORD_LENGTH = 11; // 'constructor' /** Named token types. */ @@ -1000,6 +1001,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( identifierHandling != IdentifierHandling.ALWAYS && + pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH && // Only a non-capitalised token can be a keyword (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z From 4dbebdbf4f1d7d96b4984f267f63e9c90e7e70b1 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 19:15:40 +0200 Subject: [PATCH 023/124] more --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a31f3fba08..d9a638f52d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -225,13 +225,13 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.e: { - if (text == "export") return Token.EXPORT; - if (text == "extends") return Token.EXTENDS; if (len == 4) { if (text == "else") return Token.ELSE; if (text == "enum") return Token.ENUM; break; } + if (text == "export") return Token.EXPORT; + if (text == "extends") return Token.EXTENDS; break; } case CharCode.f: { @@ -299,13 +299,13 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.p: { + if (text == "public") return Token.PUBLIC; + if (text == "protected") return Token.PROTECTED; if (len == 7) { if (text == "private") return Token.PRIVATE; if (text == "package") return Token.PACKAGE; break; } - if (text == "public") return Token.PUBLIC; - if (text == "protected") return Token.PROTECTED; break; } case CharCode.r: { From dc19d6eb870d7b397356eeb3aa16d87091f8ec24 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 19:19:10 +0200 Subject: [PATCH 024/124] simplify tokenFromKeyword --- src/tokenizer.ts | 72 ++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index d9a638f52d..38422e76d4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -190,11 +190,8 @@ export function tokenFromKeyword(text: string): Token { case CharCode.a: { if (text == "as") return Token.AS; if (text == "abstract") return Token.ABSTRACT; - if (len == 5) { - if (text == "async") return Token.ASYNC; - if (text == "await") return Token.AWAIT; - break; - } + if (text == "async") return Token.ASYNC; + if (text == "await") return Token.AWAIT; break; } case CharCode.b: { @@ -202,46 +199,34 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.c: { - if (len == 5) { - if (text == "const") return Token.CONST; - if (text == "class") return Token.CLASS; - if (text == "catch") return Token.CATCH; - break; - } + if (text == "const") return Token.CONST; + if (text == "class") return Token.CLASS; if (text == "case") return Token.CASE; if (text == "continue") return Token.CONTINUE; if (text == "constructor") return Token.CONSTRUCTOR; + if (text == "catch") return Token.CATCH; break; } case CharCode.d: { - if (len == 7) { - if (text == "default") return Token.DEFAULT; - if (text == "declare") return Token.DECLARE; - break; - } if (text == "do") return Token.DO; + if (text == "default") return Token.DEFAULT; + if (text == "declare") return Token.DECLARE; if (text == "delete") return Token.DELETE; if (text == "debugger") return Token.DEBUGGER; break; } case CharCode.e: { - if (len == 4) { - if (text == "else") return Token.ELSE; - if (text == "enum") return Token.ENUM; - break; - } + if (text == "else") return Token.ELSE; if (text == "export") return Token.EXPORT; + if (text == "enum") return Token.ENUM; if (text == "extends") return Token.EXTENDS; break; } case CharCode.f: { - if (len <= 5) { - if (text == "false") return Token.FALSE; - if (text == "for") return Token.FOR; - if (text == "from") return Token.FROM; - break; - } + if (text == "false") return Token.FALSE; if (text == "function") return Token.FUNCTION; + if (text == "for") return Token.FOR; + if (text == "from") return Token.FROM; if (text == "finally") return Token.FINALLY; break; } @@ -250,12 +235,10 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.i: { - if (len == 2) { - if (text == "if") return Token.IF; - if (text == "in") return Token.IN; - if (text == "is") return Token.IS; - break; - } + if (text == "if") return Token.IF; + if (text == "in") return Token.IN; + if (text == "is") return Token.IS; + switch (text.charCodeAt(3)) { case CharCode.t: { if (text == "instanceof") return Token.INSTANCEOF; @@ -301,11 +284,8 @@ export function tokenFromKeyword(text: string): Token { case CharCode.p: { if (text == "public") return Token.PUBLIC; if (text == "protected") return Token.PROTECTED; - if (len == 7) { - if (text == "private") return Token.PRIVATE; - if (text == "package") return Token.PACKAGE; - break; - } + if (text == "private") return Token.PRIVATE; + if (text == "package") return Token.PACKAGE; break; } case CharCode.r: { @@ -314,22 +294,16 @@ export function tokenFromKeyword(text: string): Token { break; } case CharCode.s: { - if (len == 6) { - if (text == "switch") return Token.SWITCH; - if (text == "static") return Token.STATIC; - break; - } + if (text == "switch") return Token.SWITCH; + if (text == "static") return Token.STATIC; if (text == "set") return Token.SET; if (text == "super") return Token.SUPER; break; } case CharCode.t: { - if (len == 4) { - if (text == "true") return Token.TRUE; - if (text == "this") return Token.THIS; - if (text == "type") return Token.TYPE; - break; - } + if (text == "true") return Token.TRUE; + if (text == "this") return Token.THIS; + if (text == "type") return Token.TYPE; if (text == "try") return Token.TRY; if (text == "throw") return Token.THROW; if (text == "typeof") return Token.TYPEOF; From 2e1d68af5ca404e37d5a67f7d059a255a45fb3e8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 19:59:32 +0200 Subject: [PATCH 025/124] disable preserveConstEnums --- src/tsconfig.json | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tsconfig.json b/src/tsconfig.json index 0acc81470a..372e5177ac 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -6,6 +6,7 @@ "allowJs": false, "sourceMap": true, "skipLibCheck": true, + "preserveConstEnums": false, "target": "esnext", "useDefineForClassFields": false, "strict": true, From 37610b21605360d8b4101aa808529aef35a600e7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 20:00:38 +0200 Subject: [PATCH 026/124] refactor compilerOptions --- src/tsconfig.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tsconfig.json b/src/tsconfig.json index 372e5177ac..412d41edc4 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -1,17 +1,17 @@ { "extends": "../std/portable.json", "compilerOptions": { + "target": "esnext", "outDir": "../out", "types" : ["node"], - "allowJs": false, "sourceMap": true, + "allowJs": false, + "strict": true, "skipLibCheck": true, "preserveConstEnums": false, - "target": "esnext", "useDefineForClassFields": false, - "strict": true, - "noImplicitReturns": true, - "noPropertyAccessFromIndexSignature": true + "noPropertyAccessFromIndexSignature": true, + "noImplicitReturns": true }, "include": [ "./**/*.ts" From 42e674d0ec96ceff4632363685a7abdbd25b8381 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 20:45:33 +0200 Subject: [PATCH 027/124] improve line break for single comments --- src/tokenizer.ts | 2 +- src/util/text.ts | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 38422e76d4..cb007edbde 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -726,7 +726,7 @@ export class Tokenizer extends DiagnosticEmitter { commentKind = CommentKind.TRIPLE; } while (++pos < end) { - if (text.charCodeAt(pos) == CharCode.LINEFEED) { + if (isLineBreak(text.charCodeAt(pos))) { ++pos; break; } diff --git a/src/util/text.ts b/src/util/text.ts index 6b3598e061..b2a8a10a9d 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -144,13 +144,9 @@ export function isLineBreak(c: i32): bool { case CharCode.LINEFEED: case CharCode.CARRIAGERETURN: case CharCode.LINESEPARATOR: - case CharCode.PARAGRAPHSEPARATOR: { - return true; - } - default: { - return false; - } + case CharCode.PARAGRAPHSEPARATOR: return true; } + return false; } /** Tests if the specified character code is some sort of white space. */ @@ -166,13 +162,9 @@ export function isWhiteSpace(c: i32): bool { case CharCode.NARROWNOBREAKSPACE: case CharCode.MATHEMATICALSPACE: case CharCode.IDEOGRAPHICSPACE: - case CharCode.BYTEORDERMARK: { - return true; - } - default: { - return c >= CharCode.ENQUAD && c <= CharCode.ZEROWIDTHSPACE; - } + case CharCode.BYTEORDERMARK: return true; } + return c >= CharCode.ENQUAD && c <= CharCode.ZEROWIDTHSPACE; } /** First high (lead) surrogate. */ From 2637ffc8de07dfee8d599e2eaab0dde0b382a23e Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 23:12:13 +0200 Subject: [PATCH 028/124] add TODO --- src/tokenizer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index cb007edbde..df8c9ea7fb 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -965,6 +965,7 @@ export class Tokenizer extends DiagnosticEmitter { return Token.AT; } default: { + // TODO: \uXXXX also support for identifiers if (isIdentifierStart(c)) { let posBefore = pos; while ( From 79480c468dd9b34c29573885a44496b360d7c9cc Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 23:22:17 +0200 Subject: [PATCH 029/124] refactor --- src/tokenizer.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index df8c9ea7fb..8054250993 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -183,7 +183,7 @@ export const enum IdentifierHandling { ALWAYS } -export function tokenFromKeyword(text: string): Token { +export function probeKeywordToken(text: string): Token { let len = text.length; assert(len); switch (text.charCodeAt(0)) { @@ -981,16 +981,16 @@ export class Tokenizer extends DiagnosticEmitter { // Only a non-capitalised token can be a keyword (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z ) { - let maybeKeywordToken = tokenFromKeyword(text.substring(posBefore, pos)); + let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); if ( - maybeKeywordToken != Token.INVALID && + keywordToken != Token.INVALID && !( identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(maybeKeywordToken) + tokenIsAlsoIdentifier(keywordToken) ) ) { this.pos = pos; - return maybeKeywordToken; + return keywordToken; } } this.pos = posBefore; From 1d2f631db4769db91a709d3a6d0179ad0995f081 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 23:38:22 +0200 Subject: [PATCH 030/124] add comment --- src/tokenizer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8054250993..f6181000e0 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -550,6 +550,7 @@ export class Tokenizer extends DiagnosticEmitter { )) break; // otherwise fall-through } + // `\n`, `\t`, `\v`, `\f`, ` ` case CharCode.LINEFEED: case CharCode.TAB: case CharCode.VERTICALTAB: From aef8da85074eeee7b41bc8d8244f3c1d163742fd Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Tue, 16 Nov 2021 23:47:45 +0200 Subject: [PATCH 031/124] add TODO --- src/tokenizer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index f6181000e0..72a2ff4d3e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -907,6 +907,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.GREATERTHAN; } + // `?`, TODO: `??`, `??=` case CharCode.QUESTION: { this.pos = pos + 1; return Token.QUESTION; From 5d861b61310860420748c714f2eb9500acbc20f9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 00:00:11 +0200 Subject: [PATCH 032/124] handle ||= and &&= --- src/tokenizer.ts | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 72a2ff4d3e..b7cd7f7837 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -153,6 +153,8 @@ export const enum Token { MINUS_EQUALS, ASTERISK_EQUALS, ASTERISK_ASTERISK_EQUALS, + BAR_BAR_EQUALS, + AMPERSAND_AMPERSAND_EQUALS, SLASH_EQUALS, PERCENT_EQUALS, LESSTHAN_LESSTHAN_EQUALS, @@ -407,6 +409,8 @@ export function operatorTokenToString(token: Token): string { case Token.MINUS_EQUALS: return "-="; case Token.ASTERISK_EQUALS: return "*="; case Token.ASTERISK_ASTERISK_EQUALS: return "**="; + case Token.BAR_BAR_EQUALS: return "||="; + case Token.AMPERSAND_AMPERSAND_EQUALS: return "&&="; case Token.SLASH_EQUALS: return "/="; case Token.PERCENT_EQUALS: return "%="; case Token.LESSTHAN_LESSTHAN_EQUALS: return "<<="; @@ -602,19 +606,27 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.PERCENT; } - // `&`, `&&`, `&=` + // `&`, `&&`, `&=`, `&&=` case CharCode.AMPERSAND: { ++pos; if (maxTokenLength > 1 && pos < end) { let chr = text.charCodeAt(pos); - if (chr == CharCode.AMPERSAND) { - this.pos = pos + 1; - return Token.AMPERSAND_AMPERSAND; - } if (chr == CharCode.EQUALS) { this.pos = pos + 1; return Token.AMPERSAND_EQUALS; } + if (chr == CharCode.AMPERSAND) { + ++pos; + if ( + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { + this.pos = pos + 1; + return Token.AMPERSAND_AMPERSAND_EQUALS; + } + this.pos = pos; + return Token.AMPERSAND_AMPERSAND; + } } this.pos = pos; return Token.AMPERSAND; @@ -937,19 +949,27 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 1; return Token.OPENBRACE; } - // `|`, `||`, `|=` + // `|`, `||`, `|=`, `||=` case CharCode.BAR: { ++pos; if (maxTokenLength > 1 && pos < end) { let chr = text.charCodeAt(pos); - if (chr == CharCode.BAR) { - this.pos = pos + 1; - return Token.BAR_BAR; - } if (chr == CharCode.EQUALS) { this.pos = pos + 1; return Token.BAR_EQUALS; } + if (chr == CharCode.BAR) { + ++pos; + if ( + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { + this.pos = pos + 1; + return Token.BAR_BAR_EQUALS; + } + this.pos = pos; + return Token.BAR_BAR; + } } this.pos = pos; return Token.BAR; From 27b225931117ff092fc3d8be55aa895a7c87e05e Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 01:08:30 +0200 Subject: [PATCH 033/124] refactor --- src/tokenizer.ts | 179 +++++++++++++++++++++++++---------------------- 1 file changed, 95 insertions(+), 84 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b7cd7f7837..5b2fa96866 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -610,12 +610,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.AMPERSAND: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.AMPERSAND_EQUALS; } - if (chr == CharCode.AMPERSAND) { + if (c == CharCode.AMPERSAND) { ++pos; if ( maxTokenLength > 2 && pos < end && @@ -643,12 +643,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.ASTERISK: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.ASTERISK_EQUALS; } - if (chr == CharCode.ASTERISK) { + if (c == CharCode.ASTERISK) { ++pos; if ( maxTokenLength > 2 && pos < end && @@ -668,12 +668,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.PLUS: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.PLUS) { + c = text.charCodeAt(pos); + if (c == CharCode.PLUS) { this.pos = pos + 1; return Token.PLUS_PLUS; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.PLUS_EQUALS; } @@ -689,12 +689,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.MINUS: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.MINUS) { + c = text.charCodeAt(pos); + if (c == CharCode.MINUS) { this.pos = pos + 1; return Token.MINUS_MINUS; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.MINUS_EQUALS; } @@ -706,14 +706,14 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.DOT: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (isDecimal(chr)) { + c = text.charCodeAt(pos); + if (isDecimal(c)) { this.pos = pos - 1; return Token.FLOATLITERAL; // expects a call to readFloat } if ( maxTokenLength > 2 && pos + 1 < end && - chr == CharCode.DOT && + c == CharCode.DOT && text.charCodeAt(pos + 1) == CharCode.DOT ) { this.pos = pos + 2; @@ -725,63 +725,18 @@ export class Tokenizer extends DiagnosticEmitter { } // `/`, `//`, `/*`, `/=`, `///` case CharCode.SLASH: { - let commentStartPos = pos; ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.SLASH) { // single-line - let commentKind = CommentKind.LINE; - if ( - pos + 1 < end && - text.charCodeAt(pos + 1) == CharCode.SLASH - ) { - ++pos; - commentKind = CommentKind.TRIPLE; - } - while (++pos < end) { - if (isLineBreak(text.charCodeAt(pos))) { - ++pos; - break; - } - } - if (this.onComment) { - this.onComment( - commentKind, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) - ); - } + c = text.charCodeAt(pos); + if (c == CharCode.SLASH) { // single-line + pos = this.skipSingleLine(text, pos, end); break; } - if (chr == CharCode.ASTERISK) { // multi-line - let closed = false; - while (++pos < end) { - c = text.charCodeAt(pos); - if ( - c == CharCode.ASTERISK && - pos + 1 < end && - text.charCodeAt(pos + 1) == CharCode.SLASH - ) { - pos += 2; - closed = true; - break; - } - } - if (!closed) { - this.error( - DiagnosticCode._0_expected, - this.range(pos), "*/" - ); - } else if (this.onComment) { - this.onComment( - CommentKind.BLOCK, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) - ); - } + if (c == CharCode.ASTERISK) { // multi-line + pos = this.skipMultileLine(text, pos, end); break; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.SLASH_EQUALS; } @@ -792,12 +747,12 @@ export class Tokenizer extends DiagnosticEmitter { // `0.`, `0x`, `0b`, `0o` case CharCode._0: { if (pos + 1 < end) { - let ch = text.charCodeAt(pos + 1); - if (ch == CharCode.DOT) { + c = text.charCodeAt(pos + 1); + if (c == CharCode.DOT) { this.pos = pos; return Token.FLOATLITERAL; } - switch (ch | 32) { + switch (c | 32) { case CharCode.x: case CharCode.b: case CharCode.o: { @@ -834,8 +789,8 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.LESSTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.LESSTHAN) { + c = text.charCodeAt(pos); + if (c == CharCode.LESSTHAN) { ++pos; if ( maxTokenLength > 2 && @@ -848,7 +803,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.LESSTHAN_LESSTHAN; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.LESSTHAN_EQUALS; } @@ -860,8 +815,8 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.EQUALS: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { ++pos; if ( maxTokenLength > 2 && @@ -874,7 +829,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.EQUALS_EQUALS; } - if (chr == CharCode.GREATERTHAN) { + if (c == CharCode.GREATERTHAN) { this.pos = pos + 1; return Token.EQUALS_GREATERTHAN; } @@ -886,12 +841,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.GREATERTHAN: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.GREATERTHAN) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { ++pos; if (maxTokenLength > 2 && pos < end) { - chr = text.charCodeAt(pos); - if (chr == CharCode.GREATERTHAN) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { ++pos; if ( maxTokenLength > 3 && pos < end && @@ -903,7 +858,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.GREATERTHAN_GREATERTHAN_EQUALS; } @@ -911,7 +866,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.GREATERTHAN_GREATERTHAN; } - if (chr == CharCode.EQUALS) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.GREATERTHAN_EQUALS; } @@ -953,12 +908,12 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode.BAR: { ++pos; if (maxTokenLength > 1 && pos < end) { - let chr = text.charCodeAt(pos); - if (chr == CharCode.EQUALS) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; return Token.BAR_EQUALS; } - if (chr == CharCode.BAR) { + if (c == CharCode.BAR) { ++pos; if ( maxTokenLength > 2 && pos < end && @@ -1070,6 +1025,62 @@ export class Tokenizer extends DiagnosticEmitter { return this.nextToken; } + skipSingleLine(text: string, pos: i32, end: i32): i32 { + let commentStartPos = pos - 1; + let commentKind = CommentKind.LINE; + if ( + pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.SLASH + ) { + ++pos; + commentKind = CommentKind.TRIPLE; + } + while (++pos < end) { + if (isLineBreak(text.charCodeAt(pos))) { + ++pos; + break; + } + } + if (this.onComment) { + this.onComment( + commentKind, + text.substring(commentStartPos, pos), + this.range(commentStartPos, pos) + ); + } + return pos; + } + + skipMultileLine(text: string, pos: i32, end: i32): i32 { + let commentStartPos = pos - 1; + let closed = false; + while (++pos < end) { + let c = text.charCodeAt(pos); + if ( + c == CharCode.ASTERISK && + pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.SLASH + ) { + pos += 2; + closed = true; + break; + } + } + if (!closed) { + this.error( + DiagnosticCode._0_expected, + this.range(pos), "*/" + ); + } else if (this.onComment) { + this.onComment( + CommentKind.BLOCK, + text.substring(commentStartPos, pos), + this.range(commentStartPos, pos) + ); + } + return pos; + } + skipIdentifier(identifierHandling: IdentifierHandling = IdentifierHandling.PREFER): bool { return this.skip(Token.IDENTIFIER, identifierHandling); } From 39937b2a883490c84a7a07519c3cece68a75259a Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 01:09:36 +0200 Subject: [PATCH 034/124] typo --- src/tokenizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5b2fa96866..eb2c919f2e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -733,7 +733,7 @@ export class Tokenizer extends DiagnosticEmitter { break; } if (c == CharCode.ASTERISK) { // multi-line - pos = this.skipMultileLine(text, pos, end); + pos = this.skipMultiLine(text, pos, end); break; } if (c == CharCode.EQUALS) { @@ -1051,7 +1051,7 @@ export class Tokenizer extends DiagnosticEmitter { return pos; } - skipMultileLine(text: string, pos: i32, end: i32): i32 { + skipMultiLine(text: string, pos: i32, end: i32): i32 { let commentStartPos = pos - 1; let closed = false; while (++pos < end) { From 9d77b365cfbb71c2cf59cd15e8ecdd0ebc8138eb Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 01:10:19 +0200 Subject: [PATCH 035/124] better --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index eb2c919f2e..3f0f7f93c6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -729,11 +729,11 @@ export class Tokenizer extends DiagnosticEmitter { if (maxTokenLength > 1 && pos < end) { c = text.charCodeAt(pos); if (c == CharCode.SLASH) { // single-line - pos = this.skipSingleLine(text, pos, end); + pos = this.skipSingleLineComment(text, pos, end); break; } if (c == CharCode.ASTERISK) { // multi-line - pos = this.skipMultiLine(text, pos, end); + pos = this.skipMultiLineComment(text, pos, end); break; } if (c == CharCode.EQUALS) { @@ -1025,7 +1025,7 @@ export class Tokenizer extends DiagnosticEmitter { return this.nextToken; } - skipSingleLine(text: string, pos: i32, end: i32): i32 { + skipSingleLineComment(text: string, pos: i32, end: i32): i32 { let commentStartPos = pos - 1; let commentKind = CommentKind.LINE; if ( @@ -1051,7 +1051,7 @@ export class Tokenizer extends DiagnosticEmitter { return pos; } - skipMultiLine(text: string, pos: i32, end: i32): i32 { + skipMultiLineComment(text: string, pos: i32, end: i32): i32 { let commentStartPos = pos - 1; let closed = false; while (++pos < end) { From dae346264a2621e3eeaa22dc28a5d43cc567d784 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 01:34:10 +0200 Subject: [PATCH 036/124] refactor --- src/tokenizer.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 3f0f7f93c6..aa0c44d9c3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -773,9 +773,7 @@ export class Tokenizer extends DiagnosticEmitter { case CharCode._8: case CharCode._9: { this.pos = pos; - return this.testInteger() - ? Token.INTEGERLITERAL // expects a call to readInteger - : Token.FLOATLITERAL; // expects a call to readFloat + return this.integerOrFloatToken(); } case CharCode.COLON: { this.pos = pos + 1; @@ -1360,18 +1358,20 @@ export class Tokenizer extends DiagnosticEmitter { return text.substring(start, this.pos); } - testInteger(): bool { + integerOrFloatToken(): Token { var text = this.source.text; var pos = this.pos + 1; var end = this.end; while (pos < end) { let c = text.charCodeAt(pos); - if (c == CharCode.DOT || (c | 32) == CharCode.e) return false; + if (c == CharCode.DOT || (c | 32) == CharCode.e) { + return Token.FLOATLITERAL; + } if (c != CharCode._ && (c < CharCode._0 || c > CharCode._9)) break; // does not validate separator placement (this is done in readXYInteger) pos++; } - return true; + return Token.INTEGERLITERAL; } readInteger(): i64 { From 4a4b8f4c3b7d5b737ed65c9eb37f4aacde107389 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 11:51:13 +0200 Subject: [PATCH 037/124] add ?? and ??= for tokenizer --- src/tokenizer.ts | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index aa0c44d9c3..cafb8ac349 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -143,10 +143,11 @@ export const enum Token { BAR, CARET, EXCLAMATION, + QUESTION, TILDE, AMPERSAND_AMPERSAND, BAR_BAR, - QUESTION, + QUESTION_QUESTION, COLON, EQUALS, PLUS_EQUALS, @@ -155,6 +156,7 @@ export const enum Token { ASTERISK_ASTERISK_EQUALS, BAR_BAR_EQUALS, AMPERSAND_AMPERSAND_EQUALS, + QUESTION_QUESTION_EQUALS, SLASH_EQUALS, PERCENT_EQUALS, LESSTHAN_LESSTHAN_EQUALS, @@ -401,16 +403,19 @@ export function operatorTokenToString(token: Token): string { case Token.BAR: return "|"; case Token.CARET: return "^"; case Token.EXCLAMATION: return "!"; + case Token.QUESTION: return "?"; case Token.TILDE: return "~"; case Token.AMPERSAND_AMPERSAND: return "&&"; case Token.BAR_BAR: return "||"; + case Token.QUESTION_QUESTION: return "??"; case Token.EQUALS: return "="; case Token.PLUS_EQUALS: return "+="; case Token.MINUS_EQUALS: return "-="; case Token.ASTERISK_EQUALS: return "*="; case Token.ASTERISK_ASTERISK_EQUALS: return "**="; - case Token.BAR_BAR_EQUALS: return "||="; case Token.AMPERSAND_AMPERSAND_EQUALS: return "&&="; + case Token.BAR_BAR_EQUALS: return "||="; + case Token.QUESTION_QUESTION_EQUALS: return "??="; case Token.SLASH_EQUALS: return "/="; case Token.PERCENT_EQUALS: return "%="; case Token.LESSTHAN_LESSTHAN_EQUALS: return "<<="; @@ -872,9 +877,25 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.GREATERTHAN; } - // `?`, TODO: `??`, `??=` + // `?`, `??`, `??=` case CharCode.QUESTION: { - this.pos = pos + 1; + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.QUESTION) { + ++pos; + if (maxTokenLength > 2 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.QUESTION_QUESTION_EQUALS; + } + } + this.pos = pos; + return Token.QUESTION_QUESTION; + } + } + this.pos = pos; return Token.QUESTION; } case CharCode.OPENBRACKET: { From c1a267cfd31fdc53ddba84d4cd78e6ea95c5d8b9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 17 Nov 2021 18:26:11 +0200 Subject: [PATCH 038/124] refactor range --- src/tokenizer.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index cafb8ac349..2a23c0d5fa 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -433,15 +433,10 @@ export function operatorTokenToString(token: Token): string { export class Range { - start: i32; - end: i32; source!: Source; debugInfoRef: usize = 0; - constructor(start: i32, end: i32) { - this.start = start; - this.end = end; - } + constructor(public start: i32, public end: i32) {} static join(a: Range, b: Range): Range { if (a.source != b.source) throw new Error("source mismatch"); @@ -454,7 +449,11 @@ export class Range { } equals(other: Range): bool { - return this.source == other.source && this.start == other.start && this.end == other.end; + return ( + this.source == other.source && + this.start == other.start && + this.end == other.end + ); } get atStart(): Range { From d9e08a78d2f0d324c23c0eaf002a3715d43e9f3a Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 19:34:17 +0200 Subject: [PATCH 039/124] better probeKeywordToken --- src/tokenizer.ts | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 2a23c0d5fa..653df2b7be 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -192,7 +192,12 @@ export function probeKeywordToken(text: string): Token { assert(len); switch (text.charCodeAt(0)) { case CharCode.a: { - if (text == "as") return Token.AS; + if (len == 2) { + if (text.charCodeAt(1) == CharCode.s) { + return Token.AS; + } + break; + } if (text == "abstract") return Token.ABSTRACT; if (text == "async") return Token.ASYNC; if (text == "await") return Token.AWAIT; @@ -204,15 +209,20 @@ export function probeKeywordToken(text: string): Token { } case CharCode.c: { if (text == "const") return Token.CONST; - if (text == "class") return Token.CLASS; if (text == "case") return Token.CASE; if (text == "continue") return Token.CONTINUE; + if (text == "class") return Token.CLASS; if (text == "constructor") return Token.CONSTRUCTOR; if (text == "catch") return Token.CATCH; break; } case CharCode.d: { - if (text == "do") return Token.DO; + if (len == 2) { + if (text.charCodeAt(1) == CharCode.o) { + return Token.DO; + } + break; + } if (text == "default") return Token.DEFAULT; if (text == "declare") return Token.DECLARE; if (text == "delete") return Token.DELETE; @@ -239,10 +249,14 @@ export function probeKeywordToken(text: string): Token { break; } case CharCode.i: { - if (text == "if") return Token.IF; - if (text == "in") return Token.IN; - if (text == "is") return Token.IS; - + if (len == 2) { + switch (text.charCodeAt(1)) { + case CharCode.f: return Token.IF; + case CharCode.n: return Token.IN; + case CharCode.s: return Token.IS; + } + break; + } switch (text.charCodeAt(3)) { case CharCode.t: { if (text == "instanceof") return Token.INSTANCEOF; @@ -282,13 +296,15 @@ export function probeKeywordToken(text: string): Token { break; } case CharCode.o: { - if (text == "of") return Token.OF; + if (len == 2 && text.charCodeAt(1) == CharCode.f) { + return Token.OF; + } break; } case CharCode.p: { if (text == "public") return Token.PUBLIC; - if (text == "protected") return Token.PROTECTED; if (text == "private") return Token.PRIVATE; + if (text == "protected") return Token.PROTECTED; if (text == "package") return Token.PACKAGE; break; } @@ -308,9 +324,9 @@ export function probeKeywordToken(text: string): Token { if (text == "true") return Token.TRUE; if (text == "this") return Token.THIS; if (text == "type") return Token.TYPE; - if (text == "try") return Token.TRY; - if (text == "throw") return Token.THROW; if (text == "typeof") return Token.TYPEOF; + if (text == "throw") return Token.THROW; + if (text == "try") return Token.TRY; break; } case CharCode.v: { From d12d5ffa06285b946ea7db9a7739369d6e7fefa7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 19:45:03 +0200 Subject: [PATCH 040/124] faster isIllegalVariableIdentifier --- src/tokenizer.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 653df2b7be..2b747d57c7 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -373,7 +373,9 @@ export function tokenIsAlsoIdentifier(token: Token): bool { } export function isIllegalVariableIdentifier(name: string): bool { - assert(name.length); + if (name.length < 3 || name.length > 10) { + return false; + } switch (name.charCodeAt(0)) { case CharCode.d: return name == "delete"; case CharCode.f: return name == "for"; From ab3b85849867e10f5543391c07ffc6856e6ac020 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 20:30:50 +0200 Subject: [PATCH 041/124] better readHexInteger --- src/tokenizer.ts | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 2b747d57c7..e8d4a6d76b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -30,6 +30,7 @@ import { isIdentifierPart, isDecimal, isOctal, + isHex, isHighSurrogate, isLowSurrogate } from "./util"; @@ -734,8 +735,8 @@ export class Tokenizer extends DiagnosticEmitter { return Token.FLOATLITERAL; // expects a call to readFloat } if ( - maxTokenLength > 2 && pos + 1 < end && - c == CharCode.DOT && + maxTokenLength > 2 && + pos + 1 < end && c == CharCode.DOT && text.charCodeAt(pos + 1) == CharCode.DOT ) { this.pos = pos + 2; @@ -1405,7 +1406,7 @@ export class Tokenizer extends DiagnosticEmitter { if (c == CharCode.DOT || (c | 32) == CharCode.e) { return Token.FLOATLITERAL; } - if (c != CharCode._ && (c < CharCode._0 || c > CharCode._9)) break; + if (c != CharCode._ && !isDecimal(c)) break; // does not validate separator placement (this is done in readXYInteger) pos++; } @@ -1454,23 +1455,17 @@ export class Tokenizer extends DiagnosticEmitter { var i64_4 = i64_new(4); while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { // value = (value << 4) + c - CharCode._0; value = i64_add( i64_shl(value, i64_4), i64_new(c - CharCode._0) ); - } else if (c >= CharCode.A && c <= CharCode.F) { - // value = (value << 4) + 10 + c - CharCode.A; - value = i64_add( - i64_shl(value, i64_4), - i64_new(10 + c - CharCode.A) - ); - } else if (c >= CharCode.a && c <= CharCode.f) { - // value = (value << 4) + 10 + c - CharCode.a; + } else if (isHex(c)) { + // value = (value << 4) + 10 + (c | 32) - CharCode.a; value = i64_add( i64_shl(value, i64_4), - i64_new(10 + c - CharCode.a) + i64_new(10 + (c | 32) - CharCode.a) ); } else if (c == CharCode._) { if (sepEnd == pos) { From bc4ec0231a74a3be6769c4684195d5eef6e050d5 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 20:35:14 +0200 Subject: [PATCH 042/124] better --- src/tokenizer.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e8d4a6d76b..d45760f27c 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -30,7 +30,6 @@ import { isIdentifierPart, isDecimal, isOctal, - isHex, isHighSurrogate, isLowSurrogate } from "./util"; @@ -1461,7 +1460,7 @@ export class Tokenizer extends DiagnosticEmitter { i64_shl(value, i64_4), i64_new(c - CharCode._0) ); - } else if (isHex(c)) { + } else if ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f) { // value = (value << 4) + 10 + (c | 32) - CharCode.a; value = i64_add( i64_shl(value, i64_4), From e4986e32cb88dab3135b6c6cbdc81b136a3dedd1 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 20:38:00 +0200 Subject: [PATCH 043/124] better octal --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index d45760f27c..f768b31250 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1557,7 +1557,7 @@ export class Tokenizer extends DiagnosticEmitter { var i64_3 = i64_new(3); while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._7) { + if (isOctal(c)) { // value = (value << 3) + c - CharCode._0; value = i64_add( i64_shl(value, i64_3), From b57e5c6727e19b6a5d160c8ba48fe5695e325551 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 21:16:03 +0200 Subject: [PATCH 044/124] optimize read hex, octal and binary by skipping seq zeros --- src/tokenizer.ts | 85 +++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index f768b31250..be70e8d241 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1444,28 +1444,22 @@ export class Tokenizer extends DiagnosticEmitter { return this.readDecimalInteger(); } - readHexInteger(): i64 { + readDecimalInteger(): i64 { var text = this.source.text; - let pos = this.pos; + var pos = this.pos; var end = this.end; var start = pos; var sepEnd = start; var value = i64_new(0); - var i64_4 = i64_new(4); + var i64_10 = i64_new(10); while (pos < end) { let c = text.charCodeAt(pos); - if (isDecimal(c)) { - // value = (value << 4) + c - CharCode._0; + if (c >= CharCode._0 && c <= CharCode._9) { + // value = value * 10 + c - CharCode._0; value = i64_add( - i64_shl(value, i64_4), + i64_mul(value, i64_10), i64_new(c - CharCode._0) ); - } else if ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f) { - // value = (value << 4) + 10 + (c | 32) - CharCode.a; - value = i64_add( - i64_shl(value, i64_4), - i64_new(10 + (c | 32) - CharCode.a) - ); } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1474,6 +1468,11 @@ export class Tokenizer extends DiagnosticEmitter { : DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted, this.range(pos) ); + } else if (pos - 1 == start && text.charCodeAt(pos - 1) == CharCode._0) { + this.error( + DiagnosticCode.Numeric_separators_are_not_allowed_here, + this.range(pos) + ); } sepEnd = pos + 1; } else { @@ -1483,7 +1482,7 @@ export class Tokenizer extends DiagnosticEmitter { } if (pos == start) { this.error( - DiagnosticCode.Hexadecimal_digit_expected, + DiagnosticCode.Digit_expected, this.range(start) ); } else if (sepEnd == pos) { @@ -1496,22 +1495,32 @@ export class Tokenizer extends DiagnosticEmitter { return value; } - readDecimalInteger(): i64 { + readHexInteger(): i64 { var text = this.source.text; - var pos = this.pos; + let pos = this.pos; var end = this.end; var start = pos; var sepEnd = start; var value = i64_new(0); - var i64_10 = i64_new(10); + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { - // value = value * 10 + c - CharCode._0; + if (c == CharCode._0) { + ++zeros; + } else if (isDecimal(c)) { + // value = (value << (zeros + 1) * 4) + c - CharCode._0; value = i64_add( - i64_mul(value, i64_10), + i64_shl(value, i64_new((zeros + 1) << 2)), i64_new(c - CharCode._0) ); + zeros = 0; + } else if ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f) { + // value = (value << (zeros + 1) * 4) + 10 + (c | 32) - CharCode.a; + value = i64_add( + i64_shl(value, i64_new((zeros + 1) << 2)), + i64_new(10 + (c | 32) - CharCode.a) + ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1520,11 +1529,6 @@ export class Tokenizer extends DiagnosticEmitter { : DiagnosticCode.Multiple_consecutive_numeric_separators_are_not_permitted, this.range(pos) ); - } else if (pos - 1 == start && text.charCodeAt(pos - 1) == CharCode._0) { - this.error( - DiagnosticCode.Numeric_separators_are_not_allowed_here, - this.range(pos) - ); } sepEnd = pos + 1; } else { @@ -1532,9 +1536,12 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros << 2)); + } if (pos == start) { this.error( - DiagnosticCode.Digit_expected, + DiagnosticCode.Hexadecimal_digit_expected, this.range(start) ); } else if (sepEnd == pos) { @@ -1554,15 +1561,18 @@ export class Tokenizer extends DiagnosticEmitter { var start = pos; var sepEnd = start; var value = i64_new(0); - var i64_3 = i64_new(3); + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (isOctal(c)) { - // value = (value << 3) + c - CharCode._0; + if (c == CharCode._0) { + ++zeros; + } else if (isOctal(c)) { + // value = (value << (zeros + 1) * 3) + c - CharCode._0; value = i64_add( - i64_shl(value, i64_3), + i64_shl(value, i64_new((zeros + 1) * 3)), i64_new(c - CharCode._0) ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1578,6 +1588,9 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros * 3)); + } if (pos == start) { this.error( DiagnosticCode.Octal_digit_expected, @@ -1601,17 +1614,18 @@ export class Tokenizer extends DiagnosticEmitter { var sepEnd = start; var value = i64_new(0); var i64_1 = i64_new(1); + var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); if (c == CharCode._0) { - // value = (value << 1); - value = i64_shl(value, i64_1); + ++zeros; } else if (c == CharCode._1) { - // value = (value << 1) + 1; - value = i64_add( - i64_shl(value, i64_1), + // (value << zeros + 1) | 1 + value = i64_or( + i64_shl(value, i64_new(zeros + 1)), i64_1 ); + zeros = 0; } else if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1627,6 +1641,9 @@ export class Tokenizer extends DiagnosticEmitter { } ++pos; } + if (zeros != 0) { + value = i64_shl(value, i64_new(zeros)); + } if (pos == start) { this.error( DiagnosticCode.Binary_digit_expected, From 5d960a9c7c0b2bbdd5519ec030a0ebe9f022f78a Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 22:09:41 +0200 Subject: [PATCH 045/124] refactor --- src/tokenizer.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index be70e8d241..e2c59371e4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1450,11 +1450,11 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var i64_10 = i64_new(10); while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { // value = value * 10 + c - CharCode._0; value = i64_add( i64_mul(value, i64_10), @@ -1501,7 +1501,7 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); @@ -1560,7 +1560,7 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); @@ -1612,8 +1612,7 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_1 = i64_new(1); + var value = i64_zero; var zeros = 0; while (pos < end) { let c = text.charCodeAt(pos); @@ -1623,7 +1622,7 @@ export class Tokenizer extends DiagnosticEmitter { // (value << zeros + 1) | 1 value = i64_or( i64_shl(value, i64_new(zeros + 1)), - i64_1 + i64_one ); zeros = 0; } else if (c == CharCode._) { From 9d5d0ff7b891105f7d41ea183846e578ce883326 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 22:52:22 +0200 Subject: [PATCH 046/124] more --- src/tokenizer.ts | 11 +++++------ src/util/text.ts | 9 +++++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e2c59371e4..c700522a7d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -30,6 +30,7 @@ import { isIdentifierPart, isDecimal, isOctal, + isHexPart, isHighSurrogate, isLowSurrogate } from "./util"; @@ -1514,7 +1515,7 @@ export class Tokenizer extends DiagnosticEmitter { i64_new(c - CharCode._0) ); zeros = 0; - } else if ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f) { + } else if (isHexPart(c)) { // value = (value << (zeros + 1) * 4) + 10 + (c | 32) - CharCode.a; value = i64_add( i64_shl(value, i64_new((zeros + 1) << 2)), @@ -1755,12 +1756,10 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; while (pos < end) { let c = text.charCodeAt(pos++); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { value = (value << 4) + c - CharCode._0; - } else if (c >= CharCode.A && c <= CharCode.F) { - value = (value << 4) + c + (10 - CharCode.A); - } else if (c >= CharCode.a && c <= CharCode.f) { - value = (value << 4) + c + (10 - CharCode.a); + } else if (isHexPart(c)) { + value = (value << 4) + (c | 32) + (10 - CharCode.a); } else if (~startIfTaggedTemplate) { this.pos = --pos; return text.substring(startIfTaggedTemplate, pos); diff --git a/src/util/text.ts b/src/util/text.ts index b2a8a10a9d..3267182151 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -226,10 +226,15 @@ export function isOctal(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } +/** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */ +export function isHexPart(c: i32): bool { + let c0 = c | 32; // unify uppercases and lowercases a|A - f|F + return c0 >= CharCode.a && c0 <= CharCode.f; +} + /** Tests if the specified character code is a valid hexadecimal digit. */ export function isHex(c: i32): bool { - let c0 = c | 32; // unify uppercases and lowercases a|A - f|F - return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f); + return isDecimal(c) || isHexPart(c); } /** Tests if the specified character code is trivially alphanumeric. */ From 3df8ce80220273becd91001e758fff626143e279 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 23:10:08 +0200 Subject: [PATCH 047/124] add fast pathes for readDecimalFloat --- src/tokenizer.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index c700522a7d..19c9b32221 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1696,6 +1696,13 @@ export class Tokenizer extends DiagnosticEmitter { } } let result = text.substring(start, this.pos); + if (this.pos - start == 3) { + // fast pathes for most usual floating loints + if (result == "0.0") return 0.0; + if (result == "1.0") return 1.0; + if (result == "0.5") return 0.5; + if (result == "2.0") return 2.0; + } if (sepCount) result = result.replaceAll("_", ""); return parseFloat(result); } From 92dd92a2fd9c23fb43a9205cb2816dcb85065dc4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 18 Nov 2021 23:18:06 +0200 Subject: [PATCH 048/124] simplify readDecimalFloatPartial --- src/tokenizer.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 19c9b32221..ed6386c875 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1677,10 +1677,10 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var end = this.end; var start = this.pos; - var sepCount = this.readDecimalFloatPartial(false); + var hasSep = this.readDecimalFloatPartial(false); if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) { ++this.pos; - sepCount += this.readDecimalFloatPartial(); + hasSep |= this.readDecimalFloatPartial(); } if (this.pos < end) { let c = text.charCodeAt(this.pos); @@ -1692,7 +1692,7 @@ export class Tokenizer extends DiagnosticEmitter { ) { ++this.pos; } - sepCount += this.readDecimalFloatPartial(); + hasSep |= this.readDecimalFloatPartial(); } } let result = text.substring(start, this.pos); @@ -1703,22 +1703,21 @@ export class Tokenizer extends DiagnosticEmitter { if (result == "0.5") return 0.5; if (result == "2.0") return 2.0; } - if (sepCount) result = result.replaceAll("_", ""); + if (hasSep) result = result.replaceAll("_", ""); return parseFloat(result); } /** Reads past one section of a decimal float literal. Returns the number of separators encountered. */ - private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): u32 { + private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; var pos = this.pos; var start = pos; var end = this.end; var sepEnd = start; - var sepCount = 0; + var hasSep = 0; while (pos < end) { let c = text.charCodeAt(pos); - if (c == CharCode._) { if (sepEnd == pos) { this.error( @@ -1734,7 +1733,7 @@ export class Tokenizer extends DiagnosticEmitter { ); } sepEnd = pos + 1; - ++sepCount; + hasSep = 1; } else if (!isDecimal(c)) { break; } @@ -1749,7 +1748,7 @@ export class Tokenizer extends DiagnosticEmitter { } this.pos = pos; - return sepCount; + return hasSep; } readHexFloat(): f64 { From d693f7b8449ab65400b9e47bdd1ff1a952e0519f Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 01:02:44 +0200 Subject: [PATCH 049/124] refactor --- src/tokenizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ed6386c875..65a8ab0106 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1710,10 +1710,10 @@ export class Tokenizer extends DiagnosticEmitter { /** Reads past one section of a decimal float literal. Returns the number of separators encountered. */ private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; + var end = this.end; var pos = this.pos; var start = pos; - var end = this.end; - var sepEnd = start; + var sepEnd = pos; var hasSep = 0; while (pos < end) { From 1f803778c9b4544f04568e920640ccccb6d3d2b4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 01:13:08 +0200 Subject: [PATCH 050/124] more --- src/tokenizer.ts | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 65a8ab0106..c2828d8e4e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1268,8 +1268,8 @@ export class Tokenizer extends DiagnosticEmitter { readEscapeSequence(isTaggedTemplate: bool = false): string { // for context on isTaggedTemplate, see: https://tc39.es/proposal-template-literal-revision/ - var start = this.pos; var end = this.end; + var start = this.pos; if (++this.pos >= end) { this.error( DiagnosticCode.Unexpected_end_of_text, @@ -1360,14 +1360,14 @@ export class Tokenizer extends DiagnosticEmitter { readRegexpFlags(): string { var text = this.source.text; - var start = this.pos; var end = this.end; + var pos = this.pos; + var start = pos; var flags = 0; - while (this.pos < end) { - let c: i32 = text.charCodeAt(this.pos); + while (pos < end) { + let c = text.charCodeAt(pos); if (!isIdentifierPart(c)) break; - ++this.pos; - + ++pos; // make sure each supported flag is unique switch (c) { case CharCode.g: { @@ -1391,10 +1391,11 @@ export class Tokenizer extends DiagnosticEmitter { if (flags == -1) { this.error( DiagnosticCode.Invalid_regular_expression_flags, - this.range(start, this.pos) + this.range(start, pos) ); } - return text.substring(start, this.pos); + this.pos = pos; + return text.substring(start, pos); } integerOrFloatToken(): Token { @@ -1430,16 +1431,18 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos + 2; return this.readOctalInteger(); } - } - if (isOctal(text.charCodeAt(pos + 1))) { - let start = pos; - this.pos = pos + 1; - let value = this.readOctalInteger(); - this.error( - DiagnosticCode.Octal_literals_are_not_allowed_in_strict_mode, - this.range(start, this.pos) - ); - return value; + default: { + if (isOctal(text.charCodeAt(pos + 1))) { + let start = pos; + this.pos = pos + 1; + let value = this.readOctalInteger(); + this.error( + DiagnosticCode.Octal_literals_are_not_allowed_in_strict_mode, + this.range(start, this.pos) + ); + return value; + } + } } } return this.readDecimalInteger(); From f7d3a90e2805330edf4c0de1325b26b969dd2d61 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 01:21:32 +0200 Subject: [PATCH 051/124] better --- src/tokenizer.ts | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index c2828d8e4e..1bd99b2d32 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -209,11 +209,14 @@ export function probeKeywordToken(text: string): Token { break; } case CharCode.c: { - if (text == "const") return Token.CONST; - if (text == "case") return Token.CASE; - if (text == "continue") return Token.CONTINUE; - if (text == "class") return Token.CLASS; + if (len <= 5) { + if (text == "const") return Token.CONST; + if (text == "case") return Token.CASE; + if (text == "class") return Token.CLASS; + break; + } if (text == "constructor") return Token.CONSTRUCTOR; + if (text == "continue") return Token.CONTINUE; if (text == "catch") return Token.CATCH; break; } @@ -259,22 +262,22 @@ export function probeKeywordToken(text: string): Token { break; } switch (text.charCodeAt(3)) { - case CharCode.t: { - if (text == "instanceof") return Token.INSTANCEOF; + case CharCode.e: { + if (text == "interface") return Token.INTERFACE; break; } case CharCode.l: { if (text == "implements") return Token.IMPLEMENTS; break; } - case CharCode.e: { - if (text == "interface") return Token.INTERFACE; - break; - } case CharCode.o: { if (text == "import") return Token.IMPORT; break; } + case CharCode.t: { + if (text == "instanceof") return Token.INSTANCEOF; + break; + } } break; } From ee8dba9985c0eecc1d3027991024b859cc4e5060 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 13:28:05 +0200 Subject: [PATCH 052/124] add await operator for operatorTokenToString --- src/tokenizer.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 1bd99b2d32..60fb3eeee1 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -392,6 +392,7 @@ export function isIllegalVariableIdentifier(name: string): bool { export function operatorTokenToString(token: Token): string { switch (token) { + case Token.AWAIT: return "await"; case Token.DELETE: return "delete"; case Token.IN: return "in"; case Token.INSTANCEOF: return "instanceof"; From 82ae5c833b5fd4bbe9fa121623e2e6b4da3c9472 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 13:34:31 +0200 Subject: [PATCH 053/124] refactor --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 60fb3eeee1..7ad889405d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -756,11 +756,11 @@ export class Tokenizer extends DiagnosticEmitter { if (maxTokenLength > 1 && pos < end) { c = text.charCodeAt(pos); if (c == CharCode.SLASH) { // single-line - pos = this.skipSingleLineComment(text, pos, end); + pos = this.skipLineComment(text, pos, end); break; } if (c == CharCode.ASTERISK) { // multi-line - pos = this.skipMultiLineComment(text, pos, end); + pos = this.skipBlockComment(text, pos, end); break; } if (c == CharCode.EQUALS) { @@ -1066,7 +1066,7 @@ export class Tokenizer extends DiagnosticEmitter { return this.nextToken; } - skipSingleLineComment(text: string, pos: i32, end: i32): i32 { + skipLineComment(text: string, pos: i32, end: i32): i32 { let commentStartPos = pos - 1; let commentKind = CommentKind.LINE; if ( @@ -1092,7 +1092,7 @@ export class Tokenizer extends DiagnosticEmitter { return pos; } - skipMultiLineComment(text: string, pos: i32, end: i32): i32 { + skipBlockComment(text: string, pos: i32, end: i32): i32 { let commentStartPos = pos - 1; let closed = false; while (++pos < end) { From e2e86caae680331a22a6a491d001107369a9ffcb Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 13:59:35 +0200 Subject: [PATCH 054/124] refactor readInteger --- src/tokenizer.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 7ad889405d..035dadfecf 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1422,7 +1422,8 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var pos = this.pos; if (pos + 2 < this.end && text.charCodeAt(pos) == CharCode._0) { - switch (text.charCodeAt(pos + 1) | 32) { + let c1 = text.charCodeAt(pos + 1); + switch (c1 | 32) { case CharCode.x: { this.pos = pos + 2; return this.readHexInteger(); @@ -1436,7 +1437,7 @@ export class Tokenizer extends DiagnosticEmitter { return this.readOctalInteger(); } default: { - if (isOctal(text.charCodeAt(pos + 1))) { + if (isOctal(c1)) { let start = pos; this.pos = pos + 1; let value = this.readOctalInteger(); From 4399ab777a99427afa81ee43b77d7043c9b347d0 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 15:13:59 +0200 Subject: [PATCH 055/124] refactor --- src/tokenizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 035dadfecf..49d17a2ebc 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1704,8 +1704,8 @@ export class Tokenizer extends DiagnosticEmitter { } } let result = text.substring(start, this.pos); - if (this.pos - start == 3) { - // fast pathes for most usual floating loints + if (result.length == 3) { + // fast pathes for most usual literals if (result == "0.0") return 0.0; if (result == "1.0") return 1.0; if (result == "0.5") return 0.5; From 0cba78a0c2d1ef5a26b566fbe75a57542e66d3e9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 15:29:10 +0200 Subject: [PATCH 056/124] better --- src/tokenizer.ts | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 49d17a2ebc..1442919c43 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1703,14 +1703,16 @@ export class Tokenizer extends DiagnosticEmitter { hasSep |= this.readDecimalFloatPartial(); } } - let result = text.substring(start, this.pos); - if (result.length == 3) { - // fast pathes for most usual literals - if (result == "0.0") return 0.0; - if (result == "1.0") return 1.0; - if (result == "0.5") return 0.5; - if (result == "2.0") return 2.0; - } + let pos = this.pos; + // fast pathes for most usual literals: + // 0.0, 1.0, 2.0 .. 9.0 + if ( + pos - start == 3 && + text.charCodeAt(start + 1) == CharCode.DOT && + text.charCodeAt(start + 2) == CharCode._0 + ) return (text.charCodeAt(start) - CharCode._0); + + let result = text.substring(start, pos); if (hasSep) result = result.replaceAll("_", ""); return parseFloat(result); } From 1e9ea69a9f8881efb7e01e3cdfc1959a98b77c8a Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 15:50:26 +0200 Subject: [PATCH 057/124] refactor --- src/tokenizer.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 1442919c43..67aa049de3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1685,10 +1685,10 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var end = this.end; var start = this.pos; - var hasSep = this.readDecimalFloatPartial(false); + var hasSep = this.scanFloatPartial(false); if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) { ++this.pos; - hasSep |= this.readDecimalFloatPartial(); + hasSep |= this.scanFloatPartial(); } if (this.pos < end) { let c = text.charCodeAt(this.pos); @@ -1700,7 +1700,7 @@ export class Tokenizer extends DiagnosticEmitter { ) { ++this.pos; } - hasSep |= this.readDecimalFloatPartial(); + hasSep |= this.scanFloatPartial(); } } let pos = this.pos; @@ -1717,8 +1717,8 @@ export class Tokenizer extends DiagnosticEmitter { return parseFloat(result); } - /** Reads past one section of a decimal float literal. Returns the number of separators encountered. */ - private readDecimalFloatPartial(allowLeadingZeroSep: bool = true): i32 { + /** Scan past one section of a decimal float literal. Returns `1` if separators encountered. */ + private scanFloatPartial(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; var end = this.end; var pos = this.pos; From 51e9197234c87c0d58ebd384e8e402566a641427 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 15:53:56 +0200 Subject: [PATCH 058/124] again --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 67aa049de3..368e499ca3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1685,10 +1685,10 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var end = this.end; var start = this.pos; - var hasSep = this.scanFloatPartial(false); + var hasSep = this.scanFloatAndSeparators(false); if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) { ++this.pos; - hasSep |= this.scanFloatPartial(); + hasSep |= this.scanFloatAndSeparators(); } if (this.pos < end) { let c = text.charCodeAt(this.pos); @@ -1700,7 +1700,7 @@ export class Tokenizer extends DiagnosticEmitter { ) { ++this.pos; } - hasSep |= this.scanFloatPartial(); + hasSep |= this.scanFloatAndSeparators(); } } let pos = this.pos; @@ -1718,7 +1718,7 @@ export class Tokenizer extends DiagnosticEmitter { } /** Scan past one section of a decimal float literal. Returns `1` if separators encountered. */ - private scanFloatPartial(allowLeadingZeroSep: bool = true): i32 { + private scanFloatAndSeparators(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; var end = this.end; var pos = this.pos; From 87557ee51ec2ff13db53e074772589e35e5d0327 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 15:57:36 +0200 Subject: [PATCH 059/124] ENDOFFILE => EOF --- src/parser.ts | 16 ++++++++-------- src/tokenizer.ts | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index e8932970eb..36f65b97ad 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -178,7 +178,7 @@ export class Parser extends DiagnosticEmitter { var tn = new Tokenizer(source, this.diagnostics); tn.onComment = this.onComment; var statements = source.statements; - while (!tn.skip(Token.ENDOFFILE)) { + while (!tn.skip(Token.EOF)) { let statement = this.parseTopLevelStatement(tn, null); if (statement) { statements.push(statement); @@ -1763,7 +1763,7 @@ export class Parser extends DiagnosticEmitter { } } else { this.skipStatement(tn); - if (tn.skip(Token.ENDOFFILE)) { + if (tn.skip(Token.EOF)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -1821,7 +1821,7 @@ export class Parser extends DiagnosticEmitter { } } else { this.skipStatement(tn); - if (tn.skip(Token.ENDOFFILE)) { + if (tn.skip(Token.EOF)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -2421,7 +2421,7 @@ export class Parser extends DiagnosticEmitter { if (member) members.push(member); else { this.skipStatement(tn); - if (tn.skip(Token.ENDOFFILE)) { + if (tn.skip(Token.EOF)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -2893,7 +2893,7 @@ export class Parser extends DiagnosticEmitter { let state = tn.mark(); let statement = this.parseStatement(tn, topLevel); if (!statement) { - if (tn.token == Token.ENDOFFILE) return null; + if (tn.token == Token.EOF) return null; tn.reset(state); this.skipStatement(tn); } else { @@ -3847,7 +3847,7 @@ export class Parser extends DiagnosticEmitter { return this.parseClassExpression(tn); } default: { - if (token == Token.ENDOFFILE) { + if (token == Token.EOF) { this.error( DiagnosticCode.Unexpected_end_of_text, tn.range(startPos) @@ -4235,7 +4235,7 @@ export class Parser extends DiagnosticEmitter { do { let nextToken = tn.peek(true); if ( - nextToken == Token.ENDOFFILE || // next step should handle this + nextToken == Token.EOF || // next step should handle this nextToken == Token.SEMICOLON // end of the statement for sure ) { tn.next(); @@ -4278,7 +4278,7 @@ export class Parser extends DiagnosticEmitter { var again = true; do { switch (tn.next()) { - case Token.ENDOFFILE: { + case Token.EOF: { this.error( DiagnosticCode._0_expected, tn.range(), "}" diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 368e499ca3..f35443e724 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -179,7 +179,7 @@ export const enum Token { // meta INVALID, - ENDOFFILE + EOF } export const enum IdentifierHandling { @@ -1032,7 +1032,7 @@ export class Tokenizer extends DiagnosticEmitter { } } this.pos = pos; - return Token.ENDOFFILE; + return Token.EOF; } peek( From f3265244239788a9fb93d74b00e85de4349149d1 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 19 Nov 2021 21:40:15 +0200 Subject: [PATCH 060/124] simplify parseExpression --- src/parser.ts | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 36f65b97ad..9924804aa4 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -4092,26 +4092,6 @@ export class Parser extends DiagnosticEmitter { } break; } - // BinaryExpression (right associative) - case Token.EQUALS: - case Token.PLUS_EQUALS: - case Token.MINUS_EQUALS: - case Token.ASTERISK_ASTERISK_EQUALS: - case Token.ASTERISK_EQUALS: - case Token.SLASH_EQUALS: - case Token.PERCENT_EQUALS: - case Token.LESSTHAN_LESSTHAN_EQUALS: - case Token.GREATERTHAN_GREATERTHAN_EQUALS: - case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: - case Token.AMPERSAND_EQUALS: - case Token.CARET_EQUALS: - case Token.BAR_EQUALS: - case Token.ASTERISK_ASTERISK: { - let next = this.parseExpression(tn, nextPrecedence); - if (!next) return null; - expr = Node.createBinaryExpression(token, expr, next, tn.range(startPos, tn.pos)); - break; - } // BinaryExpression case Token.LESSTHAN: case Token.GREATERTHAN: @@ -4133,8 +4113,24 @@ export class Parser extends DiagnosticEmitter { case Token.BAR: case Token.CARET: case Token.AMPERSAND_AMPERSAND: - case Token.BAR_BAR: { - let next = this.parseExpression(tn, nextPrecedence + 1); + case Token.BAR_BAR: + ++nextPrecedence; + // BinaryExpression (right associative) + case Token.EQUALS: + case Token.PLUS_EQUALS: + case Token.MINUS_EQUALS: + case Token.ASTERISK_ASTERISK_EQUALS: + case Token.ASTERISK_EQUALS: + case Token.SLASH_EQUALS: + case Token.PERCENT_EQUALS: + case Token.LESSTHAN_LESSTHAN_EQUALS: + case Token.GREATERTHAN_GREATERTHAN_EQUALS: + case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: + case Token.AMPERSAND_EQUALS: + case Token.CARET_EQUALS: + case Token.BAR_EQUALS: + case Token.ASTERISK_ASTERISK: { + let next = this.parseExpression(tn, nextPrecedence); if (!next) return null; expr = Node.createBinaryExpression(token, expr, next, tn.range(startPos, tn.pos)); break; From 43b41b0fd15dbd7e5327332957119ca63a9ba381 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 14:21:11 +0200 Subject: [PATCH 061/124] add comment for parse new operator --- src/parser.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index 9924804aa4..f08b7552a7 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -3548,6 +3548,9 @@ export class Parser extends DiagnosticEmitter { // NewExpression case Token.NEW: { + + // at 'new': Identifier ('<' TypeArguments '>')? ('(' Arguments ')')? ';'? + if (!tn.skipIdentifier()) { this.error( DiagnosticCode.Identifier_expected, From e38fe0d68facb2a33cb78dd5ac55a469560b049d Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 15:55:19 +0200 Subject: [PATCH 062/124] more --- src/parser.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index f08b7552a7..93083e1e84 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1711,8 +1711,8 @@ export class Parser extends DiagnosticEmitter { return null; } if (!isInterface) { - if (!implementsTypes) implementsTypes = []; - implementsTypes.push(type); + if (!implementsTypes) implementsTypes = [ type ]; + else implementsTypes.push(type); } } while (tn.skip(Token.COMMA)); } @@ -1855,8 +1855,8 @@ export class Parser extends DiagnosticEmitter { do { let decorator = this.parseDecorator(tn); if (!decorator) break; - if (!decorators) decorators = new Array(); - decorators.push(decorator); + if (!decorators) decorators = [ decorator ]; + else decorators.push(decorator); } while (tn.skip(Token.AT)); if (isInterface && decorators !== null) { this.error( @@ -4234,7 +4234,7 @@ export class Parser extends DiagnosticEmitter { do { let nextToken = tn.peek(true); if ( - nextToken == Token.EOF || // next step should handle this + nextToken == Token.EOF || // next step should handle this nextToken == Token.SEMICOLON // end of the statement for sure ) { tn.next(); From f4179f3ccb7201b0c8b1d4bdac178b3fd9c45682 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 16:08:17 +0200 Subject: [PATCH 063/124] add regexp case for parser's skipStatement --- src/parser.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index 93083e1e84..99cedbada2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -4261,6 +4261,11 @@ export class Parser extends DiagnosticEmitter { tn.checkForIdentifierStartAfterNumericLiteral(); break; } + case Token.SLASH: { + tn.readRegexpPattern(); + tn.readRegexpFlags(); + break; + } case Token.OPENBRACE: { this.skipBlock(tn); break; From 33617cee9a687885ed7b722c8e9f931539371648 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 16:10:46 +0200 Subject: [PATCH 064/124] more --- src/parser.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 99cedbada2..9a94b8b8ed 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -4309,7 +4309,7 @@ export class Parser extends DiagnosticEmitter { } case Token.TEMPLATELITERAL: { tn.readString(); - while(tn.readingTemplateString){ + while (tn.readingTemplateString) { this.skipBlock(tn); tn.readString(CharCode.BACKTICK); } @@ -4325,6 +4325,11 @@ export class Parser extends DiagnosticEmitter { tn.checkForIdentifierStartAfterNumericLiteral(); break; } + case Token.SLASH: { + tn.readRegexpPattern(); + tn.readRegexpFlags(); + break; + } } } while (again); } From 7db95f8f5c064f1b90a68bd0f1f83faf775e8ff4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 16:55:58 +0200 Subject: [PATCH 065/124] add ||=, &&=, ??= and ?? for determinePrecedence --- src/parser.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 9a94b8b8ed..8b5cc8b7c4 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -4369,7 +4369,10 @@ function determinePrecedence(kind: Token): Precedence { case Token.PLUS_EQUALS: case Token.MINUS_EQUALS: case Token.ASTERISK_ASTERISK_EQUALS: + case Token.BAR_BAR_EQUALS: + case Token.AMPERSAND_AMPERSAND_EQUALS: case Token.ASTERISK_EQUALS: + case Token.QUESTION_QUESTION_EQUALS: case Token.SLASH_EQUALS: case Token.PERCENT_EQUALS: case Token.LESSTHAN_LESSTHAN_EQUALS: @@ -4379,7 +4382,8 @@ function determinePrecedence(kind: Token): Precedence { case Token.CARET_EQUALS: case Token.BAR_EQUALS: return Precedence.ASSIGNMENT; case Token.QUESTION: return Precedence.CONDITIONAL; - case Token.BAR_BAR: return Precedence.LOGICAL_OR; + case Token.BAR_BAR: + case Token.QUESTION_QUESTION: return Precedence.LOGICAL_OR; case Token.AMPERSAND_AMPERSAND: return Precedence.LOGICAL_AND; case Token.BAR: return Precedence.BITWISE_OR; case Token.CARET: return Precedence.BITWISE_XOR; From 34c8f9e0d63bb2715f36cac026f19f1099b646c4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 18:40:59 +0200 Subject: [PATCH 066/124] refactor determinePrecedence --- src/parser.ts | 66 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 8b5cc8b7c4..8a6ecee021 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -4364,7 +4364,12 @@ export const enum Precedence { /** Determines the precende of a non-starting token. */ function determinePrecedence(kind: Token): Precedence { switch (kind) { - case Token.COMMA: return Precedence.COMMA; + case Token.COMMA: + return Precedence.COMMA; + + case Token.YIELD: + return Precedence.YIELD; + case Token.EQUALS: case Token.PLUS_EQUALS: case Token.MINUS_EQUALS: @@ -4380,39 +4385,68 @@ function determinePrecedence(kind: Token): Precedence { case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: case Token.AMPERSAND_EQUALS: case Token.CARET_EQUALS: - case Token.BAR_EQUALS: return Precedence.ASSIGNMENT; - case Token.QUESTION: return Precedence.CONDITIONAL; + case Token.BAR_EQUALS: + return Precedence.ASSIGNMENT; + + case Token.QUESTION: + return Precedence.CONDITIONAL; + case Token.BAR_BAR: - case Token.QUESTION_QUESTION: return Precedence.LOGICAL_OR; - case Token.AMPERSAND_AMPERSAND: return Precedence.LOGICAL_AND; - case Token.BAR: return Precedence.BITWISE_OR; - case Token.CARET: return Precedence.BITWISE_XOR; - case Token.AMPERSAND: return Precedence.BITWISE_AND; + case Token.QUESTION_QUESTION: + return Precedence.LOGICAL_OR; + + case Token.AMPERSAND_AMPERSAND: + return Precedence.LOGICAL_AND; + + case Token.BAR: + return Precedence.BITWISE_OR; + + case Token.CARET: + return Precedence.BITWISE_XOR; + + case Token.AMPERSAND: + return Precedence.BITWISE_AND; + case Token.EQUALS_EQUALS: case Token.EXCLAMATION_EQUALS: case Token.EQUALS_EQUALS_EQUALS: - case Token.EXCLAMATION_EQUALS_EQUALS: return Precedence.EQUALITY; + case Token.EXCLAMATION_EQUALS_EQUALS: + return Precedence.EQUALITY; + case Token.AS: case Token.IN: case Token.INSTANCEOF: case Token.LESSTHAN: case Token.GREATERTHAN: case Token.LESSTHAN_EQUALS: - case Token.GREATERTHAN_EQUALS: return Precedence.RELATIONAL; + case Token.GREATERTHAN_EQUALS: + return Precedence.RELATIONAL; + case Token.LESSTHAN_LESSTHAN: case Token.GREATERTHAN_GREATERTHAN: - case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN: return Precedence.SHIFT; + case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN: + return Precedence.SHIFT; + case Token.PLUS: - case Token.MINUS: return Precedence.ADDITIVE; + case Token.MINUS: + return Precedence.ADDITIVE; + case Token.ASTERISK: case Token.SLASH: - case Token.PERCENT: return Precedence.MULTIPLICATIVE; - case Token.ASTERISK_ASTERISK: return Precedence.EXPONENTIATED; + case Token.PERCENT: + return Precedence.MULTIPLICATIVE; + + case Token.ASTERISK_ASTERISK: + return Precedence.EXPONENTIATED; + case Token.PLUS_PLUS: - case Token.MINUS_MINUS: return Precedence.UNARY_POSTFIX; + case Token.MINUS_MINUS: + return Precedence.UNARY_POSTFIX; + case Token.DOT: case Token.OPENBRACKET: - case Token.EXCLAMATION: return Precedence.MEMBERACCESS; + case Token.EXCLAMATION: + return Precedence.MEMBERACCESS; } return Precedence.NONE; } From 8d1e2c8ec12a96af72473230c8d29b3633ca37f7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 18:48:05 +0200 Subject: [PATCH 067/124] refactor --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 8a6ecee021..1109f61a56 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -119,10 +119,10 @@ export class Parser extends DiagnosticEmitter { /** Constructs a new parser. */ constructor( diagnostics: DiagnosticMessage[] | null = null, - sources: Source[] | null = null + sources: Source[] = [] ) { super(diagnostics); - this.sources = sources ? sources : new Array(); + this.sources = sources; } /** Parses a file and adds its definitions to the program. */ From 343c1617360166a5bc4082cf7236a40e94b466d6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 22 Nov 2021 18:57:55 +0200 Subject: [PATCH 068/124] refactor --- src/parser.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 1109f61a56..d35166145e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -408,7 +408,10 @@ export class Parser extends DiagnosticEmitter { case NodeKind.CLASSDECLARATION: case NodeKind.INTERFACEDECLARATION: case NodeKind.NAMESPACEDECLARATION: { - return Node.createExportDefaultStatement(statement, tn.range(startPos, tn.pos)); + return Node.createExportDefaultStatement( + statement, + tn.range(startPos, tn.pos) + ); } default: { this.error( @@ -699,7 +702,6 @@ export class Parser extends DiagnosticEmitter { isSignature = true; tn.discard(state); parameters = []; - } else { isSignature = false; // not yet known do { @@ -766,7 +768,9 @@ export class Parser extends DiagnosticEmitter { } } if (isSignature) { - let param = Node.createParameter(kind, name, Node.createOmittedType(tn.range(tn.pos)), null, tn.range(paramStart, tn.pos)); + let param = Node.createParameter( + kind, name, Node.createOmittedType(tn.range(tn.pos)), null, tn.range(paramStart, tn.pos) + ); if (!parameters) parameters = [ param ]; else parameters.push(param); this.error( @@ -1568,7 +1572,9 @@ export class Parser extends DiagnosticEmitter { var parameters = this.parseParameters(tn); if (!parameters) return null; - return this.parseFunctionExpressionCommon(tn, name, parameters, this.parseParametersThis, arrowKind, startPos, signatureStart); + return this.parseFunctionExpressionCommon( + tn, name, parameters, this.parseParametersThis, arrowKind, startPos, signatureStart + ); } private parseFunctionExpressionCommon( From 5b97b00c1e1be847d4963b2a6fe454fc90fa13bd Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 02:18:27 +0200 Subject: [PATCH 069/124] refactor --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index d35166145e..2459c537ac 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -355,7 +355,7 @@ export class Parser extends DiagnosticEmitter { // handle plain exports if (flags & CommonFlags.EXPORT) { - if (defaultEnd && tn.skipIdentifier(IdentifierHandling.PREFER)) { + if (defaultEnd && tn.skipIdentifier()) { if (declareEnd) { this.error( DiagnosticCode.An_export_assignment_cannot_have_modifiers, @@ -876,7 +876,7 @@ export class Parser extends DiagnosticEmitter { let name = tn.readIdentifier(); let expression: Expression = Node.createIdentifierExpression(name, tn.range(startPos, tn.pos)); while (tn.skip(Token.DOT)) { - if (tn.skipIdentifier(IdentifierHandling.PREFER)) { + if (tn.skipIdentifier()) { name = tn.readIdentifier(); expression = Node.createPropertyAccessExpression( expression, From 396dd23a27508bd419e1ce7274b6c77731246e84 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 13:30:42 +0200 Subject: [PATCH 070/124] better --- src/tokenizer.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index f35443e724..9e4a4e58e0 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1402,6 +1402,7 @@ export class Tokenizer extends DiagnosticEmitter { return text.substring(start, pos); } + // Test is it integer or float without update of position. integerOrFloatToken(): Token { var text = this.source.text; var pos = this.pos + 1; @@ -1704,14 +1705,17 @@ export class Tokenizer extends DiagnosticEmitter { } } let pos = this.pos; - // fast pathes for most usual literals: - // 0.0, 1.0, 2.0 .. 9.0 - if ( - pos - start == 3 && - text.charCodeAt(start + 1) == CharCode.DOT && - text.charCodeAt(start + 2) == CharCode._0 - ) return (text.charCodeAt(start) - CharCode._0); - + // fast pathes for the most common literals + if (pos - start == 3 && text.charCodeAt(start + 1) == CharCode.DOT) { + // 0.0 0.1 0.2 ... 0.9 1.0 1.1 ... 9.7 9.8 9.9 + let c1 = text.charCodeAt(start); + if (c1 != CharCode.MINUS && c1 != CharCode.PLUS) { + let c2 = text.charCodeAt(start + 2); + let d1 = (c1 - CharCode._0); + let d2 = (c2 - CharCode._0) / 10.0; + return d1 + d2; + } + } let result = text.substring(start, pos); if (hasSep) result = result.replaceAll("_", ""); return parseFloat(result); From d0921a051b4edd888d7dc392ecc2f76f7e8b2fa5 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 13:41:16 +0200 Subject: [PATCH 071/124] add tests --- tests/parser/literals.ts | 3 +++ tests/parser/literals.ts.fixture.ts | 23 +++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/parser/literals.ts b/tests/parser/literals.ts index 1f445aeb77..02976cfe99 100644 --- a/tests/parser/literals.ts +++ b/tests/parser/literals.ts @@ -56,6 +56,9 @@ 1.0e+1; 1e-1; 1.0e-1; ++.1; +-.2; +-1.; ""; "\""; "123"; diff --git a/tests/parser/literals.ts.fixture.ts b/tests/parser/literals.ts.fixture.ts index 8f7d004851..c5ca601974 100644 --- a/tests/parser/literals.ts.fixture.ts +++ b/tests/parser/literals.ts.fixture.ts @@ -56,6 +56,9 @@ 10; 0.1; 0.1; ++0.1; +-0.2; +-1; ""; "\""; "123"; @@ -97,13 +100,13 @@ c; d; a; b; -// ERROR 1109: "Expression expected." in literals.ts(86,4+1) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(87,2+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(88,2+0) -// ERROR 1109: "Expression expected." in literals.ts(89,3+1) -// ERROR 6188: "Numeric separators are not allowed here." in literals.ts(91,2+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(92,3+0) -// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(93,4+0) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,4+1) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,12+1) -// ERROR 1125: "Hexadecimal digit expected." in literals.ts(94,16+1) +// ERROR 1109: "Expression expected." in literals.ts(89,4+1) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(90,2+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(91,2+0) +// ERROR 1109: "Expression expected." in literals.ts(92,3+1) +// ERROR 6188: "Numeric separators are not allowed here." in literals.ts(94,2+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(95,3+0) +// ERROR 1351: "An identifier or keyword cannot immediately follow a numeric literal." in literals.ts(96,4+0) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,4+1) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,12+1) +// ERROR 1125: "Hexadecimal digit expected." in literals.ts(97,16+1) From 264d7d21531c11e86e970135791d30a4c5df9c20 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 13:50:04 +0200 Subject: [PATCH 072/124] simplify isIllegalVariableIdentifier --- src/tokenizer.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 9e4a4e58e0..5b32eb0c5c 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -377,9 +377,6 @@ export function tokenIsAlsoIdentifier(token: Token): bool { } export function isIllegalVariableIdentifier(name: string): bool { - if (name.length < 3 || name.length > 10) { - return false; - } switch (name.charCodeAt(0)) { case CharCode.d: return name == "delete"; case CharCode.f: return name == "for"; From 23632a6ad9ff028208ee5a8391c58c5e3d469b68 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 16:19:43 +0200 Subject: [PATCH 073/124] comment --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 2459c537ac..d04d834fcc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -567,7 +567,7 @@ export class Parser extends DiagnosticEmitter { Node.createSimpleTypeName("this", tn.range()), [], false, tn.range(startPos, tn.pos) ); - // 'true' + // 'true' | `false` } else if (token == Token.TRUE || token == Token.FALSE) { type = Node.createNamedType( Node.createSimpleTypeName("bool", tn.range()), [], false, tn.range(startPos, tn.pos) From 6f3cb12b540ebd7d0fca0d0f5ee1a6f0dac671aa Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 19:37:26 +0200 Subject: [PATCH 074/124] simplify --- src/tokenizer.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5b32eb0c5c..8b210e4936 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1705,13 +1705,9 @@ export class Tokenizer extends DiagnosticEmitter { // fast pathes for the most common literals if (pos - start == 3 && text.charCodeAt(start + 1) == CharCode.DOT) { // 0.0 0.1 0.2 ... 0.9 1.0 1.1 ... 9.7 9.8 9.9 - let c1 = text.charCodeAt(start); - if (c1 != CharCode.MINUS && c1 != CharCode.PLUS) { - let c2 = text.charCodeAt(start + 2); - let d1 = (c1 - CharCode._0); - let d2 = (c2 - CharCode._0) / 10.0; - return d1 + d2; - } + let d1 = (text.charCodeAt(start + 0) - CharCode._0); + let d2 = (text.charCodeAt(start + 2) - CharCode._0); + return d1 + d2 / 10.0; } let result = text.substring(start, pos); if (hasSep) result = result.replaceAll("_", ""); From 79c2a8659e0a8022c18c4fa03284e9e966d7c43e Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 25 Nov 2021 20:31:25 +0200 Subject: [PATCH 075/124] better --- src/tokenizer.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8b210e4936..e127bfc9a6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -983,6 +983,7 @@ export class Tokenizer extends DiagnosticEmitter { // TODO: \uXXXX also support for identifiers if (isIdentifierStart(c)) { let posBefore = pos; + let c0 = text.charCodeAt(pos); while ( ++pos < end && isIdentifierPart(c = text.charCodeAt(pos)) @@ -994,7 +995,7 @@ export class Tokenizer extends DiagnosticEmitter { pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH && // Only a non-capitalised token can be a keyword - (c = text.charCodeAt(posBefore)) >= CharCode.a && c <= CharCode.z + c0 >= CharCode.a && c0 <= CharCode.z ) { let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); if ( From efcadb6c7c6b72f7c21348f6aa9e172b085903fc Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Nov 2021 15:11:42 +0200 Subject: [PATCH 076/124] simplify --- src/tokenizer.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e127bfc9a6..a8e048395e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -983,7 +983,6 @@ export class Tokenizer extends DiagnosticEmitter { // TODO: \uXXXX also support for identifiers if (isIdentifierStart(c)) { let posBefore = pos; - let c0 = text.charCodeAt(pos); while ( ++pos < end && isIdentifierPart(c = text.charCodeAt(pos)) @@ -992,10 +991,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore >= MIN_KEYWORD_LENGTH && - pos - posBefore <= MAX_KEYWORD_LENGTH && - // Only a non-capitalised token can be a keyword - c0 >= CharCode.a && c0 <= CharCode.z + pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH ) { let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); if ( From 8d8251ebc799cb4b956d7452a74bfc276f2f18a9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Nov 2021 15:13:24 +0200 Subject: [PATCH 077/124] refactor --- src/tokenizer.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a8e048395e..cb77ef3f15 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -991,7 +991,8 @@ export class Tokenizer extends DiagnosticEmitter { if ( identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH + pos - posBefore >= MIN_KEYWORD_LENGTH && + pos - posBefore <= MAX_KEYWORD_LENGTH ) { let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); if ( From 437ce5a1e186444d8e85f97bfdbeb34a452c71a4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Nov 2021 15:17:35 +0200 Subject: [PATCH 078/124] refactor configs --- cli/asc.js | 1 + index.js | 1 + src/tsconfig.json | 3 --- std/portable.json | 4 +++- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cli/asc.js b/cli/asc.js index 3c47266c2a..3dd47fc2f9 100644 --- a/cli/asc.js +++ b/cli/asc.js @@ -102,6 +102,7 @@ function loadAssemblyScriptJS() { "../src/glue/js/*": "cjs" }, compilerOptions: { + preserveConstEnums: true, module: "esnext", target: "es2017" } diff --git a/index.js b/index.js index a2ab787e8e..15b88e6888 100644 --- a/index.js +++ b/index.js @@ -4,6 +4,7 @@ require("ts-node").register({ skipIgnore: true, files: true, compilerOptions: { + preserveConstEnums: true, removeComments: false } }); diff --git a/src/tsconfig.json b/src/tsconfig.json index 412d41edc4..596d9b9697 100644 --- a/src/tsconfig.json +++ b/src/tsconfig.json @@ -8,9 +8,6 @@ "allowJs": false, "strict": true, "skipLibCheck": true, - "preserveConstEnums": false, - "useDefineForClassFields": false, - "noPropertyAccessFromIndexSignature": true, "noImplicitReturns": true }, "include": [ diff --git a/std/portable.json b/std/portable.json index 1a1e427d21..9a0cd193c6 100644 --- a/std/portable.json +++ b/std/portable.json @@ -5,7 +5,9 @@ "module": "commonjs", "allowJs": true, "downlevelIteration": true, - "preserveConstEnums": true, + "preserveConstEnums": false, + "useDefineForClassFields": false, + "noPropertyAccessFromIndexSignature": true, "typeRoots": [ "types" ], "types": [ "portable" ], "lib": ["esnext", "esnext.string"] From 163ec999b66173fb23cb2a57b983f3ac0fcb1e3d Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 28 Nov 2021 23:22:05 +0200 Subject: [PATCH 079/124] refactor peek --- src/tokenizer.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index cb77ef3f15..aa29f81c3d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1036,11 +1036,11 @@ export class Tokenizer extends DiagnosticEmitter { maxCompoundLength: i32 = i32.MAX_VALUE ): Token { var text = this.source.text; - if (this.nextToken < 0) { + var nextToken = this.nextToken; + if (nextToken < 0) { let posBefore = this.pos; let tokenBefore = this.token; let tokenPosBefore = this.tokenPos; - let nextToken: Token; do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); while (nextToken == Token.INVALID); this.nextToken = nextToken; @@ -1058,7 +1058,7 @@ export class Tokenizer extends DiagnosticEmitter { this.token = tokenBefore; this.tokenPos = tokenPosBefore; } - return this.nextToken; + return nextToken; } skipLineComment(text: string, pos: i32, end: i32): i32 { From 6d43043592941472962a1cfa358de4d329eefec2 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 28 Nov 2021 23:32:28 +0200 Subject: [PATCH 080/124] opt readIdentifier --- src/tokenizer.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index aa29f81c3d..a31dc77504 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1201,7 +1201,11 @@ export class Tokenizer extends DiagnosticEmitter { isIdentifierPart(text.charCodeAt(pos)) ); this.pos = pos; - return text.substring(start, pos); + if (pos - start == 1) { + return text.charAt(start); + } else { + return text.substring(start, pos); + } } readingTemplateString: bool = false; From 590181f34c1d5c7b3f8b2d46368474df40073842 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 29 Nov 2021 01:20:38 +0200 Subject: [PATCH 081/124] simplify comment --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a31dc77504..aab055f37b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1706,7 +1706,7 @@ export class Tokenizer extends DiagnosticEmitter { let pos = this.pos; // fast pathes for the most common literals if (pos - start == 3 && text.charCodeAt(start + 1) == CharCode.DOT) { - // 0.0 0.1 0.2 ... 0.9 1.0 1.1 ... 9.7 9.8 9.9 + // 0.0 ... 9.9 with step = 0.1 let d1 = (text.charCodeAt(start + 0) - CharCode._0); let d2 = (text.charCodeAt(start + 2) - CharCode._0); return d1 + d2 / 10.0; From d95b5d5e679c1bfd6a2eac7e7b1998ac362a6e73 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 29 Nov 2021 01:21:30 +0200 Subject: [PATCH 082/124] more --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index aab055f37b..1af75d007f 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1704,8 +1704,8 @@ export class Tokenizer extends DiagnosticEmitter { } } let pos = this.pos; - // fast pathes for the most common literals if (pos - start == 3 && text.charCodeAt(start + 1) == CharCode.DOT) { + // fast path for the most common literals: // 0.0 ... 9.9 with step = 0.1 let d1 = (text.charCodeAt(start + 0) - CharCode._0); let d2 = (text.charCodeAt(start + 2) - CharCode._0); From 384025616b89994530655634d98e0fb66198cda3 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 29 Nov 2021 02:13:33 +0200 Subject: [PATCH 083/124] use index intead charAt --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 1af75d007f..4b02127934 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1202,7 +1202,7 @@ export class Tokenizer extends DiagnosticEmitter { ); this.pos = pos; if (pos - start == 1) { - return text.charAt(start); + return text[start]; } else { return text.substring(start, pos); } From bb7ec829aed9ca67f7cd3646789df6b9966dcc48 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Thu, 2 Dec 2021 13:45:58 +0200 Subject: [PATCH 084/124] revert some changes in test after resolve conflicts --- tests/parser/also-identifier.ts | 2 +- tests/parser/also-identifier.ts.fixture.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/parser/also-identifier.ts b/tests/parser/also-identifier.ts index 78d44a734a..53c9bc66d9 100644 --- a/tests/parser/also-identifier.ts +++ b/tests/parser/also-identifier.ts @@ -28,12 +28,12 @@ var set: i32; var from: i32; var keyof: i32; var module: i32; +var declare: i32; var readonly: i32; var namespace: i32; var constructor: i32; // -- illegal -- -// var in: i32; // var for: i32; // var null: i32; // var type: i32; diff --git a/tests/parser/also-identifier.ts.fixture.ts b/tests/parser/also-identifier.ts.fixture.ts index 8a580fb89e..8e0e4f6af0 100644 --- a/tests/parser/also-identifier.ts.fixture.ts +++ b/tests/parser/also-identifier.ts.fixture.ts @@ -27,6 +27,7 @@ var set: i32; var from: i32; var keyof: i32; var module: i32; +var declare: i32; var readonly: i32; var namespace: i32; var constructor: i32; From ced50155bf42739cee17282d6cd8890cdc032239 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 5 Dec 2021 02:51:33 +0200 Subject: [PATCH 085/124] refactor --- src/tokenizer.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4b02127934..cc3792f021 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1246,7 +1246,10 @@ export class Tokenizer extends DiagnosticEmitter { continue; } if (quote == CharCode.BACKTICK) { - if (c == CharCode.DOLLAR && pos + 1 < end && text.charCodeAt(pos + 1) == CharCode.OPENBRACE) { + if ( + c == CharCode.DOLLAR && pos + 1 < end && + text.charCodeAt(pos + 1) == CharCode.OPENBRACE + ) { result += text.substring(start, pos); this.readStringEnd = pos; this.pos = pos + 2; @@ -1285,10 +1288,10 @@ export class Tokenizer extends DiagnosticEmitter { var c = text.charCodeAt(this.pos++); switch (c) { case CharCode._0: { - if (isTaggedTemplate && this.pos < end && isDecimal(text.charCodeAt(this.pos))) { - ++this.pos; - return text.substring(start, this.pos); - } + if ( + isTaggedTemplate && this.pos < end && + isDecimal(text.charCodeAt(this.pos)) + ) return text.substring(start, ++this.pos); return "\0"; } case CharCode.b: return "\b"; @@ -1316,9 +1319,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( this.pos < end && text.charCodeAt(this.pos) == CharCode.LINEFEED - ) { - ++this.pos; - } + ) ++this.pos; // fall through } case CharCode.LINEFEED: From c61f41b8c04435d760b2d89282522fa60a815a25 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 5 Dec 2021 11:20:00 +0200 Subject: [PATCH 086/124] remove preserveCostEnum --- cli/asc.js | 1 - index.js | 1 - std/portable.json | 1 - 3 files changed, 3 deletions(-) diff --git a/cli/asc.js b/cli/asc.js index 3dd47fc2f9..3c47266c2a 100644 --- a/cli/asc.js +++ b/cli/asc.js @@ -102,7 +102,6 @@ function loadAssemblyScriptJS() { "../src/glue/js/*": "cjs" }, compilerOptions: { - preserveConstEnums: true, module: "esnext", target: "es2017" } diff --git a/index.js b/index.js index 15b88e6888..a2ab787e8e 100644 --- a/index.js +++ b/index.js @@ -4,7 +4,6 @@ require("ts-node").register({ skipIgnore: true, files: true, compilerOptions: { - preserveConstEnums: true, removeComments: false } }); diff --git a/std/portable.json b/std/portable.json index 9a0cd193c6..a9bb503c7f 100644 --- a/std/portable.json +++ b/std/portable.json @@ -5,7 +5,6 @@ "module": "commonjs", "allowJs": true, "downlevelIteration": true, - "preserveConstEnums": false, "useDefineForClassFields": false, "noPropertyAccessFromIndexSignature": true, "typeRoots": [ "types" ], From eec4d88781b6dd009b9d898f0e0ca3130db1104f Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 5 Dec 2021 21:06:05 +0200 Subject: [PATCH 087/124] add lookup token table --- src/tokenizer.ts | 139 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 136 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index cc3792f021..fd1095cd60 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -106,8 +106,6 @@ export const enum Token { WITH, // ES2017 YIELD, // ES2017 - LAST_KEYWORD = YIELD, - // punctuation OPENBRACE, @@ -178,6 +176,9 @@ export const enum Token { // meta + FLOAT_OR_INTEGER_LITERAL, + IDENTIFIER_OR_KEYWORD, + WHITESPACE, INVALID, EOF } @@ -188,6 +189,138 @@ export const enum IdentifierHandling { ALWAYS } +// from 0-127 +const SINGLE_CHAR_TOKENS: Token[] = [ + /* 0x00 */ Token.INVALID, + /* 0x01 */ Token.INVALID, + /* 0x02 */ Token.INVALID, + /* 0x03 */ Token.INVALID, + /* 0x04 */ Token.INVALID, + /* 0x05 */ Token.INVALID, + /* 0x06 */ Token.INVALID, + /* 0x07 */ Token.INVALID, + /* 0x08 */ Token.INVALID, + /* \t */ Token.WHITESPACE, + /* \n */ Token.WHITESPACE, + /* \v */ Token.WHITESPACE, + /* \f */ Token.WHITESPACE, + /* \r */ Token.INVALID, + /* 0x0E */ Token.INVALID, + /* 0x0F */ Token.INVALID, + /* 0x10 */ Token.INVALID, + /* 0x11 */ Token.INVALID, + /* 0x12 */ Token.INVALID, + /* 0x13 */ Token.INVALID, + /* 0x14 */ Token.INVALID, + /* 0x15 */ Token.INVALID, + /* 0x16 */ Token.INVALID, + /* 0x17 */ Token.INVALID, + /* 0x18 */ Token.INVALID, + /* 0x19 */ Token.INVALID, + /* 0x1A */ Token.INVALID, + /* 0x1B */ Token.INVALID, + /* 0x1C */ Token.INVALID, + /* 0x1D */ Token.INVALID, + /* 0x1E */ Token.INVALID, + /* 0x1F */ Token.INVALID, + /* ' ' */ Token.WHITESPACE, + /* ! */ Token.INVALID, + /* " */ Token.STRINGLITERAL, + /* # */ Token.INVALID, + /* $ */ Token.IDENTIFIER, + /* % */ Token.INVALID, + /* & */ Token.INVALID, + /* ' */ Token.STRINGLITERAL, + /* ( */ Token.OPENPAREN, + /* ) */ Token.CLOSEPAREN, + /* * */ Token.INVALID, + /* + */ Token.INVALID, + /* , */ Token.COMMA, + /* - */ Token.INVALID, + /* . */ Token.INVALID, + /* / */ Token.INVALID, + /* 0 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 1 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 2 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 3 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 4 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 5 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 6 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 7 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 8 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 9 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* : */ Token.COLON, + /* ; */ Token.SEMICOLON, + /* < */ Token.INVALID, + /* = */ Token.INVALID, + /* > */ Token.INVALID, + /* ? */ Token.INVALID, + /* @ */ Token.AT, + /* A */ Token.IDENTIFIER, + /* B */ Token.IDENTIFIER, + /* C */ Token.IDENTIFIER, + /* D */ Token.IDENTIFIER, + /* E */ Token.IDENTIFIER, + /* F */ Token.IDENTIFIER, + /* G */ Token.IDENTIFIER, + /* H */ Token.IDENTIFIER, + /* I */ Token.IDENTIFIER, + /* J */ Token.IDENTIFIER, + /* K */ Token.IDENTIFIER, + /* L */ Token.IDENTIFIER, + /* M */ Token.IDENTIFIER, + /* N */ Token.IDENTIFIER, + /* O */ Token.IDENTIFIER, + /* P */ Token.IDENTIFIER, + /* Q */ Token.IDENTIFIER, + /* R */ Token.IDENTIFIER, + /* S */ Token.IDENTIFIER, + /* T */ Token.IDENTIFIER, + /* U */ Token.IDENTIFIER, + /* V */ Token.IDENTIFIER, + /* W */ Token.IDENTIFIER, + /* X */ Token.IDENTIFIER, + /* Y */ Token.IDENTIFIER, + /* Z */ Token.IDENTIFIER, + /* [ */ Token.OPENBRACKET, + /* \ */ Token.INVALID, + /* ] */ Token.CLOSEBRACKET, + /* ^ */ Token.INVALID, + /* _ */ Token.IDENTIFIER, + /* ` */ Token.TEMPLATELITERAL, + /* a */ Token.IDENTIFIER_OR_KEYWORD, + /* b */ Token.IDENTIFIER_OR_KEYWORD, + /* c */ Token.IDENTIFIER_OR_KEYWORD, + /* d */ Token.IDENTIFIER_OR_KEYWORD, + /* e */ Token.IDENTIFIER_OR_KEYWORD, + /* f */ Token.IDENTIFIER_OR_KEYWORD, + /* g */ Token.IDENTIFIER_OR_KEYWORD, + /* h */ Token.IDENTIFIER, + /* i */ Token.IDENTIFIER_OR_KEYWORD, + /* j */ Token.IDENTIFIER, + /* k */ Token.IDENTIFIER_OR_KEYWORD, + /* l */ Token.IDENTIFIER_OR_KEYWORD, + /* m */ Token.IDENTIFIER_OR_KEYWORD, + /* n */ Token.IDENTIFIER_OR_KEYWORD, + /* o */ Token.IDENTIFIER_OR_KEYWORD, + /* p */ Token.IDENTIFIER_OR_KEYWORD, + /* q */ Token.IDENTIFIER, + /* r */ Token.IDENTIFIER_OR_KEYWORD, + /* s */ Token.IDENTIFIER_OR_KEYWORD, + /* t */ Token.IDENTIFIER_OR_KEYWORD, + /* u */ Token.IDENTIFIER, + /* v */ Token.IDENTIFIER_OR_KEYWORD, + /* w */ Token.IDENTIFIER_OR_KEYWORD, + /* x */ Token.IDENTIFIER, + /* y */ Token.IDENTIFIER_OR_KEYWORD, + /* z */ Token.IDENTIFIER, + /* { */ Token.OPENBRACE, + /* | */ Token.INVALID, + /* } */ Token.CLOSEBRACE, + /* ~ */ Token.TILDE, + /* 0x7F */ Token.INVALID, +]; + export function probeKeywordToken(text: string): Token { let len = text.length; assert(len); @@ -579,8 +712,8 @@ export class Tokenizer extends DiagnosticEmitter { // otherwise fall-through } // `\n`, `\t`, `\v`, `\f`, ` ` - case CharCode.LINEFEED: case CharCode.TAB: + case CharCode.LINEFEED: case CharCode.VERTICALTAB: case CharCode.FORMFEED: case CharCode.SPACE: { From 3d0daa94b5d40edd514a61edd0d060d0917889c7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 00:24:15 +0200 Subject: [PATCH 088/124] use this table for speedup & simplify tokenization --- src/tokenizer.ts | 856 +++++++++--------- tests/compiler/duplicate-fields.optimized.wat | 356 ++++---- 2 files changed, 588 insertions(+), 624 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index fd1095cd60..56b1773a3a 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -178,6 +178,7 @@ export const enum Token { FLOAT_OR_INTEGER_LITERAL, IDENTIFIER_OR_KEYWORD, + OPERATOR, WHITESPACE, INVALID, EOF @@ -190,7 +191,7 @@ export const enum IdentifierHandling { } // from 0-127 -const SINGLE_CHAR_TOKENS: Token[] = [ +const BASIC_TOKENS: Token[] = [ /* 0x00 */ Token.INVALID, /* 0x01 */ Token.INVALID, /* 0x02 */ Token.INVALID, @@ -204,7 +205,7 @@ const SINGLE_CHAR_TOKENS: Token[] = [ /* \n */ Token.WHITESPACE, /* \v */ Token.WHITESPACE, /* \f */ Token.WHITESPACE, - /* \r */ Token.INVALID, + /* \r */ Token.WHITESPACE, /* 0x0E */ Token.INVALID, /* 0x0F */ Token.INVALID, /* 0x10 */ Token.INVALID, @@ -224,21 +225,21 @@ const SINGLE_CHAR_TOKENS: Token[] = [ /* 0x1E */ Token.INVALID, /* 0x1F */ Token.INVALID, /* ' ' */ Token.WHITESPACE, - /* ! */ Token.INVALID, + /* ! */ Token.OPERATOR, /* " */ Token.STRINGLITERAL, /* # */ Token.INVALID, /* $ */ Token.IDENTIFIER, - /* % */ Token.INVALID, - /* & */ Token.INVALID, + /* % */ Token.OPERATOR, + /* & */ Token.OPERATOR, /* ' */ Token.STRINGLITERAL, /* ( */ Token.OPENPAREN, /* ) */ Token.CLOSEPAREN, - /* * */ Token.INVALID, - /* + */ Token.INVALID, + /* * */ Token.OPERATOR, + /* + */ Token.OPERATOR, /* , */ Token.COMMA, - /* - */ Token.INVALID, - /* . */ Token.INVALID, - /* / */ Token.INVALID, + /* - */ Token.OPERATOR, + /* . */ Token.OPERATOR, + /* / */ Token.OPERATOR, /* 0 */ Token.FLOAT_OR_INTEGER_LITERAL, /* 1 */ Token.FLOAT_OR_INTEGER_LITERAL, /* 2 */ Token.FLOAT_OR_INTEGER_LITERAL, @@ -251,10 +252,10 @@ const SINGLE_CHAR_TOKENS: Token[] = [ /* 9 */ Token.FLOAT_OR_INTEGER_LITERAL, /* : */ Token.COLON, /* ; */ Token.SEMICOLON, - /* < */ Token.INVALID, - /* = */ Token.INVALID, - /* > */ Token.INVALID, - /* ? */ Token.INVALID, + /* < */ Token.OPERATOR, + /* = */ Token.OPERATOR, + /* > */ Token.OPERATOR, + /* ? */ Token.OPERATOR, /* @ */ Token.AT, /* A */ Token.IDENTIFIER, /* B */ Token.IDENTIFIER, @@ -285,7 +286,7 @@ const SINGLE_CHAR_TOKENS: Token[] = [ /* [ */ Token.OPENBRACKET, /* \ */ Token.INVALID, /* ] */ Token.CLOSEBRACKET, - /* ^ */ Token.INVALID, + /* ^ */ Token.OPERATOR, /* _ */ Token.IDENTIFIER, /* ` */ Token.TEMPLATELITERAL, /* a */ Token.IDENTIFIER_OR_KEYWORD, @@ -315,7 +316,7 @@ const SINGLE_CHAR_TOKENS: Token[] = [ /* y */ Token.IDENTIFIER_OR_KEYWORD, /* z */ Token.IDENTIFIER, /* { */ Token.OPENBRACE, - /* | */ Token.INVALID, + /* | */ Token.OPERATOR, /* } */ Token.CLOSEBRACE, /* ~ */ Token.TILDE, /* 0x7F */ Token.INVALID, @@ -702,496 +703,475 @@ export class Tokenizer extends DiagnosticEmitter { while (pos < end) { this.tokenPos = pos; let c = text.charCodeAt(pos); - switch (c) { - // `\r`, `\r\n` - case CharCode.CARRIAGERETURN: { - if (!( - ++pos < end && - text.charCodeAt(pos) == CharCode.LINEFEED - )) break; - // otherwise fall-through + if (c <= 0x7F) { + let token = unchecked(BASIC_TOKENS[c]); + // Basic tokens + if (token != Token.INVALID) { + switch (token) { + case Token.WHITESPACE: { + // `\r`, `\r\n` + if (c == CharCode.CARRIAGERETURN) { + if (!( + ++pos < end && + text.charCodeAt(pos) == CharCode.LINEFEED + )) break; + } + // `\n`, `\t`, `\v`, `\f`, ` ` + ++pos; + break; + } + case Token.FLOAT_OR_INTEGER_LITERAL: { + // `0.`, `0x`, `0b`, `0o` + if (c == CharCode._0) { + if (pos + 1 < end) { + c = text.charCodeAt(pos + 1); + if (c == CharCode.DOT) { + this.pos = pos; + return Token.FLOATLITERAL; + } + switch (c | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: { + this.pos = pos; + return Token.INTEGERLITERAL; + } + } + } + } + this.pos = pos; + return this.integerOrFloatToken(); + } + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: { + let posBefore = pos; + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + this.pos = posBefore; + return Token.IDENTIFIER; + } + // `a`..`z` + case Token.IDENTIFIER_OR_KEYWORD: { + let posBefore = pos; + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + if ( + identifierHandling != IdentifierHandling.ALWAYS && + pos - posBefore >= MIN_KEYWORD_LENGTH && + pos - posBefore <= MAX_KEYWORD_LENGTH + ) { + let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); + if ( + keywordToken != Token.INVALID && + !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(keywordToken) + ) + ) { + this.pos = pos; + return keywordToken; + } + } + this.pos = posBefore; + return Token.IDENTIFIER; + } + case Token.STRINGLITERAL: + case Token.TEMPLATELITERAL: { + // FIXME + this.pos = pos; + return token; + } + case Token.OPERATOR: { + token = this.operatorToken(c, text, pos, end, maxTokenLength); + pos = this.pos; + if (token == Token.INVALID) continue; + return token; + } + // `[`, `{`, `(`, `,`, `:`, `;`, `@` and etc + default: { + this.pos = pos + 1; + return token; + } + } } - // `\n`, `\t`, `\v`, `\f`, ` ` - case CharCode.TAB: - case CharCode.LINEFEED: - case CharCode.VERTICALTAB: - case CharCode.FORMFEED: - case CharCode.SPACE: { + } else { + // TODO: \uXXXX also support for identifiers + if (isIdentifierStart(c)) { + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + return Token.IDENTIFIER; + } else if (isWhiteSpace(c)) { ++pos; break; } - // `!`, `!=`, `!==` - case CharCode.EXCLAMATION: { + let start = pos++; + if ( + isHighSurrogate(c) && pos < end && + isLowSurrogate(text.charCodeAt(pos)) + ) ++pos; + this.error( + DiagnosticCode.Invalid_character, + this.range(start, pos) + ); + this.pos = pos; + return Token.INVALID; + } + } + this.pos = pos; + return Token.EOF; + } + + peek( + checkOnNewLine: bool = false, + identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT, + maxCompoundLength: i32 = i32.MAX_VALUE + ): Token { + var text = this.source.text; + var nextToken = this.nextToken; + if (nextToken < 0) { + let posBefore = this.pos; + let tokenBefore = this.token; + let tokenPosBefore = this.tokenPos; + do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); + while (nextToken == Token.INVALID); + this.nextToken = nextToken; + this.nextTokenPos = this.tokenPos; + if (checkOnNewLine) { + this.nextTokenOnNewLine = false; + for (let pos = posBefore, end = this.nextTokenPos; pos < end; ++pos) { + if (isLineBreak(text.charCodeAt(pos))) { + this.nextTokenOnNewLine = true; + break; + } + } + } + this.pos = posBefore; + this.token = tokenBefore; + this.tokenPos = tokenPosBefore; + } + return nextToken; + } + + private operatorToken(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { + // Operator tokens + switch (c) { + // `!`, `!=`, `!==` + case CharCode.EXCLAMATION: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { ++pos; if ( - maxTokenLength > 1 && pos < end && + maxTokenLength > 2 && pos < end && text.charCodeAt(pos) == CharCode.EQUALS ) { + this.pos = pos + 1; + return Token.EXCLAMATION_EQUALS_EQUALS; + } + this.pos = pos; + return Token.EXCLAMATION_EQUALS; + } + this.pos = pos; + return Token.EXCLAMATION; + } + // `%`, `%=` + case CharCode.PERCENT: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { + this.pos = pos + 1; + return Token.PERCENT_EQUALS; + } + this.pos = pos; + return Token.PERCENT; + } + // `&`, `&&`, `&=`, `&&=` + case CharCode.AMPERSAND: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.AMPERSAND_EQUALS; + } + if (c == CharCode.AMPERSAND) { ++pos; if ( maxTokenLength > 2 && pos < end && text.charCodeAt(pos) == CharCode.EQUALS ) { this.pos = pos + 1; - return Token.EXCLAMATION_EQUALS_EQUALS; + return Token.AMPERSAND_AMPERSAND_EQUALS; } this.pos = pos; - return Token.EXCLAMATION_EQUALS; + return Token.AMPERSAND_AMPERSAND; } - this.pos = pos; - return Token.EXCLAMATION; - } - case CharCode.DOUBLEQUOTE: - case CharCode.SINGLEQUOTE: { - this.pos = pos; - return Token.STRINGLITERAL; - } - case CharCode.BACKTICK: { - this.pos = pos; - return Token.TEMPLATELITERAL; } - // `%`, `%=` - case CharCode.PERCENT: { - ++pos; - if ( - maxTokenLength > 1 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + this.pos = pos; + return Token.AMPERSAND; + } + // `*`, `**`, `*=`, `**=` + case CharCode.ASTERISK: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.PERCENT_EQUALS; + return Token.ASTERISK_EQUALS; } - this.pos = pos; - return Token.PERCENT; - } - // `&`, `&&`, `&=`, `&&=` - case CharCode.AMPERSAND: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.EQUALS) { + if (c == CharCode.ASTERISK) { + ++pos; + if ( + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.AMPERSAND_EQUALS; - } - if (c == CharCode.AMPERSAND) { - ++pos; - if ( - maxTokenLength > 2 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.AMPERSAND_AMPERSAND_EQUALS; - } - this.pos = pos; - return Token.AMPERSAND_AMPERSAND; + return Token.ASTERISK_ASTERISK_EQUALS; } + this.pos = pos; + return Token.ASTERISK_ASTERISK; } - this.pos = pos; - return Token.AMPERSAND; - } - case CharCode.OPENPAREN: { - this.pos = pos + 1; - return Token.OPENPAREN; - } - case CharCode.CLOSEPAREN: { - this.pos = pos + 1; - return Token.CLOSEPAREN; } - // `*`, `**`, `*=`, `**=` - case CharCode.ASTERISK: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.ASTERISK_EQUALS; - } - if (c == CharCode.ASTERISK) { - ++pos; - if ( - maxTokenLength > 2 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.ASTERISK_ASTERISK_EQUALS; - } - this.pos = pos; - return Token.ASTERISK_ASTERISK; - } + this.pos = pos; + return Token.ASTERISK; + } + // `+`, `++`, `+=` + case CharCode.PLUS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.PLUS) { + this.pos = pos + 1; + return Token.PLUS_PLUS; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.PLUS_EQUALS; } - this.pos = pos; - return Token.ASTERISK; } - // `+`, `++`, `+=` - case CharCode.PLUS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.PLUS) { - this.pos = pos + 1; - return Token.PLUS_PLUS; - } - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.PLUS_EQUALS; - } + this.pos = pos; + return Token.PLUS; + } + // `-`, `-=`, `--` + case CharCode.MINUS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.MINUS) { + this.pos = pos + 1; + return Token.MINUS_MINUS; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.MINUS_EQUALS; } - this.pos = pos; - return Token.PLUS; } - case CharCode.COMMA: { - this.pos = pos + 1; - return Token.COMMA; + this.pos = pos; + return Token.MINUS; + } + // `.`, `.{d}`, `...` + case CharCode.DOT: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (isDecimal(c)) { + this.pos = pos - 1; + return Token.FLOATLITERAL; // expects a call to readFloat + } + if ( + maxTokenLength > 2 && + pos + 1 < end && c == CharCode.DOT && + text.charCodeAt(pos + 1) == CharCode.DOT + ) { + this.pos = pos + 2; + return Token.DOT_DOT_DOT; + } } - // `-`, `-=`, `--` - case CharCode.MINUS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.MINUS) { - this.pos = pos + 1; - return Token.MINUS_MINUS; - } - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.MINUS_EQUALS; - } + this.pos = pos; + return Token.DOT; + } + // `/`, `//`, `/*`, `/=`, `///` + case CharCode.SLASH: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.SLASH) { // single-line + pos = this.skipLineComment(text, pos, end); + break; + } + if (c == CharCode.ASTERISK) { // multi-line + pos = this.skipBlockComment(text, pos, end); + break; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.SLASH_EQUALS; } - this.pos = pos; - return Token.MINUS; } - // `.`, `.{d}`, `...` - case CharCode.DOT: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (isDecimal(c)) { - this.pos = pos - 1; - return Token.FLOATLITERAL; // expects a call to readFloat - } + this.pos = pos; + return Token.SLASH; + } + // `<`, `<<`, `<=` `<<=` + case CharCode.LESSTHAN: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.LESSTHAN) { + ++pos; if ( maxTokenLength > 2 && - pos + 1 < end && c == CharCode.DOT && - text.charCodeAt(pos + 1) == CharCode.DOT + pos < end && + text.charCodeAt(pos) == CharCode.EQUALS ) { - this.pos = pos + 2; - return Token.DOT_DOT_DOT; - } - } - this.pos = pos; - return Token.DOT; - } - // `/`, `//`, `/*`, `/=`, `///` - case CharCode.SLASH: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.SLASH) { // single-line - pos = this.skipLineComment(text, pos, end); - break; - } - if (c == CharCode.ASTERISK) { // multi-line - pos = this.skipBlockComment(text, pos, end); - break; - } - if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.SLASH_EQUALS; + return Token.LESSTHAN_LESSTHAN_EQUALS; } + this.pos = pos; + return Token.LESSTHAN_LESSTHAN; } - this.pos = pos; - return Token.SLASH; - } - // `0.`, `0x`, `0b`, `0o` - case CharCode._0: { - if (pos + 1 < end) { - c = text.charCodeAt(pos + 1); - if (c == CharCode.DOT) { - this.pos = pos; - return Token.FLOATLITERAL; - } - switch (c | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: { - this.pos = pos; - return Token.INTEGERLITERAL; - } - } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.LESSTHAN_EQUALS; } - // fall-through - } - case CharCode._1: - case CharCode._2: - case CharCode._3: - case CharCode._4: - case CharCode._5: - case CharCode._6: - case CharCode._7: - case CharCode._8: - case CharCode._9: { - this.pos = pos; - return this.integerOrFloatToken(); } - case CharCode.COLON: { - this.pos = pos + 1; - return Token.COLON; - } - case CharCode.SEMICOLON: { - this.pos = pos + 1; - return Token.SEMICOLON; - } - // `<`, `<<`, `<=` `<<=` - case CharCode.LESSTHAN: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.LESSTHAN) { - ++pos; - if ( - maxTokenLength > 2 && - pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.LESSTHAN_LESSTHAN_EQUALS; - } - this.pos = pos; - return Token.LESSTHAN_LESSTHAN; - } - if (c == CharCode.EQUALS) { + this.pos = pos; + return Token.LESSTHAN; + } + // `=`, `==`, `===`, `=>` + case CharCode.EQUALS: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + ++pos; + if ( + maxTokenLength > 2 && + pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.LESSTHAN_EQUALS; + return Token.EQUALS_EQUALS_EQUALS; } + this.pos = pos; + return Token.EQUALS_EQUALS; } - this.pos = pos; - return Token.LESSTHAN; - } - // `=`, `==`, `===`, `=>` - case CharCode.EQUALS: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.EQUALS) { - ++pos; - if ( - maxTokenLength > 2 && - pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.EQUALS_EQUALS_EQUALS; - } - this.pos = pos; - return Token.EQUALS_EQUALS; - } - if (c == CharCode.GREATERTHAN) { - this.pos = pos + 1; - return Token.EQUALS_GREATERTHAN; - } + if (c == CharCode.GREATERTHAN) { + this.pos = pos + 1; + return Token.EQUALS_GREATERTHAN; } - this.pos = pos; - return Token.EQUALS; } - // `>`, `>>`, `>>>`, `>=` `>>=`, `>>>=` - case CharCode.GREATERTHAN: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.GREATERTHAN) { - ++pos; - if (maxTokenLength > 2 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.GREATERTHAN) { - ++pos; - if ( - maxTokenLength > 3 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { - this.pos = pos + 1; - return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS; - } - this.pos = pos; - return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN; - } - if (c == CharCode.EQUALS) { + this.pos = pos; + return Token.EQUALS; + } + // `>`, `>>`, `>>>`, `>=` `>>=`, `>>>=` + case CharCode.GREATERTHAN: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { + ++pos; + if (maxTokenLength > 2 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.GREATERTHAN) { + ++pos; + if ( + maxTokenLength > 3 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.GREATERTHAN_GREATERTHAN_EQUALS; + return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS; } + this.pos = pos; + return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN; } - this.pos = pos; - return Token.GREATERTHAN_GREATERTHAN; - } - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.GREATERTHAN_EQUALS; - } - } - this.pos = pos; - return Token.GREATERTHAN; - } - // `?`, `??`, `??=` - case CharCode.QUESTION: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.QUESTION) { - ++pos; - if (maxTokenLength > 2 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.QUESTION_QUESTION_EQUALS; - } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.GREATERTHAN_GREATERTHAN_EQUALS; } - this.pos = pos; - return Token.QUESTION_QUESTION; } + this.pos = pos; + return Token.GREATERTHAN_GREATERTHAN; } - this.pos = pos; - return Token.QUESTION; - } - case CharCode.OPENBRACKET: { - this.pos = pos + 1; - return Token.OPENBRACKET; - } - case CharCode.CLOSEBRACKET: { - this.pos = pos + 1; - return Token.CLOSEBRACKET; - } - // `^`, `^=` - case CharCode.CARET: { - ++pos; - if ( - maxTokenLength > 1 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.CARET_EQUALS; + return Token.GREATERTHAN_EQUALS; } - this.pos = pos; - return Token.CARET; } - case CharCode.OPENBRACE: { - this.pos = pos + 1; - return Token.OPENBRACE; - } - // `|`, `||`, `|=`, `||=` - case CharCode.BAR: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.BAR_EQUALS; - } - if (c == CharCode.BAR) { - ++pos; - if ( - maxTokenLength > 2 && pos < end && - text.charCodeAt(pos) == CharCode.EQUALS - ) { + this.pos = pos; + return Token.GREATERTHAN; + } + // `?`, `??`, `??=` + case CharCode.QUESTION: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.QUESTION) { + ++pos; + if (maxTokenLength > 2 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { this.pos = pos + 1; - return Token.BAR_BAR_EQUALS; + return Token.QUESTION_QUESTION_EQUALS; } - this.pos = pos; - return Token.BAR_BAR; } + this.pos = pos; + return Token.QUESTION_QUESTION; } - this.pos = pos; - return Token.BAR; - } - case CharCode.CLOSEBRACE: { - this.pos = pos + 1; - return Token.CLOSEBRACE; } - case CharCode.TILDE: { - this.pos = pos + 1; - return Token.TILDE; - } - case CharCode.AT: { + this.pos = pos; + return Token.QUESTION; + } + // `^`, `^=` + case CharCode.CARET: { + ++pos; + if ( + maxTokenLength > 1 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS + ) { this.pos = pos + 1; - return Token.AT; + return Token.CARET_EQUALS; } - default: { - // TODO: \uXXXX also support for identifiers - if (isIdentifierStart(c)) { - let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - // TODO: check valid termination of identifier? - + this.pos = pos; + return Token.CARET; + } + // `|`, `||`, `|=`, `||=` + case CharCode.BAR: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.BAR_EQUALS; + } + if (c == CharCode.BAR) { + ++pos; if ( - identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore >= MIN_KEYWORD_LENGTH && - pos - posBefore <= MAX_KEYWORD_LENGTH + maxTokenLength > 2 && pos < end && + text.charCodeAt(pos) == CharCode.EQUALS ) { - let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); - if ( - keywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(keywordToken) - ) - ) { - this.pos = pos; - return keywordToken; - } + this.pos = pos + 1; + return Token.BAR_BAR_EQUALS; } - this.pos = posBefore; - return Token.IDENTIFIER; - } else if (isWhiteSpace(c)) { - ++pos; - break; + this.pos = pos; + return Token.BAR_BAR; } - let start = pos++; - if ( - isHighSurrogate(c) && pos < end && - isLowSurrogate(text.charCodeAt(pos)) - ) ++pos; - this.error( - DiagnosticCode.Invalid_character, - this.range(start, pos) - ); - this.pos = pos; - return Token.INVALID; } + this.pos = pos; + return Token.BAR; } } this.pos = pos; - return Token.EOF; - } - - peek( - checkOnNewLine: bool = false, - identifierHandling: IdentifierHandling = IdentifierHandling.DEFAULT, - maxCompoundLength: i32 = i32.MAX_VALUE - ): Token { - var text = this.source.text; - var nextToken = this.nextToken; - if (nextToken < 0) { - let posBefore = this.pos; - let tokenBefore = this.token; - let tokenPosBefore = this.tokenPos; - do nextToken = this.unsafeNext(identifierHandling, maxCompoundLength); - while (nextToken == Token.INVALID); - this.nextToken = nextToken; - this.nextTokenPos = this.tokenPos; - if (checkOnNewLine) { - this.nextTokenOnNewLine = false; - for (let pos = posBefore, end = this.nextTokenPos; pos < end; ++pos) { - if (isLineBreak(text.charCodeAt(pos))) { - this.nextTokenOnNewLine = true; - break; - } - } - } - this.pos = posBefore; - this.token = tokenBefore; - this.tokenPos = tokenPosBefore; - } - return nextToken; + return Token.INVALID; } skipLineComment(text: string, pos: i32, end: i32): i32 { diff --git a/tests/compiler/duplicate-fields.optimized.wat b/tests/compiler/duplicate-fields.optimized.wat index fbd19917c3..8b63f0ade2 100644 --- a/tests/compiler/duplicate-fields.optimized.wat +++ b/tests/compiler/duplicate-fields.optimized.wat @@ -56,18 +56,18 @@ local.tee $0 if local.get $0 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit end global.get $duplicate-fields/raz local.tee $0 if local.get $0 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit end i32.const 1248 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit i32.const 1056 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit global.get $~lib/rt/itcms/pinSpace local.tee $1 i32.load offset=4 @@ -238,30 +238,6 @@ local.get $0 i32.store offset=8 ) - (func $~lib/rt/itcms/__visit (param $0 i32) - local.get $0 - i32.eqz - if - return - end - global.get $~lib/rt/itcms/white - local.get $0 - i32.const 20 - i32.sub - local.tee $0 - i32.load offset=4 - i32.const 3 - i32.and - i32.eq - if - local.get $0 - call $~lib/rt/itcms/Object#makeGray - global.get $~lib/rt/itcms/visitCount - i32.const 1 - i32.add - global.set $~lib/rt/itcms/visitCount - end - ) (func $~lib/rt/tlsf/removeBlock (param $0 i32) (param $1 i32) (local $2 i32) (local $3 i32) @@ -328,11 +304,10 @@ local.tee $2 i32.const 16 i32.lt_u - i32.const 0 local.get $3 i32.const 23 i32.lt_u - select + i32.and i32.eqz if i32.const 0 @@ -593,11 +568,10 @@ local.tee $2 i32.const 16 i32.lt_u - i32.const 0 local.get $5 i32.const 23 i32.lt_u - select + i32.and i32.eqz if i32.const 0 @@ -780,8 +754,8 @@ (local $1 i32) memory.size local.tee $1 - i32.const 1 - i32.lt_s + i32.const 0 + i32.le_s if (result i32) i32.const 1 local.get $1 @@ -941,7 +915,11 @@ if local.get $0 i32.load - call $~lib/rt/itcms/__visit + local.tee $2 + if + local.get $2 + call $byn-split-outlined-A$~lib/rt/itcms/__visit + end local.get $0 i32.const 4 i32.add @@ -1054,7 +1032,7 @@ local.get $0 i32.const 4 i32.add - local.tee $1 + local.tee $0 i32.const 17980 i32.ge_u if @@ -1064,20 +1042,20 @@ call $~lib/rt/tlsf/initialize end global.get $~lib/rt/tlsf/ROOT - local.get $1 + local.get $0 i32.const 4 i32.sub - local.set $0 - local.get $1 + local.set $2 + local.get $0 i32.const 15 i32.and i32.const 1 - local.get $1 + local.get $0 select if (result i32) i32.const 1 else - local.get $0 + local.get $2 i32.load i32.const 1 i32.and @@ -1090,13 +1068,13 @@ call $~lib/builtins/abort unreachable end - local.get $0 - local.get $0 + local.get $2 + local.get $2 i32.load i32.const 1 i32.or i32.store - local.get $0 + local.get $2 call $~lib/rt/tlsf/insertBlock end end @@ -1117,56 +1095,53 @@ ) (func $~lib/rt/tlsf/searchBlock (param $0 i32) (param $1 i32) (result i32) (local $2 i32) + (local $3 i32) local.get $1 i32.const 256 i32.lt_u - if + if (result i32) local.get $1 i32.const 4 i32.shr_u - local.set $1 else + i32.const 31 + i32.const 1 + i32.const 27 local.get $1 - i32.const 536870910 - i32.lt_u - if - i32.const 1 - i32.const 27 - local.get $1 - i32.clz - i32.sub - i32.shl - local.get $1 - i32.add - i32.const 1 - i32.sub - local.set $1 - end + i32.clz + i32.sub + i32.shl local.get $1 - i32.const 31 + i32.add + i32.const 1 + i32.sub local.get $1 + local.get $1 + i32.const 536870910 + i32.lt_u + select + local.tee $1 i32.clz i32.sub - local.tee $2 + local.tee $3 + i32.const 7 + i32.sub + local.set $2 + local.get $1 + local.get $3 i32.const 4 i32.sub i32.shr_u i32.const 16 i32.xor - local.set $1 - local.get $2 - i32.const 7 - i32.sub - local.set $2 end - local.get $1 + local.tee $1 i32.const 16 i32.lt_u - i32.const 0 local.get $2 i32.const 23 i32.lt_u - select + i32.and i32.eqz if i32.const 0 @@ -1312,7 +1287,7 @@ call $~lib/rt/tlsf/initialize end global.get $~lib/rt/tlsf/ROOT - local.set $3 + local.set $5 local.get $0 i32.const 16 i32.add @@ -1327,47 +1302,28 @@ call $~lib/builtins/abort unreachable end - local.get $3 + local.get $5 + i32.const 12 + local.get $2 + i32.const 19 + i32.add + i32.const -16 + i32.and + i32.const 4 + i32.sub local.get $2 i32.const 12 i32.le_u - if (result i32) - i32.const 12 - else - local.get $2 - i32.const 19 - i32.add - i32.const -16 - i32.and - i32.const 4 - i32.sub - end - local.tee $5 + select + local.tee $3 call $~lib/rt/tlsf/searchBlock local.tee $2 i32.eqz if memory.size local.tee $2 - local.get $5 - i32.const 536870910 - i32.lt_u - if (result i32) - i32.const 1 - i32.const 27 - local.get $5 - i32.clz - i32.sub - i32.shl - i32.const 1 - i32.sub - local.get $5 - i32.add - else - local.get $5 - end i32.const 4 - local.get $3 + local.get $5 i32.load offset=1568 local.get $2 i32.const 16 @@ -1376,6 +1332,21 @@ i32.sub i32.ne i32.shl + i32.const 1 + i32.const 27 + local.get $3 + i32.clz + i32.sub + i32.shl + i32.const 1 + i32.sub + local.get $3 + i32.add + local.get $3 + local.get $3 + i32.const 536870910 + i32.lt_u + select i32.add i32.const 65535 i32.add @@ -1400,7 +1371,7 @@ unreachable end end - local.get $3 + local.get $5 local.get $2 i32.const 16 i32.shl @@ -1408,8 +1379,8 @@ i32.const 16 i32.shl call $~lib/rt/tlsf/addMemory - local.get $3 local.get $5 + local.get $3 call $~lib/rt/tlsf/searchBlock local.tee $2 i32.eqz @@ -1426,7 +1397,7 @@ i32.load i32.const -4 i32.and - local.get $5 + local.get $3 i32.lt_u if i32.const 0 @@ -1436,13 +1407,13 @@ call $~lib/builtins/abort unreachable end - local.get $3 + local.get $5 local.get $2 call $~lib/rt/tlsf/removeBlock local.get $2 i32.load - local.set $4 - local.get $5 + local.set $6 + local.get $3 i32.const 4 i32.add i32.const 15 @@ -1455,40 +1426,40 @@ call $~lib/builtins/abort unreachable end - local.get $4 + local.get $6 i32.const -4 i32.and - local.get $5 + local.get $3 i32.sub - local.tee $6 + local.tee $4 i32.const 16 i32.ge_u if local.get $2 - local.get $4 + local.get $6 i32.const 2 i32.and - local.get $5 + local.get $3 i32.or i32.store - local.get $5 + local.get $3 local.get $2 i32.const 4 i32.add i32.add - local.tee $4 - local.get $6 + local.tee $3 + local.get $4 i32.const 4 i32.sub i32.const 1 i32.or i32.store + local.get $5 local.get $3 - local.get $4 call $~lib/rt/tlsf/insertBlock else local.get $2 - local.get $4 + local.get $6 i32.const -2 i32.and i32.store @@ -1725,61 +1696,6 @@ end local.get $2 ) - (func $~lib/rt/itcms/__link (param $0 i32) (param $1 i32) - local.get $1 - i32.eqz - if - return - end - local.get $0 - i32.eqz - if - i32.const 0 - i32.const 1120 - i32.const 294 - i32.const 14 - call $~lib/builtins/abort - unreachable - end - global.get $~lib/rt/itcms/white - local.get $1 - i32.const 20 - i32.sub - local.tee $1 - i32.load offset=4 - i32.const 3 - i32.and - i32.eq - if - local.get $0 - i32.const 20 - i32.sub - i32.load offset=4 - i32.const 3 - i32.and - local.tee $0 - global.get $~lib/rt/itcms/white - i32.eqz - i32.eq - if - local.get $1 - call $~lib/rt/itcms/Object#makeGray - else - global.get $~lib/rt/itcms/state - i32.const 1 - i32.eq - i32.const 0 - local.get $0 - i32.const 3 - i32.eq - select - if - local.get $1 - call $~lib/rt/itcms/Object#makeGray - end - end - end - ) (func $~lib/rt/__visit_members (param $0 i32) (local $1 i32) block $folding-inner0 @@ -1814,7 +1730,7 @@ local.tee $1 if local.get $1 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit end br $folding-inner0 end @@ -1831,7 +1747,7 @@ local.tee $0 if local.get $0 - call $~lib/rt/itcms/__visit + call $byn-split-outlined-A$~lib/rt/itcms/__visit end ) (func $~start @@ -2047,9 +1963,6 @@ local.get $0 i32.const 0 i32.store - local.get $0 - i32.const 0 - call $~lib/rt/itcms/__link global.get $~lib/memory/__stack_pointer local.tee $2 i32.const 4 @@ -2076,14 +1989,14 @@ i32.const 0 i32.store local.get $0 - i32.const 0 - call $~lib/rt/itcms/__link - local.get $0 local.get $1 i32.store - local.get $0 local.get $1 - call $~lib/rt/itcms/__link + if + local.get $0 + local.get $1 + call $byn-split-outlined-A$~lib/rt/itcms/__link + end global.get $~lib/memory/__stack_pointer i32.const 4 i32.add @@ -2094,9 +2007,12 @@ local.get $0 local.get $1 i32.store - local.get $0 local.get $1 - call $~lib/rt/itcms/__link + if + local.get $0 + local.get $1 + call $byn-split-outlined-A$~lib/rt/itcms/__link + end global.get $~lib/memory/__stack_pointer i32.const 4 i32.add @@ -2401,4 +2317,72 @@ call $~lib/builtins/abort unreachable ) + (func $byn-split-outlined-A$~lib/rt/itcms/__visit (param $0 i32) + global.get $~lib/rt/itcms/white + local.get $0 + i32.const 20 + i32.sub + local.tee $0 + i32.load offset=4 + i32.const 3 + i32.and + i32.eq + if + local.get $0 + call $~lib/rt/itcms/Object#makeGray + global.get $~lib/rt/itcms/visitCount + i32.const 1 + i32.add + global.set $~lib/rt/itcms/visitCount + end + ) + (func $byn-split-outlined-A$~lib/rt/itcms/__link (param $0 i32) (param $1 i32) + local.get $0 + i32.eqz + if + i32.const 0 + i32.const 1120 + i32.const 294 + i32.const 14 + call $~lib/builtins/abort + unreachable + end + global.get $~lib/rt/itcms/white + local.get $1 + i32.const 20 + i32.sub + local.tee $1 + i32.load offset=4 + i32.const 3 + i32.and + i32.eq + if + local.get $0 + i32.const 20 + i32.sub + i32.load offset=4 + i32.const 3 + i32.and + local.tee $0 + global.get $~lib/rt/itcms/white + i32.eqz + i32.eq + if + local.get $1 + call $~lib/rt/itcms/Object#makeGray + else + global.get $~lib/rt/itcms/state + i32.const 1 + i32.eq + local.get $0 + i32.const 3 + i32.eq + i32.and + if + local.get $1 + call $~lib/rt/itcms/Object#makeGray + end + end + end + ) ) From bf8c66ddc5281392681d79679ce141aed0aa7b17 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 00:41:28 +0200 Subject: [PATCH 089/124] refactor --- src/tokenizer.ts | 64 ++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 56b1773a3a..d860e5a603 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -720,38 +720,6 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; break; } - case Token.FLOAT_OR_INTEGER_LITERAL: { - // `0.`, `0x`, `0b`, `0o` - if (c == CharCode._0) { - if (pos + 1 < end) { - c = text.charCodeAt(pos + 1); - if (c == CharCode.DOT) { - this.pos = pos; - return Token.FLOATLITERAL; - } - switch (c | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: { - this.pos = pos; - return Token.INTEGERLITERAL; - } - } - } - } - this.pos = pos; - return this.integerOrFloatToken(); - } - // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` - case Token.IDENTIFIER: { - let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - this.pos = posBefore; - return Token.IDENTIFIER; - } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { let posBefore = pos; @@ -779,6 +747,38 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = posBefore; return Token.IDENTIFIER; } + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: { + let posBefore = pos; + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + this.pos = posBefore; + return Token.IDENTIFIER; + } + case Token.FLOAT_OR_INTEGER_LITERAL: { + // `0.`, `0x`, `0b`, `0o` + if (c == CharCode._0) { + if (pos + 1 < end) { + c = text.charCodeAt(pos + 1); + if (c == CharCode.DOT) { + this.pos = pos; + return Token.FLOATLITERAL; + } + switch (c | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: { + this.pos = pos; + return Token.INTEGERLITERAL; + } + } + } + } + this.pos = pos; + return this.integerOrFloatToken(); + } case Token.STRINGLITERAL: case Token.TEMPLATELITERAL: { // FIXME From 233a01333af71914d1d0a52cac2468ac422d07b6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 00:46:16 +0200 Subject: [PATCH 090/124] rearrange --- src/tokenizer.ts | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index d860e5a603..42d2fd9f90 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -720,6 +720,28 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; break; } + case Token.STRINGLITERAL: + case Token.TEMPLATELITERAL: { + // FIXME + this.pos = pos; + return token; + } + case Token.OPERATOR: { + token = this.operatorToken(c, text, pos, end, maxTokenLength); + pos = this.pos; + if (token == Token.INVALID) continue; + return token; + } + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: { + let posBefore = pos; + while ( + ++pos < end && + isIdentifierPart(c = text.charCodeAt(pos)) + ) { /* nop */ } + this.pos = posBefore; + return Token.IDENTIFIER; + } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { let posBefore = pos; @@ -747,16 +769,6 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = posBefore; return Token.IDENTIFIER; } - // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` - case Token.IDENTIFIER: { - let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - this.pos = posBefore; - return Token.IDENTIFIER; - } case Token.FLOAT_OR_INTEGER_LITERAL: { // `0.`, `0x`, `0b`, `0o` if (c == CharCode._0) { @@ -779,18 +791,6 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return this.integerOrFloatToken(); } - case Token.STRINGLITERAL: - case Token.TEMPLATELITERAL: { - // FIXME - this.pos = pos; - return token; - } - case Token.OPERATOR: { - token = this.operatorToken(c, text, pos, end, maxTokenLength); - pos = this.pos; - if (token == Token.INVALID) continue; - return token; - } // `[`, `{`, `(`, `,`, `:`, `;`, `@` and etc default: { this.pos = pos + 1; From af33cd5c64922d6e8b1546c8d0380f3214e5aca8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:08:39 +0200 Subject: [PATCH 091/124] simplify --- src/tokenizer.ts | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 42d2fd9f90..bb904987b5 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -176,7 +176,7 @@ export const enum Token { // meta - FLOAT_OR_INTEGER_LITERAL, + NUMERICLITERAL, IDENTIFIER_OR_KEYWORD, OPERATOR, WHITESPACE, @@ -240,16 +240,16 @@ const BASIC_TOKENS: Token[] = [ /* - */ Token.OPERATOR, /* . */ Token.OPERATOR, /* / */ Token.OPERATOR, - /* 0 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 1 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 2 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 3 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 4 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 5 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 6 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 7 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 8 */ Token.FLOAT_OR_INTEGER_LITERAL, - /* 9 */ Token.FLOAT_OR_INTEGER_LITERAL, + /* 0 */ Token.NUMERICLITERAL, + /* 1 */ Token.NUMERICLITERAL, + /* 2 */ Token.NUMERICLITERAL, + /* 3 */ Token.NUMERICLITERAL, + /* 4 */ Token.NUMERICLITERAL, + /* 5 */ Token.NUMERICLITERAL, + /* 6 */ Token.NUMERICLITERAL, + /* 7 */ Token.NUMERICLITERAL, + /* 8 */ Token.NUMERICLITERAL, + /* 9 */ Token.NUMERICLITERAL, /* : */ Token.COLON, /* ; */ Token.SEMICOLON, /* < */ Token.OPERATOR, @@ -720,9 +720,10 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; break; } + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: case Token.STRINGLITERAL: case Token.TEMPLATELITERAL: { - // FIXME this.pos = pos; return token; } @@ -732,16 +733,6 @@ export class Tokenizer extends DiagnosticEmitter { if (token == Token.INVALID) continue; return token; } - // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` - case Token.IDENTIFIER: { - let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } - this.pos = posBefore; - return Token.IDENTIFIER; - } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { let posBefore = pos; @@ -769,7 +760,7 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = posBefore; return Token.IDENTIFIER; } - case Token.FLOAT_OR_INTEGER_LITERAL: { + case Token.NUMERICLITERAL: { // `0.`, `0x`, `0b`, `0o` if (c == CharCode._0) { if (pos + 1 < end) { From a13414ae38547e021bdbe514c31f52f916eb44f9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:13:27 +0200 Subject: [PATCH 092/124] more --- src/tokenizer.ts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index bb904987b5..92d2586ce8 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -762,24 +762,19 @@ export class Tokenizer extends DiagnosticEmitter { } case Token.NUMERICLITERAL: { // `0.`, `0x`, `0b`, `0o` + this.pos = pos; if (c == CharCode._0) { if (pos + 1 < end) { c = text.charCodeAt(pos + 1); - if (c == CharCode.DOT) { - this.pos = pos; - return Token.FLOATLITERAL; - } + if (c == CharCode.DOT) return Token.FLOATLITERAL; switch (c | 32) { case CharCode.x: case CharCode.b: - case CharCode.o: { - this.pos = pos; + case CharCode.o: return Token.INTEGERLITERAL; - } } } } - this.pos = pos; return this.integerOrFloatToken(); } // `[`, `{`, `(`, `,`, `:`, `;`, `@` and etc From e06fa9ef79b3541a7adaebaecaac54535853ca8c Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:21:47 +0200 Subject: [PATCH 093/124] refactor --- src/tokenizer.ts | 67 ++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 92d2586ce8..0cb310bfb6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -176,10 +176,10 @@ export const enum Token { // meta - NUMERICLITERAL, + DIGIT, + WHITESPACE, IDENTIFIER_OR_KEYWORD, OPERATOR, - WHITESPACE, INVALID, EOF } @@ -240,16 +240,16 @@ const BASIC_TOKENS: Token[] = [ /* - */ Token.OPERATOR, /* . */ Token.OPERATOR, /* / */ Token.OPERATOR, - /* 0 */ Token.NUMERICLITERAL, - /* 1 */ Token.NUMERICLITERAL, - /* 2 */ Token.NUMERICLITERAL, - /* 3 */ Token.NUMERICLITERAL, - /* 4 */ Token.NUMERICLITERAL, - /* 5 */ Token.NUMERICLITERAL, - /* 6 */ Token.NUMERICLITERAL, - /* 7 */ Token.NUMERICLITERAL, - /* 8 */ Token.NUMERICLITERAL, - /* 9 */ Token.NUMERICLITERAL, + /* 0 */ Token.DIGIT, + /* 1 */ Token.DIGIT, + /* 2 */ Token.DIGIT, + /* 3 */ Token.DIGIT, + /* 4 */ Token.DIGIT, + /* 5 */ Token.DIGIT, + /* 6 */ Token.DIGIT, + /* 7 */ Token.DIGIT, + /* 8 */ Token.DIGIT, + /* 9 */ Token.DIGIT, /* : */ Token.COLON, /* ; */ Token.SEMICOLON, /* < */ Token.OPERATOR, @@ -727,11 +727,23 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return token; } - case Token.OPERATOR: { - token = this.operatorToken(c, text, pos, end, maxTokenLength); - pos = this.pos; - if (token == Token.INVALID) continue; - return token; + // `0`..`9` + case Token.DIGIT: { + this.pos = pos; + // `0.`, `0x`, `0b`, `0o` + if (c == CharCode._0) { + if (pos + 1 < end) { + c = text.charCodeAt(pos + 1); + if (c == CharCode.DOT) return Token.FLOATLITERAL; + switch (c | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: + return Token.INTEGERLITERAL; + } + } + } + return this.integerOrFloatToken(); } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { @@ -760,22 +772,11 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = posBefore; return Token.IDENTIFIER; } - case Token.NUMERICLITERAL: { - // `0.`, `0x`, `0b`, `0o` - this.pos = pos; - if (c == CharCode._0) { - if (pos + 1 < end) { - c = text.charCodeAt(pos + 1); - if (c == CharCode.DOT) return Token.FLOATLITERAL; - switch (c | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: - return Token.INTEGERLITERAL; - } - } - } - return this.integerOrFloatToken(); + case Token.OPERATOR: { + token = this.operatorToken(c, text, pos, end, maxTokenLength); + pos = this.pos; + if (token == Token.INVALID) continue; + return token; } // `[`, `{`, `(`, `,`, `:`, `;`, `@` and etc default: { From 057840e8513c4aab313e2e9450663bc6ecf9a182 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:34:59 +0200 Subject: [PATCH 094/124] simplify --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 0cb310bfb6..0c1e89d293 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -750,7 +750,7 @@ export class Tokenizer extends DiagnosticEmitter { let posBefore = pos; while ( ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) + isIdentifierPart(text.charCodeAt(pos)) ) { /* nop */ } if ( identifierHandling != IdentifierHandling.ALWAYS && From 02250c92a9dc2467fd8f3d02c4c7426ad3dc01e6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:37:02 +0200 Subject: [PATCH 095/124] refactor --- src/tokenizer.ts | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 0c1e89d293..4a81212134 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -748,25 +748,26 @@ export class Tokenizer extends DiagnosticEmitter { // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { let posBefore = pos; - while ( - ++pos < end && - isIdentifierPart(text.charCodeAt(pos)) - ) { /* nop */ } - if ( - identifierHandling != IdentifierHandling.ALWAYS && - pos - posBefore >= MIN_KEYWORD_LENGTH && - pos - posBefore <= MAX_KEYWORD_LENGTH - ) { - let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); + if (identifierHandling != IdentifierHandling.ALWAYS) { + while ( + ++pos < end && + isIdentifierPart(text.charCodeAt(pos)) + ) { /* nop */ } if ( - keywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(keywordToken) - ) + pos - posBefore >= MIN_KEYWORD_LENGTH && + pos - posBefore <= MAX_KEYWORD_LENGTH ) { - this.pos = pos; - return keywordToken; + let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); + if ( + keywordToken != Token.INVALID && + !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(keywordToken) + ) + ) { + this.pos = pos; + return keywordToken; + } } } this.pos = posBefore; From cb323217d5baf54639c4d7f1fc003fcfc4046bb9 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 02:51:12 +0200 Subject: [PATCH 096/124] fix --- src/tokenizer.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4a81212134..2beb4038c8 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -789,10 +789,7 @@ export class Tokenizer extends DiagnosticEmitter { } else { // TODO: \uXXXX also support for identifiers if (isIdentifierStart(c)) { - while ( - ++pos < end && - isIdentifierPart(c = text.charCodeAt(pos)) - ) { /* nop */ } + this.pos = pos; return Token.IDENTIFIER; } else if (isWhiteSpace(c)) { ++pos; From 33bcf1fb4738701a27b2c25970e8a9abb6681511 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 03:13:10 +0200 Subject: [PATCH 097/124] simplify integerOrFloatToken --- src/tokenizer.ts | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 2beb4038c8..7f84f0fe86 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -585,6 +585,20 @@ export function operatorTokenToString(token: Token): string { } } +// Test is it integer or float without update of position. +function integerOrFloatToken(text: string, pos: i32, end: i32): Token { + while (pos < end) { + let c = text.charCodeAt(pos); + if (c == CharCode.DOT || (c | 32) == CharCode.e) { + return Token.FLOATLITERAL; + } + if (c != CharCode._ && !isDecimal(c)) break; + // does not validate separator placement (this is done in readXYInteger) + pos++; + } + return Token.INTEGERLITERAL; +} + export class Range { source!: Source; @@ -743,7 +757,7 @@ export class Tokenizer extends DiagnosticEmitter { } } } - return this.integerOrFloatToken(); + return integerOrFloatToken(text, pos + 1, end); } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { @@ -1500,23 +1514,6 @@ export class Tokenizer extends DiagnosticEmitter { return text.substring(start, pos); } - // Test is it integer or float without update of position. - integerOrFloatToken(): Token { - var text = this.source.text; - var pos = this.pos + 1; - var end = this.end; - while (pos < end) { - let c = text.charCodeAt(pos); - if (c == CharCode.DOT || (c | 32) == CharCode.e) { - return Token.FLOATLITERAL; - } - if (c != CharCode._ && !isDecimal(c)) break; - // does not validate separator placement (this is done in readXYInteger) - pos++; - } - return Token.INTEGERLITERAL; - } - readInteger(): i64 { var text = this.source.text; var pos = this.pos; From 6bee4dcd75f31a1909437c45cf8ca54d60b22b91 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 03:19:21 +0200 Subject: [PATCH 098/124] refactor --- src/tokenizer.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 7f84f0fe86..6dbfc91fb4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -771,16 +771,13 @@ export class Tokenizer extends DiagnosticEmitter { pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH ) { - let keywordToken = probeKeywordToken(text.substring(posBefore, pos)); - if ( - keywordToken != Token.INVALID && - !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(keywordToken) - ) - ) { + let keyword = probeKeywordToken(text.substring(posBefore, pos)); + if (keyword != Token.INVALID && !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(keyword) + )) { this.pos = pos; - return keywordToken; + return keyword; } } } From 273dc4397209d30b45492a305cf0c602afbd73f8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 10:20:41 +0200 Subject: [PATCH 099/124] refactor. Simplify names --- src/tokenizer.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 6dbfc91fb4..764b02d0e4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -322,7 +322,7 @@ const BASIC_TOKENS: Token[] = [ /* 0x7F */ Token.INVALID, ]; -export function probeKeywordToken(text: string): Token { +export function scanKeyword(text: string): Token { let len = text.length; assert(len); switch (text.charCodeAt(0)) { @@ -585,8 +585,8 @@ export function operatorTokenToString(token: Token): string { } } -// Test is it integer or float without update of position. -function integerOrFloatToken(text: string, pos: i32, end: i32): Token { +// Scan and determine is it integer or float without update of position. +function scanNumber(text: string, pos: i32, end: i32): Token { while (pos < end) { let c = text.charCodeAt(pos); if (c == CharCode.DOT || (c | 32) == CharCode.e) { @@ -757,7 +757,7 @@ export class Tokenizer extends DiagnosticEmitter { } } } - return integerOrFloatToken(text, pos + 1, end); + return scanNumber(text, pos + 1, end); } // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { @@ -771,7 +771,7 @@ export class Tokenizer extends DiagnosticEmitter { pos - posBefore >= MIN_KEYWORD_LENGTH && pos - posBefore <= MAX_KEYWORD_LENGTH ) { - let keyword = probeKeywordToken(text.substring(posBefore, pos)); + let keyword = scanKeyword(text.substring(posBefore, pos)); if (keyword != Token.INVALID && !( identifierHandling == IdentifierHandling.PREFER && tokenIsAlsoIdentifier(keyword) @@ -785,7 +785,7 @@ export class Tokenizer extends DiagnosticEmitter { return Token.IDENTIFIER; } case Token.OPERATOR: { - token = this.operatorToken(c, text, pos, end, maxTokenLength); + token = this.scanOperator(c, text, pos, end, maxTokenLength); pos = this.pos; if (token == Token.INVALID) continue; return token; @@ -854,7 +854,7 @@ export class Tokenizer extends DiagnosticEmitter { return nextToken; } - private operatorToken(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { + private scanOperator(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { // Operator tokens switch (c) { // `!`, `!=`, `!==` From 114ac1efe5f381c94142865e513aae6389819a80 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 10:29:14 +0200 Subject: [PATCH 100/124] refactor --- src/tokenizer.ts | 64 ++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 764b02d0e4..cafd94e4ce 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -585,20 +585,6 @@ export function operatorTokenToString(token: Token): string { } } -// Scan and determine is it integer or float without update of position. -function scanNumber(text: string, pos: i32, end: i32): Token { - while (pos < end) { - let c = text.charCodeAt(pos); - if (c == CharCode.DOT || (c | 32) == CharCode.e) { - return Token.FLOATLITERAL; - } - if (c != CharCode._ && !isDecimal(c)) break; - // does not validate separator placement (this is done in readXYInteger) - pos++; - } - return Token.INTEGERLITERAL; -} - export class Range { source!: Source; @@ -741,24 +727,10 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return token; } - // `0`..`9` - case Token.DIGIT: { - this.pos = pos; - // `0.`, `0x`, `0b`, `0o` - if (c == CharCode._0) { - if (pos + 1 < end) { - c = text.charCodeAt(pos + 1); - if (c == CharCode.DOT) return Token.FLOATLITERAL; - switch (c | 32) { - case CharCode.x: - case CharCode.b: - case CharCode.o: - return Token.INTEGERLITERAL; - } - } - } - return scanNumber(text, pos + 1, end); - } + // `0`..`9`, `0.`, `0x`, `0b`, `0o` + case Token.DIGIT: + return this.scanNumber(c, text, pos, end); + // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { let posBefore = pos; @@ -854,6 +826,34 @@ export class Tokenizer extends DiagnosticEmitter { return nextToken; } + // Scan and determine is it integer or float without update of position. + private scanNumber(c: i32, text: string, pos: i32, end: i32): Token { + this.pos = pos++; + // `0.`, `0x`, `0b`, `0o` + if (c == CharCode._0) { + if (pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.DOT) return Token.FLOATLITERAL; + switch (c | 32) { + case CharCode.x: + case CharCode.b: + case CharCode.o: + return Token.INTEGERLITERAL; + } + } + } + while (pos < end) { + let c = text.charCodeAt(pos); + if (c == CharCode.DOT || (c | 32) == CharCode.e) { + return Token.FLOATLITERAL; + } + if (c != CharCode._ && !isDecimal(c)) break; + // does not validate separator placement (this is done in readXYInteger) + ++pos; + } + return Token.INTEGERLITERAL; + } + private scanOperator(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { // Operator tokens switch (c) { From 25991a8885eab99193f7d1ad286a7941b0c35997 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 10:38:44 +0200 Subject: [PATCH 101/124] refactor skipLineComment --- src/tokenizer.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index cafd94e4ce..ecad14fec1 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1171,14 +1171,14 @@ export class Tokenizer extends DiagnosticEmitter { } skipLineComment(text: string, pos: i32, end: i32): i32 { - let commentStartPos = pos - 1; - let commentKind = CommentKind.LINE; + let startPos = pos - 1; + let kind = CommentKind.LINE; if ( pos + 1 < end && text.charCodeAt(pos + 1) == CharCode.SLASH ) { ++pos; - commentKind = CommentKind.TRIPLE; + kind = CommentKind.TRIPLE; } while (++pos < end) { if (isLineBreak(text.charCodeAt(pos))) { @@ -1188,9 +1188,9 @@ export class Tokenizer extends DiagnosticEmitter { } if (this.onComment) { this.onComment( - commentKind, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) + kind, + text.substring(startPos, pos), + this.range(startPos, pos) ); } return pos; From e1de8068e295acb99f0cb5c8a4b60e7622bd1611 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 10:40:44 +0200 Subject: [PATCH 102/124] refactor skipBlockComment --- src/tokenizer.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ecad14fec1..0b3e487c4a 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1197,7 +1197,7 @@ export class Tokenizer extends DiagnosticEmitter { } skipBlockComment(text: string, pos: i32, end: i32): i32 { - let commentStartPos = pos - 1; + let startPos = pos - 1; let closed = false; while (++pos < end) { let c = text.charCodeAt(pos); @@ -1219,8 +1219,8 @@ export class Tokenizer extends DiagnosticEmitter { } else if (this.onComment) { this.onComment( CommentKind.BLOCK, - text.substring(commentStartPos, pos), - this.range(commentStartPos, pos) + text.substring(startPos, pos), + this.range(startPos, pos) ); } return pos; From 1e52754dce5bd505264c5e08120e0055af20a022 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 10:42:02 +0200 Subject: [PATCH 103/124] let -> var --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 0b3e487c4a..e8306d6d65 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1171,8 +1171,8 @@ export class Tokenizer extends DiagnosticEmitter { } skipLineComment(text: string, pos: i32, end: i32): i32 { - let startPos = pos - 1; - let kind = CommentKind.LINE; + var startPos = pos - 1; + var kind = CommentKind.LINE; if ( pos + 1 < end && text.charCodeAt(pos + 1) == CharCode.SLASH @@ -1197,8 +1197,8 @@ export class Tokenizer extends DiagnosticEmitter { } skipBlockComment(text: string, pos: i32, end: i32): i32 { - let startPos = pos - 1; - let closed = false; + var startPos = pos - 1; + var closed = false; while (++pos < end) { let c = text.charCodeAt(pos); if ( From f3f309e56814671c6118e76d494c55093a834e35 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 11:31:00 +0200 Subject: [PATCH 104/124] refactor scanKeyword --- src/tokenizer.ts | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e8306d6d65..fce69ffba9 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -328,9 +328,7 @@ export function scanKeyword(text: string): Token { switch (text.charCodeAt(0)) { case CharCode.a: { if (len == 2) { - if (text.charCodeAt(1) == CharCode.s) { - return Token.AS; - } + if (text.charCodeAt(1) == CharCode.s) return Token.AS; break; } if (text == "abstract") return Token.ABSTRACT; @@ -356,9 +354,7 @@ export function scanKeyword(text: string): Token { } case CharCode.d: { if (len == 2) { - if (text.charCodeAt(1) == CharCode.o) { - return Token.DO; - } + if (text.charCodeAt(1) == CharCode.o) return Token.DO; break; } if (text == "default") return Token.DEFAULT; @@ -434,9 +430,7 @@ export function scanKeyword(text: string): Token { break; } case CharCode.o: { - if (len == 2 && text.charCodeAt(1) == CharCode.f) { - return Token.OF; - } + if (len == 2 && text.charCodeAt(1) == CharCode.f) return Token.OF; break; } case CharCode.p: { From 2a18ccc6d6cc310227fcf6fd1e7288d1f1f752ef Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 11:45:38 +0200 Subject: [PATCH 105/124] refactor scanKeyword --- src/tokenizer.ts | 84 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index fce69ffba9..ec734f18cf 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -371,11 +371,28 @@ export function scanKeyword(text: string): Token { break; } case CharCode.f: { - if (text == "false") return Token.FALSE; - if (text == "function") return Token.FUNCTION; - if (text == "for") return Token.FOR; - if (text == "from") return Token.FROM; - if (text == "finally") return Token.FINALLY; + switch (text.charCodeAt(1)) { + case CharCode.a: { + if (text == "false") return Token.FALSE; + break; + } + case CharCode.u: { + if (text == "function") return Token.FUNCTION; + break; + } + case CharCode.o: { + if (text == "for") return Token.FOR; + break; + } + case CharCode.r: { + if (text == "from") return Token.FROM; + break; + } + case CharCode.i: { + if (text == "finally") return Token.FINALLY; + break; + } + } break; } case CharCode.g: { @@ -390,23 +407,24 @@ export function scanKeyword(text: string): Token { case CharCode.s: return Token.IS; } break; - } - switch (text.charCodeAt(3)) { - case CharCode.e: { - if (text == "interface") return Token.INTERFACE; - break; - } - case CharCode.l: { - if (text == "implements") return Token.IMPLEMENTS; - break; - } - case CharCode.o: { - if (text == "import") return Token.IMPORT; - break; - } - case CharCode.t: { - if (text == "instanceof") return Token.INSTANCEOF; - break; + } else if (len >= 6) { + switch (text.charCodeAt(3)) { + case CharCode.e: { + if (text == "interface") return Token.INTERFACE; + break; + } + case CharCode.l: { + if (text == "implements") return Token.IMPLEMENTS; + break; + } + case CharCode.o: { + if (text == "import") return Token.IMPORT; + break; + } + case CharCode.t: { + if (text == "instanceof") return Token.INSTANCEOF; + break; + } } } break; @@ -434,10 +452,24 @@ export function scanKeyword(text: string): Token { break; } case CharCode.p: { - if (text == "public") return Token.PUBLIC; - if (text == "private") return Token.PRIVATE; - if (text == "protected") return Token.PROTECTED; - if (text == "package") return Token.PACKAGE; + switch (text.charCodeAt(2)) { + case CharCode.b: { + if (text == "public") return Token.PUBLIC; + break; + } + case CharCode.i: { + if (text == "private") return Token.PRIVATE; + break; + } + case CharCode.o: { + if (text == "protected") return Token.PROTECTED; + break; + } + case CharCode.c: { + if (text == "package") return Token.PACKAGE; + break; + } + } break; } case CharCode.r: { From 23d6ad8ec77eb6366b781ca795baed91b7648de3 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 11:51:13 +0200 Subject: [PATCH 106/124] simplify --- src/tokenizer.ts | 35 ++++++++++----------- tests/parser/constructor.ts.fixture.ts | 5 ++- tests/parser/interface-errors.ts.fixture.ts | 6 ++-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ec734f18cf..9064cd1a44 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -407,24 +407,23 @@ export function scanKeyword(text: string): Token { case CharCode.s: return Token.IS; } break; - } else if (len >= 6) { - switch (text.charCodeAt(3)) { - case CharCode.e: { - if (text == "interface") return Token.INTERFACE; - break; - } - case CharCode.l: { - if (text == "implements") return Token.IMPLEMENTS; - break; - } - case CharCode.o: { - if (text == "import") return Token.IMPORT; - break; - } - case CharCode.t: { - if (text == "instanceof") return Token.INSTANCEOF; - break; - } + } + switch (text.charCodeAt(3)) { + case CharCode.e: { + if (text == "interface") return Token.INTERFACE; + break; + } + case CharCode.l: { + if (text == "implements") return Token.IMPLEMENTS; + break; + } + case CharCode.o: { + if (text == "import") return Token.IMPORT; + break; + } + case CharCode.t: { + if (text == "instanceof") return Token.INSTANCEOF; + break; } } break; diff --git a/tests/parser/constructor.ts.fixture.ts b/tests/parser/constructor.ts.fixture.ts index 481863215c..64114d6fc4 100644 --- a/tests/parser/constructor.ts.fixture.ts +++ b/tests/parser/constructor.ts.fixture.ts @@ -3,6 +3,5 @@ class MyClass { constructor(a: i32) {} constructor(a: i32, b: i32) {} } -class MyClassImplicit { - constructor(public a: i32, private readonly b: i32 = 2, c: i32 = 3) {} -} +class MyClassImplicit {} +// ERROR 1005: "')' expected." in constructor.ts(8,30+7) diff --git a/tests/parser/interface-errors.ts.fixture.ts b/tests/parser/interface-errors.ts.fixture.ts index bc1285e3d3..3cc433fbb7 100644 --- a/tests/parser/interface-errors.ts.fixture.ts +++ b/tests/parser/interface-errors.ts.fixture.ts @@ -10,7 +10,9 @@ interface Foo extends Bar { } interface Foo { a: i32; + private; b: i32; + protected; c: i32; c: i32; d: i32; @@ -20,8 +22,8 @@ interface Foo { // ERROR 1242: "'abstract' modifier can only appear on a class, method, or property declaration." in interface-errors.ts(1,1+8) // ERROR 1176: "Interface declaration cannot have 'implements' clause." in interface-errors.ts(8,15+10) // ERROR 1042: "'abstract' modifier cannot be used here." in interface-errors.ts(15,3+8) -// ERROR 1042: "'private' modifier cannot be used here." in interface-errors.ts(16,3+7) -// ERROR 1042: "'protected' modifier cannot be used here." in interface-errors.ts(17,3+9) +// ERROR 1110: "Type expected." in interface-errors.ts(16,3+7) +// ERROR 1110: "Type expected." in interface-errors.ts(17,3+9) // ERROR 1042: "'public' modifier cannot be used here." in interface-errors.ts(18,3+6) // ERROR 1042: "'static' modifier cannot be used here." in interface-errors.ts(19,3+6) // ERROR 230: "'constructor' keyword cannot be used here." in interface-errors.ts(20,3+11) From 9fab45e2252a15861b55994c607b3098260674f8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 12:03:38 +0200 Subject: [PATCH 107/124] fix --- tests/parser/constructor.ts.fixture.ts | 5 +++-- tests/parser/interface-errors.ts.fixture.ts | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/parser/constructor.ts.fixture.ts b/tests/parser/constructor.ts.fixture.ts index 64114d6fc4..481863215c 100644 --- a/tests/parser/constructor.ts.fixture.ts +++ b/tests/parser/constructor.ts.fixture.ts @@ -3,5 +3,6 @@ class MyClass { constructor(a: i32) {} constructor(a: i32, b: i32) {} } -class MyClassImplicit {} -// ERROR 1005: "')' expected." in constructor.ts(8,30+7) +class MyClassImplicit { + constructor(public a: i32, private readonly b: i32 = 2, c: i32 = 3) {} +} diff --git a/tests/parser/interface-errors.ts.fixture.ts b/tests/parser/interface-errors.ts.fixture.ts index 3cc433fbb7..bc1285e3d3 100644 --- a/tests/parser/interface-errors.ts.fixture.ts +++ b/tests/parser/interface-errors.ts.fixture.ts @@ -10,9 +10,7 @@ interface Foo extends Bar { } interface Foo { a: i32; - private; b: i32; - protected; c: i32; c: i32; d: i32; @@ -22,8 +20,8 @@ interface Foo { // ERROR 1242: "'abstract' modifier can only appear on a class, method, or property declaration." in interface-errors.ts(1,1+8) // ERROR 1176: "Interface declaration cannot have 'implements' clause." in interface-errors.ts(8,15+10) // ERROR 1042: "'abstract' modifier cannot be used here." in interface-errors.ts(15,3+8) -// ERROR 1110: "Type expected." in interface-errors.ts(16,3+7) -// ERROR 1110: "Type expected." in interface-errors.ts(17,3+9) +// ERROR 1042: "'private' modifier cannot be used here." in interface-errors.ts(16,3+7) +// ERROR 1042: "'protected' modifier cannot be used here." in interface-errors.ts(17,3+9) // ERROR 1042: "'public' modifier cannot be used here." in interface-errors.ts(18,3+6) // ERROR 1042: "'static' modifier cannot be used here." in interface-errors.ts(19,3+6) // ERROR 230: "'constructor' keyword cannot be used here." in interface-errors.ts(20,3+11) From d25c23b8fee530e13fa88abb27d5bb47957877b8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 13:33:07 +0200 Subject: [PATCH 108/124] refactor --- src/tokenizer.ts | 190 +++++++++++++++++++++++++---------------------- 1 file changed, 102 insertions(+), 88 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 9064cd1a44..6799488e95 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -190,7 +190,7 @@ export const enum IdentifierHandling { ALWAYS } -// from 0-127 +// Classify single character tokens (0..127) const BASIC_TOKENS: Token[] = [ /* 0x00 */ Token.INVALID, /* 0x01 */ Token.INVALID, @@ -326,7 +326,7 @@ export function scanKeyword(text: string): Token { let len = text.length; assert(len); switch (text.charCodeAt(0)) { - case CharCode.a: { + case CharCode.a: if (len == 2) { if (text.charCodeAt(1) == CharCode.s) return Token.AS; break; @@ -335,24 +335,44 @@ export function scanKeyword(text: string): Token { if (text == "async") return Token.ASYNC; if (text == "await") return Token.AWAIT; break; - } - case CharCode.b: { + + case CharCode.b: if (text == "break") return Token.BREAK; break; - } - case CharCode.c: { - if (len <= 5) { - if (text == "const") return Token.CONST; - if (text == "case") return Token.CASE; - if (text == "class") return Token.CLASS; + + case CharCode.c: + if (len == 5) { + switch (text.charCodeAt(1)) { + case CharCode.o: + if (text == "const") return Token.CONST; + break; + + case CharCode.l: + if (text == "class") return Token.CLASS; + break; + + case CharCode.a: + if (text == "catch") return Token.CATCH; + break; + } break; } - if (text == "constructor") return Token.CONSTRUCTOR; - if (text == "continue") return Token.CONTINUE; - if (text == "catch") return Token.CATCH; + switch (text.charCodeAt(3)) { + case CharCode.e: + if (text == "case") return Token.CASE; + break; + + case CharCode.s: + if (text == "constructor") return Token.CONSTRUCTOR; + break; + + case CharCode.t: + if (text == "continue") return Token.CONTINUE; + break; + } break; - } - case CharCode.d: { + + case CharCode.d: if (len == 2) { if (text.charCodeAt(1) == CharCode.o) return Token.DO; break; @@ -362,44 +382,43 @@ export function scanKeyword(text: string): Token { if (text == "delete") return Token.DELETE; if (text == "debugger") return Token.DEBUGGER; break; - } - case CharCode.e: { + + case CharCode.e: if (text == "else") return Token.ELSE; if (text == "export") return Token.EXPORT; if (text == "enum") return Token.ENUM; if (text == "extends") return Token.EXTENDS; break; - } - case CharCode.f: { + + case CharCode.f: switch (text.charCodeAt(1)) { - case CharCode.a: { + case CharCode.a: if (text == "false") return Token.FALSE; break; - } - case CharCode.u: { + + case CharCode.u: if (text == "function") return Token.FUNCTION; break; - } - case CharCode.o: { + + case CharCode.o: if (text == "for") return Token.FOR; break; - } - case CharCode.r: { + + case CharCode.r: if (text == "from") return Token.FROM; break; - } - case CharCode.i: { + + case CharCode.i: if (text == "finally") return Token.FINALLY; break; - } } break; - } - case CharCode.g: { + + case CharCode.g: if (text == "get") return Token.GET; break; - } - case CharCode.i: { + + case CharCode.i: if (len == 2) { switch (text.charCodeAt(1)) { case CharCode.f: return Token.IF; @@ -409,81 +428,79 @@ export function scanKeyword(text: string): Token { break; } switch (text.charCodeAt(3)) { - case CharCode.e: { + case CharCode.e: if (text == "interface") return Token.INTERFACE; break; - } - case CharCode.l: { + + case CharCode.l: if (text == "implements") return Token.IMPLEMENTS; break; - } - case CharCode.o: { + + case CharCode.o: if (text == "import") return Token.IMPORT; break; - } - case CharCode.t: { + + case CharCode.t: if (text == "instanceof") return Token.INSTANCEOF; break; - } } break; - } - case CharCode.k: { + + case CharCode.k: if (text == "keyof") return Token.KEYOF; break; - } - case CharCode.l: { + + case CharCode.l: if (text == "let") return Token.LET; break; - } - case CharCode.m: { + + case CharCode.m: if (text == "module") return Token.MODULE; break; - } - case CharCode.n: { + + case CharCode.n: if (text == "null") return Token.NULL; if (text == "new") return Token.NEW; if (text == "namespace") return Token.NAMESPACE; break; - } - case CharCode.o: { + + case CharCode.o: if (len == 2 && text.charCodeAt(1) == CharCode.f) return Token.OF; break; - } - case CharCode.p: { + + case CharCode.p: switch (text.charCodeAt(2)) { - case CharCode.b: { + case CharCode.b: if (text == "public") return Token.PUBLIC; break; - } - case CharCode.i: { + + case CharCode.i: if (text == "private") return Token.PRIVATE; break; - } - case CharCode.o: { + + case CharCode.o: if (text == "protected") return Token.PROTECTED; break; - } - case CharCode.c: { + + case CharCode.c: if (text == "package") return Token.PACKAGE; break; - } } break; - } - case CharCode.r: { + + case CharCode.r: if (text == "return") return Token.RETURN; if (text == "readonly") return Token.READONLY; break; - } - case CharCode.s: { + + case CharCode.s: if (text == "switch") return Token.SWITCH; if (text == "static") return Token.STATIC; if (text == "set") return Token.SET; if (text == "super") return Token.SUPER; break; - } - case CharCode.t: { + + case CharCode.t: if (text == "true") return Token.TRUE; if (text == "this") return Token.THIS; if (text == "type") return Token.TYPE; @@ -491,21 +508,20 @@ export function scanKeyword(text: string): Token { if (text == "throw") return Token.THROW; if (text == "try") return Token.TRY; break; - } - case CharCode.v: { + + case CharCode.v: if (text == "var") return Token.VAR; if (text == "void") return Token.VOID; break; - } - case CharCode.w: { + + case CharCode.w: if (text == "while") return Token.WHILE; if (text == "with") return Token.WITH; break; - } - case CharCode.y: { + + case CharCode.y: if (text == "yield") return Token.YIELD; break; - } } return Token.INVALID; } @@ -730,9 +746,9 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos); if (c <= 0x7F) { let token = unchecked(BASIC_TOKENS[c]); - // Basic tokens if (token != Token.INVALID) { switch (token) { + // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` case Token.WHITESPACE: { // `\r`, `\r\n` if (c == CharCode.CARRIAGERETURN) { @@ -741,34 +757,32 @@ export class Tokenizer extends DiagnosticEmitter { text.charCodeAt(pos) == CharCode.LINEFEED )) break; } - // `\n`, `\t`, `\v`, `\f`, ` ` ++pos; break; } // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` case Token.IDENTIFIER: + // `"`, `'`, ``` case Token.STRINGLITERAL: - case Token.TEMPLATELITERAL: { + case Token.TEMPLATELITERAL: this.pos = pos; return token; - } // `0`..`9`, `0.`, `0x`, `0b`, `0o` case Token.DIGIT: return this.scanNumber(c, text, pos, end); - // `a`..`z` case Token.IDENTIFIER_OR_KEYWORD: { - let posBefore = pos; + let startPos = pos; if (identifierHandling != IdentifierHandling.ALWAYS) { while ( ++pos < end && isIdentifierPart(text.charCodeAt(pos)) ) { /* nop */ } if ( - pos - posBefore >= MIN_KEYWORD_LENGTH && - pos - posBefore <= MAX_KEYWORD_LENGTH + pos - startPos >= MIN_KEYWORD_LENGTH && + pos - startPos <= MAX_KEYWORD_LENGTH ) { - let keyword = scanKeyword(text.substring(posBefore, pos)); + let keyword = scanKeyword(text.substring(startPos, pos)); if (keyword != Token.INVALID && !( identifierHandling == IdentifierHandling.PREFER && tokenIsAlsoIdentifier(keyword) @@ -778,23 +792,23 @@ export class Tokenizer extends DiagnosticEmitter { } } } - this.pos = posBefore; + this.pos = startPos; return Token.IDENTIFIER; } + // `+`, `-`, `*`, `/`, `=`, `>`, .. case Token.OPERATOR: { token = this.scanOperator(c, text, pos, end, maxTokenLength); pos = this.pos; if (token == Token.INVALID) continue; return token; } - // `[`, `{`, `(`, `,`, `:`, `;`, `@` and etc - default: { + // `[`, `{`, `(`, `:`, `;`, `@`, .. + default: this.pos = pos + 1; return token; - } } } - } else { + } else { // c > 0x7F // TODO: \uXXXX also support for identifiers if (isIdentifierStart(c)) { this.pos = pos; From 244312339d6e25871d4cd6d2fbd3cc90d3ebdd03 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 15:47:26 +0200 Subject: [PATCH 109/124] remove assert in scanKeyword --- src/tokenizer.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 6799488e95..f51a64c78c 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -324,7 +324,6 @@ const BASIC_TOKENS: Token[] = [ export function scanKeyword(text: string): Token { let len = text.length; - assert(len); switch (text.charCodeAt(0)) { case CharCode.a: if (len == 2) { From 745e615df61499a0b0c1d94c522bec04c5bec8b4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 15:57:32 +0200 Subject: [PATCH 110/124] refactor --- src/tokenizer.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index f51a64c78c..b8c80697c0 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -754,10 +754,10 @@ export class Tokenizer extends DiagnosticEmitter { if (!( ++pos < end && text.charCodeAt(pos) == CharCode.LINEFEED - )) break; + )) continue; } ++pos; - break; + continue; } // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` case Token.IDENTIFIER: @@ -818,7 +818,7 @@ export class Tokenizer extends DiagnosticEmitter { } let start = pos++; if ( - isHighSurrogate(c) && pos < end && + pos < end && isHighSurrogate(c) && isLowSurrogate(text.charCodeAt(pos)) ) ++pos; this.error( From 02b02f31b96984de766fef01042f488a71dabf84 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 16:26:23 +0200 Subject: [PATCH 111/124] more --- src/tokenizer.ts | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b8c80697c0..60f4e45c93 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -747,18 +747,6 @@ export class Tokenizer extends DiagnosticEmitter { let token = unchecked(BASIC_TOKENS[c]); if (token != Token.INVALID) { switch (token) { - // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` - case Token.WHITESPACE: { - // `\r`, `\r\n` - if (c == CharCode.CARRIAGERETURN) { - if (!( - ++pos < end && - text.charCodeAt(pos) == CharCode.LINEFEED - )) continue; - } - ++pos; - continue; - } // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` case Token.IDENTIFIER: // `"`, `'`, ``` @@ -794,6 +782,18 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = startPos; return Token.IDENTIFIER; } + // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` + case Token.WHITESPACE: { + // `\r`, `\r\n` + if (c == CharCode.CARRIAGERETURN) { + if (!( + ++pos < end && + text.charCodeAt(pos) == CharCode.LINEFEED + )) continue; + } + ++pos; + continue; + } // `+`, `-`, `*`, `/`, `=`, `>`, .. case Token.OPERATOR: { token = this.scanOperator(c, text, pos, end, maxTokenLength); From 12603415dfe44fa7d4441706b7ce089e4a14a899 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 16:32:31 +0200 Subject: [PATCH 112/124] more --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 60f4e45c93..b75c609c64 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -177,8 +177,8 @@ export const enum Token { // meta DIGIT, - WHITESPACE, IDENTIFIER_OR_KEYWORD, + WHITESPACE, OPERATOR, INVALID, EOF From abc9d125a1b74821528b10409a246249bef7da2e Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 16:47:18 +0200 Subject: [PATCH 113/124] refactor --- src/tokenizer.ts | 51 +++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b75c609c64..5b27c59e4f 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -758,30 +758,8 @@ export class Tokenizer extends DiagnosticEmitter { case Token.DIGIT: return this.scanNumber(c, text, pos, end); // `a`..`z` - case Token.IDENTIFIER_OR_KEYWORD: { - let startPos = pos; - if (identifierHandling != IdentifierHandling.ALWAYS) { - while ( - ++pos < end && - isIdentifierPart(text.charCodeAt(pos)) - ) { /* nop */ } - if ( - pos - startPos >= MIN_KEYWORD_LENGTH && - pos - startPos <= MAX_KEYWORD_LENGTH - ) { - let keyword = scanKeyword(text.substring(startPos, pos)); - if (keyword != Token.INVALID && !( - identifierHandling == IdentifierHandling.PREFER && - tokenIsAlsoIdentifier(keyword) - )) { - this.pos = pos; - return keyword; - } - } - } - this.pos = startPos; - return Token.IDENTIFIER; - } + case Token.IDENTIFIER_OR_KEYWORD: + return this.scanKeyword(text, pos, end, identifierHandling); // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` case Token.WHITESPACE: { // `\r`, `\r\n` @@ -892,6 +870,31 @@ export class Tokenizer extends DiagnosticEmitter { return Token.INTEGERLITERAL; } + private scanKeyword(text: string, pos: i32, end: i32, identifierHandling: IdentifierHandling): Token { + let startPos = pos; + if (identifierHandling != IdentifierHandling.ALWAYS) { + while ( + ++pos < end && + isIdentifierPart(text.charCodeAt(pos)) + ) { /* nop */ } + if ( + pos - startPos >= MIN_KEYWORD_LENGTH && + pos - startPos <= MAX_KEYWORD_LENGTH + ) { + let keyword = scanKeyword(text.substring(startPos, pos)); + if (keyword != Token.INVALID && !( + identifierHandling == IdentifierHandling.PREFER && + tokenIsAlsoIdentifier(keyword) + )) { + this.pos = pos; + return keyword; + } + } + } + this.pos = startPos; + return Token.IDENTIFIER; + } + private scanOperator(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { // Operator tokens switch (c) { From 009e7c08ccaff74552f53974cf026962eaf444e6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 16:51:52 +0200 Subject: [PATCH 114/124] refactor --- src/tokenizer.ts | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5b27c59e4f..4dac29b14b 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -177,7 +177,7 @@ export const enum Token { // meta DIGIT, - IDENTIFIER_OR_KEYWORD, + MAYBE_KEYWORD, WHITESPACE, OPERATOR, INVALID, @@ -289,31 +289,31 @@ const BASIC_TOKENS: Token[] = [ /* ^ */ Token.OPERATOR, /* _ */ Token.IDENTIFIER, /* ` */ Token.TEMPLATELITERAL, - /* a */ Token.IDENTIFIER_OR_KEYWORD, - /* b */ Token.IDENTIFIER_OR_KEYWORD, - /* c */ Token.IDENTIFIER_OR_KEYWORD, - /* d */ Token.IDENTIFIER_OR_KEYWORD, - /* e */ Token.IDENTIFIER_OR_KEYWORD, - /* f */ Token.IDENTIFIER_OR_KEYWORD, - /* g */ Token.IDENTIFIER_OR_KEYWORD, + /* a */ Token.MAYBE_KEYWORD, + /* b */ Token.MAYBE_KEYWORD, + /* c */ Token.MAYBE_KEYWORD, + /* d */ Token.MAYBE_KEYWORD, + /* e */ Token.MAYBE_KEYWORD, + /* f */ Token.MAYBE_KEYWORD, + /* g */ Token.MAYBE_KEYWORD, /* h */ Token.IDENTIFIER, - /* i */ Token.IDENTIFIER_OR_KEYWORD, + /* i */ Token.MAYBE_KEYWORD, /* j */ Token.IDENTIFIER, - /* k */ Token.IDENTIFIER_OR_KEYWORD, - /* l */ Token.IDENTIFIER_OR_KEYWORD, - /* m */ Token.IDENTIFIER_OR_KEYWORD, - /* n */ Token.IDENTIFIER_OR_KEYWORD, - /* o */ Token.IDENTIFIER_OR_KEYWORD, - /* p */ Token.IDENTIFIER_OR_KEYWORD, + /* k */ Token.MAYBE_KEYWORD, + /* l */ Token.MAYBE_KEYWORD, + /* m */ Token.MAYBE_KEYWORD, + /* n */ Token.MAYBE_KEYWORD, + /* o */ Token.MAYBE_KEYWORD, + /* p */ Token.MAYBE_KEYWORD, /* q */ Token.IDENTIFIER, - /* r */ Token.IDENTIFIER_OR_KEYWORD, - /* s */ Token.IDENTIFIER_OR_KEYWORD, - /* t */ Token.IDENTIFIER_OR_KEYWORD, + /* r */ Token.MAYBE_KEYWORD, + /* s */ Token.MAYBE_KEYWORD, + /* t */ Token.MAYBE_KEYWORD, /* u */ Token.IDENTIFIER, - /* v */ Token.IDENTIFIER_OR_KEYWORD, - /* w */ Token.IDENTIFIER_OR_KEYWORD, + /* v */ Token.MAYBE_KEYWORD, + /* w */ Token.MAYBE_KEYWORD, /* x */ Token.IDENTIFIER, - /* y */ Token.IDENTIFIER_OR_KEYWORD, + /* y */ Token.MAYBE_KEYWORD, /* z */ Token.IDENTIFIER, /* { */ Token.OPENBRACE, /* | */ Token.OPERATOR, @@ -758,7 +758,7 @@ export class Tokenizer extends DiagnosticEmitter { case Token.DIGIT: return this.scanNumber(c, text, pos, end); // `a`..`z` - case Token.IDENTIFIER_OR_KEYWORD: + case Token.MAYBE_KEYWORD: return this.scanKeyword(text, pos, end, identifierHandling); // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` case Token.WHITESPACE: { From 8b5c1019b1a89f656a3d7ad2e2f5968ec2efc726 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 16:58:47 +0200 Subject: [PATCH 115/124] better --- src/tokenizer.ts | 73 ++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 4dac29b14b..5e3f7e6f9d 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -745,45 +745,44 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos); if (c <= 0x7F) { let token = unchecked(BASIC_TOKENS[c]); - if (token != Token.INVALID) { - switch (token) { - // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` - case Token.IDENTIFIER: - // `"`, `'`, ``` - case Token.STRINGLITERAL: - case Token.TEMPLATELITERAL: - this.pos = pos; - return token; - // `0`..`9`, `0.`, `0x`, `0b`, `0o` - case Token.DIGIT: - return this.scanNumber(c, text, pos, end); - // `a`..`z` - case Token.MAYBE_KEYWORD: - return this.scanKeyword(text, pos, end, identifierHandling); - // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` - case Token.WHITESPACE: { - // `\r`, `\r\n` - if (c == CharCode.CARRIAGERETURN) { - if (!( - ++pos < end && - text.charCodeAt(pos) == CharCode.LINEFEED - )) continue; - } - ++pos; - continue; - } - // `+`, `-`, `*`, `/`, `=`, `>`, .. - case Token.OPERATOR: { - token = this.scanOperator(c, text, pos, end, maxTokenLength); - pos = this.pos; - if (token == Token.INVALID) continue; - return token; + if (token == Token.INVALID) break; + switch (token) { + // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` + case Token.IDENTIFIER: + // `"`, `'`, ``` + case Token.STRINGLITERAL: + case Token.TEMPLATELITERAL: + this.pos = pos; + return token; + // `0`..`9`, `0.`, `0x`, `0b`, `0o` + case Token.DIGIT: + return this.scanNumber(c, text, pos, end); + // `a`..`z` + case Token.MAYBE_KEYWORD: + return this.scanKeyword(text, pos, end, identifierHandling); + // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` + case Token.WHITESPACE: { + // `\r`, `\r\n` + if (c == CharCode.CARRIAGERETURN) { + if (!( + ++pos < end && + text.charCodeAt(pos) == CharCode.LINEFEED + )) continue; } - // `[`, `{`, `(`, `:`, `;`, `@`, .. - default: - this.pos = pos + 1; - return token; + ++pos; + continue; + } + // `+`, `-`, `*`, `/`, `=`, `>`, .. + case Token.OPERATOR: { + token = this.scanOperator(c, text, pos, end, maxTokenLength); + pos = this.pos; + if (token == Token.INVALID) continue; + return token; } + // `[`, `{`, `(`, `:`, `;`, `@`, .. + default: + this.pos = pos + 1; + return token; } } else { // c > 0x7F // TODO: \uXXXX also support for identifiers From c86dd79545acc65916985bb68ad732e471974f92 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 17:16:53 +0200 Subject: [PATCH 116/124] fix --- src/tokenizer.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5e3f7e6f9d..b11ba16eed 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -745,7 +745,10 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos); if (c <= 0x7F) { let token = unchecked(BASIC_TOKENS[c]); - if (token == Token.INVALID) break; + if (token == Token.INVALID) { + this.pos = pos; + return Token.INVALID; + } switch (token) { // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` case Token.IDENTIFIER: From 56cd089183bb5aeb15e72714d9c704279fe5adc7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 17:19:19 +0200 Subject: [PATCH 117/124] better --- src/tokenizer.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b11ba16eed..8b2e0593c2 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -745,16 +745,13 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos); if (c <= 0x7F) { let token = unchecked(BASIC_TOKENS[c]); - if (token == Token.INVALID) { - this.pos = pos; - return Token.INVALID; - } switch (token) { // `$`, `_`, `h`, `j`, `q`, `u`, `x`, `z`, `A`..`Z` case Token.IDENTIFIER: // `"`, `'`, ``` case Token.STRINGLITERAL: case Token.TEMPLATELITERAL: + case Token.INVALID: this.pos = pos; return token; // `0`..`9`, `0.`, `0x`, `0b`, `0o` From de390ac151c6ef7d8dd9752560154c28c038b281 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 18:28:17 +0200 Subject: [PATCH 118/124] refactor --- src/tokenizer.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8b2e0593c2..06fad46965 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -756,7 +756,7 @@ export class Tokenizer extends DiagnosticEmitter { return token; // `0`..`9`, `0.`, `0x`, `0b`, `0o` case Token.DIGIT: - return this.scanNumber(c, text, pos, end); + return this.scanNumber(text, c, pos, end); // `a`..`z` case Token.MAYBE_KEYWORD: return this.scanKeyword(text, pos, end, identifierHandling); @@ -774,7 +774,7 @@ export class Tokenizer extends DiagnosticEmitter { } // `+`, `-`, `*`, `/`, `=`, `>`, .. case Token.OPERATOR: { - token = this.scanOperator(c, text, pos, end, maxTokenLength); + token = this.scanOperator(text, c, pos, end, maxTokenLength); pos = this.pos; if (token == Token.INVALID) continue; return token; @@ -842,7 +842,7 @@ export class Tokenizer extends DiagnosticEmitter { } // Scan and determine is it integer or float without update of position. - private scanNumber(c: i32, text: string, pos: i32, end: i32): Token { + private scanNumber(text: string, c: i32, pos: i32, end: i32): Token { this.pos = pos++; // `0.`, `0x`, `0b`, `0o` if (c == CharCode._0) { @@ -894,7 +894,7 @@ export class Tokenizer extends DiagnosticEmitter { return Token.IDENTIFIER; } - private scanOperator(c: i32, text: string, pos: i32, end: i32, maxTokenLength: i32): Token { + private scanOperator(text: string, c: i32, pos: i32, end: i32, maxTokenLength: i32): Token { // Operator tokens switch (c) { // `!`, `!=`, `!==` From ec0e9b88715284d0b10188bdb85790d1fd96a2b8 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 18:39:32 +0200 Subject: [PATCH 119/124] add COMMENT_OR_OPERATOR pseudo token --- src/tokenizer.ts | 53 +++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 06fad46965..912da04ac8 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -179,6 +179,7 @@ export const enum Token { DIGIT, MAYBE_KEYWORD, WHITESPACE, + COMMENT_OR_OPERATOR, OPERATOR, INVALID, EOF @@ -239,7 +240,7 @@ const BASIC_TOKENS: Token[] = [ /* , */ Token.COMMA, /* - */ Token.OPERATOR, /* . */ Token.OPERATOR, - /* / */ Token.OPERATOR, + /* / */ Token.COMMENT_OR_OPERATOR, /* 0 */ Token.DIGIT, /* 1 */ Token.DIGIT, /* 2 */ Token.DIGIT, @@ -772,13 +773,30 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; continue; } - // `+`, `-`, `*`, `/`, `=`, `>`, .. - case Token.OPERATOR: { - token = this.scanOperator(text, c, pos, end, maxTokenLength); - pos = this.pos; - if (token == Token.INVALID) continue; - return token; + // `/`, `//`, `/*`, `/=`, `///` + case Token.COMMENT_OR_OPERATOR: { + ++pos; + if (maxTokenLength > 1 && pos < end) { + c = text.charCodeAt(pos); + if (c == CharCode.SLASH) { // single-line + pos = this.skipLineComment(text, pos, end); + continue; + } + if (c == CharCode.ASTERISK) { // multi-line + pos = this.skipBlockComment(text, pos, end); + continue; + } + if (c == CharCode.EQUALS) { + this.pos = pos + 1; + return Token.SLASH_EQUALS; + } + } + this.pos = pos; + return Token.SLASH; } + // `+`, `-`, `*`, `=`, `>`, .. + case Token.OPERATOR: + return this.scanOperator(text, c, pos, end, maxTokenLength); // `[`, `{`, `(`, `:`, `;`, `@`, .. default: this.pos = pos + 1; @@ -1036,27 +1054,6 @@ export class Tokenizer extends DiagnosticEmitter { this.pos = pos; return Token.DOT; } - // `/`, `//`, `/*`, `/=`, `///` - case CharCode.SLASH: { - ++pos; - if (maxTokenLength > 1 && pos < end) { - c = text.charCodeAt(pos); - if (c == CharCode.SLASH) { // single-line - pos = this.skipLineComment(text, pos, end); - break; - } - if (c == CharCode.ASTERISK) { // multi-line - pos = this.skipBlockComment(text, pos, end); - break; - } - if (c == CharCode.EQUALS) { - this.pos = pos + 1; - return Token.SLASH_EQUALS; - } - } - this.pos = pos; - return Token.SLASH; - } // `<`, `<<`, `<=` `<<=` case CharCode.LESSTHAN: { ++pos; From f6cb7e3dc2c988660a2171c82bc8e114e0f3bdd3 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 20:21:37 +0200 Subject: [PATCH 120/124] move Range to diagnostics --- src/ast.ts | 5 ++++- src/compiler.ts | 2 +- src/diagnostics.ts | 46 +++++++++++++++++++++++++++++++++++++---- src/parser.ts | 2 +- src/program.ts | 4 ++-- src/resolver.ts | 5 +---- src/tokenizer.ts | 51 +++++----------------------------------------- 7 files changed, 56 insertions(+), 59 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 00b87faf09..d587e7ad84 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -22,8 +22,11 @@ import { } from "./common"; import { - Token, Range +} from "./diagnostics"; + +import { + Token } from "./tokenizer"; import { diff --git a/src/compiler.ts b/src/compiler.ts index 033372fcb6..64283121a4 100644 --- a/src/compiler.ts +++ b/src/compiler.ts @@ -15,6 +15,7 @@ import { } from "./builtins"; import { + Range, DiagnosticCode, DiagnosticEmitter } from "./diagnostics"; @@ -108,7 +109,6 @@ import { import { Token, - Range, operatorTokenToString } from "./tokenizer"; diff --git a/src/diagnostics.ts b/src/diagnostics.ts index 43606fe95f..9c6fe53a2e 100644 --- a/src/diagnostics.ts +++ b/src/diagnostics.ts @@ -3,10 +3,6 @@ * @license Apache-2.0 */ -import { - Range -} from "./tokenizer"; - import { Source } from "./ast"; @@ -44,6 +40,48 @@ export const enum DiagnosticCategory { ERROR } +export class Range { + + source!: Source; + debugInfoRef: usize = 0; + + constructor(public start: i32, public end: i32) {} + + static join(a: Range, b: Range): Range { + if (a.source != b.source) throw new Error("source mismatch"); + let range = new Range( + a.start < b.start ? a.start : b.start, + a.end > b.end ? a.end : b.end + ); + range.source = a.source; + return range; + } + + equals(other: Range): bool { + return ( + this.source == other.source && + this.start == other.start && + this.end == other.end + ); + } + + get atStart(): Range { + let range = new Range(this.start, this.start); + range.source = this.source; + return range; + } + + get atEnd(): Range { + let range = new Range(this.end, this.end); + range.source = this.source; + return range; + } + + toString(): string { + return this.source.text.substring(this.start, this.end); + } +} + /** Returns the string representation of the specified diagnostic category. */ export function diagnosticCategoryToString(category: DiagnosticCategory): string { switch (category) { diff --git a/src/parser.ts b/src/parser.ts index efd44488ac..e32b39a7a5 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -16,13 +16,13 @@ import { import { Tokenizer, Token, - Range, CommentHandler, IdentifierHandling, isIllegalVariableIdentifier } from "./tokenizer"; import { + Range, DiagnosticCode, DiagnosticEmitter, DiagnosticMessage diff --git a/src/program.ts b/src/program.ts index 21bb0f02c2..0f912cd8de 100644 --- a/src/program.ts +++ b/src/program.ts @@ -57,6 +57,7 @@ import { } from "./compiler"; import { + Range, DiagnosticCode, DiagnosticMessage, DiagnosticEmitter @@ -70,8 +71,7 @@ import { } from "./types"; import { - Token, - Range + Token } from "./tokenizer"; import { diff --git a/src/resolver.ts b/src/resolver.ts index e6171782fc..57f72dfd50 100644 --- a/src/resolver.ts +++ b/src/resolver.ts @@ -12,6 +12,7 @@ */ import { + Range, DiagnosticEmitter, DiagnosticCode } from "./diagnostics"; @@ -44,10 +45,6 @@ import { Flow } from "./flow"; -import { - Range -} from "./tokenizer"; - import { FunctionTypeNode, ParameterKind, diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 912da04ac8..1d4d1940d3 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -12,6 +12,7 @@ */ import { + Range, DiagnosticCode, DiagnosticMessage, DiagnosticEmitter @@ -626,48 +627,6 @@ export function operatorTokenToString(token: Token): string { } } -export class Range { - - source!: Source; - debugInfoRef: usize = 0; - - constructor(public start: i32, public end: i32) {} - - static join(a: Range, b: Range): Range { - if (a.source != b.source) throw new Error("source mismatch"); - let range = new Range( - a.start < b.start ? a.start : b.start, - a.end > b.end ? a.end : b.end - ); - range.source = a.source; - return range; - } - - equals(other: Range): bool { - return ( - this.source == other.source && - this.start == other.start && - this.end == other.end - ); - } - - get atStart(): Range { - let range = new Range(this.start, this.start); - range.source = this.source; - return range; - } - - get atEnd(): Range { - let range = new Range(this.end, this.end); - range.source = this.source; - return range; - } - - toString(): string { - return this.source.text.substring(this.start, this.end); - } -} - /** Handler for intercepting comments while tokenizing. */ export type CommentHandler = (kind: CommentKind, text: string, range: Range) => void; @@ -1815,10 +1774,10 @@ export class Tokenizer extends DiagnosticEmitter { var text = this.source.text; var end = this.end; var start = this.pos; - var hasSep = this.scanFloatAndSeparators(false); + var hasSep = this.scanFloatPart(false); if (this.pos < end && text.charCodeAt(this.pos) == CharCode.DOT) { ++this.pos; - hasSep |= this.scanFloatAndSeparators(); + hasSep |= this.scanFloatPart(); } if (this.pos < end) { let c = text.charCodeAt(this.pos); @@ -1830,7 +1789,7 @@ export class Tokenizer extends DiagnosticEmitter { ) { ++this.pos; } - hasSep |= this.scanFloatAndSeparators(); + hasSep |= this.scanFloatPart(); } } let pos = this.pos; @@ -1847,7 +1806,7 @@ export class Tokenizer extends DiagnosticEmitter { } /** Scan past one section of a decimal float literal. Returns `1` if separators encountered. */ - private scanFloatAndSeparators(allowLeadingZeroSep: bool = true): i32 { + private scanFloatPart(allowLeadingZeroSep: bool = true): i32 { var text = this.source.text; var end = this.end; var pos = this.pos; From f7a74f0eec88eea680486f81883ba9cd2c157a72 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 21:02:57 +0200 Subject: [PATCH 121/124] refactor comment --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 1d4d1940d3..9fda927128 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -732,7 +732,7 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; continue; } - // `/`, `//`, `/*`, `/=`, `///` + // `/`, `/=`, `/*`, `//`, `///` case Token.COMMENT_OR_OPERATOR: { ++pos; if (maxTokenLength > 1 && pos < end) { From e9603bbb7907c78f4215f78724fc3898f92dd328 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 6 Dec 2021 23:01:51 +0200 Subject: [PATCH 122/124] unchecked --- src/passes/pass.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/pass.ts b/src/passes/pass.ts index 5106c307df..32262c3453 100644 --- a/src/passes/pass.ts +++ b/src/passes/pass.ts @@ -197,7 +197,7 @@ export abstract class Visitor { get parentExpressionOrNull(): ExpressionRef { var stack = this.stack; var length = stack.length; - return length ? stack[length - 1] : 0; + return length ? unchecked(stack[length - 1]) : 0; } // Expressions From f131f29b8d2eacefc934b3f1eab70b6301a2b750 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 8 Dec 2021 11:37:01 +0200 Subject: [PATCH 123/124] refactoring --- src/parser.ts | 86 +++++++++++++++--------------------------------- src/tokenizer.ts | 4 +-- 2 files changed, 29 insertions(+), 61 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index e32b39a7a5..6e1962b635 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -178,7 +178,7 @@ export class Parser extends DiagnosticEmitter { var tn = new Tokenizer(source, this.diagnostics); tn.onComment = this.onComment; var statements = source.statements; - while (!tn.skip(Token.EOF)) { + while (!tn.skip(Token.ENDOFFILE)) { let statement = this.parseTopLevelStatement(tn, null); if (statement) { statements.push(statement); @@ -1769,7 +1769,7 @@ export class Parser extends DiagnosticEmitter { } } else { this.skipStatement(tn); - if (tn.skip(Token.EOF)) { + if (tn.skip(Token.ENDOFFILE)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -1827,7 +1827,7 @@ export class Parser extends DiagnosticEmitter { } } else { this.skipStatement(tn); - if (tn.skip(Token.EOF)) { + if (tn.skip(Token.ENDOFFILE)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -2474,7 +2474,7 @@ export class Parser extends DiagnosticEmitter { if (member) members.push(member); else { this.skipStatement(tn); - if (tn.skip(Token.EOF)) { + if (tn.skip(Token.ENDOFFILE)) { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -2946,7 +2946,7 @@ export class Parser extends DiagnosticEmitter { let state = tn.mark(); let statement = this.parseStatement(tn, topLevel); if (!statement) { - if (tn.token == Token.EOF) return null; + if (tn.token == Token.ENDOFFILE) return null; tn.reset(state); this.skipStatement(tn); } else { @@ -3903,7 +3903,7 @@ export class Parser extends DiagnosticEmitter { return this.parseClassExpression(tn); } default: { - if (token == Token.EOF) { + if (token == Token.ENDOFFILE) { this.error( DiagnosticCode.Unexpected_end_of_text, tn.range(startPos) @@ -4169,8 +4169,9 @@ export class Parser extends DiagnosticEmitter { case Token.BAR: case Token.CARET: case Token.AMPERSAND_AMPERSAND: - case Token.BAR_BAR: + case Token.BAR_BAR: { ++nextPrecedence; + } // BinaryExpression (right associative) case Token.EQUALS: case Token.PLUS_EQUALS: @@ -4287,7 +4288,7 @@ export class Parser extends DiagnosticEmitter { do { let nextToken = tn.peek(true); if ( - nextToken == Token.EOF || // next step should handle this + nextToken == Token.ENDOFFILE || // next step should handle this nextToken == Token.SEMICOLON // end of the statement for sure ) { tn.next(); @@ -4335,7 +4336,7 @@ export class Parser extends DiagnosticEmitter { var again = true; do { switch (tn.next()) { - case Token.EOF: { + case Token.ENDOFFILE: { this.error( DiagnosticCode._0_expected, tn.range(), "}" @@ -4417,12 +4418,8 @@ export const enum Precedence { /** Determines the precende of a non-starting token. */ function determinePrecedence(kind: Token): Precedence { switch (kind) { - case Token.COMMA: - return Precedence.COMMA; - - case Token.YIELD: - return Precedence.YIELD; - + case Token.COMMA: return Precedence.COMMA; + case Token.YIELD: return Precedence.YIELD; case Token.EQUALS: case Token.PLUS_EQUALS: case Token.MINUS_EQUALS: @@ -4438,68 +4435,39 @@ function determinePrecedence(kind: Token): Precedence { case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: case Token.AMPERSAND_EQUALS: case Token.CARET_EQUALS: - case Token.BAR_EQUALS: - return Precedence.ASSIGNMENT; - - case Token.QUESTION: - return Precedence.CONDITIONAL; - + case Token.BAR_EQUALS: return Precedence.ASSIGNMENT; + case Token.QUESTION: return Precedence.CONDITIONAL; case Token.BAR_BAR: - case Token.QUESTION_QUESTION: - return Precedence.LOGICAL_OR; - - case Token.AMPERSAND_AMPERSAND: - return Precedence.LOGICAL_AND; - - case Token.BAR: - return Precedence.BITWISE_OR; - - case Token.CARET: - return Precedence.BITWISE_XOR; - - case Token.AMPERSAND: - return Precedence.BITWISE_AND; - + case Token.QUESTION_QUESTION: return Precedence.LOGICAL_OR; + case Token.AMPERSAND_AMPERSAND: return Precedence.LOGICAL_AND; + case Token.BAR: return Precedence.BITWISE_OR; + case Token.CARET: return Precedence.BITWISE_XOR; + case Token.AMPERSAND: return Precedence.BITWISE_AND; case Token.EQUALS_EQUALS: case Token.EXCLAMATION_EQUALS: case Token.EQUALS_EQUALS_EQUALS: - case Token.EXCLAMATION_EQUALS_EQUALS: - return Precedence.EQUALITY; - + case Token.EXCLAMATION_EQUALS_EQUALS: return Precedence.EQUALITY; case Token.AS: case Token.IN: case Token.INSTANCEOF: case Token.LESSTHAN: case Token.GREATERTHAN: case Token.LESSTHAN_EQUALS: - case Token.GREATERTHAN_EQUALS: - return Precedence.RELATIONAL; - + case Token.GREATERTHAN_EQUALS: return Precedence.RELATIONAL; case Token.LESSTHAN_LESSTHAN: case Token.GREATERTHAN_GREATERTHAN: - case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN: - return Precedence.SHIFT; - + case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN: return Precedence.SHIFT; case Token.PLUS: - case Token.MINUS: - return Precedence.ADDITIVE; - + case Token.MINUS: return Precedence.ADDITIVE; case Token.ASTERISK: case Token.SLASH: - case Token.PERCENT: - return Precedence.MULTIPLICATIVE; - - case Token.ASTERISK_ASTERISK: - return Precedence.EXPONENTIATED; - + case Token.PERCENT: return Precedence.MULTIPLICATIVE; + case Token.ASTERISK_ASTERISK: return Precedence.EXPONENTIATED; case Token.PLUS_PLUS: - case Token.MINUS_MINUS: - return Precedence.UNARY_POSTFIX; - + case Token.MINUS_MINUS: return Precedence.UNARY_POSTFIX; case Token.DOT: case Token.OPENBRACKET: - case Token.EXCLAMATION: - return Precedence.MEMBERACCESS; + case Token.EXCLAMATION: return Precedence.MEMBERACCESS; } return Precedence.NONE; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 9fda927128..82e53a8018 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -183,7 +183,7 @@ export const enum Token { COMMENT_OR_OPERATOR, OPERATOR, INVALID, - EOF + ENDOFFILE } export const enum IdentifierHandling { @@ -784,7 +784,7 @@ export class Tokenizer extends DiagnosticEmitter { } } this.pos = pos; - return Token.EOF; + return Token.ENDOFFILE; } peek( From ab507d5b5f6a0bbc465da119e8685c7893b32732 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Wed, 8 Dec 2021 11:39:05 +0200 Subject: [PATCH 124/124] MAYBE_KEYWORD -> IDENTIFIER_OR_KEYWORD --- src/tokenizer.ts | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 82e53a8018..3ef1f9ca46 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -178,7 +178,7 @@ export const enum Token { // meta DIGIT, - MAYBE_KEYWORD, + IDENTIFIER_OR_KEYWORD, WHITESPACE, COMMENT_OR_OPERATOR, OPERATOR, @@ -291,31 +291,31 @@ const BASIC_TOKENS: Token[] = [ /* ^ */ Token.OPERATOR, /* _ */ Token.IDENTIFIER, /* ` */ Token.TEMPLATELITERAL, - /* a */ Token.MAYBE_KEYWORD, - /* b */ Token.MAYBE_KEYWORD, - /* c */ Token.MAYBE_KEYWORD, - /* d */ Token.MAYBE_KEYWORD, - /* e */ Token.MAYBE_KEYWORD, - /* f */ Token.MAYBE_KEYWORD, - /* g */ Token.MAYBE_KEYWORD, + /* a */ Token.IDENTIFIER_OR_KEYWORD, + /* b */ Token.IDENTIFIER_OR_KEYWORD, + /* c */ Token.IDENTIFIER_OR_KEYWORD, + /* d */ Token.IDENTIFIER_OR_KEYWORD, + /* e */ Token.IDENTIFIER_OR_KEYWORD, + /* f */ Token.IDENTIFIER_OR_KEYWORD, + /* g */ Token.IDENTIFIER_OR_KEYWORD, /* h */ Token.IDENTIFIER, - /* i */ Token.MAYBE_KEYWORD, + /* i */ Token.IDENTIFIER_OR_KEYWORD, /* j */ Token.IDENTIFIER, - /* k */ Token.MAYBE_KEYWORD, - /* l */ Token.MAYBE_KEYWORD, - /* m */ Token.MAYBE_KEYWORD, - /* n */ Token.MAYBE_KEYWORD, - /* o */ Token.MAYBE_KEYWORD, - /* p */ Token.MAYBE_KEYWORD, + /* k */ Token.IDENTIFIER_OR_KEYWORD, + /* l */ Token.IDENTIFIER_OR_KEYWORD, + /* m */ Token.IDENTIFIER_OR_KEYWORD, + /* n */ Token.IDENTIFIER_OR_KEYWORD, + /* o */ Token.IDENTIFIER_OR_KEYWORD, + /* p */ Token.IDENTIFIER_OR_KEYWORD, /* q */ Token.IDENTIFIER, - /* r */ Token.MAYBE_KEYWORD, - /* s */ Token.MAYBE_KEYWORD, - /* t */ Token.MAYBE_KEYWORD, + /* r */ Token.IDENTIFIER_OR_KEYWORD, + /* s */ Token.IDENTIFIER_OR_KEYWORD, + /* t */ Token.IDENTIFIER_OR_KEYWORD, /* u */ Token.IDENTIFIER, - /* v */ Token.MAYBE_KEYWORD, - /* w */ Token.MAYBE_KEYWORD, + /* v */ Token.IDENTIFIER_OR_KEYWORD, + /* w */ Token.IDENTIFIER_OR_KEYWORD, /* x */ Token.IDENTIFIER, - /* y */ Token.MAYBE_KEYWORD, + /* y */ Token.IDENTIFIER_OR_KEYWORD, /* z */ Token.IDENTIFIER, /* { */ Token.OPENBRACE, /* | */ Token.OPERATOR, @@ -718,7 +718,7 @@ export class Tokenizer extends DiagnosticEmitter { case Token.DIGIT: return this.scanNumber(text, c, pos, end); // `a`..`z` - case Token.MAYBE_KEYWORD: + case Token.IDENTIFIER_OR_KEYWORD: return this.scanKeyword(text, pos, end, identifierHandling); // `\n`, `\t`, `\v`, `\f`, ` `, `\r`, `\r\n` case Token.WHITESPACE: {