From bb3cede04a6936018b1c88e499c18cdf6c6b3b0d Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 16:00:18 +0200 Subject: [PATCH 1/7] refactor text utils --- src/util/binary.ts | 8 ++++---- src/util/text.ts | 41 ++++++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/util/binary.ts b/src/util/binary.ts index ea31c07f2e..4346b42523 100644 --- a/src/util/binary.ts +++ b/src/util/binary.ts @@ -15,19 +15,19 @@ export function writeI8(value: i32, buffer: Uint8Array, offset: i32): void { /** Reads a 16-bit integer from the specified buffer. */ export function readI16(buffer: Uint8Array, offset: i32): i32 { - return i32(buffer[offset ]) + return i32(buffer[offset ]) << 0 | i32(buffer[offset + 1]) << 8; } /** Writes a 16-bit integer to the specified buffer. */ export function writeI16(value: i32, buffer: Uint8Array, offset: i32): void { - buffer[offset ] = value; + buffer[offset ] = value >>> 0; buffer[offset + 1] = value >>> 8; } /** Reads a 32-bit integer from the specified buffer. */ export function readI32(buffer: Uint8Array, offset: i32): i32 { - return i32(buffer[offset ]) + return i32(buffer[offset ]) << 0 | i32(buffer[offset + 1]) << 8 | i32(buffer[offset + 2]) << 16 | i32(buffer[offset + 3]) << 24; @@ -35,7 +35,7 @@ export function readI32(buffer: Uint8Array, offset: i32): i32 { /** Writes a 32-bit integer to the specified buffer. */ export function writeI32(value: i32, buffer: Uint8Array, offset: i32): void { - buffer[offset ] = value; + buffer[offset ] = value >>> 0; buffer[offset + 1] = value >>> 8; buffer[offset + 2] = value >>> 16; buffer[offset + 3] = value >>> 24; diff --git a/src/util/text.ts b/src/util/text.ts index 8df6c407a6..c1b33ec8e9 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -139,7 +139,7 @@ export const enum CharCode { } /** Tests if the specified character code is some sort of line break. */ -export function isLineBreak(c: CharCode): bool { +export function isLineBreak(c: u32): bool { switch (c) { case CharCode.LINEFEED: case CharCode.CARRIAGERETURN: @@ -154,7 +154,7 @@ export function isLineBreak(c: CharCode): bool { } /** Tests if the specified character code is some sort of white space. */ -export function isWhiteSpace(c: i32): bool { +export function isWhiteSpace(c: u32): bool { switch (c) { case CharCode.SPACE: case CharCode.TAB: @@ -175,42 +175,45 @@ export function isWhiteSpace(c: i32): bool { } } +export function isAlpha(c: u32): bool { + let c0 = c | 32; // unify uppercases and lowercases a|A - z|Z + return c0 >= CharCode.a && c0 <= CharCode.z; +} + /** Tests if the specified character code is a valid decimal digit. */ -export function isDecimalDigit(c: i32): bool { +export function isDecimalDigit(c: u32): bool { return c >= CharCode._0 && c <= CharCode._9; } /** Tests if the specified character code is a valid octal digit. */ -export function isOctalDigit(c: i32): bool { +export function isOctalDigit(c: u32): bool { return c >= CharCode._0 && c <= CharCode._7; } /** Tests if the specified character code is a valid hexadecimal digit. */ -export function isHexDigit(c: i32): bool { - return isDecimalDigit(c) || ((c | 32) >= CharCode.a && (c | 32) <= CharCode.f); +export function isHexDigit(c: u32): bool { + let c0 = c | 32; // unify uppercases and lowercases a|A - f|F + return isDecimalDigit(c) + || (c0 >= CharCode.a && c0 <= CharCode.f); } /** Tests if the specified character code is trivially alphanumeric. */ -export function isTrivialAlphanum(code: i32): bool { - return code >= CharCode.a && code <= CharCode.z - || code >= CharCode.A && code <= CharCode.Z - || code >= CharCode._0 && code <= CharCode._9; +export function isTrivialAlphanum(c: u32): bool { + return isAlpha(c) || isDecimalDigit(c); } /** Tests if the specified character code is a valid start of an identifier. */ -export function isIdentifierStart(c: i32): bool { - let c0 = c | 32; // unify uppercases and lowercases a|A - z|Z - return c0 >= CharCode.a && c0 <= CharCode.z +export function isIdentifierStart(c: u32): bool { + return isAlpha(c) || c == CharCode._ || c == CharCode.DOLLAR || c > 0x7F && isUnicodeIdentifierStart(c); } /** Tests if the specified character code is a valid part of an identifier. */ -export function isIdentifierPart(c: i32): bool { - const c0 = c | 32; // unify uppercases and lowercases a|A - z|Z - return c0 >= CharCode.a && c0 <= CharCode.z - || c >= CharCode._0 && c <= CharCode._9 +export function isIdentifierPart(c: u32): bool { + return isAlpha(c) + || isDecimalDigit(c) || c == CharCode._ || c == CharCode.DOLLAR || c > 0x7F && isUnicodeIdentifierPart(c); @@ -358,11 +361,11 @@ function lookupInUnicodeMap(code: u16, map: u16[]): bool { var lo = 0; var hi = map.length; - var mid: i32; + var mid: u32; var midVal: u16; while (lo + 1 < hi) { - mid = lo + ((hi - lo) >> 1); + mid = lo + ((hi - lo) >>> 1); mid -= (mid & 1); midVal = map[mid]; if (midVal <= code && code <= map[mid + 1]) { From 244e574aa6455625e06b9df10eddf084af0b87ea Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 16:19:27 +0200 Subject: [PATCH 2/7] fix --- src/util/text.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/util/text.ts b/src/util/text.ts index c1b33ec8e9..b61a00502d 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -139,7 +139,7 @@ export const enum CharCode { } /** Tests if the specified character code is some sort of line break. */ -export function isLineBreak(c: u32): bool { +export function isLineBreak(c: i32): bool { switch (c) { case CharCode.LINEFEED: case CharCode.CARRIAGERETURN: @@ -154,7 +154,7 @@ export function isLineBreak(c: u32): bool { } /** Tests if the specified character code is some sort of white space. */ -export function isWhiteSpace(c: u32): bool { +export function isWhiteSpace(c: i32): bool { switch (c) { case CharCode.SPACE: case CharCode.TAB: @@ -175,35 +175,35 @@ export function isWhiteSpace(c: u32): bool { } } -export function isAlpha(c: u32): bool { +export function isAlpha(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - z|Z return c0 >= CharCode.a && c0 <= CharCode.z; } /** Tests if the specified character code is a valid decimal digit. */ -export function isDecimalDigit(c: u32): bool { +export function isDecimalDigit(c: i32): bool { return c >= CharCode._0 && c <= CharCode._9; } /** Tests if the specified character code is a valid octal digit. */ -export function isOctalDigit(c: u32): bool { +export function isOctalDigit(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } /** Tests if the specified character code is a valid hexadecimal digit. */ -export function isHexDigit(c: u32): bool { +export function isHexDigit(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - f|F return isDecimalDigit(c) || (c0 >= CharCode.a && c0 <= CharCode.f); } /** Tests if the specified character code is trivially alphanumeric. */ -export function isTrivialAlphanum(c: u32): bool { +export function isTrivialAlphanum(c: i32): bool { return isAlpha(c) || isDecimalDigit(c); } /** Tests if the specified character code is a valid start of an identifier. */ -export function isIdentifierStart(c: u32): bool { +export function isIdentifierStart(c: i32): bool { return isAlpha(c) || c == CharCode._ || c == CharCode.DOLLAR @@ -211,7 +211,7 @@ export function isIdentifierStart(c: u32): bool { } /** Tests if the specified character code is a valid part of an identifier. */ -export function isIdentifierPart(c: u32): bool { +export function isIdentifierPart(c: i32): bool { return isAlpha(c) || isDecimalDigit(c) || c == CharCode._ From 15e1b17b4307ef3117f2b5d9de154a84c39bc0c6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 17:19:30 +0200 Subject: [PATCH 3/7] more --- src/util/text.ts | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/util/text.ts b/src/util/text.ts index b61a00502d..e72730f801 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -207,7 +207,7 @@ export function isIdentifierStart(c: i32): bool { return isAlpha(c) || c == CharCode._ || c == CharCode.DOLLAR - || c > 0x7F && isUnicodeIdentifierStart(c); + || c >= 170 && c <= 65500 && isUnicodeIdentifierStart(c); } /** Tests if the specified character code is a valid part of an identifier. */ @@ -216,7 +216,7 @@ export function isIdentifierPart(c: i32): bool { || isDecimalDigit(c) || c == CharCode._ || c == CharCode.DOLLAR - || c > 0x7F && isUnicodeIdentifierPart(c); + || c >= 170 && c <= 65500 && isUnicodeIdentifierPart(c); } // storing as u16 to save memory @@ -357,8 +357,6 @@ const unicodeIdentifierPart: u16[] = [ ]; function lookupInUnicodeMap(code: u16, map: u16[]): bool { - if (code < map[0]) return false; - var lo = 0; var hi = map.length; var mid: u32; @@ -381,13 +379,11 @@ function lookupInUnicodeMap(code: u16, map: u16[]): bool { } function isUnicodeIdentifierStart(code: i32): bool { - return code < 170 || code > 65500 ? false : - lookupInUnicodeMap(code as u16, unicodeIdentifierStart); + return lookupInUnicodeMap(code as u16, unicodeIdentifierStart); } function isUnicodeIdentifierPart(code: i32): bool { - return code < 170 || code > 65500 ? false : - lookupInUnicodeMap(code as u16, unicodeIdentifierPart); + return lookupInUnicodeMap(code as u16, unicodeIdentifierPart); } const indentX1 = " "; From c8aa97d971d757a045b445b5d7ff9c3bb35e9475 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 18:15:17 +0200 Subject: [PATCH 4/7] cleanup --- src/util/text.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/util/text.ts b/src/util/text.ts index e72730f801..c7b0be6ae8 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -207,7 +207,8 @@ export function isIdentifierStart(c: i32): bool { return isAlpha(c) || c == CharCode._ || c == CharCode.DOLLAR - || c >= 170 && c <= 65500 && isUnicodeIdentifierStart(c); + || c >= 170 && c <= 65500 + && lookupInUnicodeMap(c as u16, unicodeIdentifierStart); } /** Tests if the specified character code is a valid part of an identifier. */ @@ -216,7 +217,8 @@ export function isIdentifierPart(c: i32): bool { || isDecimalDigit(c) || c == CharCode._ || c == CharCode.DOLLAR - || c >= 170 && c <= 65500 && isUnicodeIdentifierPart(c); + || c >= 170 && c <= 65500 + && lookupInUnicodeMap(c as u16, unicodeIdentifierPart); } // storing as u16 to save memory @@ -378,14 +380,6 @@ function lookupInUnicodeMap(code: u16, map: u16[]): bool { return false; } -function isUnicodeIdentifierStart(code: i32): bool { - return lookupInUnicodeMap(code as u16, unicodeIdentifierStart); -} - -function isUnicodeIdentifierPart(code: i32): bool { - return lookupInUnicodeMap(code as u16, unicodeIdentifierPart); -} - const indentX1 = " "; const indentX2 = " "; const indentX4 = " "; From 765a6124b73c295a516452dfbe09c7453d6787a6 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 18:20:01 +0200 Subject: [PATCH 5/7] revert binary.ts changes --- src/util/binary.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/binary.ts b/src/util/binary.ts index 4346b42523..ea31c07f2e 100644 --- a/src/util/binary.ts +++ b/src/util/binary.ts @@ -15,19 +15,19 @@ export function writeI8(value: i32, buffer: Uint8Array, offset: i32): void { /** Reads a 16-bit integer from the specified buffer. */ export function readI16(buffer: Uint8Array, offset: i32): i32 { - return i32(buffer[offset ]) << 0 + return i32(buffer[offset ]) | i32(buffer[offset + 1]) << 8; } /** Writes a 16-bit integer to the specified buffer. */ export function writeI16(value: i32, buffer: Uint8Array, offset: i32): void { - buffer[offset ] = value >>> 0; + buffer[offset ] = value; buffer[offset + 1] = value >>> 8; } /** Reads a 32-bit integer from the specified buffer. */ export function readI32(buffer: Uint8Array, offset: i32): i32 { - return i32(buffer[offset ]) << 0 + return i32(buffer[offset ]) | i32(buffer[offset + 1]) << 8 | i32(buffer[offset + 2]) << 16 | i32(buffer[offset + 3]) << 24; @@ -35,7 +35,7 @@ export function readI32(buffer: Uint8Array, offset: i32): i32 { /** Writes a 32-bit integer to the specified buffer. */ export function writeI32(value: i32, buffer: Uint8Array, offset: i32): void { - buffer[offset ] = value >>> 0; + buffer[offset ] = value; buffer[offset + 1] = value >>> 8; buffer[offset + 2] = value >>> 16; buffer[offset + 3] = value >>> 24; From 521bdc61ae827eb18b862cc23f0ecb188d2a51ff Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 14 Nov 2021 18:25:49 +0200 Subject: [PATCH 6/7] more simpler names for helpers --- src/ast.ts | 4 ++-- src/tokenizer.ts | 14 +++++++------- src/util/text.ts | 16 +++++++--------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 38114d8fae..25c37447ae 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -30,7 +30,7 @@ import { normalizePath, resolvePath, CharCode, - isTrivialAlphanum + isAlphaNum } from "./util"; import { @@ -2347,7 +2347,7 @@ export function mangleInternalPath(path: string): string { if (pos >= 0 && len - pos >= 2) { // at least one char plus dot let cur = pos; while (++cur < len) { - if (!isTrivialAlphanum(path.charCodeAt(cur))) { + if (!isAlphaNum(path.charCodeAt(cur))) { assert(false); // not a valid external path return path; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 5aec3bd243..56c1bcf5b4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -28,8 +28,8 @@ import { isWhiteSpace, isIdentifierStart, isIdentifierPart, - isDecimalDigit, - isOctalDigit + isDecNum, + isOctNum } from "./util"; /** Named token types. */ @@ -703,7 +703,7 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; if (maxTokenLength > 1 && pos < end) { let chr = text.charCodeAt(pos); - if (isDecimalDigit(chr)) { + if (isDecNum(chr)) { this.pos = pos - 1; return Token.FLOATLITERAL; // expects a call to readFloat } @@ -1177,7 +1177,7 @@ export class Tokenizer extends DiagnosticEmitter { var c = text.charCodeAt(this.pos++); switch (c) { case CharCode._0: { - if (isTaggedTemplate && this.pos < end && isDecimalDigit(text.charCodeAt(this.pos))) { + if (isTaggedTemplate && this.pos < end && isDecNum(text.charCodeAt(this.pos))) { ++this.pos; return text.substring(start, this.pos); } @@ -1331,7 +1331,7 @@ export class Tokenizer extends DiagnosticEmitter { return this.readOctalInteger(); } } - if (isOctalDigit(text.charCodeAt(pos + 1))) { + if (isOctNum(text.charCodeAt(pos + 1))) { let start = pos; this.pos = pos + 1; let value = this.readOctalInteger(); @@ -1578,7 +1578,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( ++this.pos < end && (c = text.charCodeAt(this.pos)) == CharCode.MINUS || c == CharCode.PLUS && - isDecimalDigit(text.charCodeAt(this.pos + 1)) + isDecNum(text.charCodeAt(this.pos + 1)) ) { ++this.pos; } @@ -1618,7 +1618,7 @@ export class Tokenizer extends DiagnosticEmitter { } sepEnd = pos + 1; ++sepCount; - } else if (!isDecimalDigit(c)) { + } else if (!isDecNum(c)) { break; } ++pos; diff --git a/src/util/text.ts b/src/util/text.ts index c7b0be6ae8..e9bf2de392 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -181,25 +181,24 @@ export function isAlpha(c: i32): bool { } /** Tests if the specified character code is a valid decimal digit. */ -export function isDecimalDigit(c: i32): bool { +export function isDecNum(c: i32): bool { return c >= CharCode._0 && c <= CharCode._9; } /** Tests if the specified character code is a valid octal digit. */ -export function isOctalDigit(c: i32): bool { +export function isOctNum(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } /** Tests if the specified character code is a valid hexadecimal digit. */ -export function isHexDigit(c: i32): bool { +export function isHexNum(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - f|F - return isDecimalDigit(c) - || (c0 >= CharCode.a && c0 <= CharCode.f); + return isDecNum(c) || (c0 >= CharCode.a && c0 <= CharCode.f); } /** Tests if the specified character code is trivially alphanumeric. */ -export function isTrivialAlphanum(c: i32): bool { - return isAlpha(c) || isDecimalDigit(c); +export function isAlphaNum(c: i32): bool { + return isAlpha(c) || isDecNum(c); } /** Tests if the specified character code is a valid start of an identifier. */ @@ -213,8 +212,7 @@ export function isIdentifierStart(c: i32): bool { /** Tests if the specified character code is a valid part of an identifier. */ export function isIdentifierPart(c: i32): bool { - return isAlpha(c) - || isDecimalDigit(c) + return isAlphaNum(c) || c == CharCode._ || c == CharCode.DOLLAR || c >= 170 && c <= 65500 From 3c31a7fc36bc842548739e4de560347d2fd195ec Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 15 Nov 2021 23:25:59 +0200 Subject: [PATCH 7/7] refactor --- src/ast.ts | 4 ++-- src/tokenizer.ts | 14 +++++++------- src/util/text.ts | 14 +++++++------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 25c37447ae..3f725bd24c 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -30,7 +30,7 @@ import { normalizePath, resolvePath, CharCode, - isAlphaNum + isAlphaOrDecimal } from "./util"; import { @@ -2347,7 +2347,7 @@ export function mangleInternalPath(path: string): string { if (pos >= 0 && len - pos >= 2) { // at least one char plus dot let cur = pos; while (++cur < len) { - if (!isAlphaNum(path.charCodeAt(cur))) { + if (!isAlphaOrDecimal(path.charCodeAt(cur))) { assert(false); // not a valid external path return path; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 56c1bcf5b4..3ca4f1cd71 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -28,8 +28,8 @@ import { isWhiteSpace, isIdentifierStart, isIdentifierPart, - isDecNum, - isOctNum + isDecimal, + isOctal } from "./util"; /** Named token types. */ @@ -703,7 +703,7 @@ export class Tokenizer extends DiagnosticEmitter { ++pos; if (maxTokenLength > 1 && pos < end) { let chr = text.charCodeAt(pos); - if (isDecNum(chr)) { + if (isDecimal(chr)) { this.pos = pos - 1; return Token.FLOATLITERAL; // expects a call to readFloat } @@ -1177,7 +1177,7 @@ export class Tokenizer extends DiagnosticEmitter { var c = text.charCodeAt(this.pos++); switch (c) { case CharCode._0: { - if (isTaggedTemplate && this.pos < end && isDecNum(text.charCodeAt(this.pos))) { + if (isTaggedTemplate && this.pos < end && isDecimal(text.charCodeAt(this.pos))) { ++this.pos; return text.substring(start, this.pos); } @@ -1331,7 +1331,7 @@ export class Tokenizer extends DiagnosticEmitter { return this.readOctalInteger(); } } - if (isOctNum(text.charCodeAt(pos + 1))) { + if (isOctal(text.charCodeAt(pos + 1))) { let start = pos; this.pos = pos + 1; let value = this.readOctalInteger(); @@ -1578,7 +1578,7 @@ export class Tokenizer extends DiagnosticEmitter { if ( ++this.pos < end && (c = text.charCodeAt(this.pos)) == CharCode.MINUS || c == CharCode.PLUS && - isDecNum(text.charCodeAt(this.pos + 1)) + isDecimal(text.charCodeAt(this.pos + 1)) ) { ++this.pos; } @@ -1618,7 +1618,7 @@ export class Tokenizer extends DiagnosticEmitter { } sepEnd = pos + 1; ++sepCount; - } else if (!isDecNum(c)) { + } else if (!isDecimal(c)) { break; } ++pos; diff --git a/src/util/text.ts b/src/util/text.ts index e9bf2de392..844bec41b2 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -181,24 +181,24 @@ export function isAlpha(c: i32): bool { } /** Tests if the specified character code is a valid decimal digit. */ -export function isDecNum(c: i32): bool { +export function isDecimal(c: i32): bool { return c >= CharCode._0 && c <= CharCode._9; } /** Tests if the specified character code is a valid octal digit. */ -export function isOctNum(c: i32): bool { +export function isOctal(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } /** Tests if the specified character code is a valid hexadecimal digit. */ -export function isHexNum(c: i32): bool { +export function isHex(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - f|F - return isDecNum(c) || (c0 >= CharCode.a && c0 <= CharCode.f); + return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f); } /** Tests if the specified character code is trivially alphanumeric. */ -export function isAlphaNum(c: i32): bool { - return isAlpha(c) || isDecNum(c); +export function isAlphaOrDecimal(c: i32): bool { + return isAlpha(c) || isDecimal(c); } /** Tests if the specified character code is a valid start of an identifier. */ @@ -212,7 +212,7 @@ export function isIdentifierStart(c: i32): bool { /** Tests if the specified character code is a valid part of an identifier. */ export function isIdentifierPart(c: i32): bool { - return isAlphaNum(c) + return isAlphaOrDecimal(c) || c == CharCode._ || c == CharCode.DOLLAR || c >= 170 && c <= 65500