diff --git a/src/language/ast.ts b/src/language/ast.ts index 77cdf06de5..62ddf24c6b 100644 --- a/src/language/ast.ts +++ b/src/language/ast.ts @@ -96,7 +96,6 @@ export class Token { end: number, line: number, column: number, - prev: Token | null, value?: string, ) { this.kind = kind; @@ -105,7 +104,7 @@ export class Token { this.line = line; this.column = column; this.value = value as string; - this.prev = prev; + this.prev = null; this.next = null; } diff --git a/src/language/lexer.ts b/src/language/lexer.ts index c435a02bd8..44d69d33d2 100644 --- a/src/language/lexer.ts +++ b/src/language/lexer.ts @@ -38,7 +38,7 @@ export class Lexer { lineStart: number; constructor(source: Source) { - const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0, null); + const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); this.source = source; this.lastToken = startOfFileToken; @@ -64,8 +64,17 @@ export class Lexer { let token = this.token; if (token.kind !== TokenKind.EOF) { do { - // @ts-expect-error next is only mutable during parsing, so we cast to allow this. - token = token.next ?? (token.next = readToken(this, token)); + if (token.next) { + token = token.next; + } else { + // Read the next token and form a link in the token linked-list. + const nextToken = readNextToken(this, token.end); + // @ts-expect-error next is only mutable during parsing. + token.next = nextToken; + // @ts-expect-error prev is only mutable during parsing. + nextToken.prev = token; + token = nextToken; + } } while (token.kind === TokenKind.COMMENT); } return token; @@ -107,6 +116,21 @@ function printCharCode(code: number): string { ); } +/** + * Create a token with line and column location information. + */ +function createToken( + lexer: Lexer, + kind: TokenKindEnum, + start: number, + end: number, + value?: string, +): Token { + const line = lexer.line; + const col = 1 + start - lexer.lineStart; + return new Token(kind, start, end, line, col, value); +} + /** * Gets the next token from the source starting at the given position. * @@ -114,158 +138,97 @@ function printCharCode(code: number): string { * punctuators immediately or calls the appropriate helper function for more * complicated tokens. */ -function readToken(lexer: Lexer, prev: Token): Token { - const source = lexer.source; - const body = source.body; +function readNextToken(lexer: Lexer, start: number): Token { + const body = lexer.source.body; const bodyLength = body.length; - let pos = prev.end; - while (pos < bodyLength) { - const code = body.charCodeAt(pos); - - const line = lexer.line; - const col = 1 + pos - lexer.lineStart; + let position = start; + while (position < bodyLength) { + const code = body.charCodeAt(position); // SourceCharacter switch (code) { case 0xfeff: // - case 9: // \t - case 32: // - case 44: // , - ++pos; + case 0x0009: // \t + case 0x0020: // + case 0x002c: // , + ++position; continue; - case 10: // \n - ++pos; + case 0x000a: // \n + ++position; ++lexer.line; - lexer.lineStart = pos; + lexer.lineStart = position; continue; - case 13: // \r - if (body.charCodeAt(pos + 1) === 10) { - pos += 2; + case 0x000d: // \r + if (body.charCodeAt(position + 1) === 0x000a) { + position += 2; } else { - ++pos; + ++position; } ++lexer.line; - lexer.lineStart = pos; + lexer.lineStart = position; continue; - case 33: // ! - return new Token(TokenKind.BANG, pos, pos + 1, line, col, prev); - case 35: // # - return readComment(source, pos, line, col, prev); - case 36: // $ - return new Token(TokenKind.DOLLAR, pos, pos + 1, line, col, prev); - case 38: // & - return new Token(TokenKind.AMP, pos, pos + 1, line, col, prev); - case 40: // ( - return new Token(TokenKind.PAREN_L, pos, pos + 1, line, col, prev); - case 41: // ) - return new Token(TokenKind.PAREN_R, pos, pos + 1, line, col, prev); - case 46: // . + case 0x0021: // ! + return createToken(lexer, TokenKind.BANG, position, position + 1); + case 0x0023: // # + return readComment(lexer, position); + case 0x0024: // $ + return createToken(lexer, TokenKind.DOLLAR, position, position + 1); + case 0x0026: // & + return createToken(lexer, TokenKind.AMP, position, position + 1); + case 0x0028: // ( + return createToken(lexer, TokenKind.PAREN_L, position, position + 1); + case 0x0029: // ) + return createToken(lexer, TokenKind.PAREN_R, position, position + 1); + case 0x002e: // . if ( - body.charCodeAt(pos + 1) === 46 && - body.charCodeAt(pos + 2) === 46 + body.charCodeAt(position + 1) === 0x002e && + body.charCodeAt(position + 2) === 0x002e ) { - return new Token(TokenKind.SPREAD, pos, pos + 3, line, col, prev); + return createToken(lexer, TokenKind.SPREAD, position, position + 3); } break; - case 58: // : - return new Token(TokenKind.COLON, pos, pos + 1, line, col, prev); - case 61: // = - return new Token(TokenKind.EQUALS, pos, pos + 1, line, col, prev); - case 64: // @ - return new Token(TokenKind.AT, pos, pos + 1, line, col, prev); - case 91: // [ - return new Token(TokenKind.BRACKET_L, pos, pos + 1, line, col, prev); - case 93: // ] - return new Token(TokenKind.BRACKET_R, pos, pos + 1, line, col, prev); - case 123: // { - return new Token(TokenKind.BRACE_L, pos, pos + 1, line, col, prev); - case 124: // | - return new Token(TokenKind.PIPE, pos, pos + 1, line, col, prev); - case 125: // } - return new Token(TokenKind.BRACE_R, pos, pos + 1, line, col, prev); - case 34: // " + case 0x003a: // : + return createToken(lexer, TokenKind.COLON, position, position + 1); + case 0x003d: // = + return createToken(lexer, TokenKind.EQUALS, position, position + 1); + case 0x0040: // @ + return createToken(lexer, TokenKind.AT, position, position + 1); + case 0x005b: // [ + return createToken(lexer, TokenKind.BRACKET_L, position, position + 1); + case 0x005d: // ] + return createToken(lexer, TokenKind.BRACKET_R, position, position + 1); + case 0x007b: // { + return createToken(lexer, TokenKind.BRACE_L, position, position + 1); + case 0x007c: // | + return createToken(lexer, TokenKind.PIPE, position, position + 1); + case 0x007d: // } + return createToken(lexer, TokenKind.BRACE_R, position, position + 1); + case 0x0022: // " if ( - body.charCodeAt(pos + 1) === 34 && - body.charCodeAt(pos + 2) === 34 + body.charCodeAt(position + 1) === 0x0022 && + body.charCodeAt(position + 2) === 0x0022 ) { - return readBlockString(source, pos, line, col, prev, lexer); + return readBlockString(lexer, position); } - return readString(source, pos, line, col, prev); - case 45: // - - case 48: // 0 - case 49: // 1 - case 50: // 2 - case 51: // 3 - case 52: // 4 - case 53: // 5 - case 54: // 6 - case 55: // 7 - case 56: // 8 - case 57: // 9 - return readNumber(source, pos, code, line, col, prev); - case 65: // A - case 66: // B - case 67: // C - case 68: // D - case 69: // E - case 70: // F - case 71: // G - case 72: // H - case 73: // I - case 74: // J - case 75: // K - case 76: // L - case 77: // M - case 78: // N - case 79: // O - case 80: // P - case 81: // Q - case 82: // R - case 83: // S - case 84: // T - case 85: // U - case 86: // V - case 87: // W - case 88: // X - case 89: // Y - case 90: // Z - case 95: // _ - case 97: // a - case 98: // b - case 99: // c - case 100: // d - case 101: // e - case 102: // f - case 103: // g - case 104: // h - case 105: // i - case 106: // j - case 107: // k - case 108: // l - case 109: // m - case 110: // n - case 111: // o - case 112: // p - case 113: // q - case 114: // r - case 115: // s - case 116: // t - case 117: // u - case 118: // v - case 119: // w - case 120: // x - case 121: // y - case 122: // z - return readName(source, pos, line, col, prev); + return readString(lexer, position); } - throw syntaxError(source, pos, unexpectedCharacterMessage(code)); + // IntValue | FloatValue + // 0-9 | - + if (isDigit(code) || code === 0x002d) { + return readNumber(lexer, position, code); + } + + // Name + if (isNameStart(code)) { + return readName(lexer, position); + } + + throw syntaxError(lexer.source, position, unexpectedCharacterMessage(code)); } - const line = lexer.line; - const col = 1 + pos - lexer.lineStart; - return new Token(TokenKind.EOF, bodyLength, bodyLength, line, col, prev); + return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); } /** @@ -276,7 +239,7 @@ function unexpectedCharacterMessage(code: number): string { return `Cannot contain the invalid character ${printCharCode(code)}.`; } - if (code === 39) { + if (code === 0x0027) { // ' return 'Unexpected single quote character (\'), did you mean to use a double quote (")?'; } @@ -289,14 +252,8 @@ function unexpectedCharacterMessage(code: number): string { * * #[\u0009\u0020-\uFFFF]* */ -function readComment( - source: Source, - start: number, - line: number, - col: number, - prev: Token | null, -): Token { - const body = source.body; +function readComment(lexer: Lexer, start: number): Token { + const body = lexer.source.body; let code; let position = start; @@ -308,13 +265,11 @@ function readComment( (code > 0x001f || code === 0x0009) ); - return new Token( + return createToken( + lexer, TokenKind.COMMENT, start, position, - line, - col, - prev, body.slice(start + 1, position), ); } @@ -326,77 +281,68 @@ function readComment( * Int: -?(0|[1-9][0-9]*) * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)? */ -function readNumber( - source: Source, - start: number, - firstCode: number, - line: number, - col: number, - prev: Token | null, -): Token { - const body = source.body; +function readNumber(lexer: Lexer, start: number, firstCode: number): Token { + const body = lexer.source.body; let code = firstCode; let position = start; let isFloat = false; - if (code === 45) { + if (code === 0x002d) { // - code = body.charCodeAt(++position); } - if (code === 48) { + if (code === 0x0030) { // 0 code = body.charCodeAt(++position); - if (code >= 48 && code <= 57) { + if (isDigit(code)) { throw syntaxError( - source, + lexer.source, position, `Invalid number, unexpected digit after 0: ${printCharCode(code)}.`, ); } } else { - position = readDigits(source, position, code); + position = readDigits(lexer, position, code); code = body.charCodeAt(position); } - if (code === 46) { + if (code === 0x002e) { // . isFloat = true; code = body.charCodeAt(++position); - position = readDigits(source, position, code); + position = readDigits(lexer, position, code); code = body.charCodeAt(position); } - if (code === 69 || code === 101) { + if (code === 0x0045 || code === 0x0065) { // E e isFloat = true; code = body.charCodeAt(++position); - if (code === 43 || code === 45) { + if (code === 0x002b || code === 0x002d) { // + - code = body.charCodeAt(++position); } - position = readDigits(source, position, code); + position = readDigits(lexer, position, code); code = body.charCodeAt(position); } // Numbers cannot be followed by . or NameStart - if (code === 46 || isNameStart(code)) { + if (code === 0x002e || isNameStart(code)) { throw syntaxError( - source, + lexer.source, position, `Invalid number, expected digit but got: ${printCharCode(code)}.`, ); } - return new Token( + return createToken( + lexer, isFloat ? TokenKind.FLOAT : TokenKind.INT, start, position, - line, - col, - prev, body.slice(start, position), ); } @@ -404,22 +350,27 @@ function readNumber( /** * Returns the new position in the source after reading digits. */ -function readDigits(source: Source, start: number, firstCode: number): number { - const body = source.body; +function readDigits(lexer: Lexer, start: number, firstCode: number): number { + if (!isDigit(firstCode)) { + throw syntaxError( + lexer.source, + start, + `Invalid number, expected digit but got: ${printCharCode(firstCode)}.`, + ); + } + + const body = lexer.source.body; let position = start; let code = firstCode; - if (code >= 48 && code <= 57) { - // 0 - 9 - do { - code = body.charCodeAt(++position); - } while (code >= 48 && code <= 57); // 0 - 9 - return position; - } - throw syntaxError( - source, - position, - `Invalid number, expected digit but got: ${printCharCode(code)}.`, - ); + do { + code = body.charCodeAt(++position); + } while (isDigit(code)); + return position; +} + +// 0 - 9 +function isDigit(code: number): boolean { + return code >= 0x0030 && code <= 0x0039; } /** @@ -427,14 +378,8 @@ function readDigits(source: Source, start: number, firstCode: number): number { * * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*" */ -function readString( - source: Source, - start: number, - line: number, - col: number, - prev: Token | null, -): Token { - const body = source.body; +function readString(lexer: Lexer, start: number): Token { + const body = lexer.source.body; let position = start + 1; let chunkStart = position; let code = 0; @@ -448,59 +393,51 @@ function readString( code !== 0x000d ) { // Closing Quote (") - if (code === 34) { + if (code === 0x0022) { value += body.slice(chunkStart, position); - return new Token( - TokenKind.STRING, - start, - position + 1, - line, - col, - prev, - value, - ); + return createToken(lexer, TokenKind.STRING, start, position + 1, value); } // SourceCharacter if (code < 0x0020 && code !== 0x0009) { throw syntaxError( - source, + lexer.source, position, `Invalid character within String: ${printCharCode(code)}.`, ); } ++position; - if (code === 92) { + if (code === 0x005c) { // \ value += body.slice(chunkStart, position - 1); code = body.charCodeAt(position); switch (code) { - case 34: + case 0x0022: value += '"'; break; - case 47: + case 0x002f: value += '/'; break; - case 92: + case 0x005c: value += '\\'; break; - case 98: + case 0x0062: value += '\b'; break; - case 102: + case 0x0066: value += '\f'; break; - case 110: + case 0x006e: value += '\n'; break; - case 114: + case 0x0072: value += '\r'; break; - case 116: + case 0x0074: value += '\t'; break; - case 117: { + case 0x0075: { // uXXXX const charCode = uniCharCode( body.charCodeAt(position + 1), @@ -511,7 +448,7 @@ function readString( if (charCode < 0) { const invalidSequence = body.slice(position + 1, position + 5); throw syntaxError( - source, + lexer.source, position, `Invalid character escape sequence: \\u${invalidSequence}.`, ); @@ -522,7 +459,7 @@ function readString( } default: throw syntaxError( - source, + lexer.source, position, `Invalid character escape sequence: \\${String.fromCharCode( code, @@ -534,7 +471,7 @@ function readString( } } - throw syntaxError(source, position, 'Unterminated string.'); + throw syntaxError(lexer.source, position, 'Unterminated string.'); } /** @@ -542,15 +479,8 @@ function readString( * * """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" */ -function readBlockString( - source: Source, - start: number, - line: number, - col: number, - prev: Token | null, - lexer: Lexer, -): Token { - const body = source.body; +function readBlockString(lexer: Lexer, start: number): Token { + const body = lexer.source.body; let position = start + 3; let chunkStart = position; let code = 0; @@ -559,18 +489,16 @@ function readBlockString( while (position < body.length && !isNaN((code = body.charCodeAt(position)))) { // Closing Triple-Quote (""") if ( - code === 34 && - body.charCodeAt(position + 1) === 34 && - body.charCodeAt(position + 2) === 34 + code === 0x0022 && + body.charCodeAt(position + 1) === 0x0022 && + body.charCodeAt(position + 2) === 0x0022 ) { rawValue += body.slice(chunkStart, position); - return new Token( + return createToken( + lexer, TokenKind.BLOCK_STRING, start, position + 3, - line, - col, - prev, dedentBlockStringValue(rawValue), ); } @@ -583,20 +511,20 @@ function readBlockString( code !== 0x000d ) { throw syntaxError( - source, + lexer.source, position, `Invalid character within String: ${printCharCode(code)}.`, ); } - if (code === 10) { + if (code === 0x000a) { // new line ++position; ++lexer.line; lexer.lineStart = position; - } else if (code === 13) { + } else if (code === 0x000d) { // carriage return - if (body.charCodeAt(position + 1) === 10) { + if (body.charCodeAt(position + 1) === 0x000a) { position += 2; } else { ++position; @@ -605,10 +533,10 @@ function readBlockString( lexer.lineStart = position; } else if ( // Escape Triple-Quote (\""") - code === 92 && - body.charCodeAt(position + 1) === 34 && - body.charCodeAt(position + 2) === 34 && - body.charCodeAt(position + 3) === 34 + code === 0x005c && + body.charCodeAt(position + 1) === 0x0022 && + body.charCodeAt(position + 2) === 0x0022 && + body.charCodeAt(position + 3) === 0x0022 ) { rawValue += body.slice(chunkStart, position) + '"""'; position += 4; @@ -618,7 +546,7 @@ function readBlockString( } } - throw syntaxError(source, position, 'Unterminated string.'); + throw syntaxError(lexer.source, position, 'Unterminated string.'); } /** @@ -646,12 +574,12 @@ function uniCharCode(a: number, b: number, c: number, d: number): number { * Returns -1 on error. */ function char2hex(a: number): number { - return a >= 48 && a <= 57 - ? a - 48 // 0-9 - : a >= 65 && a <= 70 - ? a - 55 // A-F - : a >= 97 && a <= 102 - ? a - 87 // a-f + return isDigit(a) + ? a - 0x0030 // 0-9 + : a >= 0x0041 && a <= 0x0046 + ? a - 0x0037 // A-F + : a >= 0x0061 && a <= 0x0066 + ? a - 0x0057 // a-f : -1; } @@ -660,41 +588,35 @@ function char2hex(a: number): number { * * [_A-Za-z][_0-9A-Za-z]* */ -function readName( - source: Source, - start: number, - line: number, - col: number, - prev: Token | null, -): Token { - const body = source.body; +function readName(lexer: Lexer, start: number): Token { + const body = lexer.source.body; const bodyLength = body.length; let position = start + 1; let code = 0; while ( position !== bodyLength && !isNaN((code = body.charCodeAt(position))) && - (code === 95 || // _ - (code >= 48 && code <= 57) || // 0-9 - (code >= 65 && code <= 90) || // A-Z - (code >= 97 && code <= 122)) // a-z + (code === 0x005f || // _ + isDigit(code) || + (code >= 0x0041 && code <= 0x005a) || // A-Z + (code >= 0x0061 && code <= 0x007a)) // a-z ) { ++position; } - return new Token( + return createToken( + lexer, TokenKind.NAME, start, position, - line, - col, - prev, body.slice(start, position), ); } -// _ A-Z a-z +// a-z | A-Z | _ function isNameStart(code: number): boolean { return ( - code === 95 || (code >= 65 && code <= 90) || (code >= 97 && code <= 122) + (code >= 0x0061 && code <= 0x007a) || + (code >= 0x0041 && code <= 0x005a) || + code === 0x005f ); } diff --git a/src/language/parser.ts b/src/language/parser.ts index 660fc906c1..f2807b5c1f 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -570,8 +570,8 @@ export class Parser { case TokenKind.DOLLAR: if (isConst) { this.expectToken(TokenKind.DOLLAR); - const varName = this.expectOptionalToken(TokenKind.NAME)?.value; - if (varName != null) { + if (this._lexer.token.kind === TokenKind.NAME) { + const varName = this._lexer.token.value; throw syntaxError( this._lexer.source, token.start, @@ -1395,23 +1395,23 @@ export class Parser { } /** - * If the next token is of the given kind, return that token after advancing the lexer. - * Otherwise, do not change the parser state and return undefined. + * If the next token is of the given kind, return "true" after advancing the lexer. + * Otherwise, do not change the parser state and return "false". */ - expectOptionalToken(kind: TokenKindEnum): Maybe { + expectOptionalToken(kind: TokenKindEnum): boolean { const token = this._lexer.token; if (token.kind === kind) { this._lexer.advance(); - return token; + return true; } - return undefined; + return false; } /** * If the next token is a given keyword, advance the lexer. * Otherwise, do not change the parser state and throw an error. */ - expectKeyword(value: string) { + expectKeyword(value: string): void { const token = this._lexer.token; if (token.kind === TokenKind.NAME && token.value === value) { this._lexer.advance();