From 666ebe4bd4cd6543a947afa9dbd24bf0066651eb Mon Sep 17 00:00:00 2001
From: Andreas Marek <andimarek@fastmail.fm>
Date: Sun, 9 Feb 2020 04:35:53 +1100
Subject: [PATCH 1/6] add full unicode support

---
 src/language/__tests__/lexer-test.js | 33 +++++++++++
 src/language/lexer.js                | 88 +++++++++++++++++++++++-----
 2 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
index 2d6e6b11fc..0fa3053ef1 100644
--- a/src/language/__tests__/lexer-test.js
+++ b/src/language/__tests__/lexer-test.js
@@ -268,6 +268,20 @@ describe('Lexer', () => {
       end: 34,
       value: 'unicode \u1234\u5678\u90AB\uCDEF',
     });
+
+    expect(lexOne('"string with unicode code point outside BMP 😀"')).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 47,
+      value: 'string with unicode code point outside BMP 😀',
+    });
+
+    expect(lexOne('"string with unicode code point outside BMP escaped \\uD83D\\uDE00"')).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 65,
+      value: 'string with unicode code point outside BMP escaped 😀',
+    });
   });
 
   it('lex reports useful string errors', () => {
@@ -353,6 +367,17 @@ describe('Lexer', () => {
       message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
       locations: [{ line: 1, column: 7 }],
     });
+
+    expectSyntaxError('"bad \\uDEAD esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid surrogate pair escape sequence: \\uDEAD.',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \\uD83D\\uDBFF esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',
+      locations: [{ line: 1, column: 7 }],
+    });
+
   });
 
   it('lexes block strings', () => {
@@ -412,6 +437,14 @@ describe('Lexer', () => {
       value: 'unescaped \\n\\r\\b\\t\\f\\u1234',
     });
 
+    expect(lexOne('"""unescaped unicode outside BMP 😀"""')).to.contain({
+      kind: TokenKind.BLOCK_STRING,
+      start: 0,
+      end: 38,
+      value: 'unescaped unicode outside BMP 😀',
+    });
+
+
     expect(lexOne('"""slashes \\\\ \\/"""')).to.contain({
       kind: TokenKind.BLOCK_STRING,
       start: 0,
diff --git a/src/language/lexer.js b/src/language/lexer.js
index b6ab501308..5c969fc9fa 100644
--- a/src/language/lexer.js
+++ b/src/language/lexer.js
@@ -511,22 +511,9 @@ function readString(source, start, line, col, prev): Token {
           break;
         case 117: {
           // uXXXX
-          const charCode = uniCharCode(
-            body.charCodeAt(position + 1),
-            body.charCodeAt(position + 2),
-            body.charCodeAt(position + 3),
-            body.charCodeAt(position + 4),
-          );
-          if (charCode < 0) {
-            const invalidSequence = body.slice(position + 1, position + 5);
-            throw syntaxError(
-              source,
-              position,
-              `Invalid character escape sequence: \\u${invalidSequence}.`,
-            );
-          }
-          value += String.fromCharCode(charCode);
-          position += 4;
+          const convertedEscape = convertUnicodeEscape(source, body, position);
+          value += convertedEscape.value;
+          position += convertedEscape.positionIncrease;
           break;
         }
         default:
@@ -546,6 +533,75 @@ function readString(source, start, line, col, prev): Token {
   throw syntaxError(source, position, 'Unterminated string.');
 }
 
+function convertUnicodeEscape(source, body, position) {
+  const charCode = uniCharCode(
+    body.charCodeAt(position + 1),
+    body.charCodeAt(position + 2),
+    body.charCodeAt(position + 3),
+    body.charCodeAt(position + 4),
+  );
+  if (charCode < 0) {
+    const invalidSequence = body.slice(position + 1, position + 5);
+    throw syntaxError(
+      source,
+      position,
+      `Invalid character escape sequence: \\u${invalidSequence}.`,
+    );
+  }
+
+  let value;
+  let positionIncrease;
+  // String.fromCharCode doesn't fail for invalid surrogate pairs, therefore
+  // it is manually verified here
+  if (isTrailingSurrogate(charCode)) {
+    const invalidSequence = body.slice(position + 1, position + 5);
+    throw syntaxError(
+      source,
+      position,
+      `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
+    );
+  }
+  if (isLeadingSurrogate(charCode)) {
+    if (body.charCodeAt(position + 5) !== 92 ||
+      body.charCodeAt(position + 6) !== 117) {
+      const invalidSequence = body.slice(position + 1, position + 7);
+      throw syntaxError(
+        source,
+        position,
+        `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
+      );
+    }
+    const trailingSurrogate = uniCharCode(
+      body.charCodeAt(position + 7),
+      body.charCodeAt(position + 8),
+      body.charCodeAt(position + 9),
+      body.charCodeAt(position + 10),
+    );
+    if (!isTrailingSurrogate(trailingSurrogate)) {
+      const invalidSequence = body.slice(position + 1, position + 11);
+      throw syntaxError(
+        source,
+        position,
+        `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
+      );
+    }
+    value = String.fromCharCode(charCode, trailingSurrogate);
+    positionIncrease = 10;
+  } else {
+    value = String.fromCharCode(charCode);
+    positionIncrease = 4;
+  }
+  return { value, positionIncrease };
+}
+
+function isLeadingSurrogate(charCode) {
+  return 0xD800 <= charCode && charCode <= 0xDBFF;
+}
+
+function isTrailingSurrogate(charCode) {
+  return 0xDC00 <= charCode && charCode <= 0xDFFF;
+}
+
 /**
  * Reads a block string token from the source file.
  *

From 2ca116526e0894f3c23d761f2af87d631fad259d Mon Sep 17 00:00:00 2001
From: Andreas Marek <andimarek@fastmail.fm>
Date: Sun, 9 Feb 2020 04:45:14 +1100
Subject: [PATCH 2/6] apply prettier

---
 src/language/__tests__/lexer-test.js | 15 ++++++++++-----
 src/language/lexer.js                | 10 ++++++----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
index 0fa3053ef1..819d5a0a45 100644
--- a/src/language/__tests__/lexer-test.js
+++ b/src/language/__tests__/lexer-test.js
@@ -269,14 +269,20 @@ describe('Lexer', () => {
       value: 'unicode \u1234\u5678\u90AB\uCDEF',
     });
 
-    expect(lexOne('"string with unicode code point outside BMP 😀"')).to.contain({
+    expect(
+      lexOne('"string with unicode code point outside BMP 😀"'),
+    ).to.contain({
       kind: TokenKind.STRING,
       start: 0,
       end: 47,
       value: 'string with unicode code point outside BMP 😀',
     });
 
-    expect(lexOne('"string with unicode code point outside BMP escaped \\uD83D\\uDE00"')).to.contain({
+    expect(
+      lexOne(
+        '"string with unicode code point outside BMP escaped \\uD83D\\uDE00"',
+      ),
+    ).to.contain({
       kind: TokenKind.STRING,
       start: 0,
       end: 65,
@@ -374,10 +380,10 @@ describe('Lexer', () => {
     });
 
     expectSyntaxError('"bad \\uD83D\\uDBFF esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',
+      message:
+        'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',
       locations: [{ line: 1, column: 7 }],
     });
-
   });
 
   it('lexes block strings', () => {
@@ -444,7 +450,6 @@ describe('Lexer', () => {
       value: 'unescaped unicode outside BMP 😀',
     });
 
-
     expect(lexOne('"""slashes \\\\ \\/"""')).to.contain({
       kind: TokenKind.BLOCK_STRING,
       start: 0,
diff --git a/src/language/lexer.js b/src/language/lexer.js
index 5c969fc9fa..191696f6ca 100644
--- a/src/language/lexer.js
+++ b/src/language/lexer.js
@@ -562,8 +562,10 @@ function convertUnicodeEscape(source, body, position) {
     );
   }
   if (isLeadingSurrogate(charCode)) {
-    if (body.charCodeAt(position + 5) !== 92 ||
-      body.charCodeAt(position + 6) !== 117) {
+    if (
+      body.charCodeAt(position + 5) !== 92 ||
+      body.charCodeAt(position + 6) !== 117
+    ) {
       const invalidSequence = body.slice(position + 1, position + 7);
       throw syntaxError(
         source,
@@ -595,11 +597,11 @@ function convertUnicodeEscape(source, body, position) {
 }
 
 function isLeadingSurrogate(charCode) {
-  return 0xD800 <= charCode && charCode <= 0xDBFF;
+  return 0xd800 <= charCode && charCode <= 0xdbff;
 }
 
 function isTrailingSurrogate(charCode) {
-  return 0xDC00 <= charCode && charCode <= 0xDFFF;
+  return 0xdc00 <= charCode && charCode <= 0xdfff;
 }
 
 /**

From 386788f3b40752a1166878fbe4a72de47baf9a77 Mon Sep 17 00:00:00 2001
From: Andreas Marek <andimarek@fastmail.fm>
Date: Sun, 9 Feb 2020 11:25:25 +1100
Subject: [PATCH 3/6] fix test

---
 src/language/lexer.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/language/lexer.js b/src/language/lexer.js
index 191696f6ca..96e78b15ce 100644
--- a/src/language/lexer.js
+++ b/src/language/lexer.js
@@ -597,11 +597,11 @@ function convertUnicodeEscape(source, body, position) {
 }
 
 function isLeadingSurrogate(charCode) {
-  return 0xd800 <= charCode && charCode <= 0xdbff;
+  return charCode >= 0xd800 && charCode <= 0xdbff;
 }
 
 function isTrailingSurrogate(charCode) {
-  return 0xdc00 <= charCode && charCode <= 0xdfff;
+  return charCode >= 0xdc00 && charCode <= 0xdfff;
 }
 
 /**

From b437de925a0729f24e995d5ee5a84ae334ae7dd5 Mon Sep 17 00:00:00 2001
From: Andreas Marek <andimarek@fastmail.fm>
Date: Sun, 9 Feb 2020 11:34:33 +1100
Subject: [PATCH 4/6] more tests

---
 src/language/__tests__/lexer-test.js | 44 ++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
index 819d5a0a45..0bcdfa7b76 100644
--- a/src/language/__tests__/lexer-test.js
+++ b/src/language/__tests__/lexer-test.js
@@ -288,6 +288,50 @@ describe('Lexer', () => {
       end: 65,
       value: 'string with unicode code point outside BMP escaped 😀',
     });
+
+    expect(
+      lexOne(
+        '"string with unicode code point outside BMP escaped \\uD800\\uDC00"',
+      ),
+    ).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 65,
+      value: 'string with unicode code point outside BMP escaped \uD800\uDC00',
+    });
+
+    expect(
+      lexOne(
+        '"string with unicode code point outside BMP escaped \\uDBFF\\uDC00"',
+      ),
+    ).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 65,
+      value: 'string with unicode code point outside BMP escaped \uDBFF\uDC00',
+    });
+
+    expect(
+      lexOne(
+        '"string with unicode code point outside BMP escaped \\uDBFF\\uDFFF"',
+      ),
+    ).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 65,
+      value: 'string with unicode code point outside BMP escaped \uDBFF\uDFFF',
+    });
+
+    expect(
+      lexOne(
+        '"string with unicode code point outside BMP escaped \\uD800\\uDFFF"',
+      ),
+    ).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 65,
+      value: 'string with unicode code point outside BMP escaped \uD800\uDFFF',
+    });
   });
 
   it('lex reports useful string errors', () => {

From f41b3c7755b2af14dd007baea2f0cd1bd9dcc54e Mon Sep 17 00:00:00 2001
From: Andreas Marek <andimarek@fastmail.fm>
Date: Wed, 19 Feb 2020 18:50:26 +1100
Subject: [PATCH 5/6] add test

---
 src/language/__tests__/lexer-test.js | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
index 0bcdfa7b76..f826e8bba6 100644
--- a/src/language/__tests__/lexer-test.js
+++ b/src/language/__tests__/lexer-test.js
@@ -423,6 +423,12 @@ describe('Lexer', () => {
       locations: [{ line: 1, column: 7 }],
     });
 
+    expectSyntaxError('"bad \\uD83D\\noEscape"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\n.',
+      locations: [{ line: 1, column: 7 }],
+    });
+
     expectSyntaxError('"bad \\uD83D\\uDBFF esc"').to.deep.equal({
       message:
         'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',

From 01f055a13f6c9286f5366ea59a2ed25edc69443b Mon Sep 17 00:00:00 2001
From: Lee Byron <lee.byron@robinhood.com>
Date: Fri, 23 Apr 2021 01:09:35 -0700
Subject: [PATCH 6/6] Add full unicode spec change support

* Requires surrogate pairs, regardless of if they are escaped
* Support braced unicode escapes
* Improved error messages with more tests
---
 src/language/__tests__/lexer-test.js | 133 +++++++++++--
 src/language/lexer.js                | 275 +++++++++++++++------------
 2 files changed, 271 insertions(+), 137 deletions(-)

diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js
index 98c25e8387..28f75eeb81 100644
--- a/src/language/__tests__/lexer-test.js
+++ b/src/language/__tests__/lexer-test.js
@@ -263,6 +263,31 @@ describe('Lexer', () => {
       value: 'unicode \u1234\u5678\u90AB\uCDEF',
     });
 
+    expect(lexOne('"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"')).to.contain(
+      {
+        kind: TokenKind.STRING,
+        start: 0,
+        end: 42,
+        value: 'unicode \u1234\u5678\u90AB\uCDEF',
+      },
+    );
+
+    expect(
+      lexOne('"string with unicode escape outside BMP \\u{1F600}"'),
+    ).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 50,
+      value: 'string with unicode escape outside BMP 😀',
+    });
+
+    expect(lexOne('"unicode \\u{10FFFF}"')).to.contain({
+      kind: TokenKind.STRING,
+      start: 0,
+      end: 20,
+      value: 'unicode \u{10FFFF}',
+    });
+
     expect(
       lexOne('"string with unicode code point outside BMP 😀"'),
     ).to.contain({
@@ -378,55 +403,135 @@ describe('Lexer', () => {
     });
 
     expectSyntaxError('"bad \\z esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\z.',
+      message: 'Syntax Error: Invalid character escape sequence: "\\z".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\x esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\x.',
+      message: 'Syntax Error: Invalid character escape sequence: "\\x".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\u1 esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\u1 es.',
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1 es".',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \\u1"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\u0XX1 esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\u0XX1.',
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u0XX1".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\uXXXX esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\uXXXX.',
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXX".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\uFXXX esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\uFXXX.',
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\uFXXX".',
       locations: [{ line: 1, column: 7 }],
     });
 
     expectSyntaxError('"bad \\uXXXF esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".',
       locations: [{ line: 1, column: 7 }],
     });
 
-    expectSyntaxError('"bad \\uDEAD esc"').to.deep.equal({
-      message: 'Syntax Error: Invalid surrogate pair escape sequence: \\uDEAD.',
+    expectSyntaxError('"bad \\u{} esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".',
       locations: [{ line: 1, column: 7 }],
     });
 
-    expectSyntaxError('"bad \\uD83D\\noEscape"').to.deep.equal({
-      message:
-        'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\n.',
+    expectSyntaxError('"bad \\u{XXXF} esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF}".',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \\u{XXXF esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF es".',
       locations: [{ line: 1, column: 7 }],
     });
 
+    expectSyntaxError('"bad \\u{X"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{X".',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \\u{XXXF e}scape"').to.deep.equal({
+      message: 'Syntax Error: Invalid Unicode escape sequence: "\\u{XXXF e}".',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \\u{110000} esc"').to.deep.equal({
+      message: 'Syntax Error: Undefined Unicode code-point: "\\u{110000}".',
+      locations: [{ line: 1, column: 7 }],
+    });
+
+    expectSyntaxError('"bad \uDEAD esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\uDEAD esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\u{DEAD} esc"').to.deep.equal({
+      message: 'Syntax Error: Invalid low surrogate within String: "\\uDEAD".',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \uD83D esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\uD83D esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\u{D83D} esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate " " in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \uD83D\uDBFF esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
     expectSyntaxError('"bad \\uD83D\\uDBFF esc"').to.deep.equal({
       message:
-        'Syntax Error: Invalid surrogate pair escape sequence: \\uD83D\\uDBFF.',
-      locations: [{ line: 1, column: 7 }],
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \uD83D\\uDBFF esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\uD83D\uDBFF esc"').to.deep.equal({
+      message:
+        'Syntax Error: Invalid high surrogate "\\uD83D" followed by a non-low surrogate "\\uDBFF" in String.',
+      locations: [{ line: 1, column: 6 }],
+    });
+
+    expectSyntaxError('"bad \\uD83D\\escape"').to.deep.equal({
+      message: 'Syntax Error: Invalid character escape sequence: "\\e".',
+      locations: [{ line: 1, column: 13 }],
     });
   });
 
diff --git a/src/language/lexer.js b/src/language/lexer.js
index ec91f2248e..2b09f9008d 100644
--- a/src/language/lexer.js
+++ b/src/language/lexer.js
@@ -425,7 +425,7 @@ function readDigits(source: Source, start: number, firstCode: number): number {
 /**
  * Reads a string token from the source file.
  *
- * "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
+ * "([^"\\\u000A\u000D]|(\\(u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{1,6}\})|["\\/bfnrt])))*"
  */
 function readString(
   source: Source,
@@ -439,6 +439,7 @@ function readString(
   let chunkStart = position;
   let code = 0;
   let value = '';
+  let isSurrogatePair = false;
 
   while (
     position < body.length &&
@@ -470,129 +471,173 @@ function readString(
       );
     }
 
-    ++position;
+    let codeSize;
+    // Escape Sequence (\)
     if (code === 92) {
-      // \
-      value += body.slice(chunkStart, position - 1);
-      code = body.charCodeAt(position);
-      switch (code) {
-        case 34:
-          value += '"';
-          break;
-        case 47:
-          value += '/';
-          break;
-        case 92:
-          value += '\\';
-          break;
-        case 98:
-          value += '\b';
-          break;
-        case 102:
-          value += '\f';
-          break;
-        case 110:
-          value += '\n';
-          break;
-        case 114:
-          value += '\r';
-          break;
-        case 116:
-          value += '\t';
-          break;
-        case 117: {
-          // uXXXX
-          const convertedEscape = convertUnicodeEscape(source, body, position);
-          value += convertedEscape.value;
-          position += convertedEscape.positionIncrease;
-          break;
-        }
-        default:
-          throw syntaxError(
-            source,
-            position,
-            `Invalid character escape sequence: \\${String.fromCharCode(
-              code,
-            )}.`,
-          );
+      value += body.slice(chunkStart, position);
+      const escape = readEscapeSequence(source, position);
+      code = escape.code;
+      codeSize = escape.size;
+      value += escape.value;
+      chunkStart = position + codeSize;
+    } else {
+      codeSize = 1;
+    }
+
+    // Surrogate Pairs
+    // The specification semantics call for replacing surrogate pairs with valid
+    // non-BMP Unicode code points. However since JS strings encode non-BMP code
+    // points as surrogate pairs anyhow, this simply validates those pairs.
+    if (code >= 0xd800 && code <= 0xdbff) {
+      let nextCode = body.charCodeAt(position + codeSize);
+      if (nextCode === 92) {
+        nextCode = readEscapeSequence(source, position + codeSize).code;
       }
-      ++position;
-      chunkStart = position;
+      // A High Surrogate must be followed by a Low Surrogate.
+      if (nextCode < 0xdc00 || nextCode > 0xdfff) {
+        throw syntaxError(
+          source,
+          position,
+          `Invalid high surrogate ${printCharCode(
+            code,
+          )} followed by a non-low surrogate ${printCharCode(
+            nextCode,
+          )} in String.`,
+        );
+      }
+      isSurrogatePair = true;
+    } else if (code >= 0xdc00 && code <= 0xdfff) {
+      // A Low Surrogate must follow a High Surrogate.
+      if (!isSurrogatePair) {
+        throw syntaxError(
+          source,
+          position,
+          `Invalid low surrogate within String: ${printCharCode(code)}.`,
+        );
+      }
+      isSurrogatePair = false;
     }
+
+    position += codeSize;
   }
 
   throw syntaxError(source, position, 'Unterminated string.');
 }
 
-function convertUnicodeEscape(source, body, position) {
-  const charCode = uniCharCode(
-    body.charCodeAt(position + 1),
-    body.charCodeAt(position + 2),
-    body.charCodeAt(position + 3),
-    body.charCodeAt(position + 4),
-  );
-  if (charCode < 0) {
-    const invalidSequence = body.slice(position + 1, position + 5);
-    throw syntaxError(
-      source,
-      position,
-      `Invalid character escape sequence: \\u${invalidSequence}.`,
-    );
-  }
+// The code-point, lexed size, and string value of an escape sequence.
+type EscapeSequence = {| code: number, size: number, value: string |};
 
-  let value;
-  let positionIncrease;
-  // String.fromCharCode doesn't fail for invalid surrogate pairs, therefore
-  // it is manually verified here
-  if (isTrailingSurrogate(charCode)) {
-    const invalidSequence = body.slice(position + 1, position + 5);
-    throw syntaxError(
-      source,
-      position,
-      `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
-    );
+/**
+ * | Escaped Character | Code Point | Character Name               |
+ * | ----------------- | ---------- | ---------------------------- |
+ * | {`"`}             | U+0022     | double quote                 |
+ * | {`\`}             | U+005C     | reverse solidus (back slash) |
+ * | {`/`}             | U+002F     | solidus (forward slash)      |
+ * | {`b`}             | U+0008     | backspace                    |
+ * | {`f`}             | U+000C     | form feed                    |
+ * | {`n`}             | U+000A     | line feed (new line)         |
+ * | {`r`}             | U+000D     | carriage return              |
+ * | {`t`}             | U+0009     | horizontal tab               |
+ */
+function readEscapeSequence(source: Source, pos: number): EscapeSequence {
+  const escapedCode = source.body.charCodeAt(pos + 1);
+  switch (escapedCode) {
+    case 34: // \"
+      return { code: 0x0022, size: 2, value: '"' };
+    case 47: // \/
+      return { code: 0x005c, size: 2, value: '/' };
+    case 92: // \\
+      return { code: 0x002f, size: 2, value: '\\' };
+    case 98: // \b
+      return { code: 0x0008, size: 2, value: '\b' };
+    case 102: // \f
+      return { code: 0x000c, size: 2, value: '\f' };
+    case 110: // \n
+      return { code: 0x000a, size: 2, value: '\n' };
+    case 114: // \r
+      return { code: 0x000d, size: 2, value: '\r' };
+    case 116: // \t
+      return { code: 0x0009, size: 2, value: '\t' };
+    case 117: // \u
+      return readEscapedUnicode(source, pos);
   }
-  if (isLeadingSurrogate(charCode)) {
-    if (
-      body.charCodeAt(position + 5) !== 92 ||
-      body.charCodeAt(position + 6) !== 117
-    ) {
-      const invalidSequence = body.slice(position + 1, position + 7);
-      throw syntaxError(
-        source,
-        position,
-        `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
-      );
+  throw syntaxError(
+    source,
+    pos + 1,
+    `Invalid character escape sequence: "${source.body.slice(pos, pos + 2)}".`,
+  );
+}
+
+function readEscapedUnicode(source: Source, pos: number): EscapeSequence {
+  const body = source.body;
+  let code = 0;
+  let size = 2;
+  // A braced unicode escape "{"
+  if (body.charCodeAt(pos + 2) === 123) {
+    size++;
+    // A braced unicode escape cannot be larger than 10 chars.
+    while (size < 10) {
+      const charCode = body.charCodeAt(pos + size++);
+      // If an end quote, break with an invalid code.
+      if (charCode === 34) {
+        size--;
+        code = -1;
+        break;
+      }
+      // End brace "}" to complete the code.
+      if (charCode === 125) {
+        // If the size is only 4, the escape found no hex digits.
+        if (size === 4) {
+          code = -1;
+        }
+        break;
+      } else if (size === 10) {
+        // If this is the 10th char which is not a brace, it's an invalid code.
+        code = -1;
+      } else {
+        // Append this hex digit to the code point.
+        code = (code << 4) | char2hex(charCode);
+      }
     }
-    const trailingSurrogate = uniCharCode(
-      body.charCodeAt(position + 7),
-      body.charCodeAt(position + 8),
-      body.charCodeAt(position + 9),
-      body.charCodeAt(position + 10),
-    );
-    if (!isTrailingSurrogate(trailingSurrogate)) {
-      const invalidSequence = body.slice(position + 1, position + 11);
+    // Unicode code points must be <= U+10FFFF
+    if (code > 0x10ffff) {
       throw syntaxError(
         source,
-        position,
-        `Invalid surrogate pair escape sequence: \\u${invalidSequence}.`,
+        pos + 1,
+        `Undefined Unicode code-point: "${body.slice(pos, pos + size)}".`,
       );
     }
-    value = String.fromCharCode(charCode, trailingSurrogate);
-    positionIncrease = 10;
   } else {
-    value = String.fromCharCode(charCode);
-    positionIncrease = 4;
+    // A simple unicode escape is 6 chars.
+    while (size < 6) {
+      const charCode = body.charCodeAt(pos + size++);
+      // If an end quote, break with an invalid code.
+      if (charCode === 34) {
+        size--;
+        code = -1;
+        break;
+      }
+      // Append this hex digit to the code point.
+      code = (code << 4) | char2hex(charCode);
+    }
   }
-  return { value, positionIncrease };
-}
-
-function isLeadingSurrogate(charCode) {
-  return charCode >= 0xd800 && charCode <= 0xdbff;
-}
-
-function isTrailingSurrogate(charCode) {
-  return charCode >= 0xdc00 && charCode <= 0xdfff;
+  // A negative code point occurs if char2hex ever encountered a non-hex digit.
+  if (code < 0) {
+    throw syntaxError(
+      source,
+      pos + 1,
+      `Invalid Unicode escape sequence: "${body.slice(pos, pos + size)}".`,
+    );
+  }
+  // JS strings encode astral code points as surrogate pairs.
+  const value =
+    code <= 0xffff
+      ? String.fromCharCode(code)
+      : String.fromCharCode(
+          0xd800 | ((code - 0x10000) >> 10), // High Surrogate
+          0xdc00 | ((code - 0x10000) & 0x3ff), // Low Surrogate
+        );
+  return { code, size, value };
 }
 
 /**
@@ -679,22 +724,6 @@ function readBlockString(
   throw syntaxError(source, position, 'Unterminated string.');
 }
 
-/**
- * Converts four hexadecimal chars to the integer that the
- * string represents. For example, uniCharCode('0','0','0','f')
- * will return 15, and uniCharCode('0','0','f','f') returns 255.
- *
- * Returns a negative number on error, if a char was invalid.
- *
- * This is implemented by noting that char2hex() returns -1 on error,
- * which means the result of ORing the char2hex() will also be negative.
- */
-function uniCharCode(a: number, b: number, c: number, d: number): number {
-  return (
-    (char2hex(a) << 12) | (char2hex(b) << 8) | (char2hex(c) << 4) | char2hex(d)
-  );
-}
-
 /**
  * Converts a hex character to its integer value.
  * '0' becomes 0, '9' becomes 9