From 089c28d81385a1f0a6f2c2c9e36e79f76366ab14 Mon Sep 17 00:00:00 2001
From: James Phillips <jamesdphillips@gmail.com>
Date: Mon, 11 Dec 2017 20:40:23 -0800
Subject: [PATCH 1/3] Add support for block strings into lexer

http://facebook.github.io/graphql/draft/#sec-String-Value
http://facebook.github.io/graphql/draft/#BlockStringValue()
https://github.com/graphql/graphql-js/blob/v0.11.7/src/language/lexer.js

Signed-off-by: James Phillips <jamesdphillips@gmail.com>
---
 language/lexer/lexer.go      | 147 ++++++++++++++++++++++++++++++++++-
 language/lexer/lexer_test.go |  99 +++++++++++++++++++++++
 2 files changed, 242 insertions(+), 4 deletions(-)

diff --git a/language/lexer/lexer.go b/language/lexer/lexer.go
index 865c9d6e..b370bcad 100644
--- a/language/lexer/lexer.go
+++ b/language/lexer/lexer.go
@@ -3,6 +3,8 @@ package lexer
 import (
 	"bytes"
 	"fmt"
+	"regexp"
+	"strings"
 	"unicode/utf8"
 
 	"github.com/graphql-go/graphql/gqlerrors"
@@ -28,6 +30,7 @@ const (
 	INT
 	FLOAT
 	STRING
+	BLOCK_STRING
 )
 
 var TokenKind map[int]int
@@ -54,6 +57,7 @@ func init() {
 	TokenKind[INT] = INT
 	TokenKind[FLOAT] = FLOAT
 	TokenKind[STRING] = STRING
+	TokenKind[BLOCK_STRING] = BLOCK_STRING
 	tokenDescription[TokenKind[EOF]] = "EOF"
 	tokenDescription[TokenKind[BANG]] = "!"
 	tokenDescription[TokenKind[DOLLAR]] = "$"
@@ -72,6 +76,7 @@ func init() {
 	tokenDescription[TokenKind[INT]] = "Int"
 	tokenDescription[TokenKind[FLOAT]] = "Float"
 	tokenDescription[TokenKind[STRING]] = "String"
+	tokenDescription[TokenKind[BLOCK_STRING]] = "BlockString"
 }
 
 // Token is a representation of a lexed Token. Value only appears for non-punctuation
@@ -303,6 +308,135 @@ func readString(s *source.Source, start int) (Token, error) {
 	return makeToken(TokenKind[STRING], start, position+1, value), nil
 }
 
+// readBlockString reads a block string token from the source file.
+//
+// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
+func readBlockString(s *source.Source, start int) (Token, error) {
+	body := s.Body
+	position := start + 3
+	runePosition := start + 3
+	chunkStart := position
+	var valueBuffer bytes.Buffer
+
+	for {
+		// Stop if we've reached the end of the buffer
+		if position >= len(body) {
+			break
+		}
+
+		code, n := runeAt(body, position)
+
+		// Closing Triple-Quote (""")
+		if code == '"' {
+			x, _ := runeAt(body, position+1)
+			y, _ := runeAt(body, position+2)
+			if x == '"' && y == '"' {
+				stringContent := body[chunkStart:position]
+				valueBuffer.Write(stringContent)
+				value := blockStringValue(valueBuffer.String())
+				return makeToken(TokenKind[BLOCK_STRING], start, position+3, value), nil
+			}
+		}
+
+		// SourceCharacter
+		if code < 0x0020 &&
+			code != 0x0009 &&
+			code != 0x000a &&
+			code != 0x000d {
+			return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
+		}
+
+		// Escape Triple-Quote (\""")
+		if code == '\\' { // \
+			x, _ := runeAt(body, position+1)
+			y, _ := runeAt(body, position+2)
+			z, _ := runeAt(body, position+3)
+			if x == '"' && y == '"' && z == '"' {
+				stringContent := append(body[chunkStart:position], []byte(`"""`)...)
+				valueBuffer.Write(stringContent)
+				position += 4     // account for `"""` characters
+				runePosition += 4 // "       "   "     "
+				chunkStart = position
+				continue
+			}
+		}
+
+		position += n
+		runePosition++
+	}
+
+	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
+}
+
+var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]")
+
+// This implements the GraphQL spec's BlockStringValue() static algorithm.
+//
+// Produces the value of a block string from its parsed raw value, similar to
+// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc.
+//
+// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue()
+// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js
+func blockStringValue(in string) string {
+	// Expand a block string's raw value into independent lines.
+	lines := splitLinesRegex.Split(in, -1)
+
+	// Remove common indentation from all lines but first
+	commonIndent := -1
+	for i := 1; i < len(lines); i++ {
+		line := lines[i]
+		indent := leadingWhitespaceLen(line)
+		if indent < len(line) && (commonIndent == -1 || indent < commonIndent) {
+			commonIndent = indent
+			if commonIndent == 0 {
+				break
+			}
+		}
+	}
+	if commonIndent > 0 {
+		for i, line := range lines {
+			lines[i] = line[commonIndent:]
+		}
+	}
+
+	// Remove leading blank lines.
+	for {
+		if isBlank := lineIsBlank(lines[0]); !isBlank {
+			break
+		}
+		lines = lines[1:]
+	}
+
+	// Remove trailing blank lines.
+	for {
+		i := len(lines) - 1
+		if isBlank := lineIsBlank(lines[i]); !isBlank {
+			break
+		}
+		lines = append(lines[:i], lines[i+1:]...)
+	}
+
+	// Return a string of the lines joined with U+000A.
+	return strings.Join(lines, "\n")
+}
+
+// leadingWhitespaceLen returns count of whitespace characters on given line.
+func leadingWhitespaceLen(in string) (n int) {
+	for _, ch := range in {
+		if ch == ' ' || ch == '\t' {
+			n++
+		} else {
+			break
+		}
+	}
+	return
+}
+
+// lineIsBlank returns true when given line has no content.
+func lineIsBlank(in string) bool {
+	return leadingWhitespaceLen(in) == len(in)
+}
+
 // Converts four hexidecimal chars to the integer that the
 // string represents. For example, uniCharCode('0','0','0','f')
 // will return 15, and uniCharCode('0','0','f','f') returns 255.
@@ -425,11 +559,16 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
 		return token, nil
 	// "
 	case '"':
-		token, err := readString(s, position)
-		if err != nil {
-			return token, err
+		var token Token
+		var err error
+		x, _ := runeAt(body, position+1)
+		y, _ := runeAt(body, position+2)
+		if x == '"' && y == '"' {
+			token, err = readBlockString(s, position)
+		} else {
+			token, err = readString(s, position)
 		}
-		return token, nil
+		return token, err
 	}
 	description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
 	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go
index ac59c846..2376b4ca 100644
--- a/language/lexer/lexer_test.go
+++ b/language/lexer/lexer_test.go
@@ -447,6 +447,105 @@ func TestLexer_ReportsUsefulStringErrors(t *testing.T) {
 	}
 }
 
+func TestLexer_LexesBlockStrings(t *testing.T) {
+	tests := []Test{
+		{
+			Body: `"""simple"""`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   12,
+				Value: "simple",
+			},
+		},
+		{
+			Body: `""" white space """`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   19,
+				Value: " white space ",
+			},
+		},
+		{
+			Body: `
+				"""  white space """
+				"""  white space  """
+				"""  white space """
+			`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 5,
+				End:   25,
+				Value: "  white space ",
+			},
+		},
+		{
+			Body: `"""contains " quote"""`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   22,
+				Value: `contains " quote`,
+			},
+		},
+		{
+			Body: `"""contains \""" triplequote"""`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   31,
+				Value: `contains """ triplequote`,
+			},
+		},
+		{
+			Body: "\"\"\"multi\nline\"\"\"",
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   16,
+				Value: "multi\nline",
+			},
+		},
+		{
+			Body: "\"\"\"multi\rline\r\nnormalized\"\"\"",
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   28,
+				Value: "multi\nline\nnormalized",
+			},
+		},
+		{
+			Body: "\"\"\"unescaped \\n\\r\\b\\t\\f\\u1234\"\"\"",
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   32,
+				Value: "unescaped \\n\\r\\b\\t\\f\\u1234",
+			},
+		},
+		{
+			Body: "\"\"\"slashes \\\\ \\/\"\"\"",
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 0,
+				End:   19,
+				Value: "slashes \\\\ \\/",
+			},
+		},
+	}
+	for _, test := range tests {
+		token, err := Lex(&source.Source{Body: []byte(test.Body)})(0)
+		if err != nil {
+			t.Errorf("unexpected error: %v", err)
+		}
+		if !reflect.DeepEqual(token, test.Expected) {
+			t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token)
+		}
+	}
+}
+
 func TestLexer_LexesNumbers(t *testing.T) {
 	tests := []Test{
 		{

From 0629778c1ca33c71c855e658bc5936c1a06b4967 Mon Sep 17 00:00:00 2001
From: James Phillips <jamesdphillips@gmail.com>
Date: Mon, 11 Dec 2017 21:42:49 -0800
Subject: [PATCH 2/3] Add support for block strings into parser

Signed-off-by: James Phillips <jamesdphillips@gmail.com>
---
 language/lexer/lexer_test.go | 47 ++++++++++++++++++++++++++++++++++++
 language/parser/parser.go    |  2 ++
 2 files changed, 49 insertions(+)

diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go
index 2376b4ca..6a9627f8 100644
--- a/language/lexer/lexer_test.go
+++ b/language/lexer/lexer_test.go
@@ -546,6 +546,53 @@ func TestLexer_LexesBlockStrings(t *testing.T) {
 	}
 }
 
+func TestLexer_ReportsUsefulBlockStringErrors(t *testing.T) {
+	tests := []Test{
+		{
+			Body: `"""`,
+			Expected: `Syntax Error GraphQL (1:4) Unterminated string.
+
+1: """
+      ^
+`,
+		},
+		{
+			Body: `"""no end quote`,
+			Expected: `Syntax Error GraphQL (1:16) Unterminated string.
+
+1: """no end quote
+                  ^
+`,
+		},
+		{
+			Body: "\"\"\"contains unescaped \u0007 control char\"\"\"",
+			Expected: `Syntax Error GraphQL (1:23) Invalid character within String: "\\u0007".
+
+1: """contains unescaped \u0007 control char"""
+                         ^
+`,
+		},
+		{
+			Body: "\"\"\"null-byte is not \u0000 end of file\"\"\"",
+			Expected: `Syntax Error GraphQL (1:21) Invalid character within String: "\\u0000".
+
+1: """null-byte is not \u0000 end of file"""
+                       ^
+`,
+		},
+	}
+	for _, test := range tests {
+		_, err := Lex(createSource(test.Body))(0)
+		if err == nil {
+			t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err)
+		}
+
+		if err.Error() != test.Expected {
+			t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error())
+		}
+	}
+}
+
 func TestLexer_LexesNumbers(t *testing.T) {
 	tests := []Test{
 		{
diff --git a/language/parser/parser.go b/language/parser/parser.go
index 92cf7ac6..29b68b59 100644
--- a/language/parser/parser.go
+++ b/language/parser/parser.go
@@ -635,6 +635,8 @@ func parseValueLiteral(parser *Parser, isConst bool) (ast.Value, error) {
 			Value: token.Value,
 			Loc:   loc(parser, token.Start),
 		}), nil
+	case lexer.TokenKind[lexer.BLOCK_STRING]:
+		fallthrough
 	case lexer.TokenKind[lexer.STRING]:
 		if err := advance(parser); err != nil {
 			return nil, err

From af754f4d221922086b48e689d5b4ea8188153716 Mon Sep 17 00:00:00 2001
From: James Phillips <jamesdphillips@gmail.com>
Date: Tue, 12 Dec 2017 08:43:17 -0800
Subject: [PATCH 3/3] Avoid panic by not exceeding bounds of string..

Signed-off-by: James Phillips <jamesdphillips@gmail.com>
---
 language/lexer/lexer.go      |  3 +++
 language/lexer/lexer_test.go | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/language/lexer/lexer.go b/language/lexer/lexer.go
index b370bcad..62589f4e 100644
--- a/language/lexer/lexer.go
+++ b/language/lexer/lexer.go
@@ -395,6 +395,9 @@ func blockStringValue(in string) string {
 	}
 	if commonIndent > 0 {
 		for i, line := range lines {
+			if commonIndent > len(line) {
+				continue
+			}
 			lines[i] = line[commonIndent:]
 		}
 	}
diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go
index 6a9627f8..b32cd76e 100644
--- a/language/lexer/lexer_test.go
+++ b/language/lexer/lexer_test.go
@@ -480,6 +480,22 @@ func TestLexer_LexesBlockStrings(t *testing.T) {
 				Value: "  white space ",
 			},
 		},
+		{
+			Body: `
+				"""
+						my great description
+						spans multiple lines
+
+						with breaks
+				"""
+			`,
+			Expected: Token{
+				Kind:  TokenKind[BLOCK_STRING],
+				Start: 5,
+				End:   89,
+				Value: "my great description\nspans multiple lines\n\nwith breaks",
+			},
+		},
 		{
 			Body: `"""contains " quote"""`,
 			Expected: Token{