From 089c28d81385a1f0a6f2c2c9e36e79f76366ab14 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 11 Dec 2017 20:40:23 -0800 Subject: [PATCH 1/3] Add support for block strings into lexer http://facebook.github.io/graphql/draft/#sec-String-Value http://facebook.github.io/graphql/draft/#BlockStringValue() https://github.com/graphql/graphql-js/blob/v0.11.7/src/language/lexer.js Signed-off-by: James Phillips --- language/lexer/lexer.go | 147 ++++++++++++++++++++++++++++++++++- language/lexer/lexer_test.go | 99 +++++++++++++++++++++++ 2 files changed, 242 insertions(+), 4 deletions(-) diff --git a/language/lexer/lexer.go b/language/lexer/lexer.go index 865c9d6e..b370bcad 100644 --- a/language/lexer/lexer.go +++ b/language/lexer/lexer.go @@ -3,6 +3,8 @@ package lexer import ( "bytes" "fmt" + "regexp" + "strings" "unicode/utf8" "github.com/graphql-go/graphql/gqlerrors" @@ -28,6 +30,7 @@ const ( INT FLOAT STRING + BLOCK_STRING ) var TokenKind map[int]int @@ -54,6 +57,7 @@ func init() { TokenKind[INT] = INT TokenKind[FLOAT] = FLOAT TokenKind[STRING] = STRING + TokenKind[BLOCK_STRING] = BLOCK_STRING tokenDescription[TokenKind[EOF]] = "EOF" tokenDescription[TokenKind[BANG]] = "!" tokenDescription[TokenKind[DOLLAR]] = "$" @@ -72,6 +76,7 @@ func init() { tokenDescription[TokenKind[INT]] = "Int" tokenDescription[TokenKind[FLOAT]] = "Float" tokenDescription[TokenKind[STRING]] = "String" + tokenDescription[TokenKind[BLOCK_STRING]] = "BlockString" } // Token is a representation of a lexed Token. Value only appears for non-punctuation @@ -303,6 +308,135 @@ func readString(s *source.Source, start int) (Token, error) { return makeToken(TokenKind[STRING], start, position+1, value), nil } +// readBlockString reads a block string token from the source file. +// +// """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" +func readBlockString(s *source.Source, start int) (Token, error) { + body := s.Body + position := start + 3 + runePosition := start + 3 + chunkStart := position + var valueBuffer bytes.Buffer + + for { + // Stop if we've reached the end of the buffer + if position >= len(body) { + break + } + + code, n := runeAt(body, position) + + // Closing Triple-Quote (""") + if code == '"' { + x, _ := runeAt(body, position+1) + y, _ := runeAt(body, position+2) + if x == '"' && y == '"' { + stringContent := body[chunkStart:position] + valueBuffer.Write(stringContent) + value := blockStringValue(valueBuffer.String()) + return makeToken(TokenKind[BLOCK_STRING], start, position+3, value), nil + } + } + + // SourceCharacter + if code < 0x0020 && + code != 0x0009 && + code != 0x000a && + code != 0x000d { + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code))) + } + + // Escape Triple-Quote (\""") + if code == '\\' { // \ + x, _ := runeAt(body, position+1) + y, _ := runeAt(body, position+2) + z, _ := runeAt(body, position+3) + if x == '"' && y == '"' && z == '"' { + stringContent := append(body[chunkStart:position], []byte(`"""`)...) + valueBuffer.Write(stringContent) + position += 4 // account for `"""` characters + runePosition += 4 // " " " " + chunkStart = position + continue + } + } + + position += n + runePosition++ + } + + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.") +} + +var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]") + +// This implements the GraphQL spec's BlockStringValue() static algorithm. +// +// Produces the value of a block string from its parsed raw value, similar to +// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc. +// +// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue() +// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js +func blockStringValue(in string) string { + // Expand a block string's raw value into independent lines. + lines := splitLinesRegex.Split(in, -1) + + // Remove common indentation from all lines but first + commonIndent := -1 + for i := 1; i < len(lines); i++ { + line := lines[i] + indent := leadingWhitespaceLen(line) + if indent < len(line) && (commonIndent == -1 || indent < commonIndent) { + commonIndent = indent + if commonIndent == 0 { + break + } + } + } + if commonIndent > 0 { + for i, line := range lines { + lines[i] = line[commonIndent:] + } + } + + // Remove leading blank lines. + for { + if isBlank := lineIsBlank(lines[0]); !isBlank { + break + } + lines = lines[1:] + } + + // Remove trailing blank lines. + for { + i := len(lines) - 1 + if isBlank := lineIsBlank(lines[i]); !isBlank { + break + } + lines = append(lines[:i], lines[i+1:]...) + } + + // Return a string of the lines joined with U+000A. + return strings.Join(lines, "\n") +} + +// leadingWhitespaceLen returns count of whitespace characters on given line. +func leadingWhitespaceLen(in string) (n int) { + for _, ch := range in { + if ch == ' ' || ch == '\t' { + n++ + } else { + break + } + } + return +} + +// lineIsBlank returns true when given line has no content. +func lineIsBlank(in string) bool { + return leadingWhitespaceLen(in) == len(in) +} + // Converts four hexidecimal chars to the integer that the // string represents. For example, uniCharCode('0','0','0','f') // will return 15, and uniCharCode('0','0','f','f') returns 255. @@ -425,11 +559,16 @@ func readToken(s *source.Source, fromPosition int) (Token, error) { return token, nil // " case '"': - token, err := readString(s, position) - if err != nil { - return token, err + var token Token + var err error + x, _ := runeAt(body, position+1) + y, _ := runeAt(body, position+2) + if x == '"' && y == '"' { + token, err = readBlockString(s, position) + } else { + token, err = readString(s, position) } - return token, nil + return token, err } description := fmt.Sprintf("Unexpected character %v.", printCharCode(code)) return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description) diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go index ac59c846..2376b4ca 100644 --- a/language/lexer/lexer_test.go +++ b/language/lexer/lexer_test.go @@ -447,6 +447,105 @@ func TestLexer_ReportsUsefulStringErrors(t *testing.T) { } } +func TestLexer_LexesBlockStrings(t *testing.T) { + tests := []Test{ + { + Body: `"""simple"""`, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 12, + Value: "simple", + }, + }, + { + Body: `""" white space """`, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 19, + Value: " white space ", + }, + }, + { + Body: ` + """ white space """ + """ white space """ + """ white space """ + `, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 5, + End: 25, + Value: " white space ", + }, + }, + { + Body: `"""contains " quote"""`, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 22, + Value: `contains " quote`, + }, + }, + { + Body: `"""contains \""" triplequote"""`, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 31, + Value: `contains """ triplequote`, + }, + }, + { + Body: "\"\"\"multi\nline\"\"\"", + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 16, + Value: "multi\nline", + }, + }, + { + Body: "\"\"\"multi\rline\r\nnormalized\"\"\"", + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 28, + Value: "multi\nline\nnormalized", + }, + }, + { + Body: "\"\"\"unescaped \\n\\r\\b\\t\\f\\u1234\"\"\"", + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 32, + Value: "unescaped \\n\\r\\b\\t\\f\\u1234", + }, + }, + { + Body: "\"\"\"slashes \\\\ \\/\"\"\"", + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 0, + End: 19, + Value: "slashes \\\\ \\/", + }, + }, + } + for _, test := range tests { + token, err := Lex(&source.Source{Body: []byte(test.Body)})(0) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(token, test.Expected) { + t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token) + } + } +} + func TestLexer_LexesNumbers(t *testing.T) { tests := []Test{ { From 0629778c1ca33c71c855e658bc5936c1a06b4967 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 11 Dec 2017 21:42:49 -0800 Subject: [PATCH 2/3] Add support for block strings into parser Signed-off-by: James Phillips --- language/lexer/lexer_test.go | 47 ++++++++++++++++++++++++++++++++++++ language/parser/parser.go | 2 ++ 2 files changed, 49 insertions(+) diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go index 2376b4ca..6a9627f8 100644 --- a/language/lexer/lexer_test.go +++ b/language/lexer/lexer_test.go @@ -546,6 +546,53 @@ func TestLexer_LexesBlockStrings(t *testing.T) { } } +func TestLexer_ReportsUsefulBlockStringErrors(t *testing.T) { + tests := []Test{ + { + Body: `"""`, + Expected: `Syntax Error GraphQL (1:4) Unterminated string. + +1: """ + ^ +`, + }, + { + Body: `"""no end quote`, + Expected: `Syntax Error GraphQL (1:16) Unterminated string. + +1: """no end quote + ^ +`, + }, + { + Body: "\"\"\"contains unescaped \u0007 control char\"\"\"", + Expected: `Syntax Error GraphQL (1:23) Invalid character within String: "\\u0007". + +1: """contains unescaped \u0007 control char""" + ^ +`, + }, + { + Body: "\"\"\"null-byte is not \u0000 end of file\"\"\"", + Expected: `Syntax Error GraphQL (1:21) Invalid character within String: "\\u0000". + +1: """null-byte is not \u0000 end of file""" + ^ +`, + }, + } + for _, test := range tests { + _, err := Lex(createSource(test.Body))(0) + if err == nil { + t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) + } + + if err.Error() != test.Expected { + t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) + } + } +} + func TestLexer_LexesNumbers(t *testing.T) { tests := []Test{ { diff --git a/language/parser/parser.go b/language/parser/parser.go index 92cf7ac6..29b68b59 100644 --- a/language/parser/parser.go +++ b/language/parser/parser.go @@ -635,6 +635,8 @@ func parseValueLiteral(parser *Parser, isConst bool) (ast.Value, error) { Value: token.Value, Loc: loc(parser, token.Start), }), nil + case lexer.TokenKind[lexer.BLOCK_STRING]: + fallthrough case lexer.TokenKind[lexer.STRING]: if err := advance(parser); err != nil { return nil, err From af754f4d221922086b48e689d5b4ea8188153716 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 12 Dec 2017 08:43:17 -0800 Subject: [PATCH 3/3] Avoid panic by not exceeding bounds of string.. Signed-off-by: James Phillips --- language/lexer/lexer.go | 3 +++ language/lexer/lexer_test.go | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/language/lexer/lexer.go b/language/lexer/lexer.go index b370bcad..62589f4e 100644 --- a/language/lexer/lexer.go +++ b/language/lexer/lexer.go @@ -395,6 +395,9 @@ func blockStringValue(in string) string { } if commonIndent > 0 { for i, line := range lines { + if commonIndent > len(line) { + continue + } lines[i] = line[commonIndent:] } } diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go index 6a9627f8..b32cd76e 100644 --- a/language/lexer/lexer_test.go +++ b/language/lexer/lexer_test.go @@ -480,6 +480,22 @@ func TestLexer_LexesBlockStrings(t *testing.T) { Value: " white space ", }, }, + { + Body: ` + """ + my great description + spans multiple lines + + with breaks + """ + `, + Expected: Token{ + Kind: TokenKind[BLOCK_STRING], + Start: 5, + End: 89, + Value: "my great description\nspans multiple lines\n\nwith breaks", + }, + }, { Body: `"""contains " quote"""`, Expected: Token{