Skip to content

Commit a43b60f

Browse files
Remove some unnecessary copying from lexer (#9961)
* Reduce copying and allocations in lexing * Simplify and improve perf of eval * Dont create temporary array in discardInput since blit handles overlapping memory * Fix review comments and update a new method * rename methods * remove "new"
1 parent 29b833d commit a43b60f

File tree

4 files changed

+31
-30
lines changed

4 files changed

+31
-30
lines changed

src/absil/illex.fsl

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ open FSharp.Compiler.AbstractIL.Internal.AsciiParser
1616
open FSharp.Compiler.AbstractIL.Internal.AsciiConstants
1717

1818

19-
let lexeme (lexbuf : LexBuffer<char>) = new System.String(lexbuf.Lexeme)
19+
let lexeme (lexbuf : LexBuffer<char>) = LexBuffer<char>.LexemeString lexbuf
20+
let lexemeChar (lexbuf : LexBuffer<char>) n = lexbuf.LexemeChar n
2021

2122
let unexpectedChar _lexbuf =
2223
raise Parsing.RecoverableParseError ;;
@@ -79,12 +80,7 @@ let kwdInstrTable =
7980

8081
let kwdOrInstr s = (Lazy.force kwdInstrTable).[s] (* words *)
8182

82-
let eval = function
83-
| '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5
84-
| '6' -> 6 | '7' -> 7 | '8' -> 8 | '9' -> 9
85-
| 'A' -> 10 | 'B' -> 11 | 'C' -> 12 | 'D' -> 13 | 'E' -> 14 | 'F' -> 15
86-
| 'a' -> 10 | 'b' -> 11 | 'c' -> 12 | 'd' -> 13 | 'e' -> 14 | 'f' -> 15
87-
| _ -> failwith "bad hexbyte"
83+
let evalDigit ch = (int ch) - (int '0')
8884

8985
let kwdOrInstrOrId s = match (Lazy.force kwdInstrTable).TryFind s with Some v -> v | _ -> VAL_ID s
9086

@@ -119,21 +115,21 @@ rule token = parse
119115
(* The problem is telling an integer-followed-by-ellipses from a floating-point-nubmer-followed-by-dots *)
120116

121117
| ((['0'-'9']) | (['0'-'9']['0'-'9']['0'-'9']+)) "..."
122-
{ let b = lexeme lexbuf in
123-
VAL_INT32_ELIPSES(int32(String.sub b 0 (String.length b - 3))) }
118+
{ let b = lexbuf.LexemeView in
119+
VAL_INT32_ELIPSES(int32(b.Slice(0, (b.Length - 3)).ToString())) }
124120
| ['0'-'9' 'A'-'F' 'a'-'f' ] ['0'-'9' 'A'-'F' 'a'-'f' ]
125-
{ let c1 = String.get (lexeme lexbuf) 0 in
126-
let c2 = String.get (lexeme lexbuf) 1 in
121+
{ let c1 = (lexemeChar lexbuf 0) in
122+
let c2 = (lexemeChar lexbuf 1) in
127123
if c1 >= '0' && c1 <= '9' && c2 >= '0' && c2 <= '9' then
128-
VAL_INT64(int64 (10*eval c1 + eval c2) )
124+
VAL_INT64(int64 (10*evalDigit c1 + evalDigit c2) )
129125
else VAL_ID(lexeme lexbuf) }
130126
| '0' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+
131127
{ VAL_INT64(int64(lexeme lexbuf)) }
132128
| "FFFFFF" ['0'-'9' 'A'-'F' 'a'-'f' ] ['0'-'9' 'A'-'F' 'a'-'f' ]
133-
{ let c1 = (lexeme lexbuf).[6] in
134-
let c2 = (lexeme lexbuf).[7] in
129+
{ let c1 = (lexemeChar lexbuf 6) in
130+
let c2 = (lexemeChar lexbuf 7) in
135131
if c1 >= '0' && c1 <= '9' && c2 >= '0' && c2 <= '9' then
136-
VAL_INT64(int64 (10*eval c1 + eval c2))
132+
VAL_INT64(int64 (10*evalDigit c1 + evalDigit c2))
137133
else VAL_ID(lexeme lexbuf) }
138134

139135
| '-' ['0'-'9']+

src/fsharp/lex.fsl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,10 @@ module Ranges =
4646
/// Get string from lexbuf
4747
let lexeme (lexbuf : UnicodeLexing.Lexbuf) = UnicodeLexing.Lexbuf.LexemeString lexbuf
4848

49-
/// Trim n chars from both side of a string
50-
let trimBoth (s:string) n m = s.Substring(n, s.Length - (n+m))
51-
5249
/// Trim n chars from both sides of lexbuf, return string
53-
let lexemeTrimBoth lexbuf n m = trimBoth (lexeme lexbuf) n m
50+
let lexemeTrimBoth (lexbuf : UnicodeLexing.Lexbuf) (n:int) (m:int) =
51+
let s = lexbuf.LexemeView
52+
s.Slice(n, s.Length - (n+m)).ToString()
5453

5554
/// Trim n chars from the right of lexbuf, return string
5655
let lexemeTrimRight lexbuf n = lexemeTrimBoth lexbuf 0 n
@@ -122,9 +121,9 @@ let lexemeTrimRightToInt32 args lexbuf n =
122121
// Checks
123122

124123
let checkExprOp (lexbuf:UnicodeLexing.Lexbuf) =
125-
if String.contains (lexeme lexbuf) ':' then
124+
if lexbuf.LexemeContains ':' then
126125
deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames(":")) lexbuf.LexemeRange
127-
if String.contains (lexeme lexbuf) '$' then
126+
if lexbuf.LexemeContains '$' then
128127
deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames("$")) lexbuf.LexemeRange
129128

130129
let unexpectedChar lexbuf =

src/utils/prim-lexing.fs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,13 +194,11 @@ namespace Internal.Utilities.Text.Lexing
194194
let mutable startPos = Position.Empty
195195
let mutable endPos = Position.Empty
196196

197-
// Throw away all the input besides the lexeme
197+
// Throw away all the input besides the lexeme, which is placed at start of buffer
198198
let discardInput () =
199-
let keep = Array.sub buffer bufferScanStart bufferScanLength
200-
let nkeep = keep.Length
201-
Array.blit keep 0 buffer 0 nkeep
199+
Array.blit buffer bufferScanStart buffer 0 bufferScanLength
202200
bufferScanStart <- 0
203-
bufferMaxScanLength <- nkeep
201+
bufferMaxScanLength <- bufferScanLength
204202

205203
member lexbuf.EndOfScan () : int =
206204
//Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength;
@@ -221,7 +219,9 @@ namespace Internal.Utilities.Text.Lexing
221219
with get() = endPos
222220
and set b = endPos <- b
223221

224-
member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength
222+
member lexbuf.LexemeView = System.ReadOnlySpan<'Char>(buffer, bufferScanStart, lexemeLength)
223+
member lexbuf.LexemeChar n = buffer.[n+bufferScanStart]
224+
member lexbuf.LexemeContains (c:'Char) = array.IndexOf(buffer, c, bufferScanStart, lexemeLength) >= 0
225225
member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>)
226226
member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v
227227
member lexbuf.Buffer with get() : 'Char[] = buffer and set v = buffer <- v

src/utils/prim-lexing.fsi

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,18 @@ type internal LexBuffer<'Char> =
9696
/// The end position for the lexeme.
9797
member EndPos: Position with get,set
9898

99-
/// The matched string.
100-
member Lexeme: 'Char []
99+
/// The currently matched text as a Span, it is only valid until the lexer is advanced
100+
member LexemeView: System.ReadOnlySpan<'Char>
101+
102+
/// Get single character of matched string
103+
member LexemeChar: int -> 'Char
104+
105+
/// Determine if Lexeme contains a specific character
106+
member LexemeContains: 'Char -> bool
101107

102108
/// Fast helper to turn the matched characters into a string, avoiding an intermediate array.
103109
static member LexemeString : LexBuffer<char> -> string
104-
110+
105111
/// Dynamically typed, non-lexically scoped parameter table.
106112
member BufferLocalStore : IDictionary<string,obj>
107113

0 commit comments

Comments
 (0)