diff --git a/src/absil/illex.fsl b/src/absil/illex.fsl index 149cd087b91..1516e02df4b 100644 --- a/src/absil/illex.fsl +++ b/src/absil/illex.fsl @@ -16,7 +16,8 @@ open FSharp.Compiler.AbstractIL.Internal.AsciiParser open FSharp.Compiler.AbstractIL.Internal.AsciiConstants -let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme) +let lexeme (lexbuf : LexBuffer) = LexBuffer.LexemeString lexbuf +let lexemeChar (lexbuf : LexBuffer) n = lexbuf.LexemeChar n let unexpectedChar _lexbuf = raise Parsing.RecoverableParseError ;; @@ -79,12 +80,7 @@ let kwdInstrTable = let kwdOrInstr s = (Lazy.force kwdInstrTable).[s] (* words *) -let eval = function - | '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 - | '6' -> 6 | '7' -> 7 | '8' -> 8 | '9' -> 9 - | 'A' -> 10 | 'B' -> 11 | 'C' -> 12 | 'D' -> 13 | 'E' -> 14 | 'F' -> 15 - | 'a' -> 10 | 'b' -> 11 | 'c' -> 12 | 'd' -> 13 | 'e' -> 14 | 'f' -> 15 - | _ -> failwith "bad hexbyte" +let evalDigit ch = (int ch) - (int '0') let kwdOrInstrOrId s = match (Lazy.force kwdInstrTable).TryFind s with Some v -> v | _ -> VAL_ID s @@ -119,21 +115,21 @@ rule token = parse (* The problem is telling an integer-followed-by-ellipses from a floating-point-nubmer-followed-by-dots *) | ((['0'-'9']) | (['0'-'9']['0'-'9']['0'-'9']+)) "..." - { let b = lexeme lexbuf in - VAL_INT32_ELIPSES(int32(String.sub b 0 (String.length b - 3))) } + { let b = lexbuf.LexemeView in + VAL_INT32_ELIPSES(int32(b.Slice(0, (b.Length - 3)).ToString())) } | ['0'-'9' 'A'-'F' 'a'-'f' ] ['0'-'9' 'A'-'F' 'a'-'f' ] - { let c1 = String.get (lexeme lexbuf) 0 in - let c2 = String.get (lexeme lexbuf) 1 in + { let c1 = (lexemeChar lexbuf 0) in + let c2 = (lexemeChar lexbuf 1) in if c1 >= '0' && c1 <= '9' && c2 >= '0' && c2 <= '9' then - VAL_INT64(int64 (10*eval c1 + eval c2) ) + VAL_INT64(int64 (10*evalDigit c1 + evalDigit c2) ) else VAL_ID(lexeme lexbuf) } | '0' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+ { VAL_INT64(int64(lexeme lexbuf)) } | "FFFFFF" ['0'-'9' 'A'-'F' 'a'-'f' ] ['0'-'9' 'A'-'F' 'a'-'f' ] - { let c1 = (lexeme lexbuf).[6] in - let c2 = (lexeme lexbuf).[7] in + { let c1 = (lexemeChar lexbuf 6) in + let c2 = (lexemeChar lexbuf 7) in if c1 >= '0' && c1 <= '9' && c2 >= '0' && c2 <= '9' then - VAL_INT64(int64 (10*eval c1 + eval c2)) + VAL_INT64(int64 (10*evalDigit c1 + evalDigit c2)) else VAL_ID(lexeme lexbuf) } | '-' ['0'-'9']+ diff --git a/src/fsharp/lex.fsl b/src/fsharp/lex.fsl index f5679b2f585..bbfe3c3c933 100644 --- a/src/fsharp/lex.fsl +++ b/src/fsharp/lex.fsl @@ -46,11 +46,10 @@ module Ranges = /// Get string from lexbuf let lexeme (lexbuf : UnicodeLexing.Lexbuf) = UnicodeLexing.Lexbuf.LexemeString lexbuf -/// Trim n chars from both side of a string -let trimBoth (s:string) n m = s.Substring(n, s.Length - (n+m)) - /// Trim n chars from both sides of lexbuf, return string -let lexemeTrimBoth lexbuf n m = trimBoth (lexeme lexbuf) n m +let lexemeTrimBoth (lexbuf : UnicodeLexing.Lexbuf) (n:int) (m:int) = + let s = lexbuf.LexemeView + s.Slice(n, s.Length - (n+m)).ToString() /// Trim n chars from the right of lexbuf, return string let lexemeTrimRight lexbuf n = lexemeTrimBoth lexbuf 0 n @@ -122,9 +121,9 @@ let lexemeTrimRightToInt32 args lexbuf n = // Checks let checkExprOp (lexbuf:UnicodeLexing.Lexbuf) = - if String.contains (lexeme lexbuf) ':' then + if lexbuf.LexemeContains ':' then deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames(":")) lexbuf.LexemeRange - if String.contains (lexeme lexbuf) '$' then + if lexbuf.LexemeContains '$' then deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames("$")) lexbuf.LexemeRange let unexpectedChar lexbuf = diff --git a/src/utils/prim-lexing.fs b/src/utils/prim-lexing.fs index 151052b5f80..401c9852fe0 100644 --- a/src/utils/prim-lexing.fs +++ b/src/utils/prim-lexing.fs @@ -194,13 +194,11 @@ namespace Internal.Utilities.Text.Lexing let mutable startPos = Position.Empty let mutable endPos = Position.Empty - // Throw away all the input besides the lexeme + // Throw away all the input besides the lexeme, which is placed at start of buffer let discardInput () = - let keep = Array.sub buffer bufferScanStart bufferScanLength - let nkeep = keep.Length - Array.blit keep 0 buffer 0 nkeep + Array.blit buffer bufferScanStart buffer 0 bufferScanLength bufferScanStart <- 0 - bufferMaxScanLength <- nkeep + bufferMaxScanLength <- bufferScanLength member lexbuf.EndOfScan () : int = //Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength; @@ -221,7 +219,9 @@ namespace Internal.Utilities.Text.Lexing with get() = endPos and set b = endPos <- b - member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength + member lexbuf.LexemeView = System.ReadOnlySpan<'Char>(buffer, bufferScanStart, lexemeLength) + member lexbuf.LexemeChar n = buffer.[n+bufferScanStart] + member lexbuf.LexemeContains (c:'Char) = array.IndexOf(buffer, c, bufferScanStart, lexemeLength) >= 0 member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>) member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v member lexbuf.Buffer with get() : 'Char[] = buffer and set v = buffer <- v diff --git a/src/utils/prim-lexing.fsi b/src/utils/prim-lexing.fsi index b0579d71e2f..957fb698cb3 100644 --- a/src/utils/prim-lexing.fsi +++ b/src/utils/prim-lexing.fsi @@ -96,12 +96,18 @@ type internal LexBuffer<'Char> = /// The end position for the lexeme. member EndPos: Position with get,set - /// The matched string. - member Lexeme: 'Char [] + /// The currently matched text as a Span, it is only valid until the lexer is advanced + member LexemeView: System.ReadOnlySpan<'Char> + + /// Get single character of matched string + member LexemeChar: int -> 'Char + + /// Determine if Lexeme contains a specific character + member LexemeContains: 'Char -> bool /// Fast helper to turn the matched characters into a string, avoiding an intermediate array. static member LexemeString : LexBuffer -> string - + /// Dynamically typed, non-lexically scoped parameter table. member BufferLocalStore : IDictionary