diff --git a/buildtools/fslex/Arg.fs b/buildtools/fslex/Arg.fs index b1131625cf3..d6f8ed790e3 100644 --- a/buildtools/fslex/Arg.fs +++ b/buildtools/fslex/Arg.fs @@ -1,11 +1,6 @@ // (c) Microsoft Corporation 2005-2009. -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities -#else -namespace Microsoft.FSharp.Text -#endif - +namespace FSharp.Text type ArgType = | ClearArg of bool ref @@ -35,17 +30,17 @@ exception HelpText of string [] type ArgParser() = static let getUsage specs u = - let sbuf = new System.Text.StringBuilder 100 + let sbuf = System.Text.StringBuilder 100 let pstring (s:string) = sbuf.Append s |> ignore let pendline s = pstring s; pstring "\n" pendline u; List.iter (fun (arg:ArgInfo) -> match arg.Name, arg.ArgType, arg.HelpText with - | (s, (UnitArg _ | SetArg _ | ClearArg _), helpText) -> pstring "\t"; pstring s; pstring ": "; pendline helpText - | (s, StringArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, IntArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, FloatArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, RestArg _, helpText) -> pstring "\t"; pstring s; pstring " ...: "; pendline helpText) + | s, (UnitArg _ | SetArg _ | ClearArg _), helpText -> pstring "\t"; pstring s; pstring ": "; pendline helpText + | s, StringArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, IntArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, FloatArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, RestArg _, helpText -> pstring "\t"; pstring s; pstring " ...: "; pendline helpText) specs; pstring "\t"; pstring "--help"; pstring ": "; pendline "display this list of options"; pstring "\t"; pstring "-help"; pstring ": "; pendline "display this list of options"; @@ -53,20 +48,20 @@ type ArgParser() = static member ParsePartial(cursor,argv,arguments:seq,?otherArgs,?usageText) = - let otherArgs = defaultArg otherArgs (fun _ -> ()) + let other = defaultArg otherArgs (fun _ -> ()) let usageText = defaultArg usageText "" let nargs = Array.length argv incr cursor; - let arguments = arguments |> Seq.toList - let specs = arguments |> List.map (fun (arg:ArgInfo) -> arg.Name, arg.ArgType) + let argSpecs = arguments |> Seq.toList + let specs = argSpecs |> List.map (fun (arg:ArgInfo) -> arg.Name, arg.ArgType) while !cursor < nargs do let arg = argv.[!cursor] let rec findMatchingArg args = match args with - | ((s, action) :: _) when s = arg -> + | (s, action) :: _ when s = arg -> let getSecondArg () = if !cursor + 1 >= nargs then - raise(Bad("option "+s+" needs an argument.\n"+getUsage arguments usageText)); + raise(Bad("option "+s+" needs an argument.\n"+getUsage argSpecs usageText)); argv.[!cursor+1] match action with @@ -85,40 +80,40 @@ type ArgParser() = cursor := !cursor + 2 | IntArg f -> let arg2 = getSecondArg () - let arg2 = try int32 arg2 with _ -> raise(Bad(getUsage arguments usageText)) in + let arg2 = try int32 arg2 with _ -> raise(Bad(getUsage argSpecs usageText)) in f arg2; cursor := !cursor + 2; | FloatArg f -> let arg2 = getSecondArg() - let arg2 = try float arg2 with _ -> raise(Bad(getUsage arguments usageText)) in + let arg2 = try float arg2 with _ -> raise(Bad(getUsage argSpecs usageText)) in f arg2; cursor := !cursor + 2; | RestArg f -> incr cursor; while !cursor < nargs do - f (argv.[!cursor]); + f argv.[!cursor]; incr cursor; - | (_ :: more) -> findMatchingArg more + | _ :: more -> findMatchingArg more | [] -> if arg = "-help" || arg = "--help" || arg = "/help" || arg = "/help" || arg = "/?" then - raise (HelpText (getUsage arguments usageText)) + raise (HelpText (getUsage argSpecs usageText)) // Note: for '/abc/def' does not count as an argument // Note: '/abc' does - elif arg.Length>0 && (arg.[0] = '-' || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains ("/")))) then - raise (Bad ("unrecognized argument: "+ arg + "\n" + getUsage arguments usageText)) + elif arg.Length>0 && (arg.[0] = '-' || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains "/"))) then + raise (Bad ("unrecognized argument: "+ arg + "\n" + getUsage argSpecs usageText)) else - otherArgs arg; + other arg; incr cursor findMatchingArg specs - static member Usage (arguments,?usage) = + static member Usage (arguments, ?usage) = let usage = defaultArg usage "" System.Console.Error.WriteLine (getUsage (Seq.toList arguments) usage) #if FX_NO_COMMAND_LINE_ARGS #else - static member Parse (arguments,?otherArgs,?usageText) = + static member Parse (arguments, ?otherArgs,?usageText) = let current = ref 0 let argv = System.Environment.GetCommandLineArgs() try ArgParser.ParsePartial (current, argv, arguments, ?otherArgs=otherArgs, ?usageText=usageText) @@ -128,6 +123,6 @@ type ArgParser() = System.Console.Error.WriteLine h; System.Console.Error.Flush(); System.Environment.Exit(1); - | e -> + | _ -> reraise() #endif diff --git a/buildtools/fslex/Arg.fsi b/buildtools/fslex/Arg.fsi index 367f69f959f..b5203999928 100644 --- a/buildtools/fslex/Arg.fsi +++ b/buildtools/fslex/Arg.fsi @@ -1,11 +1,7 @@ // (c) Microsoft Corporation 2005-2009. /// A simple command-line argument processor. -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities -#else -namespace Microsoft.FSharp.Text -#endif +namespace FSharp.Text /// The spec value describes the action of the argument, /// and whether it expects a following parameter. @@ -37,7 +33,7 @@ type ArgParser = [] static member ParsePartial: cursor: int ref * argv: string[] * arguments:seq * ?otherArgs: (string -> unit) * ?usageText:string -> unit - /// Parse the arguments given by System.Environment.GetEnvironmentVariables() + /// Parse the arguments given by System.Environment.GetCommandLineArgs() /// according to the argument processing specifications "specs". /// Args begin with "-". Non-arguments are passed to "f" in /// order. "use" is printed as part of the usage line if an error occurs. diff --git a/buildtools/fslex/Lexing.fs b/buildtools/fslex/Lexing.fs index 760ace5a932..40aacdcac96 100644 --- a/buildtools/fslex/Lexing.fs +++ b/buildtools/fslex/Lexing.fs @@ -1,423 +1,456 @@ // (c) Microsoft Corporation 2005-2009. +module FSharp.Text.Lexing #nowarn "47" // recursive initialization of LexBuffer +open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Lexing - -#else -namespace Microsoft.FSharp.Text.Lexing -#endif - - open System.Collections.Generic - - // REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal Position = -#else - type Position = -#endif - { pos_fname : string; - pos_lnum : int; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum : int; -#endif - pos_bol : int; - pos_cnum : int; } - member x.FileName = x.pos_fname - member x.Line = x.pos_lnum -#if INTERNALIZED_FSLEXYACC_RUNTIME - member x.OriginalLine = x.pos_orig_lnum -#endif - member x.Char = x.pos_cnum - member x.AbsoluteOffset = x.pos_cnum - member x.StartOfLine = x.pos_bol - member x.StartOfLineAbsoluteOffset = x.pos_bol - member x.Column = x.pos_cnum - x.pos_bol - member pos.NextLine = - { pos with -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = pos.OriginalLine + 1; -#endif - pos_lnum = pos.Line+1; - pos_bol = pos.AbsoluteOffset } - member pos.EndOfToken(n) = {pos with pos_cnum=pos.pos_cnum + n } - member pos.AsNewLinePos() = pos.NextLine - member pos.ShiftColumnBy(by) = {pos with pos_cnum = pos.pos_cnum + by} - static member Empty = - { pos_fname=""; - pos_lnum= 0; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = 0; -#endif - pos_bol= 0; - pos_cnum=0 } - static member FirstLine(filename) = - { pos_fname=filename; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = 1; -#endif - pos_lnum= 1; - pos_bol= 0; - pos_cnum=0 } - -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal LexBufferFiller<'char> = -#else - type LexBufferFiller<'char> = -#endif - { fillSync : (LexBuffer<'char> -> unit) option - fillAsync : (LexBuffer<'char> -> Async) option } - - and [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - internal LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = -#else - LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = -#endif - let context = new Dictionary(1) in - let extendBufferSync = (fun () -> match filler.fillSync with Some refill -> refill this | None -> invalidOp "attempt to read synchronously from an asynchronous lex buffer") - let extendBufferAsync = (fun () -> match filler.fillAsync with Some refill -> refill this | None -> invalidOp "attempt to read asynchronously from a synchronous lex buffer") - let mutable buffer=[||]; - /// number of valid charactes beyond bufferScanStart - let mutable bufferMaxScanLength=0; - /// count into the buffer when scanning - let mutable bufferScanStart=0; - /// number of characters scanned so far - let mutable bufferScanLength=0; - /// length of the scan at the last accepting state - let mutable lexemeLength=0; - /// action related to the last accepting state - let mutable bufferAcceptAction=0; - let mutable eof = false; - let mutable startPos = Position.Empty ; - let mutable endPos = Position.Empty - - // Throw away all the input besides the lexeme - - let discardInput () = - let keep = Array.sub buffer bufferScanStart bufferScanLength - let nkeep = keep.Length - Array.blit keep 0 buffer 0 nkeep; - bufferScanStart <- 0; - bufferMaxScanLength <- nkeep - - - member lexbuf.EndOfScan () : int = - // Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength; - if bufferAcceptAction < 0 then - failwith "unrecognized input" - - // Printf.printf "endOfScan %d state %d on unconsumed input '%c' (%d)\n" a s (Char.chr inp) inp; - // Printf.eprintf "accept, lexeme = %s\n" (lexeme lexBuffer); - lexbuf.StartPos <- endPos; - lexbuf.EndPos <- endPos.EndOfToken(lexbuf.LexemeLength); - bufferAcceptAction - - member lexbuf.StartPos - with get() = startPos - and set(b) = startPos <- b - - member lexbuf.EndPos - with get() = endPos - and set(b) = endPos <- b - - member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength - member lexbuf.LexemeChar(n) = buffer.[n+bufferScanStart] - - member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>) - member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v - member internal lexbuf.Buffer with get() : 'char[] = buffer and set v = buffer <- v - member internal lexbuf.BufferMaxScanLength with get() = bufferMaxScanLength and set v = bufferMaxScanLength <- v - member internal lexbuf.BufferScanLength with get() = bufferScanLength and set v = bufferScanLength <- v - member internal lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v - member internal lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v - member internal lexbuf.RefillBuffer = extendBufferSync - member internal lexbuf.AsyncRefillBuffer = extendBufferAsync - - static member LexemeString(lexbuf:LexBuffer) = - new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength) - - member lexbuf.IsPastEndOfStream - with get() = eof - and set(b) = eof <- b - - member lexbuf.DiscardInput() = discardInput () - - member x.BufferScanPos = bufferScanStart + bufferScanLength - - member lexbuf.EnsureBufferSize n = - if lexbuf.BufferScanPos + n >= buffer.Length then - let repl = Array.zeroCreate (lexbuf.BufferScanPos + n) - Array.blit buffer bufferScanStart repl bufferScanStart bufferScanLength; - buffer <- repl - - static member FromReadFunctions (syncRead : ('char[] * int * int -> int) option, asyncRead : ('char[] * int * int -> Async) option) : LexBuffer<'char> = - let extension= Array.zeroCreate 4096 - let fillers = - { fillSync = - match syncRead with - | None -> None - | Some read -> - Some (fun lexBuffer -> - let n = read(extension,0,extension.Length) - lexBuffer.EnsureBufferSize n; - Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n); - fillAsync = - match asyncRead with - | None -> None - | Some read -> - Some (fun lexBuffer -> - async { - let! n = read(extension,0,extension.Length) - lexBuffer.EnsureBufferSize n; - Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n }) } - new LexBuffer<_>(fillers) - - // A full type signature is required on this method because it is used at more specific types within its own scope - static member FromFunction (f : 'char[] * int * int -> int) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(Some(f),None) - static member FromAsyncFunction (f : 'char[] * int * int -> Async) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(None,Some(f)) - - static member FromCharFunction f : LexBuffer = - LexBuffer.FromFunction(fun (buff,start,len) -> - let buff2 = Array.zeroCreate len - let n = f buff2 len - Array.blit buff2 0 buff start len - n) - static member FromByteFunction f : LexBuffer = - LexBuffer.FromFunction(fun (buff,start,len) -> - let buff2 = Array.zeroCreate len - let n = f buff2 len - Array.blit buff2 0 buff start len - n) - - // A full type signature is required on this method because it is used at more specific types within its own scope - static member FromArray (s: 'char[]) : LexBuffer<'char> = - let lexBuffer = - new LexBuffer<_> - { fillSync = Some (fun _ -> ()); - fillAsync = Some (fun _ -> async { return () }) } - let buffer = Array.copy s - lexBuffer.Buffer <- buffer; - lexBuffer.BufferMaxScanLength <- buffer.Length; - lexBuffer - - static member FromBytes (arr) = LexBuffer.FromArray(arr) - static member FromChars (arr) = LexBuffer.FromArray(arr) - static member FromString (s:string) = LexBuffer.FromChars (s.ToCharArray()) - - static member FromTextReader (tr:System.IO.TextReader) : LexBuffer = - LexBuffer.FromFunction(tr.Read) - - static member FromBinaryReader (br:System.IO.BinaryReader) : LexBuffer = - LexBuffer.FromFunction(br.Read) - - static member FromStream (stream:System.IO.Stream) : LexBuffer = - LexBuffer.FromReadFunctions(Some(stream.Read),Some(fun (buf,offset,len) -> stream.AsyncRead(buf,offset=offset,count=len))) - - module GenericImplFragments = - let startInterpret(lexBuffer:LexBuffer<_>)= - lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength; - lexBuffer.BufferScanLength <- 0; - lexBuffer.LexemeLength <- 0; - lexBuffer.BufferAcceptAction <- -1; - - let afterRefill (trans: uint16[] array,sentinel,lexBuffer:LexBuffer<_>,scanUntilSentinel,endOfScan,state,eofPos) = - // end of file occurs if we couldn't extend the buffer - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - let snew = int trans.[state].[eofPos] // == EOF - if snew = sentinel then - endOfScan() - else - if lexBuffer.IsPastEndOfStream then failwith "End of file on lexing stream"; - lexBuffer.IsPastEndOfStream <- true; - // Printf.printf "state %d --> %d on eof\n" state snew; - scanUntilSentinel(lexBuffer,snew) - else - scanUntilSentinel(lexBuffer, state) +// REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo +[] +type Position = + { pos_fname : string + pos_lnum : int + pos_orig_lnum : int + pos_bol : int + pos_cnum : int } - let onAccept (lexBuffer:LexBuffer<_>,a) = - lexBuffer.LexemeLength <- lexBuffer.BufferScanLength; - lexBuffer.BufferAcceptAction <- a; + member pos.FileName = pos.pos_fname - open GenericImplFragments + member pos.Line = pos.pos_lnum - [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal AsciiTables(trans: uint16[] array, accept: uint16[]) = -#else - type AsciiTables(trans: uint16[] array, accept: uint16[]) = -#endif - let rec scanUntilSentinel(lexBuffer, state) = - let sentinel = 255 * 256 + 255 - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept (lexBuffer,a) - - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,256 (* == EOF *) ) - else - // read a character - end the scan if there are no further transitions - let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) - let snew = int trans.[state].[inp] - if snew = sentinel then - lexBuffer.EndOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - // Printf.printf "state %d --> %d on '%c' (%d)\n" state snew (Char.chr inp) inp; - scanUntilSentinel(lexBuffer, snew) - - /// Interpret tables for an ascii lexer generated by fslex. - member tables.Interpret(initialState,lexBuffer : LexBuffer) = - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) + member pos.OriginalLine = pos.pos_orig_lnum - /// Interpret tables for an ascii lexer generated by fslex. - member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer) = - - let rec scanUntilSentinel(lexBuffer,state) : Async = - async { - let sentinel = 255 * 256 + 255 - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept (lexBuffer,a) - - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - do! lexBuffer.AsyncRefillBuffer (); - // end of file occurs if we couldn't extend the buffer - return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,256 (* == EOF *) ) - else - // read a character - end the scan if there are no further transitions - let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) - let snew = int trans.[state].[inp] - if snew = sentinel then - return! endOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - return! scanUntilSentinel(lexBuffer,snew) - } - and endOfScan() = - async { return lexBuffer.EndOfScan() } - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - - static member Create(trans,accept) = new AsciiTables(trans,accept) - - [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal UnicodeTables(trans: uint16[] array, accept: uint16[]) = -#else - type UnicodeTables(trans: uint16[] array, accept: uint16[]) = -#endif + member pos.Char = pos.pos_cnum + + member pos.AbsoluteOffset = pos.pos_cnum + + member pos.StartOfLine = pos.pos_bol + + member pos.StartOfLineAbsoluteOffset = pos.pos_bol + + member pos.Column = pos.pos_cnum - pos.pos_bol + + member pos.NextLine = + let pos = pos + { pos with + pos_orig_lnum = pos.OriginalLine + 1 + pos_lnum = pos.Line+1 + pos_bol = pos.AbsoluteOffset } + + member pos.EndOfToken(n) = + let pos = pos + {pos with pos_cnum=pos.pos_cnum + n } + + member pos.AsNewLinePos() = pos.NextLine + + member pos.ShiftColumnBy(by) = + let pos = pos + {pos with pos_cnum = pos.pos_cnum + by} + + static member Empty = + { pos_fname="" + pos_lnum= 0 + pos_orig_lnum = 0 + pos_bol= 0 + pos_cnum=0 } + + static member FirstLine(filename) = + { pos_fname=filename + pos_orig_lnum = 1 + pos_lnum= 1 + pos_bol= 0 + pos_cnum=0 } + +type LexBufferFiller<'char> = + { fillSync : (LexBuffer<'char> -> unit) option + fillAsync : (LexBuffer<'char> -> Async) option } + +and [] + LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = + let context = Dictionary(1) in + let extendBufferSync = (fun () -> match filler.fillSync with Some refill -> refill this | None -> invalidOp "attempt to read synchronously from an asynchronous lex buffer") + let extendBufferAsync = (fun () -> match filler.fillAsync with Some refill -> refill this | None -> invalidOp "attempt to read asynchronously from a synchronous lex buffer") + let mutable buffer=[||] + + /// number of valid charactes beyond bufferScanStart + let mutable bufferMaxScanLength=0 + + /// count into the buffer when scanning + let mutable bufferScanStart=0 + + /// number of characters scanned so far + let mutable bufferScanLength=0 + + /// length of the scan at the last accepting state + let mutable lexemeLength=0 + + /// action related to the last accepting state + let mutable bufferAcceptAction=0 + + let mutable eof = false + + let mutable startPos = Position.Empty + + let mutable endPos = Position.Empty + + /// Throw away all the input besides the lexeme + let discardInput () = + Array.blit buffer bufferScanStart buffer 0 bufferScanLength + bufferScanStart <- 0 + bufferMaxScanLength <- bufferScanLength + + member lexbuf.EndOfScan () : int = + // Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength + if bufferAcceptAction < 0 then + failwith "unrecognized input" + + // Printf.printf "endOfScan %d state %d on unconsumed input '%c' (%d)\n" a s (Char.chr inp) inp + // Printf.eprintf "accept, lexeme = %s\n" (lexeme lexBuffer) + lexbuf.StartPos <- endPos + lexbuf.EndPos <- endPos.EndOfToken(lexbuf.LexemeLength) + bufferAcceptAction + + member _.StartPos + with get() = startPos + and set b = startPos <- b + + member _.EndPos + with get() = endPos + and set b = endPos <- b + + member _.Lexeme = Array.sub buffer bufferScanStart lexemeLength + + member _.LexemeChar n = buffer.[n+bufferScanStart] + + member _.BufferLocalStore = (context :> IDictionary<_, _>) + + member _.LexemeLength + with get() : int = lexemeLength + and set v = lexemeLength <- v + + member internal _.Buffer + with get() : 'char[] = buffer + and set v = buffer <- v + + member internal _.BufferMaxScanLength + with get() = bufferMaxScanLength + and set v = bufferMaxScanLength <- v + + member internal _.BufferScanLength + with get() = bufferScanLength + and set v = bufferScanLength <- v + + member internal _.BufferScanStart + with get() : int = bufferScanStart + and set v = bufferScanStart <- v + + member internal _.BufferAcceptAction + with get() = bufferAcceptAction + and set v = bufferAcceptAction <- v + + member internal _.RefillBuffer = extendBufferSync + + member internal _.AsyncRefillBuffer = extendBufferAsync + + static member LexemeString(lexbuf:LexBuffer) = + System.String(lexbuf.Buffer, lexbuf.BufferScanStart, lexbuf.LexemeLength) + + member _.IsPastEndOfStream + with get() = eof + and set b = eof <- b + + member _.DiscardInput() = discardInput () + + member _.BufferScanPos = bufferScanStart + bufferScanLength + + member lexbuf.EnsureBufferSize n = + if lexbuf.BufferScanPos + n >= buffer.Length then + let repl = Array.zeroCreate (lexbuf.BufferScanPos + n) + Array.blit buffer bufferScanStart repl bufferScanStart bufferScanLength + buffer <- repl + + static member FromReadFunctions (syncRead : ('char[] * int * int -> int) option, asyncRead : ('char[] * int * int -> Async) option) : LexBuffer<'char> = + let extension= Array.zeroCreate 4096 + let fillers = + { fillSync = + match syncRead with + | None -> None + | Some read -> + Some (fun lexBuffer -> + let n = read(extension, 0, extension.Length) + lexBuffer.EnsureBufferSize n + Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n) + fillAsync = + match asyncRead with + | None -> None + | Some read -> + Some (fun lexBuffer -> + async { + let! n = read(extension, 0, extension.Length) + lexBuffer.EnsureBufferSize n + Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n }) } + LexBuffer<_>(fillers) + + // A full type signature is required on this method because it is used at more specific types within its own scope + static member FromFunction (f : 'char[] * int * int -> int) : LexBuffer<'char> = + LexBuffer<_>.FromReadFunctions(Some(f), None) + + static member FromAsyncFunction (f : 'char[] * int * int -> Async) : LexBuffer<'char> = + LexBuffer<_>.FromReadFunctions(None, Some(f)) + + static member FromCharFunction f : LexBuffer = + LexBuffer.FromFunction(fun (buff, start, len) -> + let buff2 = Array.zeroCreate len + let n = f buff2 len + Array.blit buff2 0 buff start len + n) + + static member FromByteFunction f : LexBuffer = + LexBuffer.FromFunction(fun (buff, start, len) -> + let buff2 = Array.zeroCreate len + let n = f buff2 len + Array.blit buff2 0 buff start len + n) + + // A full type signature is required on this method because it is used at more specific types within its own scope + static member FromArray (s: 'char[]) : LexBuffer<'char> = + let lexBuffer = + LexBuffer<_> + { fillSync = Some (fun _ -> ()) + fillAsync = Some (fun _ -> async { return () }) } + lexBuffer.Buffer <- s + lexBuffer.BufferMaxScanLength <- s.Length + lexBuffer + + static member FromBytes arr = + LexBuffer.FromArray(Array.copy arr) + + static member FromChars arr = + LexBuffer.FromArray(Array.copy arr) + + static member FromString (s:string) = + LexBuffer.FromArray (s.ToCharArray()) + + static member FromTextReader (tr:System.IO.TextReader) : LexBuffer = + LexBuffer.FromReadFunctions(Some tr.Read, Some (tr.ReadAsync >> Async.AwaitTask)) + + static member FromBinaryReader (br:System.IO.BinaryReader) : LexBuffer = + LexBuffer.FromFunction(br.Read) + + static member FromStream (stream:System.IO.Stream) : LexBuffer = + LexBuffer.FromReadFunctions(Some(stream.Read), Some(fun (buf, offset, len) -> stream.AsyncRead(buf, offset=offset, count=len))) + +module GenericImplFragments = + let startInterpret(lexBuffer:LexBuffer<_>)= + lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength + lexBuffer.BufferScanLength <- 0 + lexBuffer.LexemeLength <- 0 + lexBuffer.BufferAcceptAction <- -1 + + let afterRefill (trans: uint16[] array, sentinel, lexBuffer:LexBuffer<_>, scanUntilSentinel, endOfScan, state, eofPos) = + // end of file occurs if we couldn't extend the buffer + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + let snew = int trans.[state].[eofPos] // == EOF + if snew = sentinel then + endOfScan() + else + if lexBuffer.IsPastEndOfStream then failwith "End of file on lexing stream" + lexBuffer.IsPastEndOfStream <- true + // Printf.printf "state %d --> %d on eof\n" state snew + scanUntilSentinel(lexBuffer, snew) + else + scanUntilSentinel(lexBuffer, state) + + let onAccept (lexBuffer:LexBuffer<_>, a) = + lexBuffer.LexemeLength <- lexBuffer.BufferScanLength + lexBuffer.BufferAcceptAction <- a + +open GenericImplFragments + +[] +type AsciiTables(trans: uint16[] array, accept: uint16[]) = + let rec scanUntilSentinel(lexBuffer, state) = let sentinel = 255 * 256 + 255 - let numUnicodeCategories = 30 - let numLowUnicodeChars = 128 - let numSpecificUnicodeChars = (trans.[0].Length - 1 - numLowUnicodeChars - numUnicodeCategories)/2 - let lookupUnicodeCharacters (state,inp: char) = - let inpAsInt = int inp - // Is it a fast ASCII character? - if inpAsInt < numLowUnicodeChars then - int trans.[state].[inpAsInt] + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept (lexBuffer, a) + + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, lexBuffer.EndOfScan, state, 256 (* == EOF *) ) + else + // read a character - end the scan if there are no further transitions + let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) + let snew = int trans.[state].[inp] + if snew = sentinel then + lexBuffer.EndOfScan() else - // Search for a specific unicode character - let baseForSpecificUnicodeChars = numLowUnicodeChars - let rec loop i = - if i >= numSpecificUnicodeChars then - // OK, if we failed then read the 'others' entry in the alphabet, - // which covers all Unicode characters not covered in other - // ways - let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2 - let unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(inp) - //System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]); - int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory] - else - // This is the specific unicode character - let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2]) - //System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]); - // OK, have we found the entry for a specific unicode character? - if c = inp - then int trans.[state].[baseForSpecificUnicodeChars+i*2+1] - else loop(i+1) + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + // Printf.printf "state %d --> %d on '%c' (%d)\n" state snew (Char.chr inp) inp + scanUntilSentinel(lexBuffer, snew) + + /// Interpret tables for an ascii lexer generated by fslex. + member tables.Interpret(initialState, lexBuffer : LexBuffer) = + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + /// Interpret tables for an ascii lexer generated by fslex. + member tables.AsyncInterpret(initialState, lexBuffer : LexBuffer) = + + let rec scanUntilSentinel(lexBuffer, state) : Async = + async { + let sentinel = 255 * 256 + 255 + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept (lexBuffer, a) - loop 0 - let eofPos = numLowUnicodeChars + 2*numSpecificUnicodeChars + numUnicodeCategories + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + do! lexBuffer.AsyncRefillBuffer () + // end of file occurs if we couldn't extend the buffer + return! afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, endOfScan, state, 256 (* == EOF *) ) + else + // read a character - end the scan if there are no further transitions + let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) + let snew = int trans.[state].[inp] + if snew = sentinel then + return! endOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + return! scanUntilSentinel(lexBuffer, snew) + } + + and endOfScan() = + async { return lexBuffer.EndOfScan() } + + startInterpret(lexBuffer) + + scanUntilSentinel(lexBuffer, initialState) + + + static member Create(trans, accept) = AsciiTables(trans, accept) + +[] +type UnicodeTables(trans: uint16[] array, accept: uint16[]) = + let sentinel = 255 * 256 + 255 + let numUnicodeCategories = 30 + let numLowUnicodeChars = 128 + let numSpecificUnicodeChars = (trans.[0].Length - 1 - numLowUnicodeChars - numUnicodeCategories)/2 + let lookupUnicodeCharacters (state, inp: char) = + let inpAsInt = int inp + // Is it a fast ASCII character? + if inpAsInt < numLowUnicodeChars then + int trans.[state].[inpAsInt] + else + // Search for a specific unicode character + let baseForSpecificUnicodeChars = numLowUnicodeChars + let rec loop i = + if i >= numSpecificUnicodeChars then + // OK, if we failed then read the 'others' entry in the alphabet, + // which covers all Unicode characters not covered in other + // ways + let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2 + let unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(inp) + //System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]) + int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory] + else + // This is the specific unicode character + let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2]) + //System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]) + // OK, have we found the entry for a specific unicode character? + if c = inp + then int trans.[state].[baseForSpecificUnicodeChars+i*2+1] + else loop(i+1) + + loop 0 + let eofPos = numLowUnicodeChars + 2*numSpecificUnicodeChars + numUnicodeCategories + + let rec scanUntilSentinel(lexBuffer, state) = + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept(lexBuffer, a) - let rec scanUntilSentinel(lexBuffer,state) = - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept(lexBuffer,a) + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, lexBuffer.EndOfScan, state, eofPos) + else + // read a character - end the scan if there are no further transitions + let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos) - else - // read a character - end the scan if there are no further transitions - let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - - // Find the new state - let snew = lookupUnicodeCharacters (state,inp) + // Find the new state + let snew = lookupUnicodeCharacters (state, inp) - if snew = sentinel then - lexBuffer.EndOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - // Printf.printf "state %d --> %d on '%c' (%d)\n" s snew (char inp) inp; - scanUntilSentinel(lexBuffer,snew) - - // Each row for the Unicode table has format - // 128 entries for ASCII characters - // A variable number of 2*UInt16 entries for SpecificUnicodeChars - // 30 entries, one for each UnicodeCategory - // 1 entry for EOF - - member tables.Interpret(initialState,lexBuffer : LexBuffer) = - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer) = - - let rec scanUntilSentinel(lexBuffer, state) = - async { - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept(lexBuffer,a) + if snew = sentinel then + lexBuffer.EndOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + // Printf.printf "state %d --> %d on '%c' (%d)\n" s snew (char inp) inp + scanUntilSentinel(lexBuffer, snew) + + // Each row for the Unicode table has format + // 128 entries for ASCII characters + // A variable number of 2*UInt16 entries for SpecificUnicodeChars + // 30 entries, one for each UnicodeCategory + // 1 entry for EOF + + member tables.Interpret(initialState, lexBuffer : LexBuffer) = + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + member tables.AsyncInterpret(initialState, lexBuffer : LexBuffer) = + + let rec scanUntilSentinel(lexBuffer, state) = + async { + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept(lexBuffer, a) + + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + return! afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, endOfScan, state, eofPos) + else + // read a character - end the scan if there are no further transitions + let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,eofPos) - else - // read a character - end the scan if there are no further transitions - let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - - // Find the new state - let snew = lookupUnicodeCharacters (state,inp) - - if snew = sentinel then - return! endOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - return! scanUntilSentinel(lexBuffer, snew) - } - and endOfScan() = - async { return lexBuffer.EndOfScan() } - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - static member Create(trans,accept) = new UnicodeTables(trans,accept) + // Find the new state + let snew = lookupUnicodeCharacters (state, inp) + + if snew = sentinel then + return! endOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + return! scanUntilSentinel(lexBuffer, snew) + } + and endOfScan() = + async { return lexBuffer.EndOfScan() } + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + static member Create(trans, accept) = UnicodeTables(trans, accept) + +open System.IO + +let UnicodeFileAsLexbuf (filename,codePage : int option) : FileStream * StreamReader * LexBuffer = + // Use the .NET functionality to auto-detect the unicode encoding + // It also presents the bytes read to the lexer in UTF8 decoded form + let stream = new FileStream(filename,FileMode.Open,FileAccess.Read,FileShare.Read) + let reader = + match codePage with + | None -> new StreamReader(stream,true) + | Some n -> new StreamReader(stream,System.Text.Encoding.GetEncoding(n)) + let lexbuf = LexBuffer.FromFunction(reader.Read) + lexbuf.EndPos <- Position.FirstLine(filename) + stream, reader, lexbuf \ No newline at end of file diff --git a/buildtools/fslex/Lexing.fsi b/buildtools/fslex/Lexing.fsi index e31ad411aa9..866ba6a1e56 100644 --- a/buildtools/fslex/Lexing.fsi +++ b/buildtools/fslex/Lexing.fsi @@ -5,147 +5,160 @@ // (c) Microsoft Corporation 2005-2008. //=========================================================================== -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Lexing -#else -namespace Microsoft.FSharp.Text.Lexing -#endif +module FSharp.Text.Lexing open System.Collections.Generic /// Position information stored for lexing tokens -// -// Note: this is an OCaml compat record type. -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Position = -#else -type Position = -#endif - { /// The file name for the position - pos_fname: string; +[] +type Position = + { + /// The file name for the position + pos_fname: string + /// The line number for the position - pos_lnum: int; -#if INTERNALIZED_FSLEXYACC_RUNTIME + pos_lnum: int + /// The line number for the position in the original source file - pos_orig_lnum : int; -#endif + pos_orig_lnum : int + /// The absolute offset of the beginning of the line - pos_bol: int; + pos_bol: int + /// The absolute offset of the column for the position - pos_cnum: int; } + pos_cnum: int + } + /// The file name associated with the input stream. member FileName : string - /// The line number in the input stream, assuming fresh positions have been updated + + /// The line number in the input stream, assuming fresh positions have been updated /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer. member Line : int -#if INTERNALIZED_FSLEXYACC_RUNTIME - /// The line number for the position in the input stream, assuming fresh positions have been updated + + /// The line number for the position in the input stream, assuming fresh positions have been updated /// using AsNewLinePos() member OriginalLine : int -#endif - [] + + [] member Char : int + /// The character number in the input stream member AbsoluteOffset : int + /// Return absolute offset of the start of the line marked by the position member StartOfLineAbsoluteOffset : int + /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset member Column : int + // Given a position just beyond the end of a line, return a position at the start of the next line - member NextLine : Position - + member NextLine : Position + /// Given a position at the start of a token of length n, return a position just beyond the end of the token member EndOfToken: n:int -> Position + /// Gives a position shifted by specified number of characters member ShiftColumnBy: by:int -> Position - - [] + + [] member AsNewLinePos : unit -> Position - - /// Get an arbitrary position, with the empty string as filename, and + + /// Get an arbitrary position, with the empty string as filename, and static member Empty : Position /// Get a position corresponding to the first line (line number 1) in a given file static member FirstLine : filename:string -> Position - + [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal LexBuffer<'char> = -#else /// Input buffers consumed by lexers generated by fslex.exe type LexBuffer<'char> = -#endif /// The start position for the lexeme member StartPos: Position with get,set + /// The end position for the lexeme member EndPos: Position with get,set - /// The matched string + + /// The matched string member Lexeme: 'char array - + /// Fast helper to turn the matched characters into a string, avoiding an intermediate array static member LexemeString : LexBuffer -> string - - /// The length of the matched string + + /// The length of the matched string member LexemeLength: int - /// Fetch a particular character in the matched string + + /// Fetch a particular character in the matched string member LexemeChar: int -> 'char /// Dynamically typed, non-lexically scoped parameter table member BufferLocalStore : IDictionary - + /// True if the refill of the buffer ever failed , or if explicitly set to true. member IsPastEndOfStream: bool with get,set - /// Remove all input, though don't discard the current lexeme + + /// Remove all input, though don't discard the current lexeme member DiscardInput: unit -> unit /// Create a lex buffer suitable for byte lexing that reads characters from the given array static member FromBytes: byte[] -> LexBuffer + /// Create a lex buffer suitable for Unicode lexing that reads characters from the given array static member FromChars: char[] -> LexBuffer + /// Create a lex buffer suitable for Unicode lexing that reads characters from the given string static member FromString: string -> LexBuffer + /// Create a lex buffer that reads character or byte inputs by using the given function static member FromFunction: ('char[] * int * int -> int) -> LexBuffer<'char> + /// Create a lex buffer that asynchronously reads character or byte inputs by using the given function static member FromAsyncFunction: ('char[] * int * int -> Async) -> LexBuffer<'char> - [.FromFunction instead")>] static member FromCharFunction: (char[] -> int -> int) -> LexBuffer + [.FromFunction instead")>] static member FromByteFunction: (byte[] -> int -> int) -> LexBuffer /// Create a lex buffer suitable for use with a Unicode lexer that reads character inputs from the given text reader static member FromTextReader: System.IO.TextReader -> LexBuffer + /// Create a lex buffer suitable for use with ASCII byte lexing that reads byte inputs from the given binary reader static member FromBinaryReader: System.IO.BinaryReader -> LexBuffer -/// The type of tables for an ascii lexer generated by fslex. +/// The type of tables for an ascii lexer generated by fslex. [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal AsciiTables = -#else type AsciiTables = -#endif static member Create : uint16[] array * uint16[] -> AsciiTables - /// Interpret tables for an ascii lexer generated by fslex. + + /// Interpret tables for an ascii lexer generated by fslex. member Interpret: initialState:int * LexBuffer -> int + + [] /// Interpret tables for an ascii lexer generated by fslex, processing input asynchronously member AsyncInterpret: initialState:int * LexBuffer -> Async -/// The type of tables for an unicode lexer generated by fslex. +/// The type of tables for an unicode lexer generated by fslex. [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal UnicodeTables = -#else type UnicodeTables = -#endif + static member Create : uint16[] array * uint16[] -> UnicodeTables - /// Interpret tables for a unicode lexer generated by fslex. + + /// Interpret tables for a unicode lexer generated by fslex. member Interpret: initialState:int * LexBuffer -> int + [] /// Interpret tables for a unicode lexer generated by fslex, processing input asynchronously member AsyncInterpret: initialState:int * LexBuffer -> Async + +/// Standard utility to create a Unicode LexBuffer +/// +/// One small annoyance is that LexBuffers and not IDisposable. This means +/// we can't just return the LexBuffer object, since the file it wraps wouldn't +/// get closed when we're finished with the LexBuffer. Hence we return the stream, +/// the reader and the LexBuffer. The caller should dispose the first two when done. +val UnicodeFileAsLexbuf: string * int option -> System.IO.FileStream * System.IO.StreamReader * LexBuffer \ No newline at end of file diff --git a/buildtools/fslex/Parsing.fs b/buildtools/fslex/Parsing.fs index 01dccfb6101..f66aa7a77f4 100644 --- a/buildtools/fslex/Parsing.fs +++ b/buildtools/fslex/Parsing.fs @@ -1,87 +1,76 @@ // (c) Microsoft Corporation 2005-2009. -#if INTERNALIZED_FSLEXYACC_RUNTIME +namespace FSharp.Text.Parsing +open FSharp.Text.Lexing -namespace Internal.Utilities.Text.Parsing -open Internal.Utilities -open Internal.Utilities.Text.Lexing - -#else -namespace Microsoft.FSharp.Text.Parsing -open Microsoft.FSharp.Text.Lexing -#endif - - - -open System open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal IParseState = -#else type IParseState = -#endif abstract InputRange: int -> Position * Position + abstract InputEndPosition: int -> Position + abstract InputStartPosition: int -> Position + abstract ResultRange: Position * Position + abstract GetInput: int -> obj + abstract ParserLocalStore : IDictionary + abstract RaiseError<'b> : unit -> 'b //------------------------------------------------------------------------- // This context is passed to the error reporter when a syntax error occurs [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal ParseErrorContext<'tok> -#else type ParseErrorContext<'tok> -#endif (//lexbuf: LexBuffer<_>, stateStack:int list, parseState: IParseState, reduceTokens: int list, currentToken: 'tok option, reducibleProductions: int list list, - shiftableTokens: int list , + shiftableTokens: int list, message : string) = - //member x.LexBuffer = lexbuf - member x.StateStack = stateStack - member x.ReduceTokens = reduceTokens - member x.CurrentToken = currentToken - member x.ParseState = parseState - member x.ReducibleProductions = reducibleProductions - member x.ShiftTokens = shiftableTokens - member x.Message = message + + member _.StateStack = stateStack + + member _.ReduceTokens = reduceTokens + + member _.CurrentToken = currentToken + + member _.ParseState = parseState + + member _.ReducibleProductions = reducibleProductions + + member _.ShiftTokens = shiftableTokens + + member _.Message = message //------------------------------------------------------------------------- // This is the data structure emitted as code by FSYACC. -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> = -#else type Tables<'tok> = -#endif - { reductions: (IParseState -> obj) array; - endOfInputTag: int; - tagOfToken: 'tok -> int; - dataOfToken: 'tok -> obj; - actionTableElements: uint16[]; - actionTableRowOffsets: uint16[]; - reductionSymbolCounts: uint16[]; - immediateActions: uint16[]; - gotos: uint16[]; - sparseGotoTableRowOffsets: uint16[]; - stateToProdIdxsTableElements: uint16[]; - stateToProdIdxsTableRowOffsets: uint16[]; - productionToNonTerminalTable: uint16[]; + { reductions: (IParseState -> obj)[] + endOfInputTag: int + tagOfToken: 'tok -> int + dataOfToken: 'tok -> obj + actionTableElements: uint16[] + actionTableRowOffsets: uint16[] + reductionSymbolCounts: uint16[] + immediateActions: uint16[] + gotos: uint16[] + sparseGotoTableRowOffsets: uint16[] + stateToProdIdxsTableElements: uint16[] + stateToProdIdxsTableRowOffsets: uint16[] + productionToNonTerminalTable: uint16[] /// For fsyacc.exe, this entry is filled in by context from the generated parser file. If no 'parse_error' function /// is defined by the user then ParseHelpers.parse_error is used by default (ParseHelpers is opened /// at the top of the generated parser file) - parseError: ParseErrorContext<'tok> -> unit; - numTerminals: int; + parseError: ParseErrorContext<'tok> -> unit + numTerminals: int tagOfErrorTerminal: int } //------------------------------------------------------------------------- @@ -90,11 +79,7 @@ type Tables<'tok> = // This type is in System.dll so for the moment we can't use it in FSharp.Core.dll //type Stack<'a> = System.Collections.Generic.Stack<'a> -#if INTERNALIZED_FSLEXYACC_RUNTIME -type Stack<'a>(n) = -#else type internal Stack<'a>(n) = -#endif let mutable contents = Array.zeroCreate<'a>(n) let mutable count = 0 @@ -102,16 +87,16 @@ type internal Stack<'a>(n) = let oldSize = Array.length contents if newSize > oldSize then let old = contents - contents <- Array.zeroCreate (max newSize (oldSize * 2)); - Array.blit old 0 contents 0 count; + contents <- Array.zeroCreate (max newSize (oldSize * 2)) + Array.blit old 0 contents 0 count member buf.Count = count member buf.Pop() = count <- count - 1 member buf.Peep() = contents.[count - 1] member buf.Top(n) = [ for x in contents.[max 0 (count-n)..count - 1] -> x ] |> List.rev member buf.Push(x) = - buf.Ensure(count + 1); - contents.[count] <- x; + buf.Ensure(count + 1) + contents.[count] <- x count <- count + 1 member buf.IsEmpty = (count = 0) @@ -132,11 +117,7 @@ module Flags = let mutable debug = false #endif -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal Implementation = -#else module Implementation = -#endif // Definitions shared with fsyacc let anyMarker = 0xffff @@ -153,7 +134,7 @@ module Implementation = // Read the tables written by FSYACC. type AssocTable(elemTab:uint16[], offsetTab:uint16[]) = - let cache = new Dictionary<_,_>(2000) + let cache = Dictionary<_,_>(2000) member t.readAssoc (minElemNum,maxElemNum,defaultValueOfAssoc,keyToFind) = // do a binary chop on the table @@ -164,10 +145,12 @@ module Implementation = let x = int elemTab.[elemNumber*2] if keyToFind = x then int elemTab.[elemNumber*2+1] - elif keyToFind < x then t.readAssoc (minElemNum ,elemNumber,defaultValueOfAssoc,keyToFind) - else t.readAssoc (elemNumber+1,maxElemNum,defaultValueOfAssoc,keyToFind) + elif keyToFind < x then + t.readAssoc (minElemNum,elemNumber,defaultValueOfAssoc,keyToFind) + else + t.readAssoc (elemNumber+1,maxElemNum,defaultValueOfAssoc,keyToFind) - member t.Read(rowNumber ,keyToFind) = + member t.Read(rowNumber,keyToFind) = // First check the sparse lookaside table // Performance note: without this lookaside table the binary chop in readAssoc @@ -191,7 +174,7 @@ module Implementation = // Read all entries in the association table // Used during error recovery to find all valid entries in the table - member x.ReadAll(n) = + member _.ReadAll(n) = let headOfTable = int offsetTab.[n] let firstElemNumber = headOfTable + 1 let numberOfElementsInAssoc = int32 elemTab.[headOfTable*2] @@ -202,7 +185,7 @@ module Implementation = type IdxToIdxListTable(elemTab:uint16[], offsetTab:uint16[]) = // Read all entries in a row of the table - member x.ReadAll(n) = + member _.ReadAll(n) = let headOfTable = int offsetTab.[n] let firstElemNumber = headOfTable + 1 let numberOfElements = int32 elemTab.[headOfTable] @@ -217,22 +200,31 @@ module Implementation = val value: obj val startPos: Position val endPos: Position - new(value,startPos,endPos) = { value=value; startPos=startPos;endPos=endPos } + + new(value,startPos,endPos) = { value=value; startPos=startPos; endPos=endPos } let interpret (tables: Tables<'tok>) lexer (lexbuf : LexBuffer<_>) initialState = - let localStore = new Dictionary() in - localStore.["LexBuffer"] <- lexbuf; + let localStore = Dictionary() in + localStore.["LexBuffer"] <- lexbuf #if __DEBUG - if Flags.debug then System.Console.WriteLine("\nParser: interpret tables"); + if Flags.debug then System.Console.WriteLine("\nParser: interpret tables") #endif - let stateStack : Stack = new Stack<_>(100) - stateStack.Push(initialState); - let valueStack = new Stack(100) + let stateStack : Stack = Stack<_>(100) + + stateStack.Push(initialState) + + let valueStack = Stack(100) + let mutable haveLookahead = false + let mutable lookaheadToken = Unchecked.defaultof<'tok> + let mutable lookaheadEndPos = Unchecked.defaultof + let mutable lookaheadStartPos = Unchecked.defaultof + let mutable finished = false + // After an error occurs, we suppress errors until we've shifted three tokens in a row. let mutable errorSuppressionCountDown = 0 @@ -243,27 +235,31 @@ module Implementation = // where consuming one EOF to trigger an error doesn't result in overall parse failure // catastrophe and the loss of intermediate results. // + let mutable inEofCountDown = false + let mutable eofCountDown = 20 // Number of EOFs to supply at the end for error recovery + // The 100 here means a maximum of 100 elements for each rule - let ruleStartPoss = (Array.zeroCreate 100 : Position array) - let ruleEndPoss = (Array.zeroCreate 100 : Position array) - let ruleValues = (Array.zeroCreate 100 : obj array) - let lhsPos = (Array.zeroCreate 2 : Position array) + let ruleStartPoss = (Array.zeroCreate 100 : Position[]) + let ruleEndPoss = (Array.zeroCreate 100 : Position[]) + let ruleValues = (Array.zeroCreate 100 : obj[]) + let lhsPos = (Array.zeroCreate 2 : Position[]) + let reductions = tables.reductions - let actionTable = new AssocTable(tables.actionTableElements, tables.actionTableRowOffsets) - let gotoTable = new AssocTable(tables.gotos, tables.sparseGotoTableRowOffsets) - let stateToProdIdxsTable = new IdxToIdxListTable(tables.stateToProdIdxsTableElements, tables.stateToProdIdxsTableRowOffsets) + let actionTable = AssocTable(tables.actionTableElements, tables.actionTableRowOffsets) + let gotoTable = AssocTable(tables.gotos, tables.sparseGotoTableRowOffsets) + let stateToProdIdxsTable = IdxToIdxListTable(tables.stateToProdIdxsTableElements, tables.stateToProdIdxsTableRowOffsets) let parseState = { new IParseState with - member p.InputRange(n) = ruleStartPoss.[n-1], ruleEndPoss.[n-1]; - member p.InputStartPosition(n) = ruleStartPoss.[n-1] - member p.InputEndPosition(n) = ruleEndPoss.[n-1]; - member p.GetInput(n) = ruleValues.[n-1]; - member p.ResultRange = (lhsPos.[0], lhsPos.[1]); - member p.ParserLocalStore = (localStore :> IDictionary<_,_>); - member p.RaiseError() = raise RecoverableParseError (* NOTE: this binding tests the fairly complex logic associated with an object expression implementing a generic abstract method *) + member _.InputRange(n) = ruleStartPoss.[n-1], ruleEndPoss.[n-1] + member _.InputStartPosition(n) = ruleStartPoss.[n-1] + member _.InputEndPosition(n) = ruleEndPoss.[n-1] + member _.GetInput(n) = ruleValues.[n-1] + member _.ResultRange = (lhsPos.[0], lhsPos.[1]) + member _.ParserLocalStore = (localStore :> IDictionary<_,_>) + member _.RaiseError() = raise RecoverableParseError (* NOTE: this binding tests the fairly complex logic associated with an object expression implementing a generic abstract method *) } #if __DEBUG @@ -275,22 +271,22 @@ module Implementation = // Pop the stack until we can shift the 'error' token. If 'tokenOpt' is given // then keep popping until we can shift both the 'error' token and the token in 'tokenOpt'. // This is used at end-of-file to make sure we can shift both the 'error' token and the 'EOF' token. - let rec popStackUntilErrorShifted(tokenOpt) = + let rec popStackUntilErrorShifted tokenOpt = // Keep popping the stack until the "error" terminal is shifted #if __DEBUG - if Flags.debug then System.Console.WriteLine("popStackUntilErrorShifted"); + if Flags.debug then System.Console.WriteLine("popStackUntilErrorShifted") #endif if stateStack.IsEmpty then #if __DEBUG if Flags.debug then - System.Console.WriteLine("state stack empty during error recovery - generating parse error"); + System.Console.WriteLine("state stack empty during error recovery - generating parse error") #endif - failwith "parse error"; + failwith "parse error" let currState = stateStack.Peep() #if __DEBUG if Flags.debug then - System.Console.WriteLine("In state {0} during error recovery", currState); + System.Console.WriteLine("In state {0} during error recovery", currState) #endif let action = actionTable.Read(currState, tables.tagOfErrorTerminal) @@ -303,22 +299,22 @@ module Implementation = actionKind (actionTable.Read(nextState, tables.tagOfToken(token))) = shiftFlag) then #if __DEBUG - if Flags.debug then System.Console.WriteLine("shifting error, continuing with error recovery"); + if Flags.debug then System.Console.WriteLine("shifting error, continuing with error recovery") #endif let nextState = actionValue action // The "error" non terminal needs position information, though it tends to be unreliable. // Use the StartPos/EndPos from the lex buffer - valueStack.Push(ValueInfo(box (), lexbuf.StartPos, lexbuf.EndPos)); + valueStack.Push(ValueInfo(box (), lexbuf.StartPos, lexbuf.EndPos)) stateStack.Push(nextState) else if valueStack.IsEmpty then - failwith "parse error"; + failwith "parse error" #if __DEBUG if Flags.debug then - System.Console.WriteLine("popping stack during error recovery"); + System.Console.WriteLine("popping stack during error recovery") #endif - valueStack.Pop(); - stateStack.Pop(); + valueStack.Pop() + stateStack.Pop() popStackUntilErrorShifted(tokenOpt) while not finished do @@ -350,7 +346,7 @@ module Implementation = lookaheadToken <- lexer lexbuf lookaheadStartPos <- lexbuf.StartPos lookaheadEndPos <- lexbuf.EndPos - haveLookahead <- true; + haveLookahead <- true let tag = if haveLookahead then tables.tagOfToken lookaheadToken @@ -362,17 +358,17 @@ module Implementation = let kind = actionKind action if kind = shiftFlag then ( if errorSuppressionCountDown > 0 then - errorSuppressionCountDown <- errorSuppressionCountDown - 1; + errorSuppressionCountDown <- errorSuppressionCountDown - 1 #if __DEBUG - if Flags.debug then Console.WriteLine("shifting, reduced errorRecoverylevel to {0}\n", errorSuppressionCountDown); + if Flags.debug then Console.WriteLine("shifting, reduced errorRecoverylevel to {0}\n", errorSuppressionCountDown) #endif let nextState = actionValue action - if not haveLookahead then failwith "shift on end of input!"; + if not haveLookahead then failwith "shift on end of input!" let data = tables.dataOfToken lookaheadToken - valueStack.Push(ValueInfo(data, lookaheadStartPos, lookaheadEndPos)); - stateStack.Push(nextState); + valueStack.Push(ValueInfo(data, lookaheadStartPos, lookaheadEndPos)) + stateStack.Push(nextState) #if __DEBUG - if Flags.debug then Console.WriteLine("shift/consume input {0}, shift to state {1}", report haveLookahead lookaheadToken, nextState); + if Flags.debug then Console.WriteLine("shift/consume input {0}, shift to state {1}", report haveLookahead lookaheadToken, nextState) #endif haveLookahead <- false @@ -382,27 +378,27 @@ module Implementation = let n = int tables.reductionSymbolCounts.[prod] // pop the symbols, populate the values and populate the locations #if __DEBUG - if Flags.debug then Console.Write("reduce popping {0} values/states, lookahead {1}", n, report haveLookahead lookaheadToken); + if Flags.debug then Console.Write("reduce popping {0} values/states, lookahead {1}", n, report haveLookahead lookaheadToken) #endif - lhsPos.[0] <- Position.Empty; - lhsPos.[1] <- Position.Empty; + lhsPos.[0] <- Position.Empty + lhsPos.[1] <- Position.Empty for i = 0 to n - 1 do - if valueStack.IsEmpty then failwith "empty symbol stack"; + if valueStack.IsEmpty then failwith "empty symbol stack" let topVal = valueStack.Peep() - valueStack.Pop(); - stateStack.Pop(); - ruleValues.[(n-i)-1] <- topVal.value; - ruleStartPoss.[(n-i)-1] <- topVal.startPos; - ruleEndPoss.[(n-i)-1] <- topVal.endPos; - if lhsPos.[1] = Position.Empty then lhsPos.[1] <- topVal.endPos; + valueStack.Pop() + stateStack.Pop() + ruleValues.[(n-i)-1] <- topVal.value + ruleStartPoss.[(n-i)-1] <- topVal.startPos + ruleEndPoss.[(n-i)-1] <- topVal.endPos + if lhsPos.[1] = Position.Empty then lhsPos.[1] <- topVal.endPos if not (topVal.startPos = Position.Empty) then lhsPos.[0] <- topVal.startPos - done; + done try - // Printf.printf "reduce %d\n" prod; + // Printf.printf "reduce %d\n" prod let redResult = reduction parseState - valueStack.Push(ValueInfo(redResult, lhsPos.[0], lhsPos.[1])); + valueStack.Push(ValueInfo(redResult, lhsPos.[0], lhsPos.[1])) let currState = stateStack.Peep() let newGotoState = gotoTable.Read(int tables.productionToNonTerminalTable.[prod], currState) stateStack.Push(newGotoState) @@ -411,23 +407,23 @@ module Implementation = #endif with | Accept res -> - finished <- true; + finished <- true valueStack.Push(ValueInfo(res, lhsPos.[0], lhsPos.[1])) | RecoverableParseError -> #if __DEBUG - if Flags.debug then Console.WriteLine("RecoverableParseErrorException...\n"); + if Flags.debug then Console.WriteLine("RecoverableParseErrorException...\n") #endif - popStackUntilErrorShifted(None); + popStackUntilErrorShifted(None) // User code raised a Parse_error. Don't report errors again until three tokens have been shifted errorSuppressionCountDown <- 3 elif kind = errorFlag then ( #if __DEBUG - if Flags.debug then Console.Write("ErrorFlag... "); + if Flags.debug then Console.Write("ErrorFlag... ") #endif // Silently discard inputs and don't report errors // until three tokens in a row have been shifted #if __DEBUG - if Flags.debug then printfn "error on token '%A' " (if haveLookahead then Some(lookaheadToken) else None); + if Flags.debug then printfn "error on token '%A' " (if haveLookahead then Some(lookaheadToken) else None) #endif if errorSuppressionCountDown > 0 then // If we're in the end-of-file count down then we're very keen to 'Accept'. @@ -435,16 +431,16 @@ module Implementation = // and an EOF token. if inEofCountDown && eofCountDown < 10 then #if __DEBUG - if Flags.debug then printfn "poppin stack, lokking to shift both 'error' and that token, during end-of-file error recovery" ; + if Flags.debug then printfn "poppin stack, lokking to shift both 'error' and that token, during end-of-file error recovery" #endif - popStackUntilErrorShifted(if haveLookahead then Some(lookaheadToken) else None); + popStackUntilErrorShifted(if haveLookahead then Some(lookaheadToken) else None) // If we don't haveLookahead then the end-of-file count down is over and we have no further options. if not haveLookahead then failwith "parse error: unexpected end of file" #if __DEBUG - if Flags.debug then printfn "discarding token '%A' during error suppression" (if haveLookahead then Some(lookaheadToken) else None); + if Flags.debug then printfn "discarding token '%A' during error suppression" (if haveLookahead then Some(lookaheadToken) else None) #endif // Discard the token haveLookahead <- false @@ -454,10 +450,10 @@ module Implementation = let currentToken = if haveLookahead then Some(lookaheadToken) else None let actions,defaultAction = actionTable.ReadAll(state) - let explicit = Set.ofList [ for (tag,_action) in actions -> tag ] + let explicit = Set.ofList [ for tag,_action in actions -> tag ] let shiftableTokens = - [ for (tag,action) in actions do + [ for tag,action in actions do if (actionKind action) = shiftFlag then yield tag if actionKind defaultAction = shiftFlag then @@ -471,7 +467,7 @@ module Implementation = yield stateToProdIdxsTable.ReadAll(state) ] let reduceTokens = - [ for (tag,action) in actions do + [ for tag,action in actions do if actionKind(action) = reduceFlag then yield tag if actionKind(defaultAction) = reduceFlag then @@ -480,35 +476,27 @@ module Implementation = yield tag ] in //let activeRules = stateStack |> List.iter (fun state -> let errorContext = new ParseErrorContext<'tok>(stateStack,parseState, reduceTokens,currentToken,reducibleProductions, shiftableTokens, "syntax error") - tables.parseError(errorContext); - popStackUntilErrorShifted(None); - errorSuppressionCountDown <- 3; + tables.parseError(errorContext) + popStackUntilErrorShifted(None) + errorSuppressionCountDown <- 3 #if __DEBUG - if Flags.debug then System.Console.WriteLine("generated syntax error and shifted error token, haveLookahead = {0}\n", haveLookahead); + if Flags.debug then System.Console.WriteLine("generated syntax error and shifted error token, haveLookahead = {0}\n", haveLookahead) #endif ) ) elif kind = acceptFlag then finished <- true #if __DEBUG else - if Flags.debug then System.Console.WriteLine("ALARM!!! drop through case in parser"); + if Flags.debug then System.Console.WriteLine("ALARM!!! drop through case in parser") #endif - done; + done // OK, we're done - read off the overall generated value valueStack.Peep().value -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> with -#else type Tables<'tok> with -#endif - member tables.Interpret (lexer,lexbuf,initialState) = - Implementation.interpret tables lexer lexbuf initialState + member tables.Interpret (lexer,lexbuf,startState) = + Implementation.interpret tables lexer lexbuf startState -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal ParseHelpers = -#else module ParseHelpers = -#endif let parse_error (_s:string) = () let parse_error_rich = (None : (ParseErrorContext<_> -> unit) option) diff --git a/buildtools/fslex/Parsing.fsi b/buildtools/fslex/Parsing.fsi index f4d12606462..e4e7329441a 100644 --- a/buildtools/fslex/Parsing.fsi +++ b/buildtools/fslex/Parsing.fsi @@ -2,129 +2,132 @@ // (c) Microsoft Corporation 2005-2009. //========================================================================= -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Parsing -open Internal.Utilities -open Internal.Utilities.Text.Lexing -#else -namespace Microsoft.FSharp.Text.Parsing -open Microsoft.FSharp.Text.Lexing -#endif +namespace FSharp.Text.Parsing +open FSharp.Text.Lexing open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal IParseState = -#else /// The information accessible via the parseState value within parser actions. type IParseState = -#endif /// Get the start and end position for the terminal or non-terminal at a given index matched by the production abstract InputRange: index:int -> Position * Position + /// Get the end position for the terminal or non-terminal at a given index matched by the production abstract InputEndPosition: int -> Position + /// Get the start position for the terminal or non-terminal at a given index matched by the production abstract InputStartPosition: int -> Position + /// Get the full range of positions matched by the production abstract ResultRange: Position * Position + /// Get the value produced by the terminal or non-terminal at the given position abstract GetInput : int -> obj + /// Get the store of local values associated with this parser // Dynamically typed, non-lexically scoped local store abstract ParserLocalStore : IDictionary + /// Raise an error in this parse context abstract RaiseError<'b> : unit -> 'b [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal ParseErrorContext<'tok> = -#else /// The context provided when a parse error occurs type ParseErrorContext<'tok> = -#endif /// The stack of state indexes active at the parse error member StateStack : int list + /// The state active at the parse error member ParseState : IParseState + /// The tokens that would cause a reduction at the parse error member ReduceTokens: int list + /// The stack of productions that would be reduced at the parse error member ReducibleProductions : int list list + /// The token that caused the parse error member CurrentToken : 'tok option + /// The token that would cause a shift at the parse error member ShiftTokens : int list + /// The message associated with the parse error member Message : string /// Tables generated by fsyacc -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> = -#else /// The type of the tables contained in a file produced by the fsyacc.exe parser generator. type Tables<'tok> = -#endif - { /// The reduction table + { + /// The reduction table reductions: (IParseState -> obj) array ; + /// The token number indicating the end of input endOfInputTag: int; + /// A function to compute the tag of a token tagOfToken: 'tok -> int; + /// A function to compute the data carried by a token dataOfToken: 'tok -> obj; + /// The sparse action table elements actionTableElements: uint16[]; + /// The sparse action table row offsets actionTableRowOffsets: uint16[]; + /// The number of symbols for each reduction reductionSymbolCounts: uint16[]; + /// The immediate action table immediateActions: uint16[]; + /// The sparse goto table gotos: uint16[]; + /// The sparse goto table row offsets sparseGotoTableRowOffsets: uint16[]; + /// The sparse table for the productions active for each state stateToProdIdxsTableElements: uint16[]; + /// The sparse table offsets for the productions active for each state stateToProdIdxsTableRowOffsets: uint16[]; + /// This table is logically part of the Goto table productionToNonTerminalTable: uint16[]; + /// This function is used to hold the user specified "parse_error" or "parse_error_rich" functions parseError: ParseErrorContext<'tok> -> unit; + /// The total number of terminals numTerminals: int; + /// The tag of the error terminal - tagOfErrorTerminal: int } + tagOfErrorTerminal: int + } /// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state. /// Returns an object indicating the final synthesized value for the parse. - member Interpret : lexer:(LexBuffer<'char> -> 'tok) * lexbuf:LexBuffer<'char> * initialState:int -> obj + member Interpret : lexer:(LexBuffer<'char> -> 'tok) * lexbuf:LexBuffer<'char> * startState:int -> obj -#if INTERNALIZED_FSLEXYACC_RUNTIME -exception internal Accept of obj -exception internal RecoverableParseError -#else /// Indicates an accept action has occured exception Accept of obj /// Indicates a parse error has occured and parse recovery is in progress exception RecoverableParseError -#endif #if __DEBUG module internal Flags = val mutable debug : bool #endif -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal ParseHelpers = -#else /// Helpers used by generated parsers. module ParseHelpers = -#endif /// The default implementation of the parse_error_rich function val parse_error_rich: (ParseErrorContext<'tok> -> unit) option + /// The default implementation of the parse_error function val parse_error: string -> unit diff --git a/buildtools/fslex/fslex.fs b/buildtools/fslex/fslex.fs index 98966fdfaf3..0769f022f39 100644 --- a/buildtools/fslex/fslex.fs +++ b/buildtools/fslex/fslex.fs @@ -1,220 +1,84 @@ -// (c) Microsoft Corporation 2005-2009. +// (c) Microsoft Corporation 2005-2009. -module internal FsLexYacc.FsLex.Driver +module FsLexYacc.FsLex.Program -open FsLexYacc.FsLex open FsLexYacc.FsLex.AST -open FsLexYacc.FsLex.Parser +open FsLexYacc.FsLex.Driver open Printf -open Internal.Utilities -open Internal.Utilities.Text.Lexing -open System -open System.Collections.Generic -open System.IO +open FSharp.Text +open System.IO -//------------------------------------------------------------------ -// This code is duplicated from Microsoft.FSharp.Compiler.UnicodeLexing - -type Lexbuf = LexBuffer - -/// Standard utility to create a Unicode LexBuffer -/// -/// One small annoyance is that LexBuffers and not IDisposable. This means -/// we can't just return the LexBuffer object, since the file it wraps wouldn't -/// get closed when we're finished with the LexBuffer. Hence we return the stream, -/// the reader and the LexBuffer. The caller should dispose the first two when done. -let UnicodeFileAsLexbuf (filename,codePage : int option) : FileStream * StreamReader * Lexbuf = - // Use the .NET functionality to auto-detect the unicode encoding - // It also presents the bytes read to the lexer in UTF8 decoded form - let stream = new FileStream(filename,FileMode.Open,FileAccess.Read,FileShare.Read) - let reader = - match codePage with - | None -> new StreamReader(stream,true) - | Some n -> new StreamReader(stream,System.Text.Encoding.GetEncoding(n)) - let lexbuf = LexBuffer.FromFunction(reader.Read) - lexbuf.EndPos <- Position.FirstLine(filename) - stream, reader, lexbuf - //------------------------------------------------------------------ // This is the program proper -let input = ref None -let out = ref None -let inputCodePage = ref None -let light = ref None - +let mutable input = None +let mutable out = None +let mutable inputCodePage = None +let mutable light = None +let mutable modname = None +let mutable internal_module = false let mutable lexlib = "FSharp.Text.Lexing" +let mutable unicode = false +let mutable caseInsensitive = false let usage = - [ ArgInfo ("-o", ArgType.String (fun s -> out := Some s), "Name the output file.") - ArgInfo ("--codepage", ArgType.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage.") - ArgInfo ("--light", ArgType.Unit (fun () -> light := Some true), "(ignored)") - ArgInfo ("--light-off", ArgType.Unit (fun () -> light := Some false), "Add #light \"off\" to the top of the generated file") + [ ArgInfo ("-o", ArgType.String (fun s -> out <- Some s), "Name the output file.") + ArgInfo ("--module", ArgType.String (fun s -> modname <- Some s), "Define the F# module name to host the generated parser."); + ArgInfo ("--internal", ArgType.Unit (fun () -> internal_module <- true), "Generate an internal module"); + ArgInfo ("--codepage", ArgType.Int (fun i -> inputCodePage <- Some i), "Assume input lexer specification file is encoded with the given codepage.") + ArgInfo ("--light", ArgType.Unit (fun () -> light <- Some true), "(ignored)") + ArgInfo ("--light-off", ArgType.Unit (fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file") ArgInfo ("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer table interpreter (default FSharp.Text.Lexing)") - ArgInfo ("--unicode", ArgType.Set unicode, "Produce a lexer for use with 16-bit unicode characters.") + ArgInfo ("--unicode", ArgType.Unit (fun () -> unicode <- true), "Produce a lexer for use with 16-bit unicode characters.") + ArgInfo ("-i", ArgType.Unit (fun () -> caseInsensitive <- true), "Produce a case-insensitive lexer.") ] -let _ = ArgParser.Parse(usage, (fun x -> match !input with Some _ -> failwith "more than one input given" | None -> input := Some x), "fslex ") - -let outputInt (os: TextWriter) (n:int) = os.Write(string n) - -let outputCodedUInt16 (os: #TextWriter) (n:int) = - os.Write n - os.Write "us; " - -let sentinel = 255 * 256 + 255 - -let lineCount = ref 0 -let cfprintfn (os: #TextWriter) fmt = Printf.kfprintf (fun () -> incr lineCount; os.WriteLine()) os fmt - -let main() = - try - let filename = (match !input with Some x -> x | None -> failwith "no input given") - let domain = if !unicode then "Unicode" else "Ascii" - let spec = - let stream,reader,lexbuf = UnicodeFileAsLexbuf(filename, !inputCodePage) - use stream = stream - use reader = reader - try - Parser.spec Lexer.token lexbuf - with e -> - eprintf "%s(%d,%d): error: %s" filename lexbuf.StartPos.Line lexbuf.StartPos.Column - (match e with - | Failure s -> s +let _ = ArgParser.Parse(usage, (fun x -> match input with Some _ -> failwith "more than one input given" | None -> input <- Some x), "fslex ") + +let compileSpec (spec: Spec) (ctx: ParseContext) = + let perRuleData, dfaNodes = Compile ctx spec + let dfaNodes = dfaNodes |> List.sortBy (fun n -> n.Id) + perRuleData, dfaNodes + +let main() = + try + let filename = (match input with Some x -> x | None -> failwith "no input given") + let parseContext = + { unicode = unicode + caseInsensitive = caseInsensitive } + let spec = + match readSpecFromFile filename inputCodePage with + | Ok spec -> spec + | Error (e, line, column) -> + eprintf "%s(%d,%d): error: %s" filename line column + (match e with + | Failure s -> s | _ -> e.Message) - exit 1 - printfn "compiling to dfas (can take a while...)" - let perRuleData, dfaNodes = AST.Compile spec - let dfaNodes = dfaNodes |> List.sortBy (fun n -> n.Id) + exit 1 + printfn "compiling to dfas (can take a while...)" + let perRuleData, dfaNodes = compileSpec spec parseContext printfn "%d states" dfaNodes.Length - printfn "writing output" - - let output = - match !out with - | Some x -> x - | _ -> - Path.Combine (Path.GetDirectoryName filename,Path.GetFileNameWithoutExtension(filename)) + ".fs" - use os = System.IO.File.CreateText output - - if (!light = Some(false)) || (!light = None && (Path.HasExtension(output) && Path.GetExtension(output) = ".ml")) then - cfprintfn os "#light \"off\"" - - let printLinesIfCodeDefined (code,pos:Position) = - if pos <> Position.Empty // If bottom code is unspecified, then position is empty. - then - cfprintfn os "# %d \"%s\"" pos.Line pos.FileName - cfprintfn os "%s" code - - printLinesIfCodeDefined spec.TopCode - let code = fst spec.TopCode - lineCount := !lineCount + code.Replace("\r","").Split([| '\n' |]).Length - cfprintfn os "# %d \"%s\"" !lineCount output - - cfprintfn os "let trans : uint16[] array = " - cfprintfn os " [| " - if !unicode then - let specificUnicodeChars = GetSpecificUnicodeChars() - // This emits a (numLowUnicodeChars+NumUnicodeCategories+(2*#specificUnicodeChars)+1) * #states array of encoded UInt16 values - - // Each row for the Unicode table has format - // 128 entries for ASCII characters - // A variable number of 2*UInt16 entries for SpecificUnicodeChars - // 30 entries, one for each UnicodeCategory - // 1 entry for EOF - // - // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. - // - // For the SpecificUnicodeChars the entries are char/next-state pairs. - for state in dfaNodes do - cfprintfn os " (* State %d *)" state.Id - fprintf os " [| " - let trans = - let dict = new Dictionary<_,_>() - state.Transitions |> List.iter dict.Add - dict - let emit n = - if trans.ContainsKey(n) then - outputCodedUInt16 os trans.[n].Id - else - outputCodedUInt16 os sentinel - for i = 0 to numLowUnicodeChars-1 do - let c = char i - emit (EncodeChar c) - for c in specificUnicodeChars do - outputCodedUInt16 os (int c) - emit (EncodeChar c) - for i = 0 to NumUnicodeCategories-1 do - emit (EncodeUnicodeCategoryIndex i) - emit Eof - cfprintfn os "|];" - done - - else - // Each row for the ASCII table has format - // 256 entries for ASCII characters - // 1 entry for EOF - // - // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. - - // This emits a (256+1) * #states array of encoded UInt16 values - for state in dfaNodes do - cfprintfn os " (* State %d *)" state.Id - fprintf os " [|" - let trans = - let dict = new Dictionary<_,_>() - state.Transitions |> List.iter dict.Add - dict - let emit n = - if trans.ContainsKey(n) then - outputCodedUInt16 os trans.[n].Id - else - outputCodedUInt16 os sentinel - for i = 0 to 255 do - let c = char i - emit (EncodeChar c) - emit Eof - cfprintfn os "|];" - done - - cfprintfn os " |] " - - fprintf os "let actions : uint16[] = [|" - for state in dfaNodes do - if state.Accepted.Length > 0 then - outputCodedUInt16 os (snd state.Accepted.Head) - else - outputCodedUInt16 os sentinel - done - cfprintfn os "|]" - cfprintfn os "let _fslex_tables = %s.%sTables.Create(trans,actions)" lexlib domain - - cfprintfn os "let rec _fslex_dummy () = _fslex_dummy() " - - // These actions push the additional start state and come first, because they are then typically inlined into later - // rules. This means more tailcalls are taken as direct branches, increasing efficiency and - // improving stack usage on platforms that do not take tailcalls. - for ((startNode, actions),(ident,args,_)) in List.zip perRuleData spec.Rules do - cfprintfn os "// Rule %s" ident - cfprintfn os "and %s %s lexbuf =" ident (String.Join(" ",Array.ofList args)) - cfprintfn os " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id - actions |> Seq.iteri (fun i (code:string, pos) -> - cfprintfn os " | %d -> ( " i - cfprintfn os "# %d \"%s\"" pos.Line pos.FileName - let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries) - for line in lines do - cfprintfn os " %s" line - cfprintfn os "# %d \"%s\"" !lineCount output - cfprintfn os " )") - cfprintfn os " | _ -> failwith \"%s\"" ident - - cfprintfn os "" - - printLinesIfCodeDefined spec.BottomCode - cfprintfn os "# 3000000 \"%s\"" output - - with e -> + printfn "writing output" + + let output = + match out with + | Some x -> x + | _ -> Path.ChangeExtension(filename, ".fs") + + let state : GeneratorState = + { inputFileName = filename + outputFileName = output + inputCodePage = inputCodePage |> Option.map System.Text.Encoding.GetEncoding |> Option.defaultValue System.Text.Encoding.UTF8 + generatedModuleName = modname + disableLightMode = light + generateInternalModule = internal_module + lexerLibraryName = lexlib + domain = if unicode then Unicode else ASCII } + writeSpecToFile state spec perRuleData dfaNodes + + with e -> eprintf "FSLEX: error FSL000: %s" (match e with Failure s -> s | e -> e.ToString()) exit 1 diff --git a/buildtools/fslex/fslex.fsproj b/buildtools/fslex/fslex.fsproj index 8577bf4e3af..b069eb6d53e 100644 --- a/buildtools/fslex/fslex.fsproj +++ b/buildtools/fslex/fslex.fsproj @@ -3,7 +3,6 @@ Exe net7.0 - INTERNALIZED_FSLEXYACC_RUNTIME;$(DefineConstants) true false @@ -18,6 +17,7 @@ + diff --git a/buildtools/fslex/fslexast.fs b/buildtools/fslex/fslexast.fs index 89cbdc4dfaf..fef6ac65ead 100644 --- a/buildtools/fslex/fslexast.fs +++ b/buildtools/fslex/fslexast.fs @@ -1,160 +1,182 @@ (* (c) Microsoft Corporation 2005-2008. *) -module internal FsLexYacc.FsLex.AST +module FsLexYacc.FsLex.AST open System.Collections.Generic -open FSharp.Text -open Microsoft.FSharp.Collections -open Internal.Utilities -open Internal.Utilities.Text.Lexing - -let (|KeyValue|) (kvp:KeyValuePair<_,_>) = kvp.Key,kvp.Value +open System.Globalization +open FSharp.Text.Lexing type Ident = string type Code = string * Position + +type ParseContext = { + unicode : bool + caseInsensitive: bool +} + +type Parser<'t> = ParseContext -> 't + type Alphabet = uint32 let Eof : Alphabet = 0xFFFFFFFEu let Epsilon : Alphabet = 0xFFFFFFFFu - -let unicode = ref false - -let unicodeCategories = - dict - [| "Pe", System.Globalization.UnicodeCategory.ClosePunctuation; // (Pe) - "Pc", System.Globalization.UnicodeCategory.ConnectorPunctuation; // (Pc) - "Cc", System.Globalization.UnicodeCategory.Control; // (Cc) - "Sc", System.Globalization.UnicodeCategory.CurrencySymbol; // (Sc) - "Pd", System.Globalization.UnicodeCategory.DashPunctuation; // (Pd) - "Nd", System.Globalization.UnicodeCategory.DecimalDigitNumber; // (Nd) - "Me", System.Globalization.UnicodeCategory.EnclosingMark; // (Me) - "Pf", System.Globalization.UnicodeCategory.FinalQuotePunctuation; // (Pf) - "Cf", enum 15; //System.Globalization.UnicodeCategory.Format; // (Cf) - "Pi", System.Globalization.UnicodeCategory.InitialQuotePunctuation; // (Pi) - "Nl", System.Globalization.UnicodeCategory.LetterNumber; // (Nl) - "Zl", System.Globalization.UnicodeCategory.LineSeparator; // (Zl) - "Ll", System.Globalization.UnicodeCategory.LowercaseLetter; // (Ll) - "Sm", System.Globalization.UnicodeCategory.MathSymbol; // (Sm) - "Lm", System.Globalization.UnicodeCategory.ModifierLetter; // (Lm) - "Sk", System.Globalization.UnicodeCategory.ModifierSymbol; // (Sk) - "Mn", System.Globalization.UnicodeCategory.NonSpacingMark; // (Mn) - "Ps", System.Globalization.UnicodeCategory.OpenPunctuation; // (Ps) - "Lo", System.Globalization.UnicodeCategory.OtherLetter; // (Lo) - "Cn", System.Globalization.UnicodeCategory.OtherNotAssigned; // (Cn) - "No", System.Globalization.UnicodeCategory.OtherNumber; // (No) - "Po", System.Globalization.UnicodeCategory.OtherPunctuation; // (Po) - "So", System.Globalization.UnicodeCategory.OtherSymbol; // (So) - "Zp", System.Globalization.UnicodeCategory.ParagraphSeparator; // (Zp) - "Co", System.Globalization.UnicodeCategory.PrivateUse; // (Co) - "Zs", System.Globalization.UnicodeCategory.SpaceSeparator; // (Zs) - "Mc", System.Globalization.UnicodeCategory.SpacingCombiningMark; // (Mc) - "Cs", System.Globalization.UnicodeCategory.Surrogate; // (Cs) - "Lt", System.Globalization.UnicodeCategory.TitlecaseLetter; // (Lt) - "Lu", System.Globalization.UnicodeCategory.UppercaseLetter; // (Lu) +let unicodeCategories = + dict + [| "Pe", UnicodeCategory.ClosePunctuation; // (Pe) + "Pc", UnicodeCategory.ConnectorPunctuation; // (Pc) + "Cc", UnicodeCategory.Control; // (Cc) + "Sc", UnicodeCategory.CurrencySymbol; // (Sc) + "Pd", UnicodeCategory.DashPunctuation; // (Pd) + "Nd", UnicodeCategory.DecimalDigitNumber; // (Nd) + "Me", UnicodeCategory.EnclosingMark; // (Me) + "Pf", UnicodeCategory.FinalQuotePunctuation; // (Pf) + "Cf", UnicodeCategory.Format; // (Cf) + "Pi", UnicodeCategory.InitialQuotePunctuation; // (Pi) + "Nl", UnicodeCategory.LetterNumber; // (Nl) + "Zl", UnicodeCategory.LineSeparator; // (Zl) + "Ll", UnicodeCategory.LowercaseLetter; // (Ll) + "Sm", UnicodeCategory.MathSymbol; // (Sm) + "Lm", UnicodeCategory.ModifierLetter; // (Lm) + "Sk", UnicodeCategory.ModifierSymbol; // (Sk) + "Mn", UnicodeCategory.NonSpacingMark; // (Mn) + "Ps", UnicodeCategory.OpenPunctuation; // (Ps) + "Lo", UnicodeCategory.OtherLetter; // (Lo) + "Cn", UnicodeCategory.OtherNotAssigned; // (Cn) + "No", UnicodeCategory.OtherNumber; // (No) + "Po", UnicodeCategory.OtherPunctuation; // (Po) + "So", UnicodeCategory.OtherSymbol; // (So) + "Zp", UnicodeCategory.ParagraphSeparator; // (Zp) + "Co", UnicodeCategory.PrivateUse; // (Co) + "Zs", UnicodeCategory.SpaceSeparator; // (Zs) + "Mc", UnicodeCategory.SpacingCombiningMark; // (Mc) + "Cs", UnicodeCategory.Surrogate; // (Cs) + "Lt", UnicodeCategory.TitlecaseLetter; // (Lt) + "Lu", UnicodeCategory.UppercaseLetter; // (Lu) |] let NumUnicodeCategories = unicodeCategories.Count -let _ = assert (NumUnicodeCategories = 30) // see table interpreter +assert (NumUnicodeCategories = 30) // see table interpreter let encodedUnicodeCategoryBase = 0xFFFFFF00u + let EncodeUnicodeCategoryIndex(idx:int) = encodedUnicodeCategoryBase + uint32 idx -let EncodeUnicodeCategory(s:string) = - if not (!unicode) then - failwith "unicode category classes may only be used if --unicode is specified"; - if unicodeCategories.ContainsKey(s) then +let EncodeUnicodeCategory s: Parser = fun ctx -> + if not ctx.unicode then + failwith "unicode category classes may only be used if --unicode is specified" + if unicodeCategories.ContainsKey(s) then EncodeUnicodeCategoryIndex (int32 unicodeCategories.[s]) else failwithf "invalid Unicode category: '%s'" s +let TryDecodeUnicodeCategory(x:Alphabet) : UnicodeCategory option = + let maybeUnicodeCategory = x - encodedUnicodeCategoryBase |> int32 |> enum + if UnicodeCategory.IsDefined(typeof, maybeUnicodeCategory) then + Some maybeUnicodeCategory + else + None + +let (|UnicodeCategoryAP|_|) (x:Alphabet) = TryDecodeUnicodeCategory x + let IsUnicodeCategory(x:Alphabet) = (encodedUnicodeCategoryBase <= x) && (x < encodedUnicodeCategoryBase + uint32 NumUnicodeCategories) let UnicodeCategoryIndex(x:Alphabet) = (x - encodedUnicodeCategoryBase) let numLowUnicodeChars = 128 -let _ = assert (numLowUnicodeChars = 128) // see table interpreter -let specificUnicodeChars = new Dictionary<_,_>() -let specificUnicodeCharsDecode = new Dictionary<_,_>() -let EncodeChar(c:char) = +assert (numLowUnicodeChars = 128) // see table interpreter +let specificUnicodeChars = Dictionary() +let specificUnicodeCharsDecode = Dictionary() + +let TryEncodeChar(c:char): Parser = fun ctx -> let x = System.Convert.ToUInt32 c - if !unicode then - if x < uint32 numLowUnicodeChars then x - else + if ctx.unicode then + if x < uint32 numLowUnicodeChars then Some x + else if not(specificUnicodeChars.ContainsKey(c)) then - let idx = uint32 numLowUnicodeChars + uint32 specificUnicodeChars.Count + let idx = uint32 numLowUnicodeChars + uint32 specificUnicodeChars.Count specificUnicodeChars.[c] <- idx specificUnicodeCharsDecode.[idx] <- c - specificUnicodeChars.[c] - else - if x >= 256u then failwithf "the Unicode character '%c' may not be used unless --unicode is specified" c; - x -let DecodeChar(x:Alphabet) = - if !unicode then + Some specificUnicodeChars.[c] + else + if x >= 256u + then None + else Some x + +let EncodeChar(c:char) : Parser = fun ctx -> + TryEncodeChar c ctx + |> Option.defaultWith (fun () -> failwithf "the Unicode character '0x%x' may not be used unless --unicode is specified" <| int c) + +let DecodeChar(x:Alphabet): Parser = fun ctx -> + if ctx.unicode then if x < uint32 numLowUnicodeChars then System.Convert.ToChar x else specificUnicodeCharsDecode.[x] else - if x >= 256u then failwithf "the Unicode character '%x' may not be used unless --unicode is specified" x; + if x >= 256u then failwithf "the Unicode character '0x%x' may not be used unless --unicode is specified" x System.Convert.ToChar x - - + + let NumSpecificUnicodeChars() = specificUnicodeChars.Count -let GetSpecificUnicodeChars() = - specificUnicodeChars - |> Seq.sortBy (fun (KeyValue(k,v)) -> v) - |> Seq.map (fun (KeyValue(k,v)) -> k) - -let GetSingleCharAlphabet() = - if !unicode - then Set.ofList [ for c in 0..numLowUnicodeChars-1 do yield (char c) - for c in GetSpecificUnicodeChars() do yield c ] - else Set.ofList [ for x in 0..255 -> (char x) ] - -let GetAlphabet() = - if !unicode - then Set.ofList [ for c in GetSingleCharAlphabet() do yield EncodeChar c +let GetSpecificUnicodeChars() = + specificUnicodeChars + |> Seq.sortBy (fun (KeyValue(_,v)) -> v) + |> Seq.map (fun (KeyValue(k,_)) -> k) + +let GetSingleCharAlphabet: Parser> = fun ctx -> + if ctx.unicode + then Set.ofList [ yield! { char 0 .. char <| numLowUnicodeChars-1 } + yield! GetSpecificUnicodeChars() ] + else Set.ofList [ char 0 .. char 255 ] + +let GetAlphabet: Parser> = fun ctx -> + if ctx.unicode + then Set.ofList [ for c in GetSingleCharAlphabet ctx do yield EncodeChar c ctx for uc in 0 .. NumUnicodeCategories-1 do yield EncodeUnicodeCategoryIndex uc ] - else Set.ofList [ for c in GetSingleCharAlphabet() do yield EncodeChar c ] + else GetSingleCharAlphabet ctx |> Seq.map (fun c -> EncodeChar c ctx) |> set + - //let DecodeAlphabet (x:Alphabet) = System.Convert.ToChar(x) (* -for i in 0 .. 65535 do +for i in 0 .. 65535 do let c = char i - if System.Char.GetUnicodeCategory c = System.Globalization.UnicodeCategory.PrivateUse then + if System.Char.GetUnicodeCategory c = System.Globalization.UnicodeCategory.PrivateUse then printfn "i = %x" i *) -type Spec = - { TopCode: Code; - Macros: (Ident * Regexp) list; - Rules: (Ident * Ident list * Clause list) list; - BottomCode: Code } -and Clause = Regexp * Code -and Regexp = - | Alt of Regexp list +type Input = + | Alphabet of Parser + | UnicodeCategory of string + | Any + | NotCharSet of Parser> +type Regexp = + | Alt of Parser | Seq of Regexp list | Inp of Input | Star of Regexp | Macro of Ident -and Input = - | Alphabet of Alphabet - | UnicodeCategory of string - | Any - | NotCharSet of Set - -type NodeId = int - -type NfaNode = - { Id: NodeId; - Name: string; - Transitions: Dictionary; +type Clause = Regexp * Code + +type Rule = Ident * Ident list * Clause list +type Macro = Ident * Regexp + +type Spec = + { TopCode: Code + Macros: Macro list + Rules: Rule list + BottomCode: Code } + +type NodeId = int + +type NfaNode = + { Id: NodeId + Name: string + Transitions: Dictionary Accepted: (int * int) list } -type DfaNode = - { Id: int; - Name: string; - mutable Transitions: (Alphabet * DfaNode) list; +type DfaNode = + { Id: int + Name: string + mutable Transitions: (Alphabet * DfaNode) list Accepted: (int * int) list } type MultiMap<'a,'b> = Dictionary<'a,'b list> @@ -163,246 +185,254 @@ let LookupMultiMap (trDict:MultiMap<_,_>) a = let AddToMultiMap (trDict:MultiMap<_,_>) a b = let prev = LookupMultiMap trDict a - trDict.[a] <- b :: prev + trDict.[a] <- b::prev -type NfaNodeMap() = - let map = new Dictionary(100) - member x.Item with get(nid) = map.[nid] +type NfaNodeMap() = + let map = Dictionary(100) + member x.Item with get nid = map.[nid] member x.Count = map.Count - member x.NewNfaNode(trs,ac) = + member x.NewNfaNode(trs,ac) = let nodeId = map.Count+1 // ID zero is reserved - let trDict = new Dictionary<_,_>(List.length trs) - for (a,b) in trs do + let trDict = Dictionary<_,_>(List.length trs) + for a,b in trs do AddToMultiMap trDict a b - + let node : NfaNode = {Id=nodeId; Name=string nodeId; Transitions=trDict; Accepted=ac} - map.[nodeId] <-node; + map.[nodeId] <-node node -let LexerStateToNfa (macros: Map) (clauses: Clause list) = +let LexerStateToNfa ctx (macros: Map) (clauses: Clause list) = + + /// Table allocating node ids + let nfaNodeMap = NfaNodeMap() - /// Table allocating node ids - let nfaNodeMap = new NfaNodeMap() - /// Compile a regular expression into the NFA - let rec CompileRegexp re dest = - match re with - | Alt res -> - let trs = res |> List.map (fun re -> (Epsilon,CompileRegexp re dest)) + let rec CompileRegexp re dest = + match re with + | Alt res -> + let trs = res ctx |> List.map (fun re -> (Epsilon,CompileRegexp re dest)) nfaNodeMap.NewNfaNode(trs,[]) - | Seq res -> - List.foldBack (CompileRegexp) res dest - | Inp (Alphabet c) -> - nfaNodeMap.NewNfaNode([(c, dest)],[]) - - | Star re -> + | Seq res -> + List.foldBack CompileRegexp res dest + | Inp (Alphabet c) -> + let c = c ctx + match c with + | c when not ctx.caseInsensitive || c = Eof -> + nfaNodeMap.NewNfaNode([(c, dest)],[]) + | UnicodeCategoryAP uc -> + let allCasedCategories = [UnicodeCategory.UppercaseLetter; UnicodeCategory.LowercaseLetter; UnicodeCategory.TitlecaseLetter] + let isCasedLetterCategory = allCasedCategories |> Seq.contains uc + if isCasedLetterCategory then + let trs = allCasedCategories |> List.map (fun x -> (EncodeUnicodeCategoryIndex (int32 x) ,dest)) + nfaNodeMap.NewNfaNode(trs,[]) + else + nfaNodeMap.NewNfaNode([(c, dest)],[]) + | c -> + let decodedChar = DecodeChar c ctx + let trs = + [ + System.Char.ToLowerInvariant decodedChar + System.Char.ToUpperInvariant decodedChar + ] + |> List.distinct + |> List.choose (fun letter -> TryEncodeChar letter ctx) + |> List.map (fun encodedLetter -> (encodedLetter, dest)) + nfaNodeMap.NewNfaNode(trs,[]) + | Star re -> let nfaNode = nfaNodeMap.NewNfaNode([(Epsilon, dest)],[]) let sre = CompileRegexp re nfaNode AddToMultiMap nfaNode.Transitions Epsilon sre nfaNodeMap.NewNfaNode([(Epsilon,sre); (Epsilon,dest)],[]) - | Macro m -> - if not (macros.ContainsKey(m)) then failwith ("The macro "+m+" is not defined"); - CompileRegexp (macros.[m]) dest + | Macro m -> + if not <| macros.ContainsKey(m) then failwithf "The macro %s is not defined" m + CompileRegexp macros.[m] dest // These cases unwind the difficult cases in the syntax that rely on knowing the // entire alphabet. // // Note we've delayed the expension of these until we've worked out all the 'special' Unicode characters // mentioned in the entire lexer spec, i.e. we wait until GetAlphabet returns a reliable and stable answer. - | Inp (UnicodeCategory uc) -> - let re = Alt([ yield Inp(Alphabet(EncodeUnicodeCategory uc)) - // Also include any specific characters in this category - for c in GetSingleCharAlphabet() do - if System.Char.GetUnicodeCategory(c) = unicodeCategories.[uc] then - yield Inp(Alphabet(EncodeChar(c))) ]) + | Inp (UnicodeCategory uc) -> + let re = Alt(fun ctx -> + [ yield Inp(Alphabet(EncodeUnicodeCategory uc)) + // Also include any specific characters in this category + for c in GetSingleCharAlphabet ctx do + if System.Char.GetUnicodeCategory(c) = unicodeCategories.[uc] then + yield Inp(Alphabet(EncodeChar c)) ]) CompileRegexp re dest - | Inp Any -> - let re = Alt([ for n in GetAlphabet() do yield Inp(Alphabet(n)) ]) + | Inp Any -> + let re = Alt(fun ctx -> + [ for n in GetAlphabet ctx do yield Inp(Alphabet(fun _ -> n)) ] + ) CompileRegexp re dest - | Inp (NotCharSet chars) -> - let re = Alt [ // Include any characters from those in the alphabet besides those that are not immediately excluded - for c in GetSingleCharAlphabet() do - let ec = EncodeChar c - if not (chars.Contains(ec)) then - yield Inp(Alphabet(ec)) - - // Include all unicode categories - // That is, negations _only_ exclude precisely the given set of characters. You can't - // exclude whole classes of characters as yet - if !unicode then - let ucs = chars |> Set.map(DecodeChar >> System.Char.GetUnicodeCategory) - for KeyValue(nm,uc) in unicodeCategories do - //if ucs.Contains(uc) then - // do printfn "warning: the unicode category '\\%s' ('%s') is automatically excluded by this character set negation. Consider adding this to the negation." nm (uc.ToString()) - // yield! [] - //else - yield Inp(Alphabet(EncodeUnicodeCategory nm)) - ] + | Inp (NotCharSet chars) -> + let chars = chars ctx + let re = Alt(fun ctx -> + [ // Include any characters from those in the alphabet besides those that are not immediately excluded + for c in GetSingleCharAlphabet ctx do + let ec = EncodeChar c ctx + if not (chars.Contains(ec)) then + yield Inp(Alphabet(fun _ -> ec)) + + // Include all unicode categories + // That is, negations _only_ exclude precisely the given set of characters. You can't + // exclude whole classes of characters as yet + if ctx.unicode then + let _ = chars |> Set.map(fun c -> DecodeChar c ctx |> System.Char.GetUnicodeCategory) + for KeyValue(nm,_) in unicodeCategories do + //if ucs.Contains(uc) then + // printfn "warning: the unicode category '\\%s' ('%O') is automatically excluded by this character set negation. Consider adding this to the negation." nm uc + //else + yield Inp(Alphabet(EncodeUnicodeCategory nm)) + ] + ) CompileRegexp re dest - let actions = new System.Collections.Generic.List<_>() - + let actions = List() + /// Compile an acceptance of a regular expression into the NFA - let sTrans macros nodeId (regexp,code) = + let sTrans _ nodeId (regexp,code) = let actionId = actions.Count actions.Add(code) let sAccept = nfaNodeMap.NewNfaNode([],[(nodeId,actionId)]) - CompileRegexp regexp sAccept + CompileRegexp regexp sAccept - let trs = clauses |> List.mapi (fun n x -> (Epsilon,sTrans macros n x)) + let trs = clauses |> List.mapi (fun n x -> (Epsilon,sTrans macros n x)) let nfaStartNode = nfaNodeMap.NewNfaNode(trs,[]) nfaStartNode,(actions |> Seq.readonly), nfaNodeMap // TODO: consider a better representation here. -type internal NfaNodeIdSetBuilder = HashSet +type NfaNodeIdSetBuilder = HashSet -type internal NfaNodeIdSet(nodes: NfaNodeIdSetBuilder) = +type NfaNodeIdSet(nodes: NfaNodeIdSetBuilder) = // BEWARE: the next line is performance critical - let s = nodes |> Seq.toArray |> (fun arr -> Array.sortInPlaceWith compare arr; arr) // 19 + let s = nodes |> Seq.toArray + do Array.sortInPlaceWith compare s // 19 - // These are all surprisingly slower: - //let s = nodes |> Seq.toArray |> Array.sort + // These are all surprisingly slower (because they create two arrays): + //let s = nodes |> Seq.toArray |> Array.sort //let s = nodes |> Seq.toArray |> Array.sortWith compare // 76 //let s = nodes |> Seq.toArray |> (fun arr -> Array.sortInPlace arr; arr) // 76 member x.Representation = s - member x.Elements = s + member x.Elements = s member x.Fold f z = Array.fold f z s interface System.IComparable with - member x.CompareTo(y:obj) = - let y = (y :?> NfaNodeIdSet) - let xr = x.Representation - let yr = y.Representation - let c = compare xr.Length yr.Length - if c <> 0 then c else - let n = yr.Length - let rec go i = - if i >= n then 0 else - let c = compare xr.[i] yr.[i] - if c <> 0 then c else - go (i+1) - go 0 + member x.CompareTo(y:obj) = + Array.compareWith compare x.Representation (y :?> NfaNodeIdSet).Representation override x.Equals(y:obj) = match y with - | :? NfaNodeIdSet as y -> + | :? NfaNodeIdSet as y -> let xr = x.Representation let yr = y.Representation let n = yr.Length - xr.Length = n && - (let rec go i = (i < n) && xr.[i] = yr.[i] && go (i+1) - go 0) + let rec go i = (i >= n) || (xr.[i] = yr.[i] && go (i+1)) + xr.Length = n && go 0 | _ -> false override x.GetHashCode() = hash s member x.IsEmpty = (s.Length = 0) - member x.Iterate f = s |> Array.iter f + member x.Iterate f = Array.iter f s type NodeSetSet = Set -let newDfaNodeId = - let i = ref 0 +let newDfaNodeId = + let i = ref 0 fun () -> let res = !i in incr i; res - -let NfaToDfa (nfaNodeMap:NfaNodeMap) nfaStartNode = - let numNfaNodes = nfaNodeMap.Count - let rec EClosure1 (acc:NfaNodeIdSetBuilder) (n:NfaNode) = - if not (acc.Contains n.Id) then - acc.Add n.Id |> ignore; + +let NfaToDfa (nfaNodeMap:NfaNodeMap) nfaStartNode = + let rec EClosure1 (acc:NfaNodeIdSetBuilder) (n:NfaNode) = + if not (acc.Contains(n.Id)) then + acc.Add(n.Id) |> ignore if n.Transitions.ContainsKey(Epsilon) then - match n.Transitions.[Epsilon] with + match n.Transitions.[Epsilon] with | [] -> () // this Clause is an optimization - the list is normally empty - | tr -> + | tr -> //printfn "n.Id = %A, #Epsilon = %d" n.Id tr.Length - tr |> List.iter (EClosure1 acc) + tr |> List.iter (EClosure1 acc) - let EClosure (moves:list) = - let acc = new NfaNodeIdSetBuilder(HashIdentity.Structural) + let EClosure (moves:list) = + let acc = NfaNodeIdSetBuilder(HashIdentity.Structural) for i in moves do - EClosure1 acc nfaNodeMap.[i]; - new NfaNodeIdSet(acc) + EClosure1 acc nfaNodeMap.[i] + NfaNodeIdSet(acc) // Compute all the immediate one-step moves for a set of NFA states, as a dictionary // mapping inputs to destination lists - let ComputeMoves (nset:NfaNodeIdSet) = - let moves = new MultiMap<_,_>() - nset.Iterate(fun nodeId -> - for (KeyValue(inp,dests)) in nfaNodeMap.[nodeId].Transitions do - if inp <> Epsilon then - match dests with + let ComputeMoves (nset:NfaNodeIdSet) = + let moves = MultiMap<_,_>() + nset.Iterate(fun nodeId -> + for KeyValue(inp,dests) in nfaNodeMap.[nodeId].Transitions do + if inp <> Epsilon then + match dests with | [] -> () // this Clause is an optimization - the list is normally empty | tr -> tr |> List.iter(fun dest -> AddToMultiMap moves inp dest.Id)) moves - let acc = new NfaNodeIdSetBuilder(HashIdentity.Structural) - EClosure1 acc nfaStartNode; - let nfaSet0 = new NfaNodeIdSet(acc) + let acc = NfaNodeIdSetBuilder(HashIdentity.Structural) + EClosure1 acc nfaStartNode + let nfaSet0 = NfaNodeIdSet(acc) - let dfaNodes = ref (Map.empty) + let dfaNodes = Dictionary() - let GetDfaNode nfaSet = - if (!dfaNodes).ContainsKey(nfaSet) then - (!dfaNodes).[nfaSet] - else + let GetDfaNode nfaSet = + if dfaNodes.ContainsKey(nfaSet) then + dfaNodes.[nfaSet] + else let dfaNode = - { Id= newDfaNodeId(); - Name = nfaSet.Fold (fun s nid -> nfaNodeMap.[nid].Name+"-"+s) ""; - Transitions=[]; - Accepted= nfaSet.Elements + { Id = newDfaNodeId() + Name = nfaSet.Fold (fun s nid -> nfaNodeMap.[nid].Name+"-"+s) "" + Transitions = [] + Accepted= nfaSet.Elements |> Seq.map (fun nid -> nfaNodeMap.[nid].Accepted) |> List.concat } - //Printf.printfn "id = %d" dfaNode.Id; + //printfn "id = %d" dfaNode.Id - dfaNodes := (!dfaNodes).Add(nfaSet,dfaNode); + dfaNodes.Add(nfaSet,dfaNode) dfaNode - - let workList = ref [nfaSet0] - let doneSet = ref Set.empty - - //let count = ref 0 - let rec Loop () = - match !workList with - | [] -> () - | nfaSet :: t -> - workList := t; - if (!doneSet).Contains(nfaSet) then - Loop () - else - let moves = ComputeMoves nfaSet - for (KeyValue(inp,movesForInput)) in moves do - assert (inp <> Epsilon); - let moveSet = EClosure movesForInput; - if not moveSet.IsEmpty then - //incr count - let dfaNode = GetDfaNode nfaSet - dfaNode.Transitions <- (inp, GetDfaNode moveSet) :: dfaNode.Transitions; - (* Printf.printf "%d (%s) : %s --> %d (%s)\n" dfaNode.Id dfaNode.Name (match inp with EncodeChar c -> String.make 1 c | LEof -> "eof") moveSetDfaNode.Id moveSetDfaNode.Name;*) - workList := moveSet :: !workList; - - doneSet := (!doneSet).Add(nfaSet); - - - Loop() - Loop(); - //Printf.printfn "count = %d" !count; + + let workList = Stack() + workList.Push nfaSet0 + let doneSet = HashSet() + + //let count = ref 0 + while workList.Count <> 0 do + let nfaSet = workList.Pop() + if not <| doneSet.Contains(nfaSet) then + let moves = ComputeMoves nfaSet + for KeyValue(inp,movesForInput) in moves do + assert (inp <> Epsilon) + let moveSet = EClosure movesForInput + if not moveSet.IsEmpty then + //incr count + let dfaNode = GetDfaNode nfaSet + dfaNode.Transitions <- (inp, GetDfaNode moveSet) :: dfaNode.Transitions + // printf "%d (%s) : %s --> %d (%s)\n" dfaNode.Id dfaNode.Name (match inp with EncodeChar c -> String.make 1 c | LEof -> "eof") moveSetDfaNode.Id moveSetDfaNode.Name + workList.Push(moveSet) + + doneSet.Add(nfaSet) |> ignore + + //printfn "count = %d" !count let ruleStartNode = GetDfaNode nfaSet0 - let ruleNodes = - (!dfaNodes) - |> Seq.map (fun kvp -> kvp.Value) + let ruleNodes = + dfaNodes + |> Seq.map (fun kvp -> kvp.Value) |> Seq.toList |> List.sortBy (fun s -> s.Id) ruleStartNode,ruleNodes -let Compile spec = +let Compile ctx spec = + let macros = Map.ofList spec.Macros List.foldBack - (fun (name,args,clauses) (perRuleData,dfaNodes) -> - let nfa, actions, nfaNodeMap = LexerStateToNfa (Map.ofList spec.Macros) clauses + (fun (_,_,clauses) (perRuleData,dfaNodes) -> + let nfa, actions, nfaNodeMap = LexerStateToNfa ctx macros clauses let ruleStartNode, ruleNodes = NfaToDfa nfaNodeMap nfa - //Printf.printfn "name = %s, ruleStartNode = %O" name ruleStartNode.Id; + //printfn "name = %s, ruleStartNode = %O" name ruleStartNode.Id (ruleStartNode,actions) :: perRuleData, ruleNodes @ dfaNodes) spec.Rules ([],[]) diff --git a/buildtools/fslex/fslexdriver.fs b/buildtools/fslex/fslexdriver.fs new file mode 100644 index 00000000000..b0b24e78524 --- /dev/null +++ b/buildtools/fslex/fslexdriver.fs @@ -0,0 +1,205 @@ +module FsLexYacc.FsLex.Driver + +open FsLexYacc.FsLex.AST +open System +open System.IO +open FSharp.Text.Lexing +open System.Collections.Generic + +type Domain = Unicode | ASCII + +/// Wraps the inputs to the code generator +type GeneratorState = + { inputFileName: string + outputFileName: string + inputCodePage: System.Text.Encoding + generatedModuleName: string option + disableLightMode: bool option + generateInternalModule: bool + lexerLibraryName: string + domain : Domain } + +type PerRuleData = list> +type DfaNodes = list + +type Writer(fileName) = + let os = File.CreateText fileName + let mutable lineCount = 0 + let incr () = + lineCount <- lineCount + 1 + + member x.writeLine fmt = + Printf.kfprintf (fun () -> incr(); os.WriteLine()) os fmt + + member x.write fmt = + Printf.fprintf os fmt + + member x.writeCode (code, pos: Position) = + if pos <> Position.Empty // If bottom code is unspecified, then position is empty. + then + x.writeLine "# %d \"%s\"" pos.Line pos.FileName + x.writeLine "%s" code + let numLines = code.Replace("\r","").Split([| '\n' |]).Length + lineCount <- lineCount + numLines + x.writeLine "# %d \"%s\"" lineCount fileName + + member x.LineCount = lineCount + + member x.WriteUint16 (n: int) = + os.Write n; + os.Write "us;" + + interface IDisposable with + member x.Dispose() = os.Dispose() + +let sentinel = 255 * 256 + 255 + +let readSpecFromFile fileName codePage = + let stream,reader,lexbuf = UnicodeFileAsLexbuf(fileName, codePage) + use stream = stream + use reader = reader + try + let spec = Parser.spec Lexer.token lexbuf + Ok spec + with e -> + (e, lexbuf.StartPos.Line, lexbuf.StartPos.Column) + |> Error + +let writeLightMode lightModeDisabled (fileName: string) (writer: Writer) = + if lightModeDisabled = Some false || (lightModeDisabled = None && (Path.HasExtension(fileName) && Path.GetExtension(fileName) = ".ml")) + then + writer.write "#light \"off\"" + +let writeModuleExpression genModuleName isInternal (writer: Writer) = + match genModuleName with + | None -> () + | Some s -> + let internal_tag = if isInternal then "internal " else "" + writer.writeLine "module %s%s" internal_tag s + +let writeTopCode code (writer: Writer) = writer.writeCode code + +let writeUnicodeTranslationArray dfaNodes domain (writer: Writer) = + let parseContext = + { unicode = match domain with | Unicode -> true | ASCII -> false + caseInsensitive = false } + writer.writeLine "let trans : uint16[] array = " + writer.writeLine " [| " + match domain with + | Unicode -> + let specificUnicodeChars = GetSpecificUnicodeChars() + // This emits a (numLowUnicodeChars+NumUnicodeCategories+(2*#specificUnicodeChars)+1) * #states array of encoded UInt16 values + + // Each row for the Unicode table has format + // 128 entries for ASCII characters + // A variable number of 2*UInt16 entries for SpecificUnicodeChars + // 30 entries, one for each UnicodeCategory + // 1 entry for EOF + // + // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. + // + // For the SpecificUnicodeChars the entries are char/next-state pairs. + for state in dfaNodes do + writer.writeLine " (* State %d *)" state.Id + writer.write " [| " + let trans = + let dict = Dictionary() + state.Transitions |> List.iter dict.Add + dict + let emit n = + if trans.ContainsKey(n) then + writer.WriteUint16 trans.[n].Id + else + writer.WriteUint16 sentinel + for i = 0 to numLowUnicodeChars-1 do + let c = char i + emit (EncodeChar c parseContext) + for c in specificUnicodeChars do + writer.WriteUint16 (int c) + emit (EncodeChar c parseContext) + for i = 0 to NumUnicodeCategories-1 do + emit (EncodeUnicodeCategoryIndex i) + emit Eof + writer.writeLine "|];" + done + + | ASCII -> + // Each row for the ASCII table has format + // 256 entries for ASCII characters + // 1 entry for EOF + // + // Each entry is an encoded UInt16 value indicating the next state to transition to for this input. + + // This emits a (256+1) * #states array of encoded UInt16 values + for state in dfaNodes do + writer.writeLine " (* State %d *)" state.Id + writer.write " [|" + let trans = + let dict = Dictionary() + state.Transitions |> List.iter dict.Add + dict + let emit n = + if trans.ContainsKey(n) then + writer.WriteUint16 trans.[n].Id + else + writer.WriteUint16 sentinel + for i = 0 to 255 do + let c = char i + emit (EncodeChar c parseContext) + emit Eof + writer.writeLine "|];" + done + + writer.writeLine " |] " + +let writeUnicodeActionsArray dfaNodes (writer: Writer) = + writer.write "let actions : uint16[] = [|" + for state in dfaNodes do + if state.Accepted.Length > 0 then + writer.WriteUint16 (snd state.Accepted.Head) + else + writer.WriteUint16 sentinel + done + writer.writeLine "|]" + +let writeUnicodeTables lexerLibraryName domain dfaNodes (writer: Writer) = + writeUnicodeTranslationArray dfaNodes domain writer + writeUnicodeActionsArray dfaNodes writer + writer.writeLine "let _fslex_tables = %s.%sTables.Create(trans,actions)" lexerLibraryName (match domain with | Unicode -> "Unicode" | ASCII -> "Ascii") + +let writeRules (rules: Rule list) (perRuleData: PerRuleData) outputFileName (writer: Writer) = + writer.writeLine "let rec _fslex_dummy () = _fslex_dummy() " + + // These actions push the additional start state and come first, because they are then typically inlined into later + // rules. This means more tailcalls are taken as direct branches, increasing efficiency and + // improving stack usage on platforms that do not take tailcalls. + for (startNode, actions),(ident,args,_) in List.zip perRuleData rules do + writer.writeLine "// Rule %s" ident + writer.writeLine "and %s %s lexbuf =" ident (String.Join(" ", Array.ofList args)) + writer.writeLine " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id + actions |> Seq.iteri (fun i (code:string, pos) -> + writer.writeLine " | %d -> ( " i + writer.writeLine "# %d \"%s\"" pos.Line pos.FileName + let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries) + for line in lines do + writer.writeLine " %s" line + writer.writeLine "# %d \"%s\"" writer.LineCount outputFileName + writer.writeLine " )") + writer.writeLine " | _ -> failwith \"%s\"" ident + + writer.writeLine "" + +let writeBottomCode code (writer: Writer) = writer.writeCode code + +let writeFooter outputFileName (writer: Writer) = writer.writeLine "# 3000000 \"%s\"" outputFileName + +let writeSpecToFile (state: GeneratorState) (spec: Spec) (perRuleData: PerRuleData) (dfaNodes: DfaNodes) = + use writer = new Writer(state.outputFileName) + writeLightMode state.disableLightMode state.outputFileName writer + writeModuleExpression state.generatedModuleName state.generateInternalModule writer + writeTopCode spec.TopCode writer + writeUnicodeTables state.lexerLibraryName state.domain dfaNodes writer + writeRules spec.Rules perRuleData state.outputFileName writer + writeBottomCode spec.BottomCode writer + writeFooter state.outputFileName writer + () \ No newline at end of file diff --git a/buildtools/fslex/fslexlex.fs b/buildtools/fslex/fslexlex.fs index b29bd10f84c..6738cdb1224 100644 --- a/buildtools/fslex/fslexlex.fs +++ b/buildtools/fslex/fslexlex.fs @@ -2,12 +2,12 @@ (* (c) Microsoft Corporation 2005-2008. *) -module internal FsLexYacc.FsLex.Lexer +module FsLexYacc.FsLex.Lexer open FsLexYacc.FsLex.AST open FsLexYacc.FsLex.Parser -open Internal.Utilities -open Internal.Utilities.Text.Lexing +open FSharp.Text +open FSharp.Text.Lexing open System.Text let escape c = @@ -340,69 +340,59 @@ let trans : uint16[] array = [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 92us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 65535us; 65535us; 65535us; 65535us; 92us; 65535us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 92us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; |] let actions : uint16[] = [|65535us; 7us; 1us; 7us; 3us; 7us; 7us; 5us; 6us; 7us; 6us; 4us; 3us; 2us; 65535us; 65535us; 0us; 65535us; 5us; 1us; 2us; 5us; 3us; 4us; 5us; 3us; 2us; 0us; 65535us; 0us; 1us; 8us; 3us; 4us; 8us; 5us; 8us; 7us; 8us; 6us; 6us; 5us; 4us; 2us; 65535us; 7us; 3us; 4us; 7us; 5us; 6us; 7us; 5us; 4us; 0us; 65535us; 1us; 65535us; 65535us; 2us; 65535us; 15us; 15us; 15us; 15us; 15us; 31us; 11us; 12us; 13us; 14us; 31us; 15us; 16us; 17us; 18us; 19us; 20us; 21us; 22us; 23us; 24us; 25us; 26us; 27us; 28us; 31us; 31us; 32us; 30us; 30us; 29us; 15us; 14us; 13us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 10us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 9us; 65535us; 65535us; 65535us; 65535us; 8us; 65535us; 65535us; 7us; 65535us; 65535us; 6us; 5us; 15us; 4us; 15us; 3us; 15us; 2us; 15us; 15us; 15us; 1us; 15us; 15us; 0us; |] -let _fslex_tables = Internal.Utilities.Text.Lexing.UnicodeTables.Create(trans,actions) +let _fslex_tables = FSharp.Text.Lexing.UnicodeTables.Create(trans,actions) let rec _fslex_dummy () = _fslex_dummy() -(* Rule token *) -and token (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_token 60 lexbuf -(* Rule string *) -and string p buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_string p buff 44 lexbuf -(* Rule code *) -and code p buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_code p buff 28 lexbuf -(* Rule codestring *) -and codestring buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_codestring buff 17 lexbuf -(* Rule comment *) -and comment p (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_comment p 0 lexbuf -(* Rule token *) -and _fslex_token _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule token +and token lexbuf = + match _fslex_tables.Interpret(60,lexbuf) with | 0 -> ( # 76 "fslexlex.fsl" RULE -# 361 "fslexlex.fs" +# 351 "fslexlex.fs" ) | 1 -> ( # 77 "fslexlex.fsl" PARSE -# 366 "fslexlex.fs" +# 356 "fslexlex.fs" ) | 2 -> ( # 78 "fslexlex.fsl" EOF -# 371 "fslexlex.fs" +# 361 "fslexlex.fs" ) | 3 -> ( # 79 "fslexlex.fsl" LET -# 376 "fslexlex.fs" +# 366 "fslexlex.fs" ) | 4 -> ( # 80 "fslexlex.fsl" AND -# 381 "fslexlex.fs" +# 371 "fslexlex.fs" ) | 5 -> ( # 82 "fslexlex.fsl" let s = lexeme lexbuf in CHAR (if s.[1] = '\\' then escape s.[2] else s.[1]) -# 387 "fslexlex.fs" +# 377 "fslexlex.fs" ) | 6 -> ( # 86 "fslexlex.fsl" let s = lexeme lexbuf in CHAR (trigraph s.[2] s.[3] s.[4]) -# 393 "fslexlex.fs" +# 383 "fslexlex.fs" ) | 7 -> ( # 90 "fslexlex.fsl" let s = lexeme lexbuf in CHAR (hexgraph s.[3] s.[4]) -# 399 "fslexlex.fs" +# 389 "fslexlex.fs" ) | 8 -> ( # 94 "fslexlex.fsl" let s = lexeme lexbuf in CHAR (unicodegraph_short s.[3..6]) -# 405 "fslexlex.fs" +# 395 "fslexlex.fs" ) | 9 -> ( # 98 "fslexlex.fsl" @@ -410,13 +400,13 @@ and _fslex_token _fslex_state lexbuf = match (unicodegraph_long s.[3..10]) with | None, c -> CHAR(c) | Some _ , _ -> failwith "Unicode characters needing surrogate pairs are not yet supported by this tool" -# 413 "fslexlex.fs" +# 403 "fslexlex.fs" ) | 10 -> ( # 104 "fslexlex.fsl" let s = (lexeme lexbuf).[2..3] in UNICODE_CATEGORY (s) -# 419 "fslexlex.fs" +# 409 "fslexlex.fs" ) | 11 -> ( # 107 "fslexlex.fsl" @@ -425,172 +415,172 @@ and _fslex_token _fslex_state lexbuf = // adjust the first line to get even indentation for all lines w.r.t. the left hand margin buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore; code p buff lexbuf -# 428 "fslexlex.fs" +# 418 "fslexlex.fs" ) | 12 -> ( # 113 "fslexlex.fsl" string lexbuf.StartPos (new StringBuilder 100) lexbuf -# 433 "fslexlex.fs" +# 423 "fslexlex.fs" ) | 13 -> ( # 115 "fslexlex.fsl" token lexbuf -# 438 "fslexlex.fs" +# 428 "fslexlex.fs" ) | 14 -> ( # 116 "fslexlex.fsl" newline lexbuf; token lexbuf -# 443 "fslexlex.fs" +# 433 "fslexlex.fs" ) | 15 -> ( # 117 "fslexlex.fsl" IDENT (lexeme lexbuf) -# 448 "fslexlex.fs" +# 438 "fslexlex.fs" ) | 16 -> ( # 118 "fslexlex.fsl" BAR -# 453 "fslexlex.fs" +# 443 "fslexlex.fs" ) | 17 -> ( # 119 "fslexlex.fsl" DOT -# 458 "fslexlex.fs" +# 448 "fslexlex.fs" ) | 18 -> ( # 120 "fslexlex.fsl" PLUS -# 463 "fslexlex.fs" +# 453 "fslexlex.fs" ) | 19 -> ( # 121 "fslexlex.fsl" STAR -# 468 "fslexlex.fs" +# 458 "fslexlex.fs" ) | 20 -> ( # 122 "fslexlex.fsl" QMARK -# 473 "fslexlex.fs" +# 463 "fslexlex.fs" ) | 21 -> ( # 123 "fslexlex.fsl" EQUALS -# 478 "fslexlex.fs" +# 468 "fslexlex.fs" ) | 22 -> ( # 124 "fslexlex.fsl" LBRACK -# 483 "fslexlex.fs" +# 473 "fslexlex.fs" ) | 23 -> ( # 125 "fslexlex.fsl" RBRACK -# 488 "fslexlex.fs" +# 478 "fslexlex.fs" ) | 24 -> ( # 126 "fslexlex.fsl" LPAREN -# 493 "fslexlex.fs" +# 483 "fslexlex.fs" ) | 25 -> ( # 127 "fslexlex.fsl" RPAREN -# 498 "fslexlex.fs" +# 488 "fslexlex.fs" ) | 26 -> ( # 128 "fslexlex.fsl" UNDERSCORE -# 503 "fslexlex.fs" +# 493 "fslexlex.fs" ) | 27 -> ( # 129 "fslexlex.fsl" HAT -# 508 "fslexlex.fs" +# 498 "fslexlex.fs" ) | 28 -> ( # 130 "fslexlex.fsl" DASH -# 513 "fslexlex.fs" +# 503 "fslexlex.fs" ) | 29 -> ( # 131 "fslexlex.fsl" ignore(comment lexbuf.StartPos lexbuf); token lexbuf -# 518 "fslexlex.fs" +# 508 "fslexlex.fs" ) | 30 -> ( # 132 "fslexlex.fsl" token lexbuf -# 523 "fslexlex.fs" +# 513 "fslexlex.fs" ) | 31 -> ( # 133 "fslexlex.fsl" unexpected_char lexbuf -# 528 "fslexlex.fs" +# 518 "fslexlex.fs" ) | 32 -> ( # 134 "fslexlex.fsl" EOF -# 533 "fslexlex.fs" +# 523 "fslexlex.fs" ) | _ -> failwith "token" -(* Rule string *) -and _fslex_string p buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule string +and string p buff lexbuf = + match _fslex_tables.Interpret(44,lexbuf) with | 0 -> ( # 136 "fslexlex.fsl" newline lexbuf; string p buff lexbuf -# 542 "fslexlex.fs" +# 532 "fslexlex.fs" ) | 1 -> ( # 138 "fslexlex.fsl" let _ = buff.Append (escape (lexeme lexbuf).[1]) in string p buff lexbuf -# 548 "fslexlex.fs" +# 538 "fslexlex.fs" ) | 2 -> ( # 141 "fslexlex.fsl" let s = lexeme lexbuf in let _ = buff.Append (trigraph s.[1] s.[2] s.[3]) in string p buff lexbuf -# 555 "fslexlex.fs" +# 545 "fslexlex.fs" ) | 3 -> ( # 144 "fslexlex.fsl" STRING (buff.ToString()) -# 560 "fslexlex.fs" +# 550 "fslexlex.fs" ) | 4 -> ( # 145 "fslexlex.fsl" newline lexbuf; let _ = buff.Append System.Environment.NewLine in string p buff lexbuf -# 567 "fslexlex.fs" +# 557 "fslexlex.fs" ) | 5 -> ( # 149 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in string p buff lexbuf -# 573 "fslexlex.fs" +# 563 "fslexlex.fs" ) | 6 -> ( # 151 "fslexlex.fsl" failwith (Printf.sprintf "end of file in string started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) -# 578 "fslexlex.fs" +# 568 "fslexlex.fs" ) | 7 -> ( # 152 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf).[0] in string p buff lexbuf -# 584 "fslexlex.fs" +# 574 "fslexlex.fs" ) | _ -> failwith "string" -(* Rule code *) -and _fslex_code p buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule code +and code p buff lexbuf = + match _fslex_tables.Interpret(28,lexbuf) with | 0 -> ( # 155 "fslexlex.fsl" CODE (buff.ToString(), p) -# 593 "fslexlex.fs" +# 583 "fslexlex.fs" ) | 1 -> ( # 156 "fslexlex.fsl" @@ -598,137 +588,137 @@ and _fslex_code p buff _fslex_state lexbuf = ignore(code p buff lexbuf); let _ = buff.Append "}" in code p buff lexbuf -# 601 "fslexlex.fs" +# 591 "fslexlex.fs" ) | 2 -> ( # 161 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in code p buff lexbuf -# 607 "fslexlex.fs" +# 597 "fslexlex.fs" ) | 3 -> ( # 163 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in ignore(codestring buff lexbuf); code p buff lexbuf -# 614 "fslexlex.fs" +# 604 "fslexlex.fs" ) | 4 -> ( # 166 "fslexlex.fsl" newline lexbuf; let _ = buff.Append System.Environment.NewLine in code p buff lexbuf -# 621 "fslexlex.fs" +# 611 "fslexlex.fs" ) | 5 -> ( # 170 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in code p buff lexbuf -# 627 "fslexlex.fs" +# 617 "fslexlex.fs" ) | 6 -> ( # 173 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in code p buff lexbuf -# 633 "fslexlex.fs" +# 623 "fslexlex.fs" ) | 7 -> ( # 175 "fslexlex.fsl" EOF -# 638 "fslexlex.fs" +# 628 "fslexlex.fs" ) | 8 -> ( # 176 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf).[0] in code p buff lexbuf -# 644 "fslexlex.fs" +# 634 "fslexlex.fs" ) | _ -> failwith "code" -(* Rule codestring *) -and _fslex_codestring buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule codestring +and codestring buff lexbuf = + match _fslex_tables.Interpret(17,lexbuf) with | 0 -> ( # 181 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in codestring buff lexbuf -# 654 "fslexlex.fs" +# 644 "fslexlex.fs" ) | 1 -> ( # 183 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in buff.ToString() -# 660 "fslexlex.fs" +# 650 "fslexlex.fs" ) | 2 -> ( # 185 "fslexlex.fsl" newline lexbuf; let _ = buff.Append System.Environment.NewLine in codestring buff lexbuf -# 667 "fslexlex.fs" +# 657 "fslexlex.fs" ) | 3 -> ( # 189 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf) in codestring buff lexbuf -# 673 "fslexlex.fs" +# 663 "fslexlex.fs" ) | 4 -> ( # 191 "fslexlex.fsl" failwith "unterminated string in code" -# 678 "fslexlex.fs" +# 668 "fslexlex.fs" ) | 5 -> ( # 192 "fslexlex.fsl" let _ = buff.Append (lexeme lexbuf).[0] in codestring buff lexbuf -# 684 "fslexlex.fs" +# 674 "fslexlex.fs" ) | _ -> failwith "codestring" -(* Rule comment *) -and _fslex_comment p _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule comment +and comment p lexbuf = + match _fslex_tables.Interpret(0,lexbuf) with | 0 -> ( # 196 "fslexlex.fsl" comment p lexbuf -# 693 "fslexlex.fs" +# 683 "fslexlex.fs" ) | 1 -> ( # 197 "fslexlex.fsl" ignore(try string lexbuf.StartPos (new StringBuilder 100) lexbuf with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing string nested in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); comment p lexbuf -# 700 "fslexlex.fs" +# 690 "fslexlex.fs" ) | 2 -> ( # 200 "fslexlex.fsl" ignore(try comment p lexbuf with Failure s -> failwith (s + "\n" + Printf.sprintf "error while processing nested comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol))); comment p lexbuf -# 706 "fslexlex.fs" +# 696 "fslexlex.fs" ) | 3 -> ( # 202 "fslexlex.fsl" newline lexbuf; comment p lexbuf -# 711 "fslexlex.fs" +# 701 "fslexlex.fs" ) | 4 -> ( # 203 "fslexlex.fsl" () -# 716 "fslexlex.fs" +# 706 "fslexlex.fs" ) | 5 -> ( # 204 "fslexlex.fsl" failwith (Printf.sprintf "end of file in comment started at (%d,%d)" p.pos_lnum (p.pos_cnum - p.pos_bol)) -# 721 "fslexlex.fs" +# 711 "fslexlex.fs" ) | 6 -> ( # 205 "fslexlex.fsl" comment p lexbuf -# 726 "fslexlex.fs" +# 716 "fslexlex.fs" ) | 7 -> ( # 206 "fslexlex.fsl" comment p lexbuf -# 731 "fslexlex.fs" +# 721 "fslexlex.fs" ) | _ -> failwith "comment" diff --git a/buildtools/fslex/fslexpars.fs b/buildtools/fslex/fslexpars.fs index 87268dcb7fc..55b628b47ee 100644 --- a/buildtools/fslex/fslexpars.fs +++ b/buildtools/fslex/fslexpars.fs @@ -1,8 +1,8 @@ // Implementation file for parser generated by fsyacc -module internal FsLexYacc.FsLex.Parser +module FsLexYacc.FsLex.Parser #nowarn "64";; // turn off warnings that type variables used in production annotations are instantiated to concrete type -open Internal.Utilities.Text.Lexing -open Internal.Utilities.Text.Parsing.ParseHelpers +open FSharp.Text.Lexing +open FSharp.Text.Parsing.ParseHelpers # 1 "fslexpars.fsy" (* (c) Microsoft Corporation 2005-2008. *) @@ -244,389 +244,394 @@ let _fsyacc_productionToNonTerminalTable = [|0us; 1us; 2us; 2us; 3us; 3us; 4us; let _fsyacc_immediateActions = [|65535us; 49152us; 65535us; 65535us; 65535us; 65535us; 16385us; 16386us; 65535us; 16389us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 16391us; 65535us; 65535us; 65535us; 65535us; 65535us; 16393us; 65535us; 16395us; 16397us; 65535us; 65535us; 16398us; 65535us; 16400us; 16401us; 16402us; 16403us; 16404us; 16405us; 16406us; 65535us; 65535us; 65535us; 16408us; 16409us; 16410us; 65535us; 65535us; 16412us; 65535us; 65535us; 16413us; 65535us; 65535us; 16414us; 65535us; 65535us; 16416us; 65535us; |] let _fsyacc_reductions () = [| # 246 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : AST.Spec)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> AST.Spec in Microsoft.FSharp.Core.Operators.box ( ( - raise (Internal.Utilities.Text.Parsing.Accept(Microsoft.FSharp.Core.Operators.box _1)) + raise (FSharp.Text.Parsing.Accept(Microsoft.FSharp.Core.Operators.box _1)) ) - : '_startspec)); + : 'gentype__startspec)); # 255 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'codeopt)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'Macros)) in - let _4 = (let data = parseState.GetInput(4) in (Microsoft.FSharp.Core.Operators.unbox data : 'Rules)) in - let _5 = (let data = parseState.GetInput(5) in (Microsoft.FSharp.Core.Operators.unbox data : 'codeopt)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_codeopt in + let _2 = parseState.GetInput(2) :?> 'gentype_Macros in + let _4 = parseState.GetInput(4) :?> 'gentype_Rules in + let _5 = parseState.GetInput(5) :?> 'gentype_codeopt in Microsoft.FSharp.Core.Operators.box ( ( -# 24 "fslexpars.fsy" - { TopCode=_1;Macros=_2;Rules=_4;BottomCode=_5 } +# 25 "fslexpars.fsy" + + { TopCode=_1;Macros=_2;Rules=_4;BottomCode=_5 } ) -# 24 "fslexpars.fsy" +# 25 "fslexpars.fsy" : AST.Spec)); -# 269 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : AST.Code)) in +# 270 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> AST.Code in Microsoft.FSharp.Core.Operators.box ( ( -# 25 "fslexpars.fsy" - _1 +# 30 "fslexpars.fsy" + _1 ) -# 25 "fslexpars.fsy" - : 'codeopt)); -# 280 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 30 "fslexpars.fsy" + : 'gentype_codeopt)); +# 281 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 25 "fslexpars.fsy" - "", (parseState.ResultRange |> fst) +# 31 "fslexpars.fsy" + "", (parseState.ResultRange |> fst) ) -# 25 "fslexpars.fsy" - : 'codeopt)); -# 290 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 31 "fslexpars.fsy" + : 'gentype_codeopt)); +# 291 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 26 "fslexpars.fsy" - [] +# 34 "fslexpars.fsy" + [] ) -# 26 "fslexpars.fsy" - : 'Macros)); -# 300 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'macro)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'Macros)) in +# 34 "fslexpars.fsy" + : 'gentype_Macros)); +# 301 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_macro in + let _2 = parseState.GetInput(2) :?> 'gentype_Macros in Microsoft.FSharp.Core.Operators.box ( ( -# 26 "fslexpars.fsy" - _1 :: _2 +# 35 "fslexpars.fsy" + + _1 :: _2 ) -# 26 "fslexpars.fsy" - : 'Macros)); -# 312 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _4 = (let data = parseState.GetInput(4) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 35 "fslexpars.fsy" + : 'gentype_Macros)); +# 314 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> string in + let _4 = parseState.GetInput(4) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 27 "fslexpars.fsy" - (_2, _4) +# 40 "fslexpars.fsy" + + (_2, _4) ) -# 27 "fslexpars.fsy" - : 'macro)); -# 324 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'rule)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'Rules)) in +# 40 "fslexpars.fsy" + : 'gentype_macro)); +# 327 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_rule in + let _3 = parseState.GetInput(3) :?> 'gentype_Rules in Microsoft.FSharp.Core.Operators.box ( ( -# 28 "fslexpars.fsy" - _1 :: _3 +# 45 "fslexpars.fsy" + + _1 :: _3 ) -# 28 "fslexpars.fsy" - : 'Rules)); -# 336 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'rule)) in +# 45 "fslexpars.fsy" + : 'gentype_Rules)); +# 340 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_rule in Microsoft.FSharp.Core.Operators.box ( ( -# 28 "fslexpars.fsy" - [_1] +# 48 "fslexpars.fsy" + [_1] ) -# 28 "fslexpars.fsy" - : 'Rules)); -# 347 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'args)) in - let _5 = (let data = parseState.GetInput(5) in (Microsoft.FSharp.Core.Operators.unbox data : 'optbar)) in - let _6 = (let data = parseState.GetInput(6) in (Microsoft.FSharp.Core.Operators.unbox data : 'clauses)) in +# 48 "fslexpars.fsy" + : 'gentype_Rules)); +# 351 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _2 = parseState.GetInput(2) :?> 'gentype_args in + let _5 = parseState.GetInput(5) :?> 'gentype_optbar in + let _6 = parseState.GetInput(6) :?> 'gentype_clauses in Microsoft.FSharp.Core.Operators.box ( ( -# 29 "fslexpars.fsy" - (_1,_2,_6) +# 51 "fslexpars.fsy" + + (_1,_2,_6) ) -# 29 "fslexpars.fsy" - : 'rule)); -# 361 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 51 "fslexpars.fsy" + : 'gentype_rule)); +# 366 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 30 "fslexpars.fsy" - [] +# 56 "fslexpars.fsy" + [] ) -# 30 "fslexpars.fsy" - : 'args)); -# 371 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'args)) in +# 56 "fslexpars.fsy" + : 'gentype_args)); +# 376 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _2 = parseState.GetInput(2) :?> 'gentype_args in Microsoft.FSharp.Core.Operators.box ( ( -# 30 "fslexpars.fsy" - _1 :: _2 +# 57 "fslexpars.fsy" + _1 :: _2 ) -# 30 "fslexpars.fsy" - : 'args)); -# 383 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 57 "fslexpars.fsy" + : 'gentype_args)); +# 388 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 31 "fslexpars.fsy" - +# 60 "fslexpars.fsy" + ) -# 31 "fslexpars.fsy" - : 'optbar)); -# 393 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 60 "fslexpars.fsy" + : 'gentype_optbar)); +# 398 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 31 "fslexpars.fsy" - +# 61 "fslexpars.fsy" + ) -# 31 "fslexpars.fsy" - : 'optbar)); -# 403 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'clause)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'clauses)) in +# 61 "fslexpars.fsy" + : 'gentype_optbar)); +# 408 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_clause in + let _3 = parseState.GetInput(3) :?> 'gentype_clauses in Microsoft.FSharp.Core.Operators.box ( ( -# 32 "fslexpars.fsy" - _1 :: _3 +# 64 "fslexpars.fsy" + _1 :: _3 ) -# 32 "fslexpars.fsy" - : 'clauses)); -# 415 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'clause)) in +# 64 "fslexpars.fsy" + : 'gentype_clauses)); +# 420 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_clause in Microsoft.FSharp.Core.Operators.box ( ( -# 32 "fslexpars.fsy" - [_1] +# 65 "fslexpars.fsy" + [_1] ) -# 32 "fslexpars.fsy" - : 'clauses)); -# 426 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : AST.Code)) in +# 65 "fslexpars.fsy" + : 'gentype_clauses)); +# 431 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in + let _2 = parseState.GetInput(2) :?> AST.Code in Microsoft.FSharp.Core.Operators.box ( ( -# 33 "fslexpars.fsy" - _1, _2 +# 68 "fslexpars.fsy" + _1, _2 ) -# 33 "fslexpars.fsy" - : 'clause)); -# 438 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : char)) in +# 68 "fslexpars.fsy" + : 'gentype_clause)); +# 443 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> char in Microsoft.FSharp.Core.Operators.box ( ( -# 35 "fslexpars.fsy" - Inp(Alphabet(EncodeChar _1)) +# 71 "fslexpars.fsy" + Inp(Alphabet(EncodeChar _1)) ) -# 35 "fslexpars.fsy" - : 'regexp)); -# 449 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in +# 71 "fslexpars.fsy" + : 'gentype_regexp)); +# 454 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in Microsoft.FSharp.Core.Operators.box ( ( -# 36 "fslexpars.fsy" - Inp(UnicodeCategory _1) +# 72 "fslexpars.fsy" + Inp(UnicodeCategory _1) ) -# 36 "fslexpars.fsy" - : 'regexp)); -# 460 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 72 "fslexpars.fsy" + : 'gentype_regexp)); +# 465 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 37 "fslexpars.fsy" - Inp(Alphabet(Eof)) +# 73 "fslexpars.fsy" + Inp(Alphabet(fun ctx -> Eof)) ) -# 37 "fslexpars.fsy" - : 'regexp)); -# 470 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> +# 73 "fslexpars.fsy" + : 'gentype_regexp)); +# 475 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( -# 38 "fslexpars.fsy" +# 74 "fslexpars.fsy" Inp Any ) -# 38 "fslexpars.fsy" - : 'regexp)); -# 480 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in +# 74 "fslexpars.fsy" + : 'gentype_regexp)); +# 485 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in Microsoft.FSharp.Core.Operators.box ( ( -# 39 "fslexpars.fsy" - Seq([ for n in 0 .. _1.Length - 1 -> Inp(Alphabet(EncodeChar _1.[n]))]) +# 75 "fslexpars.fsy" + Seq([ for n in 0 .. _1.Length - 1 -> Inp(Alphabet(EncodeChar _1.[n]))]) ) -# 39 "fslexpars.fsy" - : 'regexp)); -# 491 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in +# 75 "fslexpars.fsy" + : 'gentype_regexp)); +# 496 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in Microsoft.FSharp.Core.Operators.box ( ( -# 40 "fslexpars.fsy" +# 76 "fslexpars.fsy" Macro(_1) ) -# 40 "fslexpars.fsy" - : 'regexp)); -# 502 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 76 "fslexpars.fsy" + : 'gentype_regexp)); +# 507 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in + let _2 = parseState.GetInput(2) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 41 "fslexpars.fsy" +# 77 "fslexpars.fsy" Seq[_1;_2] ) -# 41 "fslexpars.fsy" - : 'regexp)); -# 514 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 77 "fslexpars.fsy" + : 'gentype_regexp)); +# 519 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 42 "fslexpars.fsy" +# 78 "fslexpars.fsy" Seq[_1;Star _1] ) -# 42 "fslexpars.fsy" - : 'regexp)); -# 525 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 78 "fslexpars.fsy" + : 'gentype_regexp)); +# 530 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 43 "fslexpars.fsy" +# 79 "fslexpars.fsy" Star _1 ) -# 43 "fslexpars.fsy" - : 'regexp)); -# 536 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 79 "fslexpars.fsy" + : 'gentype_regexp)); +# 541 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 44 "fslexpars.fsy" - Alt[Seq[];_1] +# 80 "fslexpars.fsy" + Alt(fun ctx -> [Seq[];_1]) ) -# 44 "fslexpars.fsy" - : 'regexp)); -# 547 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 80 "fslexpars.fsy" + : 'gentype_regexp)); +# 552 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_regexp in + let _3 = parseState.GetInput(3) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 45 "fslexpars.fsy" - Alt[_1;_3] +# 81 "fslexpars.fsy" + Alt(fun ctx -> [_1;_3]) ) -# 45 "fslexpars.fsy" - : 'regexp)); -# 559 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'regexp)) in +# 81 "fslexpars.fsy" + : 'gentype_regexp)); +# 564 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_regexp in Microsoft.FSharp.Core.Operators.box ( ( -# 46 "fslexpars.fsy" +# 82 "fslexpars.fsy" _2 ) -# 46 "fslexpars.fsy" - : 'regexp)); -# 570 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'charset)) in +# 82 "fslexpars.fsy" + : 'gentype_regexp)); +# 575 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_charset in Microsoft.FSharp.Core.Operators.box ( ( -# 47 "fslexpars.fsy" - Alt [ for c in _2 -> Inp(Alphabet(c)) ] +# 83 "fslexpars.fsy" + Alt (fun ctx -> [ for c in (_2 ctx) -> Inp(Alphabet(fun ctx -> c)) ]) ) -# 47 "fslexpars.fsy" - : 'regexp)); -# 581 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'charset)) in +# 83 "fslexpars.fsy" + : 'gentype_regexp)); +# 586 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _3 = parseState.GetInput(3) :?> 'gentype_charset in Microsoft.FSharp.Core.Operators.box ( ( -# 48 "fslexpars.fsy" - Inp(NotCharSet(_3)) +# 84 "fslexpars.fsy" + Inp(NotCharSet(fun ctx -> _3 ctx)) ) -# 48 "fslexpars.fsy" - : 'regexp)); -# 592 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : char)) in +# 84 "fslexpars.fsy" + : 'gentype_regexp)); +# 597 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> char in Microsoft.FSharp.Core.Operators.box ( ( -# 51 "fslexpars.fsy" - Set.singleton(EncodeChar _1) +# 87 "fslexpars.fsy" + fun ctx -> Set.singleton(EncodeChar _1 ctx) ) -# 51 "fslexpars.fsy" - : 'charset)); -# 603 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : char)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : char)) in +# 87 "fslexpars.fsy" + : 'gentype_charset)); +# 608 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> char in + let _3 = parseState.GetInput(3) :?> char in Microsoft.FSharp.Core.Operators.box ( ( -# 52 "fslexpars.fsy" - Set.ofSeq [ for c in _1 .. _3 -> EncodeChar c ] +# 88 "fslexpars.fsy" + fun ctx -> Set.ofSeq [ for c in _1 .. _3 -> EncodeChar c ctx ] ) -# 52 "fslexpars.fsy" - : 'charset)); -# 615 "fslexpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'charset)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'charset)) in +# 88 "fslexpars.fsy" + : 'gentype_charset)); +# 620 "fslexpars.fs" + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_charset in + let _2 = parseState.GetInput(2) :?> 'gentype_charset in Microsoft.FSharp.Core.Operators.box ( ( -# 53 "fslexpars.fsy" - Set.union _1 _2 +# 89 "fslexpars.fsy" + fun ctx -> Set.union (_1 ctx) (_2 ctx) ) -# 53 "fslexpars.fsy" - : 'charset)); +# 89 "fslexpars.fsy" + : 'gentype_charset)); |] -# 628 "fslexpars.fs" -let tables () : Internal.Utilities.Text.Parsing.Tables<_> = +# 633 "fslexpars.fs" +let tables : FSharp.Text.Parsing.Tables<_> = { reductions= _fsyacc_reductions (); endOfInputTag = _fsyacc_endOfInputTag; tagOfToken = tagOfToken; @@ -640,12 +645,12 @@ let tables () : Internal.Utilities.Text.Parsing.Tables<_> = gotos = _fsyacc_gotos; sparseGotoTableRowOffsets = _fsyacc_sparseGotoTableRowOffsets; tagOfErrorTerminal = _fsyacc_tagOfErrorTerminal; - parseError = (fun (ctxt:Internal.Utilities.Text.Parsing.ParseErrorContext<_>) -> + parseError = (fun (ctxt:FSharp.Text.Parsing.ParseErrorContext<_>) -> match parse_error_rich with | Some f -> f ctxt | None -> parse_error ctxt.Message); numTerminals = 26; productionToNonTerminalTable = _fsyacc_productionToNonTerminalTable } -let engine lexer lexbuf startState = (tables ()).Interpret(lexer, lexbuf, startState) +let engine lexer lexbuf startState = tables.Interpret(lexer, lexbuf, startState) let spec lexer lexbuf : AST.Spec = - Microsoft.FSharp.Core.Operators.unbox ((tables ()).Interpret(lexer, lexbuf, 0)) + engine lexer lexbuf 0 :?> _ diff --git a/buildtools/fsyacc/Arg.fs b/buildtools/fsyacc/Arg.fs index b1131625cf3..d6f8ed790e3 100644 --- a/buildtools/fsyacc/Arg.fs +++ b/buildtools/fsyacc/Arg.fs @@ -1,11 +1,6 @@ // (c) Microsoft Corporation 2005-2009. -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities -#else -namespace Microsoft.FSharp.Text -#endif - +namespace FSharp.Text type ArgType = | ClearArg of bool ref @@ -35,17 +30,17 @@ exception HelpText of string [] type ArgParser() = static let getUsage specs u = - let sbuf = new System.Text.StringBuilder 100 + let sbuf = System.Text.StringBuilder 100 let pstring (s:string) = sbuf.Append s |> ignore let pendline s = pstring s; pstring "\n" pendline u; List.iter (fun (arg:ArgInfo) -> match arg.Name, arg.ArgType, arg.HelpText with - | (s, (UnitArg _ | SetArg _ | ClearArg _), helpText) -> pstring "\t"; pstring s; pstring ": "; pendline helpText - | (s, StringArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, IntArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, FloatArg _, helpText) -> pstring "\t"; pstring s; pstring " : "; pendline helpText - | (s, RestArg _, helpText) -> pstring "\t"; pstring s; pstring " ...: "; pendline helpText) + | s, (UnitArg _ | SetArg _ | ClearArg _), helpText -> pstring "\t"; pstring s; pstring ": "; pendline helpText + | s, StringArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, IntArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, FloatArg _, helpText -> pstring "\t"; pstring s; pstring " : "; pendline helpText + | s, RestArg _, helpText -> pstring "\t"; pstring s; pstring " ...: "; pendline helpText) specs; pstring "\t"; pstring "--help"; pstring ": "; pendline "display this list of options"; pstring "\t"; pstring "-help"; pstring ": "; pendline "display this list of options"; @@ -53,20 +48,20 @@ type ArgParser() = static member ParsePartial(cursor,argv,arguments:seq,?otherArgs,?usageText) = - let otherArgs = defaultArg otherArgs (fun _ -> ()) + let other = defaultArg otherArgs (fun _ -> ()) let usageText = defaultArg usageText "" let nargs = Array.length argv incr cursor; - let arguments = arguments |> Seq.toList - let specs = arguments |> List.map (fun (arg:ArgInfo) -> arg.Name, arg.ArgType) + let argSpecs = arguments |> Seq.toList + let specs = argSpecs |> List.map (fun (arg:ArgInfo) -> arg.Name, arg.ArgType) while !cursor < nargs do let arg = argv.[!cursor] let rec findMatchingArg args = match args with - | ((s, action) :: _) when s = arg -> + | (s, action) :: _ when s = arg -> let getSecondArg () = if !cursor + 1 >= nargs then - raise(Bad("option "+s+" needs an argument.\n"+getUsage arguments usageText)); + raise(Bad("option "+s+" needs an argument.\n"+getUsage argSpecs usageText)); argv.[!cursor+1] match action with @@ -85,40 +80,40 @@ type ArgParser() = cursor := !cursor + 2 | IntArg f -> let arg2 = getSecondArg () - let arg2 = try int32 arg2 with _ -> raise(Bad(getUsage arguments usageText)) in + let arg2 = try int32 arg2 with _ -> raise(Bad(getUsage argSpecs usageText)) in f arg2; cursor := !cursor + 2; | FloatArg f -> let arg2 = getSecondArg() - let arg2 = try float arg2 with _ -> raise(Bad(getUsage arguments usageText)) in + let arg2 = try float arg2 with _ -> raise(Bad(getUsage argSpecs usageText)) in f arg2; cursor := !cursor + 2; | RestArg f -> incr cursor; while !cursor < nargs do - f (argv.[!cursor]); + f argv.[!cursor]; incr cursor; - | (_ :: more) -> findMatchingArg more + | _ :: more -> findMatchingArg more | [] -> if arg = "-help" || arg = "--help" || arg = "/help" || arg = "/help" || arg = "/?" then - raise (HelpText (getUsage arguments usageText)) + raise (HelpText (getUsage argSpecs usageText)) // Note: for '/abc/def' does not count as an argument // Note: '/abc' does - elif arg.Length>0 && (arg.[0] = '-' || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains ("/")))) then - raise (Bad ("unrecognized argument: "+ arg + "\n" + getUsage arguments usageText)) + elif arg.Length>0 && (arg.[0] = '-' || (arg.[0] = '/' && not (arg.Length > 1 && arg.[1..].Contains "/"))) then + raise (Bad ("unrecognized argument: "+ arg + "\n" + getUsage argSpecs usageText)) else - otherArgs arg; + other arg; incr cursor findMatchingArg specs - static member Usage (arguments,?usage) = + static member Usage (arguments, ?usage) = let usage = defaultArg usage "" System.Console.Error.WriteLine (getUsage (Seq.toList arguments) usage) #if FX_NO_COMMAND_LINE_ARGS #else - static member Parse (arguments,?otherArgs,?usageText) = + static member Parse (arguments, ?otherArgs,?usageText) = let current = ref 0 let argv = System.Environment.GetCommandLineArgs() try ArgParser.ParsePartial (current, argv, arguments, ?otherArgs=otherArgs, ?usageText=usageText) @@ -128,6 +123,6 @@ type ArgParser() = System.Console.Error.WriteLine h; System.Console.Error.Flush(); System.Environment.Exit(1); - | e -> + | _ -> reraise() #endif diff --git a/buildtools/fsyacc/Arg.fsi b/buildtools/fsyacc/Arg.fsi index 367f69f959f..b5203999928 100644 --- a/buildtools/fsyacc/Arg.fsi +++ b/buildtools/fsyacc/Arg.fsi @@ -1,11 +1,7 @@ // (c) Microsoft Corporation 2005-2009. /// A simple command-line argument processor. -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities -#else -namespace Microsoft.FSharp.Text -#endif +namespace FSharp.Text /// The spec value describes the action of the argument, /// and whether it expects a following parameter. @@ -37,7 +33,7 @@ type ArgParser = [] static member ParsePartial: cursor: int ref * argv: string[] * arguments:seq * ?otherArgs: (string -> unit) * ?usageText:string -> unit - /// Parse the arguments given by System.Environment.GetEnvironmentVariables() + /// Parse the arguments given by System.Environment.GetCommandLineArgs() /// according to the argument processing specifications "specs". /// Args begin with "-". Non-arguments are passed to "f" in /// order. "use" is printed as part of the usage line if an error occurs. diff --git a/buildtools/fsyacc/Lexing.fs b/buildtools/fsyacc/Lexing.fs index 760ace5a932..40aacdcac96 100644 --- a/buildtools/fsyacc/Lexing.fs +++ b/buildtools/fsyacc/Lexing.fs @@ -1,423 +1,456 @@ // (c) Microsoft Corporation 2005-2009. +module FSharp.Text.Lexing #nowarn "47" // recursive initialization of LexBuffer +open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Lexing - -#else -namespace Microsoft.FSharp.Text.Lexing -#endif - - open System.Collections.Generic - - // REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal Position = -#else - type Position = -#endif - { pos_fname : string; - pos_lnum : int; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum : int; -#endif - pos_bol : int; - pos_cnum : int; } - member x.FileName = x.pos_fname - member x.Line = x.pos_lnum -#if INTERNALIZED_FSLEXYACC_RUNTIME - member x.OriginalLine = x.pos_orig_lnum -#endif - member x.Char = x.pos_cnum - member x.AbsoluteOffset = x.pos_cnum - member x.StartOfLine = x.pos_bol - member x.StartOfLineAbsoluteOffset = x.pos_bol - member x.Column = x.pos_cnum - x.pos_bol - member pos.NextLine = - { pos with -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = pos.OriginalLine + 1; -#endif - pos_lnum = pos.Line+1; - pos_bol = pos.AbsoluteOffset } - member pos.EndOfToken(n) = {pos with pos_cnum=pos.pos_cnum + n } - member pos.AsNewLinePos() = pos.NextLine - member pos.ShiftColumnBy(by) = {pos with pos_cnum = pos.pos_cnum + by} - static member Empty = - { pos_fname=""; - pos_lnum= 0; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = 0; -#endif - pos_bol= 0; - pos_cnum=0 } - static member FirstLine(filename) = - { pos_fname=filename; -#if INTERNALIZED_FSLEXYACC_RUNTIME - pos_orig_lnum = 1; -#endif - pos_lnum= 1; - pos_bol= 0; - pos_cnum=0 } - -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal LexBufferFiller<'char> = -#else - type LexBufferFiller<'char> = -#endif - { fillSync : (LexBuffer<'char> -> unit) option - fillAsync : (LexBuffer<'char> -> Async) option } - - and [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - internal LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = -#else - LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = -#endif - let context = new Dictionary(1) in - let extendBufferSync = (fun () -> match filler.fillSync with Some refill -> refill this | None -> invalidOp "attempt to read synchronously from an asynchronous lex buffer") - let extendBufferAsync = (fun () -> match filler.fillAsync with Some refill -> refill this | None -> invalidOp "attempt to read asynchronously from a synchronous lex buffer") - let mutable buffer=[||]; - /// number of valid charactes beyond bufferScanStart - let mutable bufferMaxScanLength=0; - /// count into the buffer when scanning - let mutable bufferScanStart=0; - /// number of characters scanned so far - let mutable bufferScanLength=0; - /// length of the scan at the last accepting state - let mutable lexemeLength=0; - /// action related to the last accepting state - let mutable bufferAcceptAction=0; - let mutable eof = false; - let mutable startPos = Position.Empty ; - let mutable endPos = Position.Empty - - // Throw away all the input besides the lexeme - - let discardInput () = - let keep = Array.sub buffer bufferScanStart bufferScanLength - let nkeep = keep.Length - Array.blit keep 0 buffer 0 nkeep; - bufferScanStart <- 0; - bufferMaxScanLength <- nkeep - - - member lexbuf.EndOfScan () : int = - // Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength; - if bufferAcceptAction < 0 then - failwith "unrecognized input" - - // Printf.printf "endOfScan %d state %d on unconsumed input '%c' (%d)\n" a s (Char.chr inp) inp; - // Printf.eprintf "accept, lexeme = %s\n" (lexeme lexBuffer); - lexbuf.StartPos <- endPos; - lexbuf.EndPos <- endPos.EndOfToken(lexbuf.LexemeLength); - bufferAcceptAction - - member lexbuf.StartPos - with get() = startPos - and set(b) = startPos <- b - - member lexbuf.EndPos - with get() = endPos - and set(b) = endPos <- b - - member lexbuf.Lexeme = Array.sub buffer bufferScanStart lexemeLength - member lexbuf.LexemeChar(n) = buffer.[n+bufferScanStart] - - member lexbuf.BufferLocalStore = (context :> IDictionary<_,_>) - member lexbuf.LexemeLength with get() : int = lexemeLength and set v = lexemeLength <- v - member internal lexbuf.Buffer with get() : 'char[] = buffer and set v = buffer <- v - member internal lexbuf.BufferMaxScanLength with get() = bufferMaxScanLength and set v = bufferMaxScanLength <- v - member internal lexbuf.BufferScanLength with get() = bufferScanLength and set v = bufferScanLength <- v - member internal lexbuf.BufferScanStart with get() : int = bufferScanStart and set v = bufferScanStart <- v - member internal lexbuf.BufferAcceptAction with get() = bufferAcceptAction and set v = bufferAcceptAction <- v - member internal lexbuf.RefillBuffer = extendBufferSync - member internal lexbuf.AsyncRefillBuffer = extendBufferAsync - - static member LexemeString(lexbuf:LexBuffer) = - new System.String(lexbuf.Buffer,lexbuf.BufferScanStart,lexbuf.LexemeLength) - - member lexbuf.IsPastEndOfStream - with get() = eof - and set(b) = eof <- b - - member lexbuf.DiscardInput() = discardInput () - - member x.BufferScanPos = bufferScanStart + bufferScanLength - - member lexbuf.EnsureBufferSize n = - if lexbuf.BufferScanPos + n >= buffer.Length then - let repl = Array.zeroCreate (lexbuf.BufferScanPos + n) - Array.blit buffer bufferScanStart repl bufferScanStart bufferScanLength; - buffer <- repl - - static member FromReadFunctions (syncRead : ('char[] * int * int -> int) option, asyncRead : ('char[] * int * int -> Async) option) : LexBuffer<'char> = - let extension= Array.zeroCreate 4096 - let fillers = - { fillSync = - match syncRead with - | None -> None - | Some read -> - Some (fun lexBuffer -> - let n = read(extension,0,extension.Length) - lexBuffer.EnsureBufferSize n; - Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n); - fillAsync = - match asyncRead with - | None -> None - | Some read -> - Some (fun lexBuffer -> - async { - let! n = read(extension,0,extension.Length) - lexBuffer.EnsureBufferSize n; - Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n }) } - new LexBuffer<_>(fillers) - - // A full type signature is required on this method because it is used at more specific types within its own scope - static member FromFunction (f : 'char[] * int * int -> int) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(Some(f),None) - static member FromAsyncFunction (f : 'char[] * int * int -> Async) : LexBuffer<'char> = LexBuffer<_>.FromReadFunctions(None,Some(f)) - - static member FromCharFunction f : LexBuffer = - LexBuffer.FromFunction(fun (buff,start,len) -> - let buff2 = Array.zeroCreate len - let n = f buff2 len - Array.blit buff2 0 buff start len - n) - static member FromByteFunction f : LexBuffer = - LexBuffer.FromFunction(fun (buff,start,len) -> - let buff2 = Array.zeroCreate len - let n = f buff2 len - Array.blit buff2 0 buff start len - n) - - // A full type signature is required on this method because it is used at more specific types within its own scope - static member FromArray (s: 'char[]) : LexBuffer<'char> = - let lexBuffer = - new LexBuffer<_> - { fillSync = Some (fun _ -> ()); - fillAsync = Some (fun _ -> async { return () }) } - let buffer = Array.copy s - lexBuffer.Buffer <- buffer; - lexBuffer.BufferMaxScanLength <- buffer.Length; - lexBuffer - - static member FromBytes (arr) = LexBuffer.FromArray(arr) - static member FromChars (arr) = LexBuffer.FromArray(arr) - static member FromString (s:string) = LexBuffer.FromChars (s.ToCharArray()) - - static member FromTextReader (tr:System.IO.TextReader) : LexBuffer = - LexBuffer.FromFunction(tr.Read) - - static member FromBinaryReader (br:System.IO.BinaryReader) : LexBuffer = - LexBuffer.FromFunction(br.Read) - - static member FromStream (stream:System.IO.Stream) : LexBuffer = - LexBuffer.FromReadFunctions(Some(stream.Read),Some(fun (buf,offset,len) -> stream.AsyncRead(buf,offset=offset,count=len))) - - module GenericImplFragments = - let startInterpret(lexBuffer:LexBuffer<_>)= - lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength; - lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength; - lexBuffer.BufferScanLength <- 0; - lexBuffer.LexemeLength <- 0; - lexBuffer.BufferAcceptAction <- -1; - - let afterRefill (trans: uint16[] array,sentinel,lexBuffer:LexBuffer<_>,scanUntilSentinel,endOfScan,state,eofPos) = - // end of file occurs if we couldn't extend the buffer - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - let snew = int trans.[state].[eofPos] // == EOF - if snew = sentinel then - endOfScan() - else - if lexBuffer.IsPastEndOfStream then failwith "End of file on lexing stream"; - lexBuffer.IsPastEndOfStream <- true; - // Printf.printf "state %d --> %d on eof\n" state snew; - scanUntilSentinel(lexBuffer,snew) - else - scanUntilSentinel(lexBuffer, state) +// REVIEW: This type showed up on a parsing-intensive performance measurement. Consider whether it can be a struct-record later when we have this feature. -jomo +[] +type Position = + { pos_fname : string + pos_lnum : int + pos_orig_lnum : int + pos_bol : int + pos_cnum : int } - let onAccept (lexBuffer:LexBuffer<_>,a) = - lexBuffer.LexemeLength <- lexBuffer.BufferScanLength; - lexBuffer.BufferAcceptAction <- a; + member pos.FileName = pos.pos_fname - open GenericImplFragments + member pos.Line = pos.pos_lnum - [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal AsciiTables(trans: uint16[] array, accept: uint16[]) = -#else - type AsciiTables(trans: uint16[] array, accept: uint16[]) = -#endif - let rec scanUntilSentinel(lexBuffer, state) = - let sentinel = 255 * 256 + 255 - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept (lexBuffer,a) - - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,256 (* == EOF *) ) - else - // read a character - end the scan if there are no further transitions - let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) - let snew = int trans.[state].[inp] - if snew = sentinel then - lexBuffer.EndOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - // Printf.printf "state %d --> %d on '%c' (%d)\n" state snew (Char.chr inp) inp; - scanUntilSentinel(lexBuffer, snew) - - /// Interpret tables for an ascii lexer generated by fslex. - member tables.Interpret(initialState,lexBuffer : LexBuffer) = - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) + member pos.OriginalLine = pos.pos_orig_lnum - /// Interpret tables for an ascii lexer generated by fslex. - member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer) = - - let rec scanUntilSentinel(lexBuffer,state) : Async = - async { - let sentinel = 255 * 256 + 255 - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept (lexBuffer,a) - - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - do! lexBuffer.AsyncRefillBuffer (); - // end of file occurs if we couldn't extend the buffer - return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,256 (* == EOF *) ) - else - // read a character - end the scan if there are no further transitions - let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) - let snew = int trans.[state].[inp] - if snew = sentinel then - return! endOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - return! scanUntilSentinel(lexBuffer,snew) - } - and endOfScan() = - async { return lexBuffer.EndOfScan() } - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - - static member Create(trans,accept) = new AsciiTables(trans,accept) - - [] -#if INTERNALIZED_FSLEXYACC_RUNTIME - type internal UnicodeTables(trans: uint16[] array, accept: uint16[]) = -#else - type UnicodeTables(trans: uint16[] array, accept: uint16[]) = -#endif + member pos.Char = pos.pos_cnum + + member pos.AbsoluteOffset = pos.pos_cnum + + member pos.StartOfLine = pos.pos_bol + + member pos.StartOfLineAbsoluteOffset = pos.pos_bol + + member pos.Column = pos.pos_cnum - pos.pos_bol + + member pos.NextLine = + let pos = pos + { pos with + pos_orig_lnum = pos.OriginalLine + 1 + pos_lnum = pos.Line+1 + pos_bol = pos.AbsoluteOffset } + + member pos.EndOfToken(n) = + let pos = pos + {pos with pos_cnum=pos.pos_cnum + n } + + member pos.AsNewLinePos() = pos.NextLine + + member pos.ShiftColumnBy(by) = + let pos = pos + {pos with pos_cnum = pos.pos_cnum + by} + + static member Empty = + { pos_fname="" + pos_lnum= 0 + pos_orig_lnum = 0 + pos_bol= 0 + pos_cnum=0 } + + static member FirstLine(filename) = + { pos_fname=filename + pos_orig_lnum = 1 + pos_lnum= 1 + pos_bol= 0 + pos_cnum=0 } + +type LexBufferFiller<'char> = + { fillSync : (LexBuffer<'char> -> unit) option + fillAsync : (LexBuffer<'char> -> Async) option } + +and [] + LexBuffer<'char>(filler: LexBufferFiller<'char>) as this = + let context = Dictionary(1) in + let extendBufferSync = (fun () -> match filler.fillSync with Some refill -> refill this | None -> invalidOp "attempt to read synchronously from an asynchronous lex buffer") + let extendBufferAsync = (fun () -> match filler.fillAsync with Some refill -> refill this | None -> invalidOp "attempt to read asynchronously from a synchronous lex buffer") + let mutable buffer=[||] + + /// number of valid charactes beyond bufferScanStart + let mutable bufferMaxScanLength=0 + + /// count into the buffer when scanning + let mutable bufferScanStart=0 + + /// number of characters scanned so far + let mutable bufferScanLength=0 + + /// length of the scan at the last accepting state + let mutable lexemeLength=0 + + /// action related to the last accepting state + let mutable bufferAcceptAction=0 + + let mutable eof = false + + let mutable startPos = Position.Empty + + let mutable endPos = Position.Empty + + /// Throw away all the input besides the lexeme + let discardInput () = + Array.blit buffer bufferScanStart buffer 0 bufferScanLength + bufferScanStart <- 0 + bufferMaxScanLength <- bufferScanLength + + member lexbuf.EndOfScan () : int = + // Printf.eprintf "endOfScan, lexBuffer.lexemeLength = %d\n" lexBuffer.lexemeLength + if bufferAcceptAction < 0 then + failwith "unrecognized input" + + // Printf.printf "endOfScan %d state %d on unconsumed input '%c' (%d)\n" a s (Char.chr inp) inp + // Printf.eprintf "accept, lexeme = %s\n" (lexeme lexBuffer) + lexbuf.StartPos <- endPos + lexbuf.EndPos <- endPos.EndOfToken(lexbuf.LexemeLength) + bufferAcceptAction + + member _.StartPos + with get() = startPos + and set b = startPos <- b + + member _.EndPos + with get() = endPos + and set b = endPos <- b + + member _.Lexeme = Array.sub buffer bufferScanStart lexemeLength + + member _.LexemeChar n = buffer.[n+bufferScanStart] + + member _.BufferLocalStore = (context :> IDictionary<_, _>) + + member _.LexemeLength + with get() : int = lexemeLength + and set v = lexemeLength <- v + + member internal _.Buffer + with get() : 'char[] = buffer + and set v = buffer <- v + + member internal _.BufferMaxScanLength + with get() = bufferMaxScanLength + and set v = bufferMaxScanLength <- v + + member internal _.BufferScanLength + with get() = bufferScanLength + and set v = bufferScanLength <- v + + member internal _.BufferScanStart + with get() : int = bufferScanStart + and set v = bufferScanStart <- v + + member internal _.BufferAcceptAction + with get() = bufferAcceptAction + and set v = bufferAcceptAction <- v + + member internal _.RefillBuffer = extendBufferSync + + member internal _.AsyncRefillBuffer = extendBufferAsync + + static member LexemeString(lexbuf:LexBuffer) = + System.String(lexbuf.Buffer, lexbuf.BufferScanStart, lexbuf.LexemeLength) + + member _.IsPastEndOfStream + with get() = eof + and set b = eof <- b + + member _.DiscardInput() = discardInput () + + member _.BufferScanPos = bufferScanStart + bufferScanLength + + member lexbuf.EnsureBufferSize n = + if lexbuf.BufferScanPos + n >= buffer.Length then + let repl = Array.zeroCreate (lexbuf.BufferScanPos + n) + Array.blit buffer bufferScanStart repl bufferScanStart bufferScanLength + buffer <- repl + + static member FromReadFunctions (syncRead : ('char[] * int * int -> int) option, asyncRead : ('char[] * int * int -> Async) option) : LexBuffer<'char> = + let extension= Array.zeroCreate 4096 + let fillers = + { fillSync = + match syncRead with + | None -> None + | Some read -> + Some (fun lexBuffer -> + let n = read(extension, 0, extension.Length) + lexBuffer.EnsureBufferSize n + Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n) + fillAsync = + match asyncRead with + | None -> None + | Some read -> + Some (fun lexBuffer -> + async { + let! n = read(extension, 0, extension.Length) + lexBuffer.EnsureBufferSize n + Array.blit extension 0 lexBuffer.Buffer lexBuffer.BufferScanPos n + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferScanLength + n }) } + LexBuffer<_>(fillers) + + // A full type signature is required on this method because it is used at more specific types within its own scope + static member FromFunction (f : 'char[] * int * int -> int) : LexBuffer<'char> = + LexBuffer<_>.FromReadFunctions(Some(f), None) + + static member FromAsyncFunction (f : 'char[] * int * int -> Async) : LexBuffer<'char> = + LexBuffer<_>.FromReadFunctions(None, Some(f)) + + static member FromCharFunction f : LexBuffer = + LexBuffer.FromFunction(fun (buff, start, len) -> + let buff2 = Array.zeroCreate len + let n = f buff2 len + Array.blit buff2 0 buff start len + n) + + static member FromByteFunction f : LexBuffer = + LexBuffer.FromFunction(fun (buff, start, len) -> + let buff2 = Array.zeroCreate len + let n = f buff2 len + Array.blit buff2 0 buff start len + n) + + // A full type signature is required on this method because it is used at more specific types within its own scope + static member FromArray (s: 'char[]) : LexBuffer<'char> = + let lexBuffer = + LexBuffer<_> + { fillSync = Some (fun _ -> ()) + fillAsync = Some (fun _ -> async { return () }) } + lexBuffer.Buffer <- s + lexBuffer.BufferMaxScanLength <- s.Length + lexBuffer + + static member FromBytes arr = + LexBuffer.FromArray(Array.copy arr) + + static member FromChars arr = + LexBuffer.FromArray(Array.copy arr) + + static member FromString (s:string) = + LexBuffer.FromArray (s.ToCharArray()) + + static member FromTextReader (tr:System.IO.TextReader) : LexBuffer = + LexBuffer.FromReadFunctions(Some tr.Read, Some (tr.ReadAsync >> Async.AwaitTask)) + + static member FromBinaryReader (br:System.IO.BinaryReader) : LexBuffer = + LexBuffer.FromFunction(br.Read) + + static member FromStream (stream:System.IO.Stream) : LexBuffer = + LexBuffer.FromReadFunctions(Some(stream.Read), Some(fun (buf, offset, len) -> stream.AsyncRead(buf, offset=offset, count=len))) + +module GenericImplFragments = + let startInterpret(lexBuffer:LexBuffer<_>)= + lexBuffer.BufferScanStart <- lexBuffer.BufferScanStart + lexBuffer.LexemeLength + lexBuffer.BufferMaxScanLength <- lexBuffer.BufferMaxScanLength - lexBuffer.LexemeLength + lexBuffer.BufferScanLength <- 0 + lexBuffer.LexemeLength <- 0 + lexBuffer.BufferAcceptAction <- -1 + + let afterRefill (trans: uint16[] array, sentinel, lexBuffer:LexBuffer<_>, scanUntilSentinel, endOfScan, state, eofPos) = + // end of file occurs if we couldn't extend the buffer + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + let snew = int trans.[state].[eofPos] // == EOF + if snew = sentinel then + endOfScan() + else + if lexBuffer.IsPastEndOfStream then failwith "End of file on lexing stream" + lexBuffer.IsPastEndOfStream <- true + // Printf.printf "state %d --> %d on eof\n" state snew + scanUntilSentinel(lexBuffer, snew) + else + scanUntilSentinel(lexBuffer, state) + + let onAccept (lexBuffer:LexBuffer<_>, a) = + lexBuffer.LexemeLength <- lexBuffer.BufferScanLength + lexBuffer.BufferAcceptAction <- a + +open GenericImplFragments + +[] +type AsciiTables(trans: uint16[] array, accept: uint16[]) = + let rec scanUntilSentinel(lexBuffer, state) = let sentinel = 255 * 256 + 255 - let numUnicodeCategories = 30 - let numLowUnicodeChars = 128 - let numSpecificUnicodeChars = (trans.[0].Length - 1 - numLowUnicodeChars - numUnicodeCategories)/2 - let lookupUnicodeCharacters (state,inp: char) = - let inpAsInt = int inp - // Is it a fast ASCII character? - if inpAsInt < numLowUnicodeChars then - int trans.[state].[inpAsInt] + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept (lexBuffer, a) + + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, lexBuffer.EndOfScan, state, 256 (* == EOF *) ) + else + // read a character - end the scan if there are no further transitions + let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) + let snew = int trans.[state].[inp] + if snew = sentinel then + lexBuffer.EndOfScan() else - // Search for a specific unicode character - let baseForSpecificUnicodeChars = numLowUnicodeChars - let rec loop i = - if i >= numSpecificUnicodeChars then - // OK, if we failed then read the 'others' entry in the alphabet, - // which covers all Unicode characters not covered in other - // ways - let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2 - let unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(inp) - //System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]); - int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory] - else - // This is the specific unicode character - let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2]) - //System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]); - // OK, have we found the entry for a specific unicode character? - if c = inp - then int trans.[state].[baseForSpecificUnicodeChars+i*2+1] - else loop(i+1) + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + // Printf.printf "state %d --> %d on '%c' (%d)\n" state snew (Char.chr inp) inp + scanUntilSentinel(lexBuffer, snew) + + /// Interpret tables for an ascii lexer generated by fslex. + member tables.Interpret(initialState, lexBuffer : LexBuffer) = + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + /// Interpret tables for an ascii lexer generated by fslex. + member tables.AsyncInterpret(initialState, lexBuffer : LexBuffer) = + + let rec scanUntilSentinel(lexBuffer, state) : Async = + async { + let sentinel = 255 * 256 + 255 + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept (lexBuffer, a) - loop 0 - let eofPos = numLowUnicodeChars + 2*numSpecificUnicodeChars + numUnicodeCategories + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + do! lexBuffer.AsyncRefillBuffer () + // end of file occurs if we couldn't extend the buffer + return! afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, endOfScan, state, 256 (* == EOF *) ) + else + // read a character - end the scan if there are no further transitions + let inp = int(lexBuffer.Buffer.[lexBuffer.BufferScanPos]) + let snew = int trans.[state].[inp] + if snew = sentinel then + return! endOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + return! scanUntilSentinel(lexBuffer, snew) + } + + and endOfScan() = + async { return lexBuffer.EndOfScan() } + + startInterpret(lexBuffer) + + scanUntilSentinel(lexBuffer, initialState) + + + static member Create(trans, accept) = AsciiTables(trans, accept) + +[] +type UnicodeTables(trans: uint16[] array, accept: uint16[]) = + let sentinel = 255 * 256 + 255 + let numUnicodeCategories = 30 + let numLowUnicodeChars = 128 + let numSpecificUnicodeChars = (trans.[0].Length - 1 - numLowUnicodeChars - numUnicodeCategories)/2 + let lookupUnicodeCharacters (state, inp: char) = + let inpAsInt = int inp + // Is it a fast ASCII character? + if inpAsInt < numLowUnicodeChars then + int trans.[state].[inpAsInt] + else + // Search for a specific unicode character + let baseForSpecificUnicodeChars = numLowUnicodeChars + let rec loop i = + if i >= numSpecificUnicodeChars then + // OK, if we failed then read the 'others' entry in the alphabet, + // which covers all Unicode characters not covered in other + // ways + let baseForUnicodeCategories = numLowUnicodeChars+numSpecificUnicodeChars*2 + let unicodeCategory = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(inp) + //System.Console.WriteLine("inp = {0}, unicodeCategory = {1}", [| box inp; box unicodeCategory |]) + int trans.[state].[baseForUnicodeCategories + int32 unicodeCategory] + else + // This is the specific unicode character + let c = char (int trans.[state].[baseForSpecificUnicodeChars+i*2]) + //System.Console.WriteLine("c = {0}, inp = {1}, i = {2}", [| box c; box inp; box i |]) + // OK, have we found the entry for a specific unicode character? + if c = inp + then int trans.[state].[baseForSpecificUnicodeChars+i*2+1] + else loop(i+1) + + loop 0 + let eofPos = numLowUnicodeChars + 2*numSpecificUnicodeChars + numUnicodeCategories + + let rec scanUntilSentinel(lexBuffer, state) = + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept(lexBuffer, a) - let rec scanUntilSentinel(lexBuffer,state) = - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept(lexBuffer,a) + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, lexBuffer.EndOfScan, state, eofPos) + else + // read a character - end the scan if there are no further transitions + let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,lexBuffer.EndOfScan,state,eofPos) - else - // read a character - end the scan if there are no further transitions - let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - - // Find the new state - let snew = lookupUnicodeCharacters (state,inp) + // Find the new state + let snew = lookupUnicodeCharacters (state, inp) - if snew = sentinel then - lexBuffer.EndOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - // Printf.printf "state %d --> %d on '%c' (%d)\n" s snew (char inp) inp; - scanUntilSentinel(lexBuffer,snew) - - // Each row for the Unicode table has format - // 128 entries for ASCII characters - // A variable number of 2*UInt16 entries for SpecificUnicodeChars - // 30 entries, one for each UnicodeCategory - // 1 entry for EOF - - member tables.Interpret(initialState,lexBuffer : LexBuffer) = - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - member tables.AsyncInterpret(initialState,lexBuffer : LexBuffer) = - - let rec scanUntilSentinel(lexBuffer, state) = - async { - // Return an endOfScan after consuming the input - let a = int accept.[state] - if a <> sentinel then - onAccept(lexBuffer,a) + if snew = sentinel then + lexBuffer.EndOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + // Printf.printf "state %d --> %d on '%c' (%d)\n" s snew (char inp) inp + scanUntilSentinel(lexBuffer, snew) + + // Each row for the Unicode table has format + // 128 entries for ASCII characters + // A variable number of 2*UInt16 entries for SpecificUnicodeChars + // 30 entries, one for each UnicodeCategory + // 1 entry for EOF + + member tables.Interpret(initialState, lexBuffer : LexBuffer) = + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + member tables.AsyncInterpret(initialState, lexBuffer : LexBuffer) = + + let rec scanUntilSentinel(lexBuffer, state) = + async { + // Return an endOfScan after consuming the input + let a = int accept.[state] + if a <> sentinel then + onAccept(lexBuffer, a) + + if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then + lexBuffer.DiscardInput() + lexBuffer.RefillBuffer () + // end of file occurs if we couldn't extend the buffer + return! afterRefill (trans, sentinel, lexBuffer, scanUntilSentinel, endOfScan, state, eofPos) + else + // read a character - end the scan if there are no further transitions + let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - if lexBuffer.BufferScanLength = lexBuffer.BufferMaxScanLength then - lexBuffer.DiscardInput(); - lexBuffer.RefillBuffer (); - // end of file occurs if we couldn't extend the buffer - return! afterRefill (trans,sentinel,lexBuffer,scanUntilSentinel,endOfScan,state,eofPos) - else - // read a character - end the scan if there are no further transitions - let inp = lexBuffer.Buffer.[lexBuffer.BufferScanPos] - - // Find the new state - let snew = lookupUnicodeCharacters (state,inp) - - if snew = sentinel then - return! endOfScan() - else - lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1; - return! scanUntilSentinel(lexBuffer, snew) - } - and endOfScan() = - async { return lexBuffer.EndOfScan() } - startInterpret(lexBuffer) - scanUntilSentinel(lexBuffer, initialState) - - static member Create(trans,accept) = new UnicodeTables(trans,accept) + // Find the new state + let snew = lookupUnicodeCharacters (state, inp) + + if snew = sentinel then + return! endOfScan() + else + lexBuffer.BufferScanLength <- lexBuffer.BufferScanLength + 1 + return! scanUntilSentinel(lexBuffer, snew) + } + and endOfScan() = + async { return lexBuffer.EndOfScan() } + startInterpret(lexBuffer) + scanUntilSentinel(lexBuffer, initialState) + + static member Create(trans, accept) = UnicodeTables(trans, accept) + +open System.IO + +let UnicodeFileAsLexbuf (filename,codePage : int option) : FileStream * StreamReader * LexBuffer = + // Use the .NET functionality to auto-detect the unicode encoding + // It also presents the bytes read to the lexer in UTF8 decoded form + let stream = new FileStream(filename,FileMode.Open,FileAccess.Read,FileShare.Read) + let reader = + match codePage with + | None -> new StreamReader(stream,true) + | Some n -> new StreamReader(stream,System.Text.Encoding.GetEncoding(n)) + let lexbuf = LexBuffer.FromFunction(reader.Read) + lexbuf.EndPos <- Position.FirstLine(filename) + stream, reader, lexbuf \ No newline at end of file diff --git a/buildtools/fsyacc/Lexing.fsi b/buildtools/fsyacc/Lexing.fsi index e31ad411aa9..866ba6a1e56 100644 --- a/buildtools/fsyacc/Lexing.fsi +++ b/buildtools/fsyacc/Lexing.fsi @@ -5,147 +5,160 @@ // (c) Microsoft Corporation 2005-2008. //=========================================================================== -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Lexing -#else -namespace Microsoft.FSharp.Text.Lexing -#endif +module FSharp.Text.Lexing open System.Collections.Generic /// Position information stored for lexing tokens -// -// Note: this is an OCaml compat record type. -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Position = -#else -type Position = -#endif - { /// The file name for the position - pos_fname: string; +[] +type Position = + { + /// The file name for the position + pos_fname: string + /// The line number for the position - pos_lnum: int; -#if INTERNALIZED_FSLEXYACC_RUNTIME + pos_lnum: int + /// The line number for the position in the original source file - pos_orig_lnum : int; -#endif + pos_orig_lnum : int + /// The absolute offset of the beginning of the line - pos_bol: int; + pos_bol: int + /// The absolute offset of the column for the position - pos_cnum: int; } + pos_cnum: int + } + /// The file name associated with the input stream. member FileName : string - /// The line number in the input stream, assuming fresh positions have been updated + + /// The line number in the input stream, assuming fresh positions have been updated /// using AsNewLinePos() and by modifying the EndPos property of the LexBuffer. member Line : int -#if INTERNALIZED_FSLEXYACC_RUNTIME - /// The line number for the position in the input stream, assuming fresh positions have been updated + + /// The line number for the position in the input stream, assuming fresh positions have been updated /// using AsNewLinePos() member OriginalLine : int -#endif - [] + + [] member Char : int + /// The character number in the input stream member AbsoluteOffset : int + /// Return absolute offset of the start of the line marked by the position member StartOfLineAbsoluteOffset : int + /// Return the column number marked by the position, i.e. the difference between the AbsoluteOffset and the StartOfLineAbsoluteOffset member Column : int + // Given a position just beyond the end of a line, return a position at the start of the next line - member NextLine : Position - + member NextLine : Position + /// Given a position at the start of a token of length n, return a position just beyond the end of the token member EndOfToken: n:int -> Position + /// Gives a position shifted by specified number of characters member ShiftColumnBy: by:int -> Position - - [] + + [] member AsNewLinePos : unit -> Position - - /// Get an arbitrary position, with the empty string as filename, and + + /// Get an arbitrary position, with the empty string as filename, and static member Empty : Position /// Get a position corresponding to the first line (line number 1) in a given file static member FirstLine : filename:string -> Position - + [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal LexBuffer<'char> = -#else /// Input buffers consumed by lexers generated by fslex.exe type LexBuffer<'char> = -#endif /// The start position for the lexeme member StartPos: Position with get,set + /// The end position for the lexeme member EndPos: Position with get,set - /// The matched string + + /// The matched string member Lexeme: 'char array - + /// Fast helper to turn the matched characters into a string, avoiding an intermediate array static member LexemeString : LexBuffer -> string - - /// The length of the matched string + + /// The length of the matched string member LexemeLength: int - /// Fetch a particular character in the matched string + + /// Fetch a particular character in the matched string member LexemeChar: int -> 'char /// Dynamically typed, non-lexically scoped parameter table member BufferLocalStore : IDictionary - + /// True if the refill of the buffer ever failed , or if explicitly set to true. member IsPastEndOfStream: bool with get,set - /// Remove all input, though don't discard the current lexeme + + /// Remove all input, though don't discard the current lexeme member DiscardInput: unit -> unit /// Create a lex buffer suitable for byte lexing that reads characters from the given array static member FromBytes: byte[] -> LexBuffer + /// Create a lex buffer suitable for Unicode lexing that reads characters from the given array static member FromChars: char[] -> LexBuffer + /// Create a lex buffer suitable for Unicode lexing that reads characters from the given string static member FromString: string -> LexBuffer + /// Create a lex buffer that reads character or byte inputs by using the given function static member FromFunction: ('char[] * int * int -> int) -> LexBuffer<'char> + /// Create a lex buffer that asynchronously reads character or byte inputs by using the given function static member FromAsyncFunction: ('char[] * int * int -> Async) -> LexBuffer<'char> - [.FromFunction instead")>] static member FromCharFunction: (char[] -> int -> int) -> LexBuffer + [.FromFunction instead")>] static member FromByteFunction: (byte[] -> int -> int) -> LexBuffer /// Create a lex buffer suitable for use with a Unicode lexer that reads character inputs from the given text reader static member FromTextReader: System.IO.TextReader -> LexBuffer + /// Create a lex buffer suitable for use with ASCII byte lexing that reads byte inputs from the given binary reader static member FromBinaryReader: System.IO.BinaryReader -> LexBuffer -/// The type of tables for an ascii lexer generated by fslex. +/// The type of tables for an ascii lexer generated by fslex. [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal AsciiTables = -#else type AsciiTables = -#endif static member Create : uint16[] array * uint16[] -> AsciiTables - /// Interpret tables for an ascii lexer generated by fslex. + + /// Interpret tables for an ascii lexer generated by fslex. member Interpret: initialState:int * LexBuffer -> int + + [] /// Interpret tables for an ascii lexer generated by fslex, processing input asynchronously member AsyncInterpret: initialState:int * LexBuffer -> Async -/// The type of tables for an unicode lexer generated by fslex. +/// The type of tables for an unicode lexer generated by fslex. [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal UnicodeTables = -#else type UnicodeTables = -#endif + static member Create : uint16[] array * uint16[] -> UnicodeTables - /// Interpret tables for a unicode lexer generated by fslex. + + /// Interpret tables for a unicode lexer generated by fslex. member Interpret: initialState:int * LexBuffer -> int + [] /// Interpret tables for a unicode lexer generated by fslex, processing input asynchronously member AsyncInterpret: initialState:int * LexBuffer -> Async + +/// Standard utility to create a Unicode LexBuffer +/// +/// One small annoyance is that LexBuffers and not IDisposable. This means +/// we can't just return the LexBuffer object, since the file it wraps wouldn't +/// get closed when we're finished with the LexBuffer. Hence we return the stream, +/// the reader and the LexBuffer. The caller should dispose the first two when done. +val UnicodeFileAsLexbuf: string * int option -> System.IO.FileStream * System.IO.StreamReader * LexBuffer \ No newline at end of file diff --git a/buildtools/fsyacc/Parsing.fs b/buildtools/fsyacc/Parsing.fs index 01dccfb6101..f66aa7a77f4 100644 --- a/buildtools/fsyacc/Parsing.fs +++ b/buildtools/fsyacc/Parsing.fs @@ -1,87 +1,76 @@ // (c) Microsoft Corporation 2005-2009. -#if INTERNALIZED_FSLEXYACC_RUNTIME +namespace FSharp.Text.Parsing +open FSharp.Text.Lexing -namespace Internal.Utilities.Text.Parsing -open Internal.Utilities -open Internal.Utilities.Text.Lexing - -#else -namespace Microsoft.FSharp.Text.Parsing -open Microsoft.FSharp.Text.Lexing -#endif - - - -open System open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal IParseState = -#else type IParseState = -#endif abstract InputRange: int -> Position * Position + abstract InputEndPosition: int -> Position + abstract InputStartPosition: int -> Position + abstract ResultRange: Position * Position + abstract GetInput: int -> obj + abstract ParserLocalStore : IDictionary + abstract RaiseError<'b> : unit -> 'b //------------------------------------------------------------------------- // This context is passed to the error reporter when a syntax error occurs [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal ParseErrorContext<'tok> -#else type ParseErrorContext<'tok> -#endif (//lexbuf: LexBuffer<_>, stateStack:int list, parseState: IParseState, reduceTokens: int list, currentToken: 'tok option, reducibleProductions: int list list, - shiftableTokens: int list , + shiftableTokens: int list, message : string) = - //member x.LexBuffer = lexbuf - member x.StateStack = stateStack - member x.ReduceTokens = reduceTokens - member x.CurrentToken = currentToken - member x.ParseState = parseState - member x.ReducibleProductions = reducibleProductions - member x.ShiftTokens = shiftableTokens - member x.Message = message + + member _.StateStack = stateStack + + member _.ReduceTokens = reduceTokens + + member _.CurrentToken = currentToken + + member _.ParseState = parseState + + member _.ReducibleProductions = reducibleProductions + + member _.ShiftTokens = shiftableTokens + + member _.Message = message //------------------------------------------------------------------------- // This is the data structure emitted as code by FSYACC. -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> = -#else type Tables<'tok> = -#endif - { reductions: (IParseState -> obj) array; - endOfInputTag: int; - tagOfToken: 'tok -> int; - dataOfToken: 'tok -> obj; - actionTableElements: uint16[]; - actionTableRowOffsets: uint16[]; - reductionSymbolCounts: uint16[]; - immediateActions: uint16[]; - gotos: uint16[]; - sparseGotoTableRowOffsets: uint16[]; - stateToProdIdxsTableElements: uint16[]; - stateToProdIdxsTableRowOffsets: uint16[]; - productionToNonTerminalTable: uint16[]; + { reductions: (IParseState -> obj)[] + endOfInputTag: int + tagOfToken: 'tok -> int + dataOfToken: 'tok -> obj + actionTableElements: uint16[] + actionTableRowOffsets: uint16[] + reductionSymbolCounts: uint16[] + immediateActions: uint16[] + gotos: uint16[] + sparseGotoTableRowOffsets: uint16[] + stateToProdIdxsTableElements: uint16[] + stateToProdIdxsTableRowOffsets: uint16[] + productionToNonTerminalTable: uint16[] /// For fsyacc.exe, this entry is filled in by context from the generated parser file. If no 'parse_error' function /// is defined by the user then ParseHelpers.parse_error is used by default (ParseHelpers is opened /// at the top of the generated parser file) - parseError: ParseErrorContext<'tok> -> unit; - numTerminals: int; + parseError: ParseErrorContext<'tok> -> unit + numTerminals: int tagOfErrorTerminal: int } //------------------------------------------------------------------------- @@ -90,11 +79,7 @@ type Tables<'tok> = // This type is in System.dll so for the moment we can't use it in FSharp.Core.dll //type Stack<'a> = System.Collections.Generic.Stack<'a> -#if INTERNALIZED_FSLEXYACC_RUNTIME -type Stack<'a>(n) = -#else type internal Stack<'a>(n) = -#endif let mutable contents = Array.zeroCreate<'a>(n) let mutable count = 0 @@ -102,16 +87,16 @@ type internal Stack<'a>(n) = let oldSize = Array.length contents if newSize > oldSize then let old = contents - contents <- Array.zeroCreate (max newSize (oldSize * 2)); - Array.blit old 0 contents 0 count; + contents <- Array.zeroCreate (max newSize (oldSize * 2)) + Array.blit old 0 contents 0 count member buf.Count = count member buf.Pop() = count <- count - 1 member buf.Peep() = contents.[count - 1] member buf.Top(n) = [ for x in contents.[max 0 (count-n)..count - 1] -> x ] |> List.rev member buf.Push(x) = - buf.Ensure(count + 1); - contents.[count] <- x; + buf.Ensure(count + 1) + contents.[count] <- x count <- count + 1 member buf.IsEmpty = (count = 0) @@ -132,11 +117,7 @@ module Flags = let mutable debug = false #endif -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal Implementation = -#else module Implementation = -#endif // Definitions shared with fsyacc let anyMarker = 0xffff @@ -153,7 +134,7 @@ module Implementation = // Read the tables written by FSYACC. type AssocTable(elemTab:uint16[], offsetTab:uint16[]) = - let cache = new Dictionary<_,_>(2000) + let cache = Dictionary<_,_>(2000) member t.readAssoc (minElemNum,maxElemNum,defaultValueOfAssoc,keyToFind) = // do a binary chop on the table @@ -164,10 +145,12 @@ module Implementation = let x = int elemTab.[elemNumber*2] if keyToFind = x then int elemTab.[elemNumber*2+1] - elif keyToFind < x then t.readAssoc (minElemNum ,elemNumber,defaultValueOfAssoc,keyToFind) - else t.readAssoc (elemNumber+1,maxElemNum,defaultValueOfAssoc,keyToFind) + elif keyToFind < x then + t.readAssoc (minElemNum,elemNumber,defaultValueOfAssoc,keyToFind) + else + t.readAssoc (elemNumber+1,maxElemNum,defaultValueOfAssoc,keyToFind) - member t.Read(rowNumber ,keyToFind) = + member t.Read(rowNumber,keyToFind) = // First check the sparse lookaside table // Performance note: without this lookaside table the binary chop in readAssoc @@ -191,7 +174,7 @@ module Implementation = // Read all entries in the association table // Used during error recovery to find all valid entries in the table - member x.ReadAll(n) = + member _.ReadAll(n) = let headOfTable = int offsetTab.[n] let firstElemNumber = headOfTable + 1 let numberOfElementsInAssoc = int32 elemTab.[headOfTable*2] @@ -202,7 +185,7 @@ module Implementation = type IdxToIdxListTable(elemTab:uint16[], offsetTab:uint16[]) = // Read all entries in a row of the table - member x.ReadAll(n) = + member _.ReadAll(n) = let headOfTable = int offsetTab.[n] let firstElemNumber = headOfTable + 1 let numberOfElements = int32 elemTab.[headOfTable] @@ -217,22 +200,31 @@ module Implementation = val value: obj val startPos: Position val endPos: Position - new(value,startPos,endPos) = { value=value; startPos=startPos;endPos=endPos } + + new(value,startPos,endPos) = { value=value; startPos=startPos; endPos=endPos } let interpret (tables: Tables<'tok>) lexer (lexbuf : LexBuffer<_>) initialState = - let localStore = new Dictionary() in - localStore.["LexBuffer"] <- lexbuf; + let localStore = Dictionary() in + localStore.["LexBuffer"] <- lexbuf #if __DEBUG - if Flags.debug then System.Console.WriteLine("\nParser: interpret tables"); + if Flags.debug then System.Console.WriteLine("\nParser: interpret tables") #endif - let stateStack : Stack = new Stack<_>(100) - stateStack.Push(initialState); - let valueStack = new Stack(100) + let stateStack : Stack = Stack<_>(100) + + stateStack.Push(initialState) + + let valueStack = Stack(100) + let mutable haveLookahead = false + let mutable lookaheadToken = Unchecked.defaultof<'tok> + let mutable lookaheadEndPos = Unchecked.defaultof + let mutable lookaheadStartPos = Unchecked.defaultof + let mutable finished = false + // After an error occurs, we suppress errors until we've shifted three tokens in a row. let mutable errorSuppressionCountDown = 0 @@ -243,27 +235,31 @@ module Implementation = // where consuming one EOF to trigger an error doesn't result in overall parse failure // catastrophe and the loss of intermediate results. // + let mutable inEofCountDown = false + let mutable eofCountDown = 20 // Number of EOFs to supply at the end for error recovery + // The 100 here means a maximum of 100 elements for each rule - let ruleStartPoss = (Array.zeroCreate 100 : Position array) - let ruleEndPoss = (Array.zeroCreate 100 : Position array) - let ruleValues = (Array.zeroCreate 100 : obj array) - let lhsPos = (Array.zeroCreate 2 : Position array) + let ruleStartPoss = (Array.zeroCreate 100 : Position[]) + let ruleEndPoss = (Array.zeroCreate 100 : Position[]) + let ruleValues = (Array.zeroCreate 100 : obj[]) + let lhsPos = (Array.zeroCreate 2 : Position[]) + let reductions = tables.reductions - let actionTable = new AssocTable(tables.actionTableElements, tables.actionTableRowOffsets) - let gotoTable = new AssocTable(tables.gotos, tables.sparseGotoTableRowOffsets) - let stateToProdIdxsTable = new IdxToIdxListTable(tables.stateToProdIdxsTableElements, tables.stateToProdIdxsTableRowOffsets) + let actionTable = AssocTable(tables.actionTableElements, tables.actionTableRowOffsets) + let gotoTable = AssocTable(tables.gotos, tables.sparseGotoTableRowOffsets) + let stateToProdIdxsTable = IdxToIdxListTable(tables.stateToProdIdxsTableElements, tables.stateToProdIdxsTableRowOffsets) let parseState = { new IParseState with - member p.InputRange(n) = ruleStartPoss.[n-1], ruleEndPoss.[n-1]; - member p.InputStartPosition(n) = ruleStartPoss.[n-1] - member p.InputEndPosition(n) = ruleEndPoss.[n-1]; - member p.GetInput(n) = ruleValues.[n-1]; - member p.ResultRange = (lhsPos.[0], lhsPos.[1]); - member p.ParserLocalStore = (localStore :> IDictionary<_,_>); - member p.RaiseError() = raise RecoverableParseError (* NOTE: this binding tests the fairly complex logic associated with an object expression implementing a generic abstract method *) + member _.InputRange(n) = ruleStartPoss.[n-1], ruleEndPoss.[n-1] + member _.InputStartPosition(n) = ruleStartPoss.[n-1] + member _.InputEndPosition(n) = ruleEndPoss.[n-1] + member _.GetInput(n) = ruleValues.[n-1] + member _.ResultRange = (lhsPos.[0], lhsPos.[1]) + member _.ParserLocalStore = (localStore :> IDictionary<_,_>) + member _.RaiseError() = raise RecoverableParseError (* NOTE: this binding tests the fairly complex logic associated with an object expression implementing a generic abstract method *) } #if __DEBUG @@ -275,22 +271,22 @@ module Implementation = // Pop the stack until we can shift the 'error' token. If 'tokenOpt' is given // then keep popping until we can shift both the 'error' token and the token in 'tokenOpt'. // This is used at end-of-file to make sure we can shift both the 'error' token and the 'EOF' token. - let rec popStackUntilErrorShifted(tokenOpt) = + let rec popStackUntilErrorShifted tokenOpt = // Keep popping the stack until the "error" terminal is shifted #if __DEBUG - if Flags.debug then System.Console.WriteLine("popStackUntilErrorShifted"); + if Flags.debug then System.Console.WriteLine("popStackUntilErrorShifted") #endif if stateStack.IsEmpty then #if __DEBUG if Flags.debug then - System.Console.WriteLine("state stack empty during error recovery - generating parse error"); + System.Console.WriteLine("state stack empty during error recovery - generating parse error") #endif - failwith "parse error"; + failwith "parse error" let currState = stateStack.Peep() #if __DEBUG if Flags.debug then - System.Console.WriteLine("In state {0} during error recovery", currState); + System.Console.WriteLine("In state {0} during error recovery", currState) #endif let action = actionTable.Read(currState, tables.tagOfErrorTerminal) @@ -303,22 +299,22 @@ module Implementation = actionKind (actionTable.Read(nextState, tables.tagOfToken(token))) = shiftFlag) then #if __DEBUG - if Flags.debug then System.Console.WriteLine("shifting error, continuing with error recovery"); + if Flags.debug then System.Console.WriteLine("shifting error, continuing with error recovery") #endif let nextState = actionValue action // The "error" non terminal needs position information, though it tends to be unreliable. // Use the StartPos/EndPos from the lex buffer - valueStack.Push(ValueInfo(box (), lexbuf.StartPos, lexbuf.EndPos)); + valueStack.Push(ValueInfo(box (), lexbuf.StartPos, lexbuf.EndPos)) stateStack.Push(nextState) else if valueStack.IsEmpty then - failwith "parse error"; + failwith "parse error" #if __DEBUG if Flags.debug then - System.Console.WriteLine("popping stack during error recovery"); + System.Console.WriteLine("popping stack during error recovery") #endif - valueStack.Pop(); - stateStack.Pop(); + valueStack.Pop() + stateStack.Pop() popStackUntilErrorShifted(tokenOpt) while not finished do @@ -350,7 +346,7 @@ module Implementation = lookaheadToken <- lexer lexbuf lookaheadStartPos <- lexbuf.StartPos lookaheadEndPos <- lexbuf.EndPos - haveLookahead <- true; + haveLookahead <- true let tag = if haveLookahead then tables.tagOfToken lookaheadToken @@ -362,17 +358,17 @@ module Implementation = let kind = actionKind action if kind = shiftFlag then ( if errorSuppressionCountDown > 0 then - errorSuppressionCountDown <- errorSuppressionCountDown - 1; + errorSuppressionCountDown <- errorSuppressionCountDown - 1 #if __DEBUG - if Flags.debug then Console.WriteLine("shifting, reduced errorRecoverylevel to {0}\n", errorSuppressionCountDown); + if Flags.debug then Console.WriteLine("shifting, reduced errorRecoverylevel to {0}\n", errorSuppressionCountDown) #endif let nextState = actionValue action - if not haveLookahead then failwith "shift on end of input!"; + if not haveLookahead then failwith "shift on end of input!" let data = tables.dataOfToken lookaheadToken - valueStack.Push(ValueInfo(data, lookaheadStartPos, lookaheadEndPos)); - stateStack.Push(nextState); + valueStack.Push(ValueInfo(data, lookaheadStartPos, lookaheadEndPos)) + stateStack.Push(nextState) #if __DEBUG - if Flags.debug then Console.WriteLine("shift/consume input {0}, shift to state {1}", report haveLookahead lookaheadToken, nextState); + if Flags.debug then Console.WriteLine("shift/consume input {0}, shift to state {1}", report haveLookahead lookaheadToken, nextState) #endif haveLookahead <- false @@ -382,27 +378,27 @@ module Implementation = let n = int tables.reductionSymbolCounts.[prod] // pop the symbols, populate the values and populate the locations #if __DEBUG - if Flags.debug then Console.Write("reduce popping {0} values/states, lookahead {1}", n, report haveLookahead lookaheadToken); + if Flags.debug then Console.Write("reduce popping {0} values/states, lookahead {1}", n, report haveLookahead lookaheadToken) #endif - lhsPos.[0] <- Position.Empty; - lhsPos.[1] <- Position.Empty; + lhsPos.[0] <- Position.Empty + lhsPos.[1] <- Position.Empty for i = 0 to n - 1 do - if valueStack.IsEmpty then failwith "empty symbol stack"; + if valueStack.IsEmpty then failwith "empty symbol stack" let topVal = valueStack.Peep() - valueStack.Pop(); - stateStack.Pop(); - ruleValues.[(n-i)-1] <- topVal.value; - ruleStartPoss.[(n-i)-1] <- topVal.startPos; - ruleEndPoss.[(n-i)-1] <- topVal.endPos; - if lhsPos.[1] = Position.Empty then lhsPos.[1] <- topVal.endPos; + valueStack.Pop() + stateStack.Pop() + ruleValues.[(n-i)-1] <- topVal.value + ruleStartPoss.[(n-i)-1] <- topVal.startPos + ruleEndPoss.[(n-i)-1] <- topVal.endPos + if lhsPos.[1] = Position.Empty then lhsPos.[1] <- topVal.endPos if not (topVal.startPos = Position.Empty) then lhsPos.[0] <- topVal.startPos - done; + done try - // Printf.printf "reduce %d\n" prod; + // Printf.printf "reduce %d\n" prod let redResult = reduction parseState - valueStack.Push(ValueInfo(redResult, lhsPos.[0], lhsPos.[1])); + valueStack.Push(ValueInfo(redResult, lhsPos.[0], lhsPos.[1])) let currState = stateStack.Peep() let newGotoState = gotoTable.Read(int tables.productionToNonTerminalTable.[prod], currState) stateStack.Push(newGotoState) @@ -411,23 +407,23 @@ module Implementation = #endif with | Accept res -> - finished <- true; + finished <- true valueStack.Push(ValueInfo(res, lhsPos.[0], lhsPos.[1])) | RecoverableParseError -> #if __DEBUG - if Flags.debug then Console.WriteLine("RecoverableParseErrorException...\n"); + if Flags.debug then Console.WriteLine("RecoverableParseErrorException...\n") #endif - popStackUntilErrorShifted(None); + popStackUntilErrorShifted(None) // User code raised a Parse_error. Don't report errors again until three tokens have been shifted errorSuppressionCountDown <- 3 elif kind = errorFlag then ( #if __DEBUG - if Flags.debug then Console.Write("ErrorFlag... "); + if Flags.debug then Console.Write("ErrorFlag... ") #endif // Silently discard inputs and don't report errors // until three tokens in a row have been shifted #if __DEBUG - if Flags.debug then printfn "error on token '%A' " (if haveLookahead then Some(lookaheadToken) else None); + if Flags.debug then printfn "error on token '%A' " (if haveLookahead then Some(lookaheadToken) else None) #endif if errorSuppressionCountDown > 0 then // If we're in the end-of-file count down then we're very keen to 'Accept'. @@ -435,16 +431,16 @@ module Implementation = // and an EOF token. if inEofCountDown && eofCountDown < 10 then #if __DEBUG - if Flags.debug then printfn "poppin stack, lokking to shift both 'error' and that token, during end-of-file error recovery" ; + if Flags.debug then printfn "poppin stack, lokking to shift both 'error' and that token, during end-of-file error recovery" #endif - popStackUntilErrorShifted(if haveLookahead then Some(lookaheadToken) else None); + popStackUntilErrorShifted(if haveLookahead then Some(lookaheadToken) else None) // If we don't haveLookahead then the end-of-file count down is over and we have no further options. if not haveLookahead then failwith "parse error: unexpected end of file" #if __DEBUG - if Flags.debug then printfn "discarding token '%A' during error suppression" (if haveLookahead then Some(lookaheadToken) else None); + if Flags.debug then printfn "discarding token '%A' during error suppression" (if haveLookahead then Some(lookaheadToken) else None) #endif // Discard the token haveLookahead <- false @@ -454,10 +450,10 @@ module Implementation = let currentToken = if haveLookahead then Some(lookaheadToken) else None let actions,defaultAction = actionTable.ReadAll(state) - let explicit = Set.ofList [ for (tag,_action) in actions -> tag ] + let explicit = Set.ofList [ for tag,_action in actions -> tag ] let shiftableTokens = - [ for (tag,action) in actions do + [ for tag,action in actions do if (actionKind action) = shiftFlag then yield tag if actionKind defaultAction = shiftFlag then @@ -471,7 +467,7 @@ module Implementation = yield stateToProdIdxsTable.ReadAll(state) ] let reduceTokens = - [ for (tag,action) in actions do + [ for tag,action in actions do if actionKind(action) = reduceFlag then yield tag if actionKind(defaultAction) = reduceFlag then @@ -480,35 +476,27 @@ module Implementation = yield tag ] in //let activeRules = stateStack |> List.iter (fun state -> let errorContext = new ParseErrorContext<'tok>(stateStack,parseState, reduceTokens,currentToken,reducibleProductions, shiftableTokens, "syntax error") - tables.parseError(errorContext); - popStackUntilErrorShifted(None); - errorSuppressionCountDown <- 3; + tables.parseError(errorContext) + popStackUntilErrorShifted(None) + errorSuppressionCountDown <- 3 #if __DEBUG - if Flags.debug then System.Console.WriteLine("generated syntax error and shifted error token, haveLookahead = {0}\n", haveLookahead); + if Flags.debug then System.Console.WriteLine("generated syntax error and shifted error token, haveLookahead = {0}\n", haveLookahead) #endif ) ) elif kind = acceptFlag then finished <- true #if __DEBUG else - if Flags.debug then System.Console.WriteLine("ALARM!!! drop through case in parser"); + if Flags.debug then System.Console.WriteLine("ALARM!!! drop through case in parser") #endif - done; + done // OK, we're done - read off the overall generated value valueStack.Peep().value -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> with -#else type Tables<'tok> with -#endif - member tables.Interpret (lexer,lexbuf,initialState) = - Implementation.interpret tables lexer lexbuf initialState + member tables.Interpret (lexer,lexbuf,startState) = + Implementation.interpret tables lexer lexbuf startState -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal ParseHelpers = -#else module ParseHelpers = -#endif let parse_error (_s:string) = () let parse_error_rich = (None : (ParseErrorContext<_> -> unit) option) diff --git a/buildtools/fsyacc/Parsing.fsi b/buildtools/fsyacc/Parsing.fsi index f4d12606462..e4e7329441a 100644 --- a/buildtools/fsyacc/Parsing.fsi +++ b/buildtools/fsyacc/Parsing.fsi @@ -2,129 +2,132 @@ // (c) Microsoft Corporation 2005-2009. //========================================================================= -#if INTERNALIZED_FSLEXYACC_RUNTIME -namespace Internal.Utilities.Text.Parsing -open Internal.Utilities -open Internal.Utilities.Text.Lexing -#else -namespace Microsoft.FSharp.Text.Parsing -open Microsoft.FSharp.Text.Lexing -#endif +namespace FSharp.Text.Parsing +open FSharp.Text.Lexing open System.Collections.Generic -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal IParseState = -#else /// The information accessible via the parseState value within parser actions. type IParseState = -#endif /// Get the start and end position for the terminal or non-terminal at a given index matched by the production abstract InputRange: index:int -> Position * Position + /// Get the end position for the terminal or non-terminal at a given index matched by the production abstract InputEndPosition: int -> Position + /// Get the start position for the terminal or non-terminal at a given index matched by the production abstract InputStartPosition: int -> Position + /// Get the full range of positions matched by the production abstract ResultRange: Position * Position + /// Get the value produced by the terminal or non-terminal at the given position abstract GetInput : int -> obj + /// Get the store of local values associated with this parser // Dynamically typed, non-lexically scoped local store abstract ParserLocalStore : IDictionary + /// Raise an error in this parse context abstract RaiseError<'b> : unit -> 'b [] -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal ParseErrorContext<'tok> = -#else /// The context provided when a parse error occurs type ParseErrorContext<'tok> = -#endif /// The stack of state indexes active at the parse error member StateStack : int list + /// The state active at the parse error member ParseState : IParseState + /// The tokens that would cause a reduction at the parse error member ReduceTokens: int list + /// The stack of productions that would be reduced at the parse error member ReducibleProductions : int list list + /// The token that caused the parse error member CurrentToken : 'tok option + /// The token that would cause a shift at the parse error member ShiftTokens : int list + /// The message associated with the parse error member Message : string /// Tables generated by fsyacc -#if INTERNALIZED_FSLEXYACC_RUNTIME -type internal Tables<'tok> = -#else /// The type of the tables contained in a file produced by the fsyacc.exe parser generator. type Tables<'tok> = -#endif - { /// The reduction table + { + /// The reduction table reductions: (IParseState -> obj) array ; + /// The token number indicating the end of input endOfInputTag: int; + /// A function to compute the tag of a token tagOfToken: 'tok -> int; + /// A function to compute the data carried by a token dataOfToken: 'tok -> obj; + /// The sparse action table elements actionTableElements: uint16[]; + /// The sparse action table row offsets actionTableRowOffsets: uint16[]; + /// The number of symbols for each reduction reductionSymbolCounts: uint16[]; + /// The immediate action table immediateActions: uint16[]; + /// The sparse goto table gotos: uint16[]; + /// The sparse goto table row offsets sparseGotoTableRowOffsets: uint16[]; + /// The sparse table for the productions active for each state stateToProdIdxsTableElements: uint16[]; + /// The sparse table offsets for the productions active for each state stateToProdIdxsTableRowOffsets: uint16[]; + /// This table is logically part of the Goto table productionToNonTerminalTable: uint16[]; + /// This function is used to hold the user specified "parse_error" or "parse_error_rich" functions parseError: ParseErrorContext<'tok> -> unit; + /// The total number of terminals numTerminals: int; + /// The tag of the error terminal - tagOfErrorTerminal: int } + tagOfErrorTerminal: int + } /// Interpret the parser table taking input from the given lexer, using the given lex buffer, and the given start state. /// Returns an object indicating the final synthesized value for the parse. - member Interpret : lexer:(LexBuffer<'char> -> 'tok) * lexbuf:LexBuffer<'char> * initialState:int -> obj + member Interpret : lexer:(LexBuffer<'char> -> 'tok) * lexbuf:LexBuffer<'char> * startState:int -> obj -#if INTERNALIZED_FSLEXYACC_RUNTIME -exception internal Accept of obj -exception internal RecoverableParseError -#else /// Indicates an accept action has occured exception Accept of obj /// Indicates a parse error has occured and parse recovery is in progress exception RecoverableParseError -#endif #if __DEBUG module internal Flags = val mutable debug : bool #endif -#if INTERNALIZED_FSLEXYACC_RUNTIME -module internal ParseHelpers = -#else /// Helpers used by generated parsers. module ParseHelpers = -#endif /// The default implementation of the parse_error_rich function val parse_error_rich: (ParseErrorContext<'tok> -> unit) option + /// The default implementation of the parse_error function val parse_error: string -> unit diff --git a/buildtools/fsyacc/fsyacc.fs b/buildtools/fsyacc/fsyacc.fs index 41d816794d9..057ac708dfe 100644 --- a/buildtools/fsyacc/fsyacc.fs +++ b/buildtools/fsyacc/fsyacc.fs @@ -1,531 +1,88 @@ (* (c) Microsoft Corporation 2005-2008. *) -module internal FsLexYacc.FsYacc.Driver +module FsLexYacc.FsYacc.Program -open System.IO -open System.Collections.Generic open Printf -open Internal.Utilities -open Internal.Utilities.Text.Lexing - -open FsLexYacc.FsYacc +open FSharp.Text open FsLexYacc.FsYacc.AST - -//------------------------------------------------------------------ -// This code is duplicated from Microsoft.FSharp.Compiler.UnicodeLexing - -type Lexbuf = LexBuffer - -/// Standard utility to create a Unicode LexBuffer -/// -/// One small annoyance is that LexBuffers and not IDisposable. This means -/// we can't just return the LexBuffer object, since the file it wraps wouldn't -/// get closed when we're finished with the LexBuffer. Hence we return the stream, -/// the reader and the LexBuffer. The caller should dispose the first two when done. -let UnicodeFileAsLexbuf (filename,codePage : int option) : FileStream * StreamReader * Lexbuf = - // Use the .NET functionality to auto-detect the unicode encoding - // It also uses Lexing.from_text_reader to present the bytes read to the lexer in UTF8 decoded form - let stream = new FileStream(filename,FileMode.Open,FileAccess.Read,FileShare.Read) - let reader = - match codePage with - | None -> new StreamReader(stream,true) - | Some n -> new StreamReader(stream,System.Text.Encoding.GetEncoding(n)) - let lexbuf = LexBuffer.FromFunction(reader.Read) - lexbuf.EndPos <- Position.FirstLine(filename); - stream, reader, lexbuf +open FsLexYacc.FsYacc.Driver //------------------------------------------------------------------ // This is the program proper -let input = ref None -let modname= ref None -let internal_module = ref false -let opens= ref [] -let out = ref None -let tokenize = ref false -let compat = ref false -let log = ref false -let light = ref None -let inputCodePage = ref None +let mutable input = None +let mutable modname = None +let mutable internal_module = false +let mutable opens = [] +let mutable out = None +let mutable tokenize = false +let mutable compat = false +let mutable log = false +let mutable light = None +let mutable inputCodePage = None let mutable lexlib = "FSharp.Text.Lexing" let mutable parslib = "FSharp.Text.Parsing" let usage = - [ ArgInfo("-o", ArgType.String (fun s -> out := Some s), "Name the output file."); - ArgInfo("-v", ArgType.Unit (fun () -> log := true), "Produce a listing file."); - ArgInfo("--module", ArgType.String (fun s -> modname := Some s), "Define the F# module name to host the generated parser."); - ArgInfo("--internal", ArgType.Unit (fun () -> internal_module := true), "Generate an internal module"); - ArgInfo("--open", ArgType.String (fun s -> opens := !opens @ [s]), "Add the given module to the list of those to open in both the generated signature and implementation."); - ArgInfo("--light", ArgType.Unit (fun () -> light := Some true), "(ignored)"); - ArgInfo("--light-off", ArgType.Unit (fun () -> light := Some false), "Add #light \"off\" to the top of the generated file"); - ArgInfo("--ml-compatibility", ArgType.Set compat, "Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll."); - ArgInfo("--tokens", ArgType.Set tokenize, "Simply tokenize the specification file itself."); + [ ArgInfo("-o", ArgType.String (fun s -> out <- Some s), "Name the output file."); + ArgInfo("-v", ArgType.Unit (fun () -> log <- true), "Produce a listing file."); + ArgInfo("--module", ArgType.String (fun s -> modname <- Some s), "Define the F# module name to host the generated parser."); + ArgInfo("--internal", ArgType.Unit (fun () -> internal_module <- true), "Generate an internal module"); + ArgInfo("--open", ArgType.String (fun s -> opens <- opens @ [s]), "Add the given module to the list of those to open in both the generated signature and implementation."); + ArgInfo("--light", ArgType.Unit (fun () -> light <- Some true), "(ignored)"); + ArgInfo("--light-off", ArgType.Unit (fun () -> light <- Some false), "Add #light \"off\" to the top of the generated file"); + ArgInfo("--ml-compatibility", ArgType.Unit (fun _ -> compat <- true), "Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll."); + ArgInfo("--tokens", ArgType.Unit (fun _ -> tokenize <- true), "Simply tokenize the specification file itself."); ArgInfo("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer (default: FSharp.Text.Lexing)"); ArgInfo("--parslib", ArgType.String (fun s -> parslib <- s), "Specify the namespace for the implementation of the parser table interpreter (default: FSharp.Text.Parsing)"); - ArgInfo("--codepage", ArgType.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage."); ] - -let _ = ArgParser.Parse(usage,(fun x -> match !input with Some _ -> failwith "more than one input given" | None -> input := Some x),"fsyacc ") - -let output_int (os: #TextWriter) (n:int) = os.Write(string n) + ArgInfo("--codepage", ArgType.Int (fun i -> inputCodePage <- Some i), "Assume input lexer specification file is encoded with the given codepage."); ] -let outputCodedUInt16 (os: #TextWriter) (n:int) = - os.Write n; - os.Write "us; "; - -let shiftFlag = 0x0000 -let reduceFlag = 0x4000 -let errorFlag = 0x8000 -let acceptFlag = 0xc000 -let actionMask = 0xc000 - -let anyMarker = 0xffff - -let actionCoding action = - match action with - | Accept -> acceptFlag - | Shift n -> shiftFlag ||| n - | Reduce n -> reduceFlag ||| n - | Error -> errorFlag +let _ = ArgParser.Parse(usage,(fun x -> match input with Some _ -> failwith "more than one input given" | None -> input <- Some x),"fsyacc ") let main() = - let filename = (match !input with Some x -> x | None -> failwith "no input given") in + let filename = (match input with Some x -> x | None -> failwith "no input given") in + if tokenize then printTokens filename inputCodePage + let spec = - let stream,reader,lexbuf = UnicodeFileAsLexbuf(filename, !inputCodePage) - use stream = stream - use reader = reader - - try - if !tokenize then begin - while true do - printf "tokenize - getting one token"; - let t = Lexer.token lexbuf in - (*F# printf "tokenize - got %s" (Parser.token_to_string t); F#*) - if t = Parser.EOF then exit 0; - done; - end; - - Parser.spec Lexer.token lexbuf - with e -> - eprintf "%s(%d,%d): error: %s" filename lexbuf.StartPos.Line lexbuf.StartPos.Column e.Message; - exit 1 in - - let has_extension (s:string) = - (s.Length >= 1 && s.[s.Length - 1] = '.') - || Path.HasExtension(s) - - let chop_extension (s:string) = - if not (has_extension s) then invalidArg "s" "the file name does not have an extension" - Path.Combine (Path.GetDirectoryName s,Path.GetFileNameWithoutExtension(s)) + match readSpecFromFile filename inputCodePage with + | Ok spec -> spec + | Result.Error (e, line, col) -> + eprintf "%s(%d,%d): error: %s" filename line col e.Message + exit 1 - let checkSuffix (x:string) (y:string) = x.EndsWith(y) - - let output = match !out with Some x -> x | _ -> chop_extension filename + (if checkSuffix filename ".mly" then ".ml" else ".fs") in - let outputi = match !out with Some x -> chop_extension x + (if checkSuffix x ".ml" then ".mli" else ".fsi") | _ -> chop_extension filename + (if checkSuffix filename ".mly" then ".mli" else ".fsi") in - let outputo = - if !log then Some (match !out with Some x -> chop_extension x + ".fsyacc.output" | _ -> chop_extension filename + ".fsyacc.output") - else None - - use os = (File.CreateText output :> TextWriter) - use osi = (File.CreateText outputi :> TextWriter) - - let lineCountOutput = ref 0 - let lineCountSignature = ref 0 - let cos = (os,lineCountOutput) - let cosi = (osi,lineCountSignature) - let cprintf (os:TextWriter,lineCount) fmt = Printf.fprintf os fmt - let cprintfn (os:TextWriter,lineCount) fmt = Printf.kfprintf (fun () -> incr lineCount; os.WriteLine()) os fmt - - let logf = - match outputo with - | None -> (fun f -> ()) - | Some filename -> - let oso = (File.CreateText filename :> TextWriter) - (fun f -> f oso) - - logf (fun oso -> fprintfn oso " Output file describing compiled parser placed in %s and %s" output outputi); - - printfn " building tables"; - let spec1 = ProcessParserSpecAst spec - let (prods,states, startStates,actionTable,immediateActionTable,gotoTable,endOfInputTerminalIdx,errorTerminalIdx,nonTerminals) = - CompilerLalrParserSpec logf spec1 - - let (code,pos) = spec.Header - printfn " %d states" states.Length; - printfn " %d nonterminals" gotoTable.[0].Length; - printfn " %d terminals" actionTable.[0].Length; - printfn " %d productions" prods.Length; - printfn " #rows in action table: %d" actionTable.Length; + use logger = match logFileName(filename, out, log) with + | Some outputLogName -> new FileLogger(outputLogName) :> Logger + | None -> new NullLogger() :> Logger + let compiledSpec = compileSpec spec logger + printfn " building tables" + printfn " %d states" compiledSpec.states.Length; + printfn " %d nonterminals" compiledSpec.gotoTable.[0].Length; + printfn " %d terminals" compiledSpec.actionTable.[0].Length; + printfn " %d productions" compiledSpec.prods.Length; + printfn " #rows in action table: %d" compiledSpec.actionTable.Length; (* printfn "#unique rows in action table: %d" (List.length (Array.foldBack (fun row acc -> insert (Array.to_list row) acc) actionTable [])); printfn "maximum #different actions per state: %d" (Array.foldBack (fun row acc ->max (List.length (List.foldBack insert (Array.to_list row) [])) acc) actionTable 0); printfn "average #different actions per state: %d" ((Array.foldBack (fun row acc -> (List.length (List.foldBack insert (Array.to_list row) [])) + acc) actionTable 0) / (Array.length states)); *) - - cprintfn cos "// Implementation file for parser generated by fsyacc"; - cprintfn cosi "// Signature file for parser generated by fsyacc"; - - if (!light = Some(false)) || (!light = None && checkSuffix output ".ml") then - cprintfn cos "#light \"off\""; - cprintfn cosi "#light \"off\""; - - match !modname with - | None -> () - | Some s -> - match !internal_module with - | true -> - cprintfn cos "module internal %s" s; - cprintfn cosi "module internal %s" s; - | false -> - cprintfn cos "module %s" s; - cprintfn cosi "module %s" s; - - cprintfn cos "#nowarn \"64\";; // turn off warnings that type variables used in production annotations are instantiated to concrete type"; - - for s in !opens do - cprintfn cos "open %s" s; - cprintfn cosi "open %s" s; - - cprintfn cos "open %s" lexlib; - cprintfn cos "open %s.ParseHelpers" parslib; - if !compat then - cprintfn cos "open Microsoft.FSharp.Compatibility.OCaml.Parsing"; - - cprintfn cos "# %d \"%s\"" pos.pos_lnum pos.pos_fname; - cprintfn cos "%s" code; - lineCountOutput := !lineCountOutput + code.Replace("\r","").Split([| '\n' |]).Length; - - cprintfn cos "# %d \"%s\"" !lineCountOutput output; - // Print the datatype for the tokens - cprintfn cos "// This type is the type of tokens accepted by the parser"; - for out in [cos;cosi] do - cprintfn out "type token = "; - for id,typ in spec.Tokens do - match typ with - | None -> cprintfn out " | %s" id - | Some ty -> cprintfn out " | %s of (%s)" id ty; - - // Print the datatype for the token names - cprintfn cos "// This type is used to give symbolic names to token indexes, useful for error messages"; - for out in [cos;cosi] do - cprintfn out "type tokenId = "; - for id,typ in spec.Tokens do - cprintfn out " | TOKEN_%s" id; - cprintfn out " | TOKEN_end_of_input"; - cprintfn out " | TOKEN_error"; - - cprintfn cos "// This type is used to give symbolic names to token indexes, useful for error messages"; - for out in [cos;cosi] do - cprintfn out "type nonTerminalId = "; - for nt in nonTerminals do - cprintfn out " | NONTERM_%s" nt; - - cprintfn cos ""; - cprintfn cos "// This function maps tokens to integer indexes"; - cprintfn cos "let tagOfToken (t:token) = "; - cprintfn cos " match t with"; - spec.Tokens |> List.iteri (fun i (id,typ) -> - cprintfn cos " | %s %s -> %d " id (match typ with Some _ -> "_" | None -> "") i); - cprintfn cosi "/// This function maps tokens to integer indexes"; - cprintfn cosi "val tagOfToken: token -> int"; - - cprintfn cos ""; - cprintfn cos "// This function maps integer indexes to symbolic token ids"; - cprintfn cos "let tokenTagToTokenId (tokenIdx:int) = "; - cprintfn cos " match tokenIdx with"; - spec.Tokens |> List.iteri (fun i (id,typ) -> - cprintfn cos " | %d -> TOKEN_%s " i id) - cprintfn cos " | %d -> TOKEN_end_of_input" endOfInputTerminalIdx; - cprintfn cos " | %d -> TOKEN_error" errorTerminalIdx; - cprintfn cos " | _ -> failwith \"tokenTagToTokenId: bad token\"" - - cprintfn cosi ""; - cprintfn cosi "/// This function maps integer indexes to symbolic token ids"; - cprintfn cosi "val tokenTagToTokenId: int -> tokenId"; - - cprintfn cos ""; - cprintfn cos "/// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production"; - cprintfn cos "let prodIdxToNonTerminal (prodIdx:int) = "; - cprintfn cos " match prodIdx with"; - prods |> Array.iteri (fun i (nt,ntIdx,syms,code) -> - cprintfn cos " | %d -> NONTERM_%s " i nt); - cprintfn cos " | _ -> failwith \"prodIdxToNonTerminal: bad production index\"" - - cprintfn cosi ""; - cprintfn cosi "/// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production"; - cprintfn cosi "val prodIdxToNonTerminal: int -> nonTerminalId"; - - cprintfn cos ""; - cprintfn cos "let _fsyacc_endOfInputTag = %d " endOfInputTerminalIdx; - cprintfn cos "let _fsyacc_tagOfErrorTerminal = %d" errorTerminalIdx; - cprintfn cos ""; - cprintfn cos "// This function gets the name of a token as a string"; - cprintfn cos "let token_to_string (t:token) = "; - cprintfn cos " match t with "; - spec.Tokens |> List.iteri (fun i (id,typ) -> - cprintfn cos " | %s %s -> \"%s\" " id (match typ with Some _ -> "_" | None -> "") id); - - cprintfn cosi ""; - cprintfn cosi "/// This function gets the name of a token as a string"; - cprintfn cosi "val token_to_string: token -> string"; - - cprintfn cos ""; - cprintfn cos "// This function gets the data carried by a token as an object"; - cprintfn cos "let _fsyacc_dataOfToken (t:token) = "; - cprintfn cos " match t with "; - - for (id,typ) in spec.Tokens do - cprintfn cos " | %s %s -> %s " - id - (match typ with Some _ -> "_fsyacc_x" | None -> "") - (match typ with Some _ -> "Microsoft.FSharp.Core.Operators.box _fsyacc_x" | None -> "(null : System.Object)") - - let tychar = "'cty" - - for (key,_) in spec.Types |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do - failwithf "%s is given multiple %%type declarations" key; - - for (key,_) in spec.Tokens |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do - failwithf "%s is given %%token declarations" key - - let types = Map.ofList spec.Types - let tokens = Map.ofList spec.Tokens - let nStates = states.Length - begin - cprintf cos "let _fsyacc_gotos = [| " ; - let numGotoNonTerminals = gotoTable.[0].Length - let gotoIndexes = Array.create numGotoNonTerminals 0 - let gotoTableCurrIndex = ref 0 in - for j = 0 to numGotoNonTerminals-1 do - gotoIndexes.[j] <- !gotoTableCurrIndex; - - (* Count the number of entries in the association table. *) - let count = ref 0 in - for i = 0 to nStates - 1 do - let goto = gotoTable.[i].[j] - match goto with - | None -> () - | Some _ -> incr count - - (* Write the head of the table (i.e. the number of entries and the default value) *) - gotoTableCurrIndex := !gotoTableCurrIndex + 1; - outputCodedUInt16 os !count; - outputCodedUInt16 os anyMarker; - - (* Write the pairs of entries in incremental order by key *) - (* This lets us implement the lookup by a binary chop. *) - for i = 0 to nStates - 1 do - let goto = gotoTable.[i].[j] - match goto with - | None -> () - | Some n -> - gotoTableCurrIndex := !gotoTableCurrIndex + 1; - outputCodedUInt16 os i; - outputCodedUInt16 os n; - cprintfn cos "|]" ; - (* Output offsets into gotos table where the gotos for a particular nonterminal begin *) - cprintf cos "let _fsyacc_sparseGotoTableRowOffsets = [|" ; - for j = 0 to numGotoNonTerminals-1 do - outputCodedUInt16 os gotoIndexes.[j]; - cprintfn cos "|]" ; - end; - - begin - cprintf cos "let _fsyacc_stateToProdIdxsTableElements = [| " ; - let indexes = Array.create states.Length 0 - let currIndex = ref 0 - for j = 0 to states.Length - 1 do - let state = states.[j] - indexes.[j] <- !currIndex; - - (* Write the head of the table (i.e. the number of entries) *) - outputCodedUInt16 os state.Length; - currIndex := !currIndex + state.Length + 1; - - (* Write the pairs of entries in incremental order by key *) - (* This lets us implement the lookup by a binary chop. *) - for prodIdx in state do - outputCodedUInt16 os prodIdx; - cprintfn cos "|]" ; - (* Output offsets into gotos table where the gotos for a particular nonterminal begin *) - cprintf cos "let _fsyacc_stateToProdIdxsTableRowOffsets = [|" ; - for idx in indexes do - outputCodedUInt16 os idx; - cprintfn cos "|]" ; - end; - - begin - let numActionRows = (Array.length actionTable) - let maxActionColumns = Array.length actionTable.[0] - cprintfn cos "let _fsyacc_action_rows = %d" numActionRows; - cprintf cos "let _fsyacc_actionTableElements = [|" ; - let actionIndexes = Array.create numActionRows 0 - - let actionTableCurrIndex = ref 0 - for i = 0 to nStates-1 do - actionIndexes.[i] <- !actionTableCurrIndex; - let actions = actionTable.[i] - let terminalsByAction = new Dictionary<_,int list>(10) - let countPerAction = new Dictionary<_,_>(10) - for terminal = 0 to actions.Length - 1 do - let action = snd actions.[terminal] - if terminalsByAction.ContainsKey action then - terminalsByAction.[action] <- terminal :: terminalsByAction.[action] ; - else - terminalsByAction.[action] <- [terminal]; - if countPerAction.ContainsKey action then - countPerAction.[action] <- countPerAction.[action]+1 - else - countPerAction.[action] <- 1 - - let mostCommonAction = - let mostCommon = ref Error - let max = ref 0 - for (KeyValue(x,y)) in countPerAction do - if y > !max then (mostCommon := x; max := y) - !mostCommon - - (* Count the number of entries in the association table. *) - let count = ref 0 - for (KeyValue(action,terminals)) in terminalsByAction do - for terminals in terminals do - if action <> mostCommonAction then - incr count; - - (* Write the head of the table (i.e. the number of entries and the default value) *) - actionTableCurrIndex := !actionTableCurrIndex + 1; - outputCodedUInt16 os !count; - outputCodedUInt16 os (actionCoding mostCommonAction); - - (* Write the pairs of entries in incremental order by key *) - (* This lets us implement the lookup by a binary chop. *) - for terminal = 0 to Array.length actions-1 do - let action = snd actions.[terminal] in - if action <> mostCommonAction then ( - actionTableCurrIndex := !actionTableCurrIndex + 1; - outputCodedUInt16 os terminal; - outputCodedUInt16 os (actionCoding action); - ); - cprintfn cos "|]" ; - (* Output offsets into actions table where the actions for a particular nonterminal begin *) - cprintf cos "let _fsyacc_actionTableRowOffsets = [|" ; - for j = 0 to numActionRows-1 do - cprintf cos "%a" outputCodedUInt16 actionIndexes.[j]; - cprintfn cos "|]" ; - - end; - begin - cprintf cos "let _fsyacc_reductionSymbolCounts = [|" ; - for nt,ntIdx,syms,code in prods do - cprintf cos "%a" outputCodedUInt16 (List.length syms); - cprintfn cos "|]" ; - end; - begin - cprintf cos "let _fsyacc_productionToNonTerminalTable = [|" ; - for nt,ntIdx,syms,code in prods do - cprintf cos "%a" outputCodedUInt16 ntIdx; - cprintfn cos "|]" ; - end; - begin - cprintf cos "let _fsyacc_immediateActions = [|" ; - for prodIdx in immediateActionTable do - match prodIdx with - | None -> cprintf cos "%a" outputCodedUInt16 anyMarker (* NONE REP *) - | Some act -> cprintf cos "%a" outputCodedUInt16 (actionCoding act) - cprintfn cos "|]" ; - end; - - let getType nt = if types.ContainsKey nt then types.[nt] else "'"+nt - begin - cprintf cos "let _fsyacc_reductions () =" ; - cprintfn cos " [| " ; - for nt,ntIdx,syms,code in prods do - cprintfn cos "# %d \"%s\"" !lineCountOutput output; - cprintfn cos " (fun (parseState : %s.IParseState) ->" parslib - if !compat then - cprintfn cos " Parsing.set_parse_state parseState;" - syms |> List.iteri (fun i sym -> - let tyopt = - match sym with - | Terminal t -> - if tokens.ContainsKey t then - tokens.[t] - else None - | NonTerminal nt -> Some (getType nt) - match tyopt with - | Some ty -> cprintfn cos " let _%d = (let data = parseState.GetInput(%d) in (Microsoft.FSharp.Core.Operators.unbox data : %s)) in" (i+1) (i+1) ty - | None -> ()) - cprintfn cos " Microsoft.FSharp.Core.Operators.box" - cprintfn cos " ("; - cprintfn cos " ("; - match code with - | Some (_,pos) -> cprintfn cos "# %d \"%s\"" pos.pos_lnum pos.pos_fname - | None -> () - match code with - | Some (code,_) -> - let dollar = ref false in - let c = code |> String.collect (fun c -> - if not !dollar && c = '$' then (dollar := true; "") - elif !dollar && c >= '0' && c <= '9' then (dollar := false; "_"+new System.String(c,1)) - elif !dollar then (dollar := false; "$"+new System.String(c,1)) - else new System.String(c,1)) - let lines = c.Split([| '\r'; '\n' |], System.StringSplitOptions.RemoveEmptyEntries); - for line in lines do - cprintfn cos " %s" line; - if !dollar then os.Write '$' - | None -> - cprintfn cos " raise (%s.Accept(Microsoft.FSharp.Core.Operators.box _1))" parslib - cprintfn cos " )"; - // Place the line count back for the type constraint - match code with - | Some (_,pos) -> cprintfn cos "# %d \"%s\"" pos.pos_lnum pos.pos_fname - | None -> () - cprintfn cos " : %s));" (if types.ContainsKey nt then types.[nt] else "'"+nt); - done; - cprintfn cos "|]" ; - end; - cprintfn cos "# %d \"%s\"" !lineCountOutput output; - cprintfn cos "let tables () : %s.Tables<_> = " parslib - cprintfn cos " { reductions= _fsyacc_reductions ();" - cprintfn cos " endOfInputTag = _fsyacc_endOfInputTag;" - cprintfn cos " tagOfToken = tagOfToken;" - cprintfn cos " dataOfToken = _fsyacc_dataOfToken; " - cprintfn cos " actionTableElements = _fsyacc_actionTableElements;" - cprintfn cos " actionTableRowOffsets = _fsyacc_actionTableRowOffsets;" - cprintfn cos " stateToProdIdxsTableElements = _fsyacc_stateToProdIdxsTableElements;" - cprintfn cos " stateToProdIdxsTableRowOffsets = _fsyacc_stateToProdIdxsTableRowOffsets;" - cprintfn cos " reductionSymbolCounts = _fsyacc_reductionSymbolCounts;" - cprintfn cos " immediateActions = _fsyacc_immediateActions;" - cprintfn cos " gotos = _fsyacc_gotos;" - cprintfn cos " sparseGotoTableRowOffsets = _fsyacc_sparseGotoTableRowOffsets;" - cprintfn cos " tagOfErrorTerminal = _fsyacc_tagOfErrorTerminal;" - cprintfn cos " parseError = (fun (ctxt:%s.ParseErrorContext<_>) -> " parslib - cprintfn cos " match parse_error_rich with " - cprintfn cos " | Some f -> f ctxt" - cprintfn cos " | None -> parse_error ctxt.Message);" - - cprintfn cos " numTerminals = %d;" (Array.length actionTable.[0]); - cprintfn cos " productionToNonTerminalTable = _fsyacc_productionToNonTerminalTable }" - cprintfn cos "let engine lexer lexbuf startState = (tables ()).Interpret(lexer, lexbuf, startState)" - - for (id,startState) in List.zip spec.StartSymbols startStates do - if not (types.ContainsKey id) then - failwith ("a %type declaration is required for for start token "+id); - let ty = types.[id] in - cprintfn cos "let %s lexer lexbuf : %s =" id ty; - cprintfn cos " Microsoft.FSharp.Core.Operators.unbox ((tables ()).Interpret(lexer, lexbuf, %d))" startState - - for id in spec.StartSymbols do - if not (types.ContainsKey id) then - failwith ("a %type declaration is required for start token "+id); - let ty = types.[id] in - cprintfn cosi "val %s : (%s.LexBuffer<%s> -> token) -> %s.LexBuffer<%s> -> (%s) " id lexlib tychar lexlib tychar ty; - - logf (fun oso -> oso.Close()) + let generatorState: GeneratorState = + { GeneratorState.Default with + input = filename + output = out + logger = logger + light = light + modname = modname + internal_module = internal_module + opens = opens + lexlib = lexlib + parslib = parslib + compat = compat } + writeSpecToFile generatorState spec compiledSpec let result = try main() with e -> - eprintf "FSYACC: error FSY000: %s" (match e with Failure s -> s | e -> e.Message); + eprintf "FSYACC: error FSY000: %s\n%s" (match e with Failure s -> s | e -> e.Message) e.StackTrace; exit 1 diff --git a/buildtools/fsyacc/fsyacc.fsproj b/buildtools/fsyacc/fsyacc.fsproj index e3a4b88a3a0..eccbfb76b07 100644 --- a/buildtools/fsyacc/fsyacc.fsproj +++ b/buildtools/fsyacc/fsyacc.fsproj @@ -3,7 +3,6 @@ Exe net7.0 - INTERNALIZED_FSLEXYACC_RUNTIME;$(DefineConstants) true false @@ -18,6 +17,7 @@ + diff --git a/buildtools/fsyacc/fsyaccast.fs b/buildtools/fsyacc/fsyaccast.fs index 4880ca5f0bf..3f81e725556 100644 --- a/buildtools/fsyacc/fsyaccast.fs +++ b/buildtools/fsyacc/fsyaccast.fs @@ -1,6 +1,6 @@ // (c) Microsoft Corporation 2005-2007. -module internal FsLexYacc.FsYacc.AST +module FsLexYacc.FsYacc.AST #nowarn "62" // This construct is for ML compatibility. @@ -9,8 +9,7 @@ open System open System.Collections.Generic open Printf open Microsoft.FSharp.Collections -open Internal.Utilities -open Internal.Utilities.Text.Lexing +open FSharp.Text.Lexing /// An active pattern that should be in the F# standard library let (|KeyValue|) (kvp:KeyValuePair<_,_>) = kvp.Key,kvp.Value @@ -18,17 +17,17 @@ let (|KeyValue|) (kvp:KeyValuePair<_,_>) = kvp.Key,kvp.Value type Identifier = string type Code = string * Position +type Associativity = LeftAssoc | RightAssoc | NonAssoc +type Rule = Rule of Identifier list * Identifier option * Code option type ParserSpec= { Header : Code; Tokens : (Identifier * string option) list; Types : (Identifier * string) list; - Associativities: (Identifier * Associativity) list list; + Associativities: (Identifier * Associativity) list list; // suggest to do: (Associativity * Identifier list) list StartSymbols : Identifier list; Rules : (Identifier * Rule list) list } -and Rule = Rule of Identifier list * Identifier option * Code option -and Associativity = LeftAssoc | RightAssoc | NonAssoc type Terminal = string type NonTerminal = string @@ -75,17 +74,17 @@ type ProcessedParserSpec = let ProcessParserSpecAst (spec: ParserSpec) = let explicitPrecInfo = spec.Associativities - |> List.mapi (fun n precSpecs -> precSpecs |> List.map (fun (precSym, assoc) -> precSym,ExplicitPrec (assoc, 10000 - n))) + |> List.mapi (fun n precSpecs -> precSpecs |> List.map (fun (precSym, assoc) -> precSym,ExplicitPrec (assoc, 9999 - n))) |> List.concat - for (key,_) in explicitPrecInfo |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do + for key,_ in explicitPrecInfo |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do failwithf "%s is given two associativities" key let explicitPrecInfo = explicitPrecInfo |> Map.ofList let implicitSymPrecInfo = NoPrecedence - let terminals = List.map fst spec.Tokens @ ["error"]in + let terminals = List.map fst spec.Tokens @ ["error"] let terminalSet = Set.ofList terminals let IsTerminal z = terminalSet.Contains(z) let prec_of_terminal sym implicitPrecInfo = @@ -94,14 +93,14 @@ let ProcessParserSpecAst (spec: ParserSpec) = let mkSym s = if IsTerminal s then Terminal s else NonTerminal s let prods = - spec.Rules |> List.mapi (fun i (nonterm,rules) -> - rules |> List.mapi (fun j (Rule(syms,precsym,code)) -> + spec.Rules |> List.mapi (fun _ (nonterm,rules) -> + rules |> List.mapi (fun _ (Rule(syms,precsym,code)) -> let precInfo = let precsym = List.foldBack (fun x acc -> match acc with Some _ -> acc | None -> match x with z when IsTerminal z -> Some z | _ -> acc) syms precsym let implicitPrecInfo = NoPrecedence match precsym with | None -> implicitPrecInfo - | Some sym -> if explicitPrecInfo.ContainsKey(sym) then explicitPrecInfo.[sym] else implicitPrecInfo + | Some sym -> prec_of_terminal sym None Production(nonterm, precInfo, List.map mkSym syms, code))) |> List.concat let nonTerminals = List.map fst spec.Rules @@ -110,7 +109,7 @@ let ProcessParserSpecAst (spec: ParserSpec) = if nt <> "error" && not (nonTerminalSet.Contains(nt)) then failwith (sprintf "NonTerminal '%s' has no productions" nt) - for (Production(nt,_,syms,_)) in prods do + for Production(_,_,syms,_) in prods do for sym in syms do match sym with | NonTerminal nt -> @@ -120,7 +119,7 @@ let ProcessParserSpecAst (spec: ParserSpec) = if spec.StartSymbols= [] then (failwith "at least one %start declaration is required"); - for (nt,_) in spec.Types do + for nt,_ in spec.Types do checkNonTerminal nt; let terminals = terminals |> List.map (fun t -> (t,prec_of_terminal t None)) @@ -176,7 +175,7 @@ type NonTerminalIndex = int type SymbolIndex = int let PTerminal(i:TerminalIndex) : SymbolIndex = -i-1 let PNonTerminal(i:NonTerminalIndex) : SymbolIndex = i -let (|PTerminal|PNonTerminal|) x = if x < 0 then PTerminal (-(x+1)) else PNonTerminal x +let (|PTerminal|PNonTerminal|) x = if x < 0 then PTerminal (-x-1) else PNonTerminal x type SymbolIndexes = SymbolIndex list @@ -211,7 +210,7 @@ let ProcessWorkList start f = let rec loop() = match !work with | [] -> () - | x :: t -> + | x::t -> work := t; f queueWork x; loop() @@ -227,14 +226,14 @@ let LeastFixedPoint f set = /// A general standard memoization utility. Be sure to apply to only one (function) argument to build the /// residue function! let Memoize f = - let t = new Dictionary<_,_>(1000) + let t = Dictionary(1000) fun x -> let ok,v = t.TryGetValue(x) if ok then v else let res = f x in t.[x] <- res; res /// A standard utility to create a dictionary from a list of pairs let CreateDictionary xs = - let dict = new Dictionary<_,_>() + let dict = Dictionary() for x,y in xs do dict.Add(x,y) dict @@ -277,7 +276,7 @@ type ProductionTable(ntTab:NonTerminalTable, termTab:TerminalTable, nonTerminals let c = Array.ofList (List.map (fun (_,Production(_,prec,_,_)) -> prec) prodsWithIdxs) let productions = nonTerminals - |> List.map(fun nt -> (ntTab.ToIndex nt, List.choose (fun (i,Production(nt2,prec,syms,_)) -> if nt2=nt then Some i else None) prodsWithIdxs)) + |> List.map(fun nt -> (ntTab.ToIndex nt, List.choose (fun (i,Production(nt2,_,_,_)) -> if nt2=nt then Some i else None) prodsWithIdxs)) |> CreateDictionary member prodTab.Symbols(i) = a.[i] @@ -285,12 +284,12 @@ type ProductionTable(ntTab:NonTerminalTable, termTab:TerminalTable, nonTerminals member prodTab.Precedence(i) = c.[i] member prodTab.Symbol i n = let syms = prodTab.Symbols i - if n >= syms.Length then None else Some (syms.[n]) + if n >= syms.Length then None else Some syms.[n] member prodTab.Productions = productions /// A mutable table maping kernels to sets of lookahead tokens type LookaheadTable() = - let t = new Dictionary>() + let t = Dictionary>() member table.Add(x,y) = let prev = if t.ContainsKey(x) then t.[x] else Set.empty t.[x] <- prev.Add(y) @@ -314,9 +313,9 @@ type KernelTable(kernels) = /// Hold the results of cpmuting the LALR(1) closure of an LR(0) kernel type Closure1Table() = - let t = new Dictionary>() + let t = Dictionary>() member table.Add(a,b) = - if not (t.ContainsKey(a)) then t.[a] <- new HashSet<_>(HashIdentity.Structural) + if not (t.ContainsKey(a)) then t.[a] <- HashSet<_>(HashIdentity.Structural) t.[a].Add(b) member table.Count = t.Count member table.IEnumerable = (t :> seq<_>) @@ -325,9 +324,9 @@ type Closure1Table() = /// A mutable table giving a lookahead set Set for each kernel. The terminals represent the /// "spontaneous" items for the kernel. TODO: document this more w.r.t. the Dragon book. type SpontaneousTable() = - let t = new Dictionary>() + let t = Dictionary>() member table.Add(a,b) = - if not (t.ContainsKey(a)) then t.[a] <- new HashSet<_>(HashIdentity.Structural) + if not (t.ContainsKey(a)) then t.[a] <- HashSet<_>(HashIdentity.Structural) t.[a].Add(b) member table.Count = t.Count member table.IEnumerable = (t :> seq<_>) @@ -335,20 +334,35 @@ type SpontaneousTable() = /// A mutable table giving a Set for each kernel. The kernels represent the /// "propagate" items for the kernel. TODO: document this more w.r.t. the Dragon book. type PropagateTable() = - let t = new Dictionary>() + let t = Dictionary>() member table.Add(a,b) = - if not (t.ContainsKey(a)) then t.[a] <- new HashSet(HashIdentity.Structural) + if not (t.ContainsKey(a)) then t.[a] <- HashSet(HashIdentity.Structural) t.[a].Add(b) member table.Item - with get(a) = + with get a = let ok,v = t.TryGetValue(a) if ok then v :> seq<_> else Seq.empty member table.Count = t.Count +type Prod = NonTerminal * int * Symbols * option +type ActionTable = (PrecedenceInfo * Action) array array + +type CompiledSpec = + { prods: Prod [] + states: int list [] + startStates: int list + actionTable: ActionTable + immediateActionTable: Action option [] + gotoTable: int option [] [] + endOfInputTerminalIdx: int + errorTerminalIdx: int + nonTerminals: string list + } + /// Compile a pre-processed LALR parser spec to tables following the Dragon book algorithm -let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = - let stopWatch = new System.Diagnostics.Stopwatch() +let CompilerLalrParserSpec logf (spec : ProcessedParserSpec): CompiledSpec = + let stopWatch = System.Diagnostics.Stopwatch() let reportTime() = printfn " time: %A" stopWatch.Elapsed; stopWatch.Reset(); stopWatch.Start() stopWatch.Start() @@ -400,7 +414,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let rhs = Array.toList (prodTab.Symbols prodIdx) let rec place l = match l with - | (yi :: t) -> + | yi::t -> res := List.choose (function None -> None | Some a -> Some (PNonTerminal nonTermX,Some a)) @@ -424,11 +438,11 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = /// have an empty token in the first set then we have to iterate through those. let ComputeFirstSetOfTokenList = Memoize (fun (str,term) -> - let acc = new System.Collections.Generic.List<_>() + let acc = System.Collections.Generic.List<_>() let rec add l = match l with | [] -> acc.Add(term) - | sym :: moreSyms -> + | sym::moreSyms -> let firstSetOfSym = computedFirstTable.[sym] firstSetOfSym |> Set.iter (function None -> () | Some v -> acc.Add(v)) if firstSetOfSym.Contains(None) then add moreSyms @@ -519,7 +533,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = fprintf os " gotos:" fprintf os "%a" OutputGotos d) m - let OutputLalrTables os (prods,states, startStates,actionTable,immediateActionTable,gotoTable,endOfInputTerminalIdx,errorTerminalIdx) = + let OutputLalrTables os (_,states, startStates,actionTable,immediateActionTable,gotoTable,_,_) = let combined = Array.ofList (List.map2 (fun x (y,(z,w)) -> x,y,z,w) (Array.toList states) (List.zip (Array.toList actionTable) (List.zip (Array.toList immediateActionTable) (Array.toList gotoTable)))) fprintfn os "------------------------"; fprintfn os "states = "; @@ -559,7 +573,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = // Input is kernel, output is kernel let ComputeGotosOfKernel iset sym = let isetClosure = ComputeClosure0 iset - let acc = new System.Collections.Generic.List<_>(10) + let acc = System.Collections.Generic.List<_>(10) isetClosure |> Set.iter (fun item0 -> match rsym_of_item0 item0 with | Some sym2 when sym = sym2 -> acc.Add(advance_of_item0 item0) @@ -587,7 +601,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = reportTime(); printf "building kernel table..."; stdout.Flush(); // Give an index to each LR(0) kernel, and from now on refer to them only by index - let kernelTab = new KernelTable(kernels) + let kernelTab = KernelTable(kernels) let startKernelIdxs = List.map kernelTab.Index startKernels let startKernelItemIdxs = List.map2 (fun a b -> KernelItemIdx(a,b)) startKernelIdxs startItems @@ -617,7 +631,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = // add [B --> .g, b] to I let ComputeClosure1 iset = - let acc = new Closure1Table() + let acc = Closure1Table() ProcessWorkList iset (fun addToWorkList (item0,pretokens:Set) -> pretokens |> Set.iter (fun pretoken -> if not (acc.Contains(item0,pretoken)) then @@ -625,7 +639,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let rsyms = rsyms_of_item0 item0 if rsyms.Length > 0 then match rsyms.[0] with - | (PNonTerminal ntB) -> + | PNonTerminal ntB -> let firstSet = ComputeFirstSetOfTokenList (Array.toList rsyms.[1..],pretoken) for prodIdx in prodTab.Productions.[ntB] do addToWorkList (prodIdx_to_item0 prodIdx,firstSet) @@ -657,8 +671,8 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let closure1OfItem0WithDummy = Memoize (fun item0 -> ComputeClosure1 [(item0,Set.ofList [dummyLookaheadIdx])]) - let spontaneous = new SpontaneousTable() - let propagate = new PropagateTable() + let spontaneous = SpontaneousTable() + let propagate = PropagateTable() let count = ref 0 for kernelIdx in kernelTab.Indexes do @@ -669,7 +683,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let item0Idx = KernelItemIdx(kernelIdx,item0) let jset = closure1OfItem0WithDummy item0 //printf "#jset = %d\n" jset.Count; stdout.Flush(); - for (KeyValue(closureItem0, lookaheadTokens)) in jset.IEnumerable do + for KeyValue(closureItem0, lookaheadTokens) in jset.IEnumerable do incr count match rsym_of_item0 closureItem0 with | None -> () @@ -701,11 +715,11 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let initialWork = [ for idx in startKernelItemIdxs do yield (idx,endOfInputTerminalIdx) - for (KeyValue(kernelItemIdx,lookaheads)) in spontaneous.IEnumerable do + for KeyValue(kernelItemIdx,lookaheads) in spontaneous.IEnumerable do for lookahead in lookaheads do yield (kernelItemIdx,lookahead) ] - let acc = new LookaheadTable() + let acc = LookaheadTable() // Compute the closure ProcessWorkList initialWork @@ -729,7 +743,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = // printf "DEBUG: state %d: adding action for %s, precNew = %a, actionNew = %a\n" kernelIdx (termTab.OfIndex termIdx) outputPrec precNew OutputAction actionNew; // We add in order of precedence - however the precedences may be the same, and we give warnings when rpecedence resolution is based on implicit file orderings - let (precSoFar, actionSoFar) as itemSoFar = arr.[termIdx] + let _, actionSoFar as itemSoFar = arr.[termIdx] // printf "DEBUG: state %d: adding action for %s, precNew = %a, precSoFar = %a, actionSoFar = %a\n" kernelIdx (termTab.OfIndex termIdx) outputPrec precNew outputPrec precSoFar OutputAction actionSoFar; // if compare_prec precSoFar precNew = -1 then failwith "addResolvingPrecedence"; @@ -764,17 +778,17 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = let a2n, astr2 = reportAction x2 printfn " %s/%s error at state %d on terminal %s between %s and %s - assuming the former because %s" a1n a2n kernelIdx (termTab.OfIndex termIdx) astr1 astr2 reason match itemSoFar,itemNew with - | (_,Shift s1),(_, Shift s2) -> + | (_,Shift _),(_, Shift _) -> if actionSoFar <> actionNew then reportConflict itemSoFar itemNew "internal error" itemSoFar - | (((precShift,Shift sIdx) as shiftItem), - ((precReduce,Reduce prodIdx) as reduceItem)) - | (((precReduce,Reduce prodIdx) as reduceItem), - ((precShift,Shift sIdx) as shiftItem)) -> + | (precShift,Shift _ as shiftItem, + (precReduce,Reduce _ as reduceItem)) + | (precReduce,Reduce _ as reduceItem, + (precShift,Shift _ as shiftItem)) -> match precReduce, precShift with - | (ExplicitPrec (_,p1), ExplicitPrec(assocNew,p2)) -> + | ExplicitPrec (_,p1), ExplicitPrec(assocNew,p2) -> if p1 < p2 then shiftItem elif p1 > p2 then reduceItem else @@ -789,7 +803,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = reportConflict shiftItem reduceItem "we prefer shift when unable to compare precedences" incr shiftReduceConflicts; shiftItem - | ((_,Reduce prodIdx1),(_, Reduce prodIdx2)) -> + | (_,Reduce prodIdx1),(_, Reduce prodIdx2) -> "we prefer the rule earlier in the file" |> if prodIdx1 < prodIdx2 then reportConflict itemSoFar itemNew else reportConflict itemNew itemSoFar incr reduceReduceConflicts; @@ -813,9 +827,9 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = yield (item0,lookaheads) ] |> ComputeClosure1 - for (KeyValue(item0,lookaheads)) in items.IEnumerable do + for KeyValue(item0,lookaheads) in items.IEnumerable do - let nonTermA = ntIdx_of_item0 item0 + let _ = ntIdx_of_item0 item0 match rsym_of_item0 item0 with | Some (PTerminal termIdx) -> let action = @@ -890,18 +904,22 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) = /// The final results let states = kernels |> Array.ofList - let prods = Array.ofList (List.map (fun (Production(nt,prec,syms,code)) -> (nt, ntTab.ToIndex nt, syms,code)) prods) + let prods = Array.ofList (List.map (fun (Production(nt,_,syms,code)) -> (nt, ntTab.ToIndex nt, syms,code)) prods) logf (fun logStream -> printfn "writing tables to log"; stdout.Flush(); OutputLalrTables logStream (prods, states, startKernelIdxs, actionTable, immediateActionTable, gotoTable, (termTab.ToIndex endOfInputTerminal), errorTerminalIdx)); let states = states |> Array.map (Set.toList >> List.map prodIdx_of_item0) - (prods, states, startKernelIdxs, - actionTable, immediateActionTable, gotoTable, - (termTab.ToIndex endOfInputTerminal), - errorTerminalIdx, nonTerminals) - + { prods = prods + states = states + startStates = startKernelIdxs + actionTable = actionTable + immediateActionTable = immediateActionTable + gotoTable = gotoTable + endOfInputTerminalIdx = termTab.ToIndex endOfInputTerminal + errorTerminalIdx = errorTerminalIdx + nonTerminals = nonTerminals } (* Some examples for testing *) diff --git a/buildtools/fsyacc/fsyaccdriver.fs b/buildtools/fsyacc/fsyaccdriver.fs new file mode 100644 index 00000000000..b249e0e60d7 --- /dev/null +++ b/buildtools/fsyacc/fsyaccdriver.fs @@ -0,0 +1,547 @@ +module FsLexYacc.FsYacc.Driver + +open System +open System.IO +open FSharp.Text.Lexing +open FsLexYacc.FsYacc +open FsLexYacc.FsYacc.AST +open Printf +open System.Collections.Generic + +let has_extension (s:string) = + (s.Length >= 1 && s.[s.Length - 1] = '.') + || Path.HasExtension(s) + +let chop_extension (s:string) = + if not (has_extension s) then invalidArg "s" "the file name does not have an extension" + Path.Combine (Path.GetDirectoryName s,Path.GetFileNameWithoutExtension(s)) + +let checkSuffix (x:string) (y:string) = x.EndsWith(y) + +let readSpecFromFile fileName codePage = + let stream,reader,lexbuf = UnicodeFileAsLexbuf(fileName, codePage) + use stream = stream + use reader = reader + try + let spec = Parser.spec Lexer.token lexbuf + Ok spec + with e -> + (e, lexbuf.StartPos.Line, lexbuf.StartPos.Column) + |> Result.Error + +let printTokens filename codePage = + let stream,reader,lexbuf = UnicodeFileAsLexbuf(filename, codePage) + use stream = stream + use reader = reader + + try + while true do + printf "tokenize - getting one token"; + let t = Lexer.token lexbuf in + (*F# printf "tokenize - got %s" (Parser.token_to_string t); F#*) + if t = Parser.EOF then exit 0 + with e -> + eprintf "%s(%d,%d): error: %s" filename lexbuf.StartPos.Line lexbuf.StartPos.Column e.Message; + exit 1 + +let logFileName (input: string, out: string option, log: bool) = + if log then Some (match out with Some x -> chop_extension x + ".fsyacc.output" | _ -> chop_extension input + ".fsyacc.output") + else None + +let deriveOutputFileNames (filename, out: string option) = + let output = match out with Some x -> x | _ -> chop_extension filename + (if checkSuffix filename ".mly" then ".ml" else ".fs") in + let outputi = match out with Some x -> chop_extension x + (if checkSuffix x ".ml" then ".mli" else ".fsi") | _ -> chop_extension filename + (if checkSuffix filename ".mly" then ".mli" else ".fsi") in + output, outputi + +type Logger = + inherit IDisposable + + abstract member LogStream: (TextWriter -> 'a) -> 'a + abstract member Log: TextWriterFormat<'a> -> 'a + abstract member LogString: string -> unit + +type FileLogger (outputFileLog) = + let osl = File.CreateText outputFileLog :> TextWriter + + interface Logger with + member x.LogStream f = f osl + + member x.Log format = fprintfn osl format + + member x.LogString msg = fprintfn osl "%s" msg + + interface IDisposable with + member _.Dispose() = + osl.Dispose() + +type NullLogger () = + interface Logger with + member x.LogStream f = + f TextWriter.Null + member x.Log f = + fprintfn TextWriter.Null f + member x.LogString _ = () + + interface IDisposable with member _.Dispose () = () + +type Writer(outputFileName, outputFileInterface) = + let os = File.CreateText outputFileName :> TextWriter + let mutable outputLineCount = 0 + let osi = File.CreateText outputFileInterface :> TextWriter + let mutable interfaceLineCount = 0 + + member x.Write format = + fprintf os format + + member x.WriteLine format = + kfprintf (fun _ -> + outputLineCount <- outputLineCount + 1 + os.WriteLine () + ) os format + + member x.WriteUInt16 (i: int) = fprintf os "%dus;" i + + member x.WriteCode (code, pos) = + x.WriteLine "# %d \"%s\"" pos.pos_lnum pos.pos_fname + x.WriteLine "%s" code + let codeLines = code.Replace("\r","").Split([| '\n' |]).Length + outputLineCount <- outputLineCount + codeLines + x.WriteLine "# %d \"%s\"" outputLineCount outputFileName + + member x.OutputLineCount = outputLineCount + + member x.WriteInterface format = + fprintf osi format + + member x.WriteLineInterface format = + kfprintf (fun _ -> + interfaceLineCount <- interfaceLineCount + 1 + osi.WriteLine () + ) osi format + + member x.InterfaceLineCount = interfaceLineCount + + + + interface IDisposable with + member x.Dispose () = + os.Dispose() + osi.Dispose() + + +// This is to avoid name conflicts against keywords. +let generic_nt_name nt = "'gentype_" + nt +let anyMarker = 0xffff + +let actionCoding = + let shiftFlag = 0x0000 + let reduceFlag = 0x4000 + let errorFlag = 0x8000 + let acceptFlag = 0xc000 + function + | Accept -> acceptFlag + | Shift n -> shiftFlag ||| n + | Reduce n -> reduceFlag ||| n + | Error -> errorFlag + +type GeneratorState = + { input: string + output: string option + logger: Logger + light: bool option + modname: string option + internal_module: bool + opens: string list + lexlib: string + parslib: string + compat: bool + generate_nonterminal_name: Identifier -> string + map_action_to_int: Action -> int + anyMarker: int } + with + static member Default = + { input = "" + output = None + logger = new NullLogger() + light = None + modname = None + internal_module = false + opens = [] + lexlib = "" + parslib = "" + compat = false + generate_nonterminal_name = generic_nt_name + map_action_to_int = actionCoding + anyMarker = anyMarker } + +let writeSpecToFile (generatorState: GeneratorState) (spec: ParserSpec) (compiledSpec: CompiledSpec) = + let output, outputi = deriveOutputFileNames (generatorState.input, generatorState.output) + generatorState.logger.Log " Output file describing compiled parser placed in %s and %s" output outputi + use writer = new Writer(output, outputi) + writer.WriteLine "// Implementation file for parser generated by fsyacc"; + writer.WriteLineInterface "// Signature file for parser generated by fsyacc"; + + if (generatorState.light = Some(false)) || (generatorState.light = None && checkSuffix output ".ml") then + writer.WriteLine "#light \"off\""; + writer.WriteLineInterface "#light \"off\""; + + match generatorState.modname with + | None -> () + | Some s -> + match generatorState.internal_module with + | true -> + writer.WriteLine "module internal %s" s; + writer.WriteLineInterface "module internal %s" s; + | false -> + writer.WriteLine "module %s" s; + writer.WriteLineInterface "module %s" s; + + writer.WriteLine "#nowarn \"64\";; // turn off warnings that type variables used in production annotations are instantiated to concrete type"; + + for s in generatorState.opens do + writer.WriteLine "open %s" s; + writer.WriteLineInterface "open %s" s; + + writer.WriteLine "open %s" generatorState.lexlib; + writer.WriteLine "open %s.ParseHelpers" generatorState.parslib; + if generatorState.compat then + writer.WriteLine "open Microsoft.FSharp.Compatibility.OCaml.Parsing"; + + writer.WriteCode spec.Header + + // Print the datatype for the tokens + writer.WriteLine "// This type is the type of tokens accepted by the parser"; + + writer.WriteLine "type token = "; + writer.WriteLineInterface "type token = "; + for id,typ in spec.Tokens do + match typ with + | None -> + writer.WriteLine " | %s" id + writer.WriteLineInterface " | %s" id + | Some ty -> + writer.WriteLine " | %s of (%s)" id ty + writer.WriteLineInterface " | %s of (%s)" id ty + + // Print the datatype for the token names + writer.WriteLine "// This type is used to give symbolic names to token indexes, useful for error messages"; + writer.WriteLine "type tokenId = "; + writer.WriteLineInterface "type tokenId = "; + for id,_ in spec.Tokens do + writer.WriteLine " | TOKEN_%s" id; + writer.WriteLineInterface " | TOKEN_%s" id; + writer.WriteLine " | TOKEN_end_of_input"; + writer.WriteLineInterface " | TOKEN_end_of_input"; + writer.WriteLine " | TOKEN_error"; + writer.WriteLineInterface " | TOKEN_error"; + + writer.WriteLine "// This type is used to give symbolic names to token indexes, useful for error messages"; + writer.WriteLine "type nonTerminalId = "; + writer.WriteLineInterface "type nonTerminalId = "; + for nt in compiledSpec.nonTerminals do + writer.WriteLine " | NONTERM_%s" nt; + writer.WriteLineInterface " | NONTERM_%s" nt; + + + writer.WriteLine ""; + writer.WriteLine "// This function maps tokens to integer indexes"; + writer.WriteLine "let tagOfToken (t:token) = "; + writer.WriteLine " match t with"; + spec.Tokens |> List.iteri (fun i (id,typ) -> + writer.WriteLine " | %s %s -> %d " id (match typ with Some _ -> "_" | None -> "") i); + writer.WriteLineInterface "/// This function maps tokens to integer indexes"; + writer.WriteLineInterface "val tagOfToken: token -> int"; + + writer.WriteLine ""; + writer.WriteLine "// This function maps integer indexes to symbolic token ids"; + writer.WriteLine "let tokenTagToTokenId (tokenIdx:int) = "; + writer.WriteLine " match tokenIdx with"; + spec.Tokens |> List.iteri (fun i (id,_) -> writer.WriteLine " | %d -> TOKEN_%s " i id) + writer.WriteLine " | %d -> TOKEN_end_of_input" compiledSpec.endOfInputTerminalIdx; + writer.WriteLine " | %d -> TOKEN_error" compiledSpec.errorTerminalIdx; + writer.WriteLine " | _ -> failwith \"tokenTagToTokenId: bad token\"" + + writer.WriteLineInterface ""; + writer.WriteLineInterface "/// This function maps integer indexes to symbolic token ids"; + writer.WriteLineInterface "val tokenTagToTokenId: int -> tokenId"; + + writer.WriteLine ""; + writer.WriteLine "/// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production"; + writer.WriteLine "let prodIdxToNonTerminal (prodIdx:int) = "; + writer.WriteLine " match prodIdx with"; + compiledSpec.prods |> Array.iteri (fun i (nt,_,_,_) -> writer.WriteLine " | %d -> NONTERM_%s " i nt); + writer.WriteLine " | _ -> failwith \"prodIdxToNonTerminal: bad production index\"" + + writer.WriteLineInterface ""; + writer.WriteLineInterface "/// This function maps production indexes returned in syntax errors to strings representing the non terminal that would be produced by that production"; + writer.WriteLineInterface "val prodIdxToNonTerminal: int -> nonTerminalId"; + + writer.WriteLine ""; + writer.WriteLine "let _fsyacc_endOfInputTag = %d " compiledSpec.endOfInputTerminalIdx; + writer.WriteLine "let _fsyacc_tagOfErrorTerminal = %d" compiledSpec.errorTerminalIdx; + writer.WriteLine ""; + writer.WriteLine "// This function gets the name of a token as a string"; + writer.WriteLine "let token_to_string (t:token) = "; + writer.WriteLine " match t with "; + spec.Tokens |> List.iteri (fun _ (id,typ) -> writer.WriteLine " | %s %s -> \"%s\" " id (match typ with Some _ -> "_" | None -> "") id); + + writer.WriteLineInterface ""; + writer.WriteLineInterface "/// This function gets the name of a token as a string"; + writer.WriteLineInterface "val token_to_string: token -> string"; + + writer.WriteLine ""; + writer.WriteLine "// This function gets the data carried by a token as an object"; + writer.WriteLine "let _fsyacc_dataOfToken (t:token) = "; + writer.WriteLine " match t with "; + + for id,typ in spec.Tokens do + writer.WriteLine " | %s %s -> %s " + id + (match typ with Some _ -> "_fsyacc_x" | None -> "") + (match typ with Some _ -> "Microsoft.FSharp.Core.Operators.box _fsyacc_x" | None -> "(null : System.Object)") + + let tychar = "'cty" + + for key,_ in spec.Types |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do + failwithf "%s is given multiple %%type declarations" key; + + for key,_ in spec.Tokens |> Seq.countBy fst |> Seq.filter (fun (_,n) -> n > 1) do + failwithf "%s is given %%token declarations" key + + let types = Map.ofList spec.Types + let tokens = Map.ofList spec.Tokens + + let nStates = compiledSpec.states.Length + begin + writer.Write "let _fsyacc_gotos = [| " ; + let numGotoNonTerminals = compiledSpec.gotoTable.[0].Length + let gotoIndexes = Array.create numGotoNonTerminals 0 + let mutable gotoTableCurrIndex = 0 + for j = 0 to numGotoNonTerminals-1 do + gotoIndexes.[j] <- gotoTableCurrIndex + + (* Count the number of entries in the association table. *) + let mutable count = 0 + for i = 0 to nStates - 1 do + let goto = compiledSpec.gotoTable.[i].[j] + match goto with + | None -> () + | Some _ -> count <- count + 1 + + (* Write the head of the table (i.e. the number of entries and the default value) *) + gotoTableCurrIndex <- gotoTableCurrIndex + 1 + writer.WriteUInt16 count + writer.WriteUInt16 generatorState.anyMarker + + (* Write the pairs of entries in incremental order by key *) + (* This lets us implement the lookup by a binary chop. *) + for i = 0 to nStates - 1 do + let goto = compiledSpec.gotoTable.[i].[j] + match goto with + | None -> () + | Some n -> + gotoTableCurrIndex <- gotoTableCurrIndex + 1 + writer.WriteUInt16 i + writer.WriteUInt16 n + writer.WriteLine "|]" ; + (* Output offsets into gotos table where the gotos for a particular nonterminal begin *) + writer.Write "let _fsyacc_sparseGotoTableRowOffsets = [|" ; + for j = 0 to numGotoNonTerminals-1 do + writer.WriteUInt16 gotoIndexes.[j] + writer.WriteLine "|]" + end; + + begin + writer.Write "let _fsyacc_stateToProdIdxsTableElements = [| " ; + let indexes = Array.create compiledSpec.states.Length 0 + let mutable currIndex = 0 + for j = 0 to compiledSpec.states.Length - 1 do + let state = compiledSpec.states.[j] + indexes.[j] <- currIndex; + + (* Write the head of the table (i.e. the number of entries) *) + writer.WriteUInt16 state.Length; + currIndex <- currIndex + state.Length + 1 + + (* Write the pairs of entries in incremental order by key *) + (* This lets us implement the lookup by a binary chop. *) + for prodIdx in state do + writer.WriteUInt16 prodIdx + writer.WriteLine "|]" ; + (* Output offsets into gotos table where the gotos for a particular nonterminal begin *) + writer.Write "let _fsyacc_stateToProdIdxsTableRowOffsets = [|" ; + for idx in indexes do + writer.WriteUInt16 idx; + writer.WriteLine "|]" ; + end; + + begin + let numActionRows = (Array.length compiledSpec.actionTable) + let _ = Array.length compiledSpec.actionTable.[0] + writer.WriteLine "let _fsyacc_action_rows = %d" numActionRows; + writer.Write "let _fsyacc_actionTableElements = [|" ; + let actionIndexes = Array.create numActionRows 0 + + let mutable actionTableCurrIndex = 0 + for i = 0 to nStates-1 do + actionIndexes.[i] <- actionTableCurrIndex; + let actions = compiledSpec.actionTable.[i] + let terminalsByAction = Dictionary<_,int list>(10) + let countPerAction = Dictionary<_,_>(10) + for terminal = 0 to actions.Length - 1 do + let action = snd actions.[terminal] + if terminalsByAction.ContainsKey action then + terminalsByAction.[action] <- terminal :: terminalsByAction.[action] ; + else + terminalsByAction.[action] <- [terminal]; + if countPerAction.ContainsKey action then + countPerAction.[action] <- countPerAction.[action]+1 + else + countPerAction.[action] <- 1 + + let mostCommonAction = + let mostCommon = ref Error + let max = ref 0 + for KeyValue(x,y) in countPerAction do + if y > !max then (mostCommon := x; max := y) + !mostCommon + + (* Count the number of entries in the association table. *) + let count = ref 0 + for KeyValue(action,terminals) in terminalsByAction do + for terminal in terminals do + if action <> mostCommonAction then + incr count; + + (* Write the head of the table (i.e. the number of entries and the default value) *) + actionTableCurrIndex <- actionTableCurrIndex + 1; + writer.WriteUInt16 !count; + writer.WriteUInt16 (generatorState.map_action_to_int mostCommonAction); + + (* Write the pairs of entries in incremental order by key *) + (* This lets us implement the lookup by a binary chop. *) + for terminal = 0 to Array.length actions-1 do + let action = snd actions.[terminal] in + if action <> mostCommonAction then ( + actionTableCurrIndex <- actionTableCurrIndex + 1; + writer.WriteUInt16 terminal; + writer.WriteUInt16 (generatorState.map_action_to_int action); + ); + writer.WriteLine "|]" ; + (* Output offsets into actions table where the actions for a particular nonterminal begin *) + writer.Write "let _fsyacc_actionTableRowOffsets = [|" ; + for j = 0 to numActionRows-1 do + writer.WriteUInt16 actionIndexes.[j]; + writer.WriteLine "|]" ; + + end; + begin + writer.Write "let _fsyacc_reductionSymbolCounts = [|" ; + for nt,ntIdx,syms,code in compiledSpec.prods do + writer.WriteUInt16 (List.length syms); + writer.WriteLine "|]" ; + end; + begin + writer.Write "let _fsyacc_productionToNonTerminalTable = [|" ; + for nt,ntIdx,syms,code in compiledSpec.prods do + writer.WriteUInt16 ntIdx; + writer.WriteLine "|]" ; + end; + begin + writer.Write "let _fsyacc_immediateActions = [|" ; + for prodIdx in compiledSpec.immediateActionTable do + match prodIdx with + | None -> writer.WriteUInt16 generatorState.anyMarker (* NONE REP *) + | Some act -> writer.WriteUInt16 (generatorState.map_action_to_int act) + writer.WriteLine "|]" ; + end; + + let getType nt = if types.ContainsKey nt then types.[nt] else generatorState.generate_nonterminal_name nt + begin + writer.WriteLine "let _fsyacc_reductions = lazy [|" + for nt,ntIdx,syms,code in compiledSpec.prods do + writer.WriteLine "# %d \"%s\"" writer.OutputLineCount output; + writer.WriteLine " (fun (parseState : %s.IParseState) ->" generatorState.parslib + if generatorState.compat then + writer.WriteLine " Parsing.set_parse_state parseState;" + syms |> List.iteri (fun i sym -> + let tyopt = + match sym with + | Terminal t -> + if tokens.ContainsKey t then + tokens.[t] + else None + | NonTerminal nt -> Some (getType nt) + match tyopt with + | Some ty -> writer.WriteLine " let _%d = parseState.GetInput(%d) :?> %s in" (i+1) (i+1) ty + | None -> ()) + writer.WriteLine " Microsoft.FSharp.Core.Operators.box" + writer.WriteLine " ("; + writer.WriteLine " ("; + match code with + | Some (_,pos) -> writer.WriteLine "# %d \"%s\"" pos.pos_lnum pos.pos_fname + | None -> () + match code with + | Some (code,_) -> + let dollar = ref false in + let c = code |> String.collect (fun c -> + if not !dollar && c = '$' then (dollar := true; "") + elif !dollar && c >= '0' && c <= '9' then (dollar := false; "_"+String(c,1)) + elif !dollar then (dollar := false; "$"+String(c,1)) + else String(c,1)) + let lines = c.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries); + for line in lines do + writer.WriteLine " %s" line; + if !dollar then writer.Write "$" + | None -> + writer.WriteLine " raise (%s.Accept(Microsoft.FSharp.Core.Operators.box _1))" generatorState.parslib + writer.WriteLine " )"; + // Place the line count back for the type constraint + match code with + | Some (_,pos) -> writer.WriteLine "# %d \"%s\"" pos.pos_lnum pos.pos_fname + | None -> () + writer.WriteLine " : %s));" (if types.ContainsKey nt then types.[nt] else generatorState.generate_nonterminal_name nt); + done; + writer.WriteLine "|]" ; + end; + writer.WriteLine "# %d \"%s\"" writer.OutputLineCount output; + writer.WriteLine "let tables : %s.Tables<_> = " generatorState.parslib + writer.WriteLine " { reductions = _fsyacc_reductions.Value;" + writer.WriteLine " endOfInputTag = _fsyacc_endOfInputTag;" + writer.WriteLine " tagOfToken = tagOfToken;" + writer.WriteLine " dataOfToken = _fsyacc_dataOfToken; " + writer.WriteLine " actionTableElements = _fsyacc_actionTableElements;" + writer.WriteLine " actionTableRowOffsets = _fsyacc_actionTableRowOffsets;" + writer.WriteLine " stateToProdIdxsTableElements = _fsyacc_stateToProdIdxsTableElements;" + writer.WriteLine " stateToProdIdxsTableRowOffsets = _fsyacc_stateToProdIdxsTableRowOffsets;" + writer.WriteLine " reductionSymbolCounts = _fsyacc_reductionSymbolCounts;" + writer.WriteLine " immediateActions = _fsyacc_immediateActions;" + writer.WriteLine " gotos = _fsyacc_gotos;" + writer.WriteLine " sparseGotoTableRowOffsets = _fsyacc_sparseGotoTableRowOffsets;" + writer.WriteLine " tagOfErrorTerminal = _fsyacc_tagOfErrorTerminal;" + writer.WriteLine " parseError = (fun (ctxt:%s.ParseErrorContext<_>) -> " generatorState.parslib + writer.WriteLine " match parse_error_rich with " + writer.WriteLine " | Some f -> f ctxt" + writer.WriteLine " | None -> parse_error ctxt.Message);" + + writer.WriteLine " numTerminals = %d;" (Array.length compiledSpec.actionTable.[0]); + writer.WriteLine " productionToNonTerminalTable = _fsyacc_productionToNonTerminalTable }" + writer.WriteLine "let engine lexer lexbuf startState = tables.Interpret(lexer, lexbuf, startState)" + + for id,startState in List.zip spec.StartSymbols compiledSpec.startStates do + if not (types.ContainsKey id) then + failwith ("a %type declaration is required for for start token "+id); + let ty = types.[id] in + writer.WriteLine "let %s lexer lexbuf : %s =" id ty; + writer.WriteLine " engine lexer lexbuf %d :?> _" startState + + for id in spec.StartSymbols do + if not (types.ContainsKey id) then + failwith ("a %type declaration is required for start token "+id); + let ty = types.[id] in + writer.WriteLineInterface "val %s : (%s.LexBuffer<%s> -> token) -> %s.LexBuffer<%s> -> (%s) " id generatorState.lexlib tychar generatorState.lexlib tychar ty; + + +let compileSpec (spec: ParserSpec) (logger: Logger) = + let spec1 = ProcessParserSpecAst spec + CompilerLalrParserSpec logger.LogStream spec1 diff --git a/buildtools/fsyacc/fsyacclex.fs b/buildtools/fsyacc/fsyacclex.fs index a035f6fe012..5b99ce61cdd 100644 --- a/buildtools/fsyacc/fsyacclex.fs +++ b/buildtools/fsyacc/fsyacclex.fs @@ -2,12 +2,12 @@ (* (c) Microsoft Corporation 2005-2008. *) -module internal FsLexYacc.FsYacc.Lexer +module FsLexYacc.FsYacc.Lexer open FsLexYacc.FsYacc.AST open FsLexYacc.FsYacc.Parser open System.Text -open Internal.Utilities.Text.Lexing +open FSharp.Text.Lexing let lexeme (lexbuf : LexBuffer) = new System.String(lexbuf.Lexeme) let newline (lexbuf:LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine @@ -290,92 +290,80 @@ let trans : uint16[] array = [| 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 130us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 130us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 129us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; |]; |] let actions : uint16[] = [|65535us; 5us; 1us; 5us; 5us; 3us; 4us; 5us; 4us; 2us; 1us; 0us; 65535us; 5us; 1us; 2us; 5us; 3us; 4us; 5us; 3us; 2us; 0us; 65535us; 0us; 1us; 2us; 8us; 8us; 4us; 5us; 8us; 7us; 8us; 6us; 6us; 5us; 65535us; 65535us; 65535us; 3us; 2us; 65535us; 7us; 1us; 7us; 2us; 7us; 7us; 5us; 6us; 7us; 65535us; 65535us; 65535us; 4us; 3us; 3us; 2us; 1us; 0us; 65535us; 0us; 1us; 2us; 65535us; 22us; 17us; 11us; 12us; 13us; 14us; 15us; 16us; 22us; 17us; 18us; 22us; 21us; 22us; 23us; 19us; 20us; 20us; 17us; 16us; 15us; 17us; 17us; 17us; 10us; 0us; 1us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 65535us; 9us; 65535us; 65535us; 65535us; 8us; 65535us; 65535us; 7us; 65535us; 65535us; 5us; 65535us; 65535us; 65535us; 4us; 65535us; 65535us; 65535us; 65535us; 6us; 65535us; 65535us; 65535us; 3us; 2us; 65535us; |] -let _fslex_tables = Internal.Utilities.Text.Lexing.UnicodeTables.Create(trans,actions) +let _fslex_tables = FSharp.Text.Lexing.UnicodeTables.Create(trans,actions) let rec _fslex_dummy () = _fslex_dummy() -(* Rule token *) -and token (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_token 65 lexbuf -(* Rule fs_type *) -and fs_type (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_fs_type 61 lexbuf -(* Rule header *) -and header p buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_header p buff 42 lexbuf -(* Rule code *) -and code p buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_code p buff 23 lexbuf -(* Rule codestring *) -and codestring buff (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_codestring buff 12 lexbuf -(* Rule comment *) -and comment (lexbuf : Internal.Utilities.Text.Lexing.LexBuffer<_>) = _fslex_comment 0 lexbuf -(* Rule token *) -and _fslex_token _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule token +and token lexbuf = + match _fslex_tables.Interpret(65,lexbuf) with | 0 -> ( # 35 "fsyacclex.fsl" let p = lexbuf.StartPos in header p (new StringBuilder 100) lexbuf -# 313 "fsyacclex.fs" +# 301 "fsyacclex.fs" ) | 1 -> ( # 36 "fsyacclex.fsl" PERCENT_PERCENT -# 318 "fsyacclex.fs" +# 306 "fsyacclex.fs" ) | 2 -> ( # 37 "fsyacclex.fsl" typeDepth := 1; startPos := lexbuf.StartPos; clearBuf(); TOKEN (fs_type lexbuf) -# 323 "fsyacclex.fs" +# 311 "fsyacclex.fs" ) | 3 -> ( # 38 "fsyacclex.fsl" TOKEN (None) -# 328 "fsyacclex.fs" +# 316 "fsyacclex.fs" ) | 4 -> ( # 39 "fsyacclex.fsl" START -# 333 "fsyacclex.fs" +# 321 "fsyacclex.fs" ) | 5 -> ( # 40 "fsyacclex.fsl" PREC -# 338 "fsyacclex.fs" +# 326 "fsyacclex.fs" ) | 6 -> ( # 41 "fsyacclex.fsl" typeDepth := 1; startPos := lexbuf.StartPos; clearBuf(); TYPE (match fs_type lexbuf with Some x -> x | None -> failwith "gettype") -# 343 "fsyacclex.fs" +# 331 "fsyacclex.fs" ) | 7 -> ( # 42 "fsyacclex.fsl" LEFT -# 348 "fsyacclex.fs" +# 336 "fsyacclex.fs" ) | 8 -> ( # 43 "fsyacclex.fsl" RIGHT -# 353 "fsyacclex.fs" +# 341 "fsyacclex.fs" ) | 9 -> ( # 44 "fsyacclex.fsl" NONASSOC -# 358 "fsyacclex.fs" +# 346 "fsyacclex.fs" ) | 10 -> ( # 45 "fsyacclex.fsl" ERROR -# 363 "fsyacclex.fs" +# 351 "fsyacclex.fs" ) | 11 -> ( # 46 "fsyacclex.fsl" LESS -# 368 "fsyacclex.fs" +# 356 "fsyacclex.fs" ) | 12 -> ( # 47 "fsyacclex.fsl" GREATER -# 373 "fsyacclex.fs" +# 361 "fsyacclex.fs" ) | 13 -> ( # 48 "fsyacclex.fsl" SEMI -# 378 "fsyacclex.fs" +# 366 "fsyacclex.fs" ) | 14 -> ( # 49 "fsyacclex.fsl" @@ -384,61 +372,61 @@ and _fslex_token _fslex_state lexbuf = // adjust the first line to get even indentation for all lines w.r.t. the left hand margin buff.Append (String.replicate (lexbuf.StartPos.Column+1) " ") |> ignore; code p buff lexbuf -# 387 "fsyacclex.fs" +# 375 "fsyacclex.fs" ) | 15 -> ( # 54 "fsyacclex.fsl" token lexbuf -# 392 "fsyacclex.fs" +# 380 "fsyacclex.fs" ) | 16 -> ( # 55 "fsyacclex.fsl" newline lexbuf; token lexbuf -# 397 "fsyacclex.fs" +# 385 "fsyacclex.fs" ) | 17 -> ( # 56 "fsyacclex.fsl" IDENT (lexeme lexbuf) -# 402 "fsyacclex.fs" +# 390 "fsyacclex.fs" ) | 18 -> ( # 57 "fsyacclex.fsl" BAR -# 407 "fsyacclex.fs" +# 395 "fsyacclex.fs" ) | 19 -> ( # 58 "fsyacclex.fsl" ignore(comment lexbuf); token lexbuf -# 412 "fsyacclex.fs" +# 400 "fsyacclex.fs" ) | 20 -> ( # 59 "fsyacclex.fsl" token lexbuf -# 417 "fsyacclex.fs" +# 405 "fsyacclex.fs" ) | 21 -> ( # 60 "fsyacclex.fsl" COLON -# 422 "fsyacclex.fs" +# 410 "fsyacclex.fs" ) | 22 -> ( # 61 "fsyacclex.fsl" unexpected_char lexbuf -# 427 "fsyacclex.fs" +# 415 "fsyacclex.fs" ) | 23 -> ( # 62 "fsyacclex.fsl" EOF -# 432 "fsyacclex.fs" +# 420 "fsyacclex.fs" ) | _ -> failwith "token" -(* Rule fs_type *) -and _fslex_fs_type _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule fs_type +and fs_type lexbuf = + match _fslex_tables.Interpret(61,lexbuf) with | 0 -> ( # 65 "fsyacclex.fsl" incr typeDepth; appendBuf(lexeme lexbuf); fs_type lexbuf -# 441 "fsyacclex.fs" +# 429 "fsyacclex.fs" ) | 1 -> ( # 67 "fsyacclex.fsl" @@ -446,73 +434,73 @@ and _fslex_fs_type _fslex_state lexbuf = if !typeDepth = 0 then Some(string str_buf) else appendBuf(lexeme lexbuf); fs_type lexbuf -# 449 "fsyacclex.fs" +# 437 "fsyacclex.fs" ) | 2 -> ( # 71 "fsyacclex.fsl" appendBuf(lexeme lexbuf); fs_type lexbuf -# 454 "fsyacclex.fs" +# 442 "fsyacclex.fs" ) | _ -> failwith "fs_type" -(* Rule header *) -and _fslex_header p buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule header +and header p buff lexbuf = + match _fslex_tables.Interpret(42,lexbuf) with | 0 -> ( # 74 "fsyacclex.fsl" HEADER (buff.ToString(), p) -# 463 "fsyacclex.fs" +# 451 "fsyacclex.fs" ) | 1 -> ( # 75 "fsyacclex.fsl" newline lexbuf; ignore <| buff.Append System.Environment.NewLine; header p buff lexbuf -# 470 "fsyacclex.fs" +# 458 "fsyacclex.fs" ) | 2 -> ( # 79 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); header p buff lexbuf -# 476 "fsyacclex.fs" +# 464 "fsyacclex.fs" ) | 3 -> ( # 82 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); header p buff lexbuf -# 482 "fsyacclex.fs" +# 470 "fsyacclex.fs" ) | 4 -> ( # 85 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); header p buff lexbuf -# 488 "fsyacclex.fs" +# 476 "fsyacclex.fs" ) | 5 -> ( # 88 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); ignore(codestring buff lexbuf); header p buff lexbuf -# 495 "fsyacclex.fs" +# 483 "fsyacclex.fs" ) | 6 -> ( # 91 "fsyacclex.fsl" EOF -# 500 "fsyacclex.fs" +# 488 "fsyacclex.fs" ) | 7 -> ( # 92 "fsyacclex.fsl" ignore <| buff.Append(lexeme lexbuf).[0]; header p buff lexbuf -# 506 "fsyacclex.fs" +# 494 "fsyacclex.fs" ) | _ -> failwith "header" -(* Rule code *) -and _fslex_code p buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule code +and code p buff lexbuf = + match _fslex_tables.Interpret(23,lexbuf) with | 0 -> ( # 95 "fsyacclex.fsl" CODE (buff.ToString(), p) -# 515 "fsyacclex.fs" +# 503 "fsyacclex.fs" ) | 1 -> ( # 96 "fsyacclex.fsl" @@ -520,124 +508,124 @@ and _fslex_code p buff _fslex_state lexbuf = ignore(code p buff lexbuf); ignore <| buff.Append "}"; code p buff lexbuf -# 523 "fsyacclex.fs" +# 511 "fsyacclex.fs" ) | 2 -> ( # 100 "fsyacclex.fsl" newline lexbuf; ignore <| buff.Append System.Environment.NewLine; code p buff lexbuf -# 530 "fsyacclex.fs" +# 518 "fsyacclex.fs" ) | 3 -> ( # 104 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); code p buff lexbuf -# 536 "fsyacclex.fs" +# 524 "fsyacclex.fs" ) | 4 -> ( # 106 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); ignore(codestring buff lexbuf); code p buff lexbuf -# 543 "fsyacclex.fs" +# 531 "fsyacclex.fs" ) | 5 -> ( # 110 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); code p buff lexbuf -# 549 "fsyacclex.fs" +# 537 "fsyacclex.fs" ) | 6 -> ( # 113 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); code p buff lexbuf -# 555 "fsyacclex.fs" +# 543 "fsyacclex.fs" ) | 7 -> ( # 115 "fsyacclex.fsl" EOF -# 560 "fsyacclex.fs" +# 548 "fsyacclex.fs" ) | 8 -> ( # 116 "fsyacclex.fsl" ignore <| buff.Append(lexeme lexbuf).[0]; code p buff lexbuf -# 566 "fsyacclex.fs" +# 554 "fsyacclex.fs" ) | _ -> failwith "code" -(* Rule codestring *) -and _fslex_codestring buff _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule codestring +and codestring buff lexbuf = + match _fslex_tables.Interpret(12,lexbuf) with | 0 -> ( # 122 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); codestring buff lexbuf -# 576 "fsyacclex.fs" +# 564 "fsyacclex.fs" ) | 1 -> ( # 124 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); buff.ToString() -# 582 "fsyacclex.fs" +# 570 "fsyacclex.fs" ) | 2 -> ( # 126 "fsyacclex.fsl" newline lexbuf; ignore <| buff.Append System.Environment.NewLine; codestring buff lexbuf -# 589 "fsyacclex.fs" +# 577 "fsyacclex.fs" ) | 3 -> ( # 130 "fsyacclex.fsl" ignore <| buff.Append (lexeme lexbuf); codestring buff lexbuf -# 595 "fsyacclex.fs" +# 583 "fsyacclex.fs" ) | 4 -> ( # 132 "fsyacclex.fsl" failwith "unterminated string in code" -# 600 "fsyacclex.fs" +# 588 "fsyacclex.fs" ) | 5 -> ( # 133 "fsyacclex.fsl" ignore <| buff.Append(lexeme lexbuf).[0]; codestring buff lexbuf -# 606 "fsyacclex.fs" +# 594 "fsyacclex.fs" ) | _ -> failwith "codestring" -(* Rule comment *) -and _fslex_comment _fslex_state lexbuf = - match _fslex_tables.Interpret(_fslex_state,lexbuf) with +// Rule comment +and comment lexbuf = + match _fslex_tables.Interpret(0,lexbuf) with | 0 -> ( # 138 "fsyacclex.fsl" ignore(comment lexbuf); comment lexbuf -# 615 "fsyacclex.fs" +# 603 "fsyacclex.fs" ) | 1 -> ( # 139 "fsyacclex.fsl" newline lexbuf; comment lexbuf -# 620 "fsyacclex.fs" +# 608 "fsyacclex.fs" ) | 2 -> ( # 140 "fsyacclex.fsl" () -# 625 "fsyacclex.fs" +# 613 "fsyacclex.fs" ) | 3 -> ( # 141 "fsyacclex.fsl" failwith "end of file in comment" -# 630 "fsyacclex.fs" +# 618 "fsyacclex.fs" ) | 4 -> ( # 142 "fsyacclex.fsl" comment lexbuf -# 635 "fsyacclex.fs" +# 623 "fsyacclex.fs" ) | 5 -> ( # 143 "fsyacclex.fsl" comment lexbuf -# 640 "fsyacclex.fs" +# 628 "fsyacclex.fs" ) | _ -> failwith "comment" diff --git a/buildtools/fsyacc/fsyaccpars.fs b/buildtools/fsyacc/fsyaccpars.fs index c6d614f2694..504e1cb55ed 100644 --- a/buildtools/fsyacc/fsyaccpars.fs +++ b/buildtools/fsyacc/fsyaccpars.fs @@ -1,8 +1,8 @@ // Implementation file for parser generated by fsyacc -module internal FsLexYacc.FsYacc.Parser +module FsLexYacc.FsYacc.Parser #nowarn "64";; // turn off warnings that type variables used in production annotations are instantiated to concrete type -open Internal.Utilities.Text.Lexing -open Internal.Utilities.Text.Parsing.ParseHelpers +open FSharp.Text.Lexing +open FSharp.Text.Parsing.ParseHelpers # 1 "fsyaccpars.fsy" (* (c) Microsoft Corporation 2005-2008. *) @@ -214,19 +214,19 @@ let _fsyacc_productionToNonTerminalTable = [|0us; 1us; 2us; 2us; 3us; 3us; 4us; let _fsyacc_immediateActions = [|65535us; 49152us; 65535us; 65535us; 65535us; 16385us; 16386us; 65535us; 16389us; 65535us; 16390us; 65535us; 16391us; 65535us; 16392us; 65535us; 16393us; 65535us; 16394us; 65535us; 16395us; 65535us; 16396us; 65535us; 16398us; 65535us; 65535us; 65535us; 65535us; 16400us; 16402us; 16404us; 65535us; 65535us; 16405us; 65535us; 65535us; 16407us; 65535us; 16408us; 65535us; 16409us; 65535us; 16412us; |] let _fsyacc_reductions () = [| # 216 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : AST.ParserSpec)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> AST.ParserSpec in Microsoft.FSharp.Core.Operators.box ( ( - raise (Internal.Utilities.Text.Parsing.Accept(Microsoft.FSharp.Core.Operators.box _1)) + raise (FSharp.Text.Parsing.Accept(Microsoft.FSharp.Core.Operators.box _1)) ) - : '_startspec)); + : 'gentype__startspec)); # 225 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'headeropt)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'decls)) in - let _4 = (let data = parseState.GetInput(4) in (Microsoft.FSharp.Core.Operators.unbox data : 'rules)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_headeropt in + let _2 = parseState.GetInput(2) :?> 'gentype_decls in + let _4 = parseState.GetInput(4) :?> 'gentype_rules in Microsoft.FSharp.Core.Operators.box ( ( @@ -236,8 +236,8 @@ let _fsyacc_reductions () = [| # 25 "fsyaccpars.fsy" : AST.ParserSpec)); # 238 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : AST.Code)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> AST.Code in Microsoft.FSharp.Core.Operators.box ( ( @@ -245,9 +245,9 @@ let _fsyacc_reductions () = [| _1 ) # 29 "fsyaccpars.fsy" - : 'headeropt)); + : 'gentype_headeropt)); # 249 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -255,9 +255,9 @@ let _fsyacc_reductions () = [| "", (parseState.ResultRange |> fst) ) # 31 "fsyaccpars.fsy" - : 'headeropt)); + : 'gentype_headeropt)); # 259 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -265,11 +265,11 @@ let _fsyacc_reductions () = [| [] ) # 34 "fsyaccpars.fsy" - : 'decls)); + : 'gentype_decls)); # 269 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'decl)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'decls)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_decl in + let _2 = parseState.GetInput(2) :?> 'gentype_decls in Microsoft.FSharp.Core.Operators.box ( ( @@ -277,11 +277,11 @@ let _fsyacc_reductions () = [| _1 :: _2 ) # 35 "fsyaccpars.fsy" - : 'decls)); + : 'gentype_decls)); # 281 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string option)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string option in + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -289,11 +289,11 @@ let _fsyacc_reductions () = [| (fun x -> {x with Tokens = x.Tokens @ (List.map (fun x -> (x,_1)) _2)}) ) # 38 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 293 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -301,10 +301,10 @@ let _fsyacc_reductions () = [| (fun x -> {x with Types = x.Types @ (List.map (fun x -> (x,_1)) _2)} ) ) # 39 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 305 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -312,10 +312,10 @@ let _fsyacc_reductions () = [| (fun x -> {x with StartSymbols = x.StartSymbols @ _2} ) ) # 40 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 316 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -323,10 +323,10 @@ let _fsyacc_reductions () = [| (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,LeftAssoc)) _2)]} ) ) # 41 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 327 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -334,10 +334,10 @@ let _fsyacc_reductions () = [| (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,RightAssoc)) _2)]} ) ) # 42 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 338 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -345,11 +345,11 @@ let _fsyacc_reductions () = [| (fun x -> {x with Associativities = x.Associativities @ [(List.map (fun x -> (x,NonAssoc)) _2)]} ) ) # 43 "fsyaccpars.fsy" - : 'decl)); + : 'gentype_decl)); # 349 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'idents)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _2 = parseState.GetInput(2) :?> 'gentype_idents in Microsoft.FSharp.Core.Operators.box ( ( @@ -357,9 +357,9 @@ let _fsyacc_reductions () = [| _1 :: _2 ) # 45 "fsyaccpars.fsy" - : 'idents)); + : 'gentype_idents)); # 361 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -367,11 +367,11 @@ let _fsyacc_reductions () = [| [] ) # 45 "fsyaccpars.fsy" - : 'idents)); + : 'gentype_idents)); # 371 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'rule)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'rules)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_rule in + let _2 = parseState.GetInput(2) :?> 'gentype_rules in Microsoft.FSharp.Core.Operators.box ( ( @@ -379,10 +379,10 @@ let _fsyacc_reductions () = [| _1 :: _2 ) # 46 "fsyaccpars.fsy" - : 'rules)); + : 'gentype_rules)); # 383 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'rule)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_rule in Microsoft.FSharp.Core.Operators.box ( ( @@ -390,13 +390,13 @@ let _fsyacc_reductions () = [| [_1] ) # 46 "fsyaccpars.fsy" - : 'rules)); + : 'gentype_rules)); # 394 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'optbar)) in - let _4 = (let data = parseState.GetInput(4) in (Microsoft.FSharp.Core.Operators.unbox data : 'clauses)) in - let _5 = (let data = parseState.GetInput(5) in (Microsoft.FSharp.Core.Operators.unbox data : 'optsemi)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _3 = parseState.GetInput(3) :?> 'gentype_optbar in + let _4 = parseState.GetInput(4) :?> 'gentype_clauses in + let _5 = parseState.GetInput(5) :?> 'gentype_optsemi in Microsoft.FSharp.Core.Operators.box ( ( @@ -404,9 +404,9 @@ let _fsyacc_reductions () = [| (_1,_4) ) # 47 "fsyaccpars.fsy" - : 'rule)); + : 'gentype_rule)); # 408 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -414,9 +414,9 @@ let _fsyacc_reductions () = [| ) # 48 "fsyaccpars.fsy" - : 'optbar)); + : 'gentype_optbar)); # 418 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -424,9 +424,9 @@ let _fsyacc_reductions () = [| ) # 48 "fsyaccpars.fsy" - : 'optbar)); + : 'gentype_optbar)); # 428 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -434,9 +434,9 @@ let _fsyacc_reductions () = [| ) # 49 "fsyaccpars.fsy" - : 'optsemi)); + : 'gentype_optsemi)); # 438 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -444,11 +444,11 @@ let _fsyacc_reductions () = [| ) # 49 "fsyaccpars.fsy" - : 'optsemi)); + : 'gentype_optsemi)); # 448 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'clause)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : 'clauses)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_clause in + let _3 = parseState.GetInput(3) :?> 'gentype_clauses in Microsoft.FSharp.Core.Operators.box ( ( @@ -456,10 +456,10 @@ let _fsyacc_reductions () = [| _1 :: _3 ) # 50 "fsyaccpars.fsy" - : 'clauses)); + : 'gentype_clauses)); # 460 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'clause)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_clause in Microsoft.FSharp.Core.Operators.box ( ( @@ -467,12 +467,12 @@ let _fsyacc_reductions () = [| [_1] ) # 50 "fsyaccpars.fsy" - : 'clauses)); + : 'gentype_clauses)); # 471 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : 'syms)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'optprec)) in - let _3 = (let data = parseState.GetInput(3) in (Microsoft.FSharp.Core.Operators.unbox data : AST.Code)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> 'gentype_syms in + let _2 = parseState.GetInput(2) :?> 'gentype_optprec in + let _3 = parseState.GetInput(3) :?> AST.Code in Microsoft.FSharp.Core.Operators.box ( ( @@ -480,11 +480,11 @@ let _fsyacc_reductions () = [| Rule(_1,_2,Some _3) ) # 51 "fsyaccpars.fsy" - : 'clause)); + : 'gentype_clause)); # 484 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _1 = (let data = parseState.GetInput(1) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'syms)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _1 = parseState.GetInput(1) :?> string in + let _2 = parseState.GetInput(2) :?> 'gentype_syms in Microsoft.FSharp.Core.Operators.box ( ( @@ -492,10 +492,10 @@ let _fsyacc_reductions () = [| _1 :: _2 ) # 52 "fsyaccpars.fsy" - : 'syms)); + : 'gentype_syms)); # 496 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : 'syms)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> 'gentype_syms in Microsoft.FSharp.Core.Operators.box ( ( @@ -503,9 +503,9 @@ let _fsyacc_reductions () = [| "error" :: _2 ) # 52 "fsyaccpars.fsy" - : 'syms)); + : 'gentype_syms)); # 507 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -513,9 +513,9 @@ let _fsyacc_reductions () = [| [] ) # 52 "fsyaccpars.fsy" - : 'syms)); + : 'gentype_syms)); # 517 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> + (fun (parseState : FSharp.Text.Parsing.IParseState) -> Microsoft.FSharp.Core.Operators.box ( ( @@ -523,10 +523,10 @@ let _fsyacc_reductions () = [| None ) # 53 "fsyaccpars.fsy" - : 'optprec)); + : 'gentype_optprec)); # 527 "fsyaccpars.fs" - (fun (parseState : Internal.Utilities.Text.Parsing.IParseState) -> - let _2 = (let data = parseState.GetInput(2) in (Microsoft.FSharp.Core.Operators.unbox data : string)) in + (fun (parseState : FSharp.Text.Parsing.IParseState) -> + let _2 = parseState.GetInput(2) :?> string in Microsoft.FSharp.Core.Operators.box ( ( @@ -534,10 +534,10 @@ let _fsyacc_reductions () = [| Some _2 ) # 53 "fsyaccpars.fsy" - : 'optprec)); + : 'gentype_optprec)); |] # 539 "fsyaccpars.fs" -let tables () : Internal.Utilities.Text.Parsing.Tables<_> = +let tables : FSharp.Text.Parsing.Tables<_> = { reductions= _fsyacc_reductions (); endOfInputTag = _fsyacc_endOfInputTag; tagOfToken = tagOfToken; @@ -551,12 +551,12 @@ let tables () : Internal.Utilities.Text.Parsing.Tables<_> = gotos = _fsyacc_gotos; sparseGotoTableRowOffsets = _fsyacc_sparseGotoTableRowOffsets; tagOfErrorTerminal = _fsyacc_tagOfErrorTerminal; - parseError = (fun (ctxt:Internal.Utilities.Text.Parsing.ParseErrorContext<_>) -> + parseError = (fun (ctxt:FSharp.Text.Parsing.ParseErrorContext<_>) -> match parse_error_rich with | Some f -> f ctxt | None -> parse_error ctxt.Message); numTerminals = 21; productionToNonTerminalTable = _fsyacc_productionToNonTerminalTable } -let engine lexer lexbuf startState = (tables ()).Interpret(lexer, lexbuf, startState) +let engine lexer lexbuf startState = tables.Interpret(lexer, lexbuf, startState) let spec lexer lexbuf : AST.ParserSpec = - Microsoft.FSharp.Core.Operators.unbox ((tables ()).Interpret(lexer, lexbuf, 0)) + engine lexer lexbuf 0 :?> _