Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 56 additions & 58 deletions src/buildtools/fslex/fslex.fs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ open System.Collections.Generic
open System.IO

//------------------------------------------------------------------
// This code is duplicated from FSharp.Compiler.UnicodeLexing
// This code is duplicated from Microsoft.FSharp.Compiler.UnicodeLexing

type Lexbuf = LexBuffer<char>

Expand All @@ -32,7 +32,7 @@ let UnicodeFileAsLexbuf (filename,codePage : int option) : FileStream * StreamRe
| None -> new StreamReader(stream,true)
| Some n -> new StreamReader(stream,System.Text.Encoding.GetEncoding(n))
let lexbuf = LexBuffer.FromFunction(reader.Read)
lexbuf.EndPos <- Position.FirstLine(filename);
lexbuf.EndPos <- Position.FirstLine(filename)
stream, reader, lexbuf

//------------------------------------------------------------------
Expand All @@ -43,32 +43,31 @@ let out = ref None
let inputCodePage = ref None
let light = ref None

let mutable lexlib = "Microsoft.FSharp.Text.Lexing"
let mutable lexlib = "FSharp.Text.Lexing"

let usage =
[ ArgInfo ("-o", ArgType.String (fun s -> out := Some s), "Name the output file.");
ArgInfo ("--codepage", ArgType.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage.");
ArgInfo ("--light", ArgType.Unit (fun () -> light := Some true), "(ignored)");
ArgInfo ("--light-off", ArgType.Unit (fun () -> light := Some false), "Add #light \"off\" to the top of the generated file");
ArgInfo ("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer table interpreter (default Microsoft.FSharp.Text.Lexing)");
ArgInfo ("--unicode", ArgType.Set unicode, "Produce a lexer for use with 16-bit unicode characters.");
[ ArgInfo ("-o", ArgType.String (fun s -> out := Some s), "Name the output file.")
ArgInfo ("--codepage", ArgType.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage.")
ArgInfo ("--light", ArgType.Unit (fun () -> light := Some true), "(ignored)")
ArgInfo ("--light-off", ArgType.Unit (fun () -> light := Some false), "Add #light \"off\" to the top of the generated file")
ArgInfo ("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer table interpreter (default FSharp.Text.Lexing)")
ArgInfo ("--unicode", ArgType.Set unicode, "Produce a lexer for use with 16-bit unicode characters.")
]

let _ = ArgParser.Parse(usage, (fun x -> match !input with Some _ -> failwith "more than one input given" | None -> input := Some x), "fslex <filename>")

let outputInt (os: TextWriter) (n:int) = os.Write(string n)

let outputCodedUInt16 (os: #TextWriter) (n:int) =
os.Write n;
os.Write "us; ";
os.Write n
os.Write "us; "

let sentinel = 255 * 256 + 255

let lineCount = ref 0
let cfprintfn (os: #TextWriter) fmt = Printf.kfprintf (fun () -> incr lineCount; os.WriteLine()) os fmt

[<EntryPoint>]
let main(args: string[]) =
let main() =
try
let filename = (match !input with Some x -> x | None -> failwith "no input given")
let domain = if !unicode then "Unicode" else "Ascii"
Expand All @@ -82,14 +81,14 @@ let main(args: string[]) =
eprintf "%s(%d,%d): error: %s" filename lexbuf.StartPos.Line lexbuf.StartPos.Column
(match e with
| Failure s -> s
| _ -> e.Message);
| _ -> e.Message)
exit 1
printfn "compiling to dfas (can take a while...)";
printfn "compiling to dfas (can take a while...)"
let perRuleData, dfaNodes = AST.Compile spec
let dfaNodes = dfaNodes |> List.sortBy (fun n -> n.Id)

printfn "%d states" dfaNodes.Length;
printfn "writing output";
printfn "%d states" dfaNodes.Length
printfn "writing output"

let output =
match !out with
Expand All @@ -99,21 +98,21 @@ let main(args: string[]) =
use os = System.IO.File.CreateText output

if (!light = Some(false)) || (!light = None && (Path.HasExtension(output) && Path.GetExtension(output) = ".ml")) then
cfprintfn os "#light \"off\"";
cfprintfn os "#light \"off\""

let printLinesIfCodeDefined (code,pos:Position) =
if pos <> Position.Empty // If bottom code is unspecified, then position is empty.
then
cfprintfn os "# %d \"%s\"" pos.Line pos.FileName;
cfprintfn os "%s" code;
cfprintfn os "# %d \"%s\"" pos.Line pos.FileName
cfprintfn os "%s" code

printLinesIfCodeDefined spec.TopCode
let code = fst spec.TopCode
lineCount := !lineCount + code.Replace("\r","").Split([| '\n' |]).Length;
cfprintfn os "# %d \"%s\"" !lineCount output;
lineCount := !lineCount + code.Replace("\r","").Split([| '\n' |]).Length
cfprintfn os "# %d \"%s\"" !lineCount output

cfprintfn os "let trans : uint16[] array = ";
cfprintfn os " [| ";
cfprintfn os "let trans : uint16[] array = "
cfprintfn os " [| "
if !unicode then
let specificUnicodeChars = GetSpecificUnicodeChars()
// This emits a (numLowUnicodeChars+NumUnicodeCategories+(2*#specificUnicodeChars)+1) * #states array of encoded UInt16 values
Expand All @@ -128,8 +127,8 @@ let main(args: string[]) =
//
// For the SpecificUnicodeChars the entries are char/next-state pairs.
for state in dfaNodes do
cfprintfn os " (* State %d *)" state.Id;
fprintf os " [| ";
cfprintfn os " (* State %d *)" state.Id
fprintf os " [| "
let trans =
let dict = new Dictionary<_,_>()
state.Transitions |> List.iter dict.Add
Expand All @@ -141,15 +140,15 @@ let main(args: string[]) =
outputCodedUInt16 os sentinel
for i = 0 to numLowUnicodeChars-1 do
let c = char i
emit (EncodeChar c);
emit (EncodeChar c)
for c in specificUnicodeChars do
outputCodedUInt16 os (int c);
emit (EncodeChar c);
outputCodedUInt16 os (int c)
emit (EncodeChar c)
for i = 0 to NumUnicodeCategories-1 do
emit (EncodeUnicodeCategoryIndex i);
emit Eof;
emit (EncodeUnicodeCategoryIndex i)
emit Eof
cfprintfn os "|];"
done;
done

else
// Each row for the ASCII table has format
Expand All @@ -160,8 +159,8 @@ let main(args: string[]) =

// This emits a (256+1) * #states array of encoded UInt16 values
for state in dfaNodes do
cfprintfn os " (* State %d *)" state.Id;
fprintf os " [|";
cfprintfn os " (* State %d *)" state.Id
fprintf os " [|"
let trans =
let dict = new Dictionary<_,_>()
state.Transitions |> List.iter dict.Add
Expand All @@ -173,52 +172,51 @@ let main(args: string[]) =
outputCodedUInt16 os sentinel
for i = 0 to 255 do
let c = char i
emit (EncodeChar c);
emit Eof;
emit (EncodeChar c)
emit Eof
cfprintfn os "|];"
done;
done

cfprintfn os " |] ";
cfprintfn os " |] "

fprintf os "let actions : uint16[] = [|";
fprintf os "let actions : uint16[] = [|"
for state in dfaNodes do
if state.Accepted.Length > 0 then
outputCodedUInt16 os (snd state.Accepted.Head)
else
outputCodedUInt16 os sentinel
done;
cfprintfn os "|]";
cfprintfn os "let _fslex_tables = %s.%sTables.Create(trans,actions)" lexlib domain;
done
cfprintfn os "|]"
cfprintfn os "let _fslex_tables = %s.%sTables.Create(trans,actions)" lexlib domain

cfprintfn os "let rec _fslex_dummy () = _fslex_dummy() ";
cfprintfn os "let rec _fslex_dummy () = _fslex_dummy() "

// These actions push the additional start state and come first, because they are then typically inlined into later
// rules. This means more tailcalls are taken as direct branches, increasing efficiency and
// improving stack usage on platforms that do not take tailcalls.
for ((startNode, actions),(ident,args,_)) in List.zip perRuleData spec.Rules do
cfprintfn os "(* Rule %s *)" ident;
cfprintfn os "and %s %s (lexbuf : %s.LexBuffer<_>) = _fslex_%s %s %d lexbuf" ident (String.Join(" ",Array.ofList args)) lexlib ident (String.Join(" ",Array.ofList args)) startNode.Id;
for ((startNode, actions),(ident,args,_)) in List.zip perRuleData spec.Rules do
cfprintfn os "(* Rule %s *)" ident;
cfprintfn os "and _fslex_%s %s _fslex_state lexbuf =" ident (String.Join(" ",Array.ofList args));
cfprintfn os " match _fslex_tables.Interpret(_fslex_state,lexbuf) with" ;
actions |> Seq.iteri (fun i (code,pos) ->
cfprintfn os " | %d -> ( " i;
cfprintfn os "# %d \"%s\"" pos.Line pos.FileName;
cfprintfn os "// Rule %s" ident
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This must be the change where it fixes the issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that's right

cfprintfn os "and %s %s lexbuf =" ident (String.Join(" ",Array.ofList args))
cfprintfn os " match _fslex_tables.Interpret(%d,lexbuf) with" startNode.Id
actions |> Seq.iteri (fun i (code:string, pos) ->
cfprintfn os " | %d -> ( " i
cfprintfn os "# %d \"%s\"" pos.Line pos.FileName
let lines = code.Split([| '\r'; '\n' |], StringSplitOptions.RemoveEmptyEntries)
for line in lines do
cfprintfn os " %s" line;
cfprintfn os "# %d \"%s\"" !lineCount output;
cfprintfn os " %s" line
cfprintfn os "# %d \"%s\"" !lineCount output
cfprintfn os " )")
cfprintfn os " | _ -> failwith \"%s\"" ident


cfprintfn os "";
cfprintfn os ""

printLinesIfCodeDefined spec.BottomCode
cfprintfn os "# 3000000 \"%s\"" output;
0
cfprintfn os "# 3000000 \"%s\"" output

with e ->
eprintf "FSLEX: error FSL000: %s" (match e with Failure s -> s | e -> e.ToString());
eprintf "FSLEX: error FSL000: %s" (match e with Failure s -> s | e -> e.ToString())
exit 1


let result = main()
2 changes: 1 addition & 1 deletion src/buildtools/fslex/fslexast.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
module internal FsLexYacc.FsLex.AST

open System.Collections.Generic
open Microsoft.FSharp.Text
open FSharp.Text
open Microsoft.FSharp.Collections
open Internal.Utilities
open Internal.Utilities.Text.Lexing
Expand Down
22 changes: 10 additions & 12 deletions src/buildtools/fsyacc/fsyacc.fs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ open FsLexYacc.FsYacc
open FsLexYacc.FsYacc.AST

//------------------------------------------------------------------
// This code is duplicated from FSharp.Compiler.UnicodeLexing
// This code is duplicated from Microsoft.FSharp.Compiler.UnicodeLexing

type Lexbuf = LexBuffer<char>

Expand Down Expand Up @@ -47,8 +47,8 @@ let compat = ref false
let log = ref false
let light = ref None
let inputCodePage = ref None
let mutable lexlib = "Microsoft.FSharp.Text.Lexing"
let mutable parslib = "Microsoft.FSharp.Text.Parsing"
let mutable lexlib = "FSharp.Text.Lexing"
let mutable parslib = "FSharp.Text.Parsing"

let usage =
[ ArgInfo("-o", ArgType.String (fun s -> out := Some s), "Name the output file.");
Expand All @@ -60,8 +60,8 @@ let usage =
ArgInfo("--light-off", ArgType.Unit (fun () -> light := Some false), "Add #light \"off\" to the top of the generated file");
ArgInfo("--ml-compatibility", ArgType.Set compat, "Support the use of the global state from the 'Parsing' module in FSharp.PowerPack.dll.");
ArgInfo("--tokens", ArgType.Set tokenize, "Simply tokenize the specification file itself.");
ArgInfo("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer (default: Microsoft.FSharp.Text.Lexing)");
ArgInfo("--parslib", ArgType.String (fun s -> parslib <- s), "Specify the namespace for the implementation of the parser table interpreter (default: Microsoft.FSharp.Text.Parsing)");
ArgInfo("--lexlib", ArgType.String (fun s -> lexlib <- s), "Specify the namespace for the implementation of the lexer (default: FSharp.Text.Lexing)");
ArgInfo("--parslib", ArgType.String (fun s -> parslib <- s), "Specify the namespace for the implementation of the parser table interpreter (default: FSharp.Text.Parsing)");
ArgInfo("--codepage", ArgType.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage."); ]

let _ = ArgParser.Parse(usage,(fun x -> match !input with Some _ -> failwith "more than one input given" | None -> input := Some x),"fsyacc <filename>")
Expand Down Expand Up @@ -523,11 +523,9 @@ let main() =

logf (fun oso -> oso.Close())

[<EntryPoint>]
let result(args: string[]) =
try
main()
0
let result =
try main()
with e ->
eprintf "FSYACC: error FSY000: %s" (match e with Failure s -> s | e -> e.Message);
1
eprintf "FSYACC: error FSY000: %s" (match e with Failure s -> s | e -> e.Message);
exit 1

6 changes: 3 additions & 3 deletions src/buildtools/fsyacc/fsyaccast.fs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type Symbols = Symbol list
//---------------------------------------------------------------------
// Output Raw Parser Spec AST

let StringOfSym sym = match sym with Terminal s -> "'" + s + "'" | NonTerminal s -> s
let StringOfSym sym = match sym with Terminal s -> "'" ^ s ^ "'" | NonTerminal s -> s

let OutputSym os sym = fprintf os "%s" (StringOfSym sym)

Expand Down Expand Up @@ -353,7 +353,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) =
stopWatch.Start()

// Augment the grammar
let fakeStartNonTerminals = spec.StartSymbols |> List.map(fun nt -> "_start" + nt)
let fakeStartNonTerminals = spec.StartSymbols |> List.map(fun nt -> "_start"^nt)
let nonTerminals = [email protected]
let endOfInputTerminal = "$$"
let dummyLookahead = "#"
Expand Down Expand Up @@ -466,7 +466,7 @@ let CompilerLalrParserSpec logf (spec : ProcessedParserSpec) =
let IsStartItem item0 = fakeStartNonTerminalsSet.Contains(ntIdx_of_item0 item0)
let IsKernelItem item0 = (IsStartItem item0 || dotIdx_of_item0 item0 <> 0)

let StringOfSym sym = match sym with PTerminal s -> "'" + termTab.OfIndex s + "'" | PNonTerminal s -> ntTab.OfIndex s
let StringOfSym sym = match sym with PTerminal s -> "'" ^ termTab.OfIndex s ^ "'" | PNonTerminal s -> ntTab.OfIndex s

let OutputSym os sym = fprintf os "%s" (StringOfSym sym)

Expand Down