diff --git a/CHANGES.md b/CHANGES.md index c0802098b0..8359b0ea42 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,9 @@ ## Bug fixes * Compiler: put custom header at the top of the output file (fix #1441) * Compiler (js parser): fix parsing of js labels (fix #1440) +* Compiler: fix simplification of js with let and const +* Compiler: reduce memory consumption when parsing js +* Compiler: parsing js can return a list of token, the list was sometime incorrect # 5.1.1 (2023-03-15) - Lille ## Bug fixes diff --git a/compiler/lib/js_parser.mly b/compiler/lib/js_parser.mly index a3f1040805..f6d634ff5b 100644 --- a/compiler/lib/js_parser.mly +++ b/compiler/lib/js_parser.mly @@ -208,7 +208,7 @@ T_BACKQUOTE (* Rules type decl *) (*************************************************************************) -%start <[ `Annot of Js_token.Annot.t * Parse_info.t | `Item of Javascript.statement * Javascript.location] list > program +%start <(Lexing.position * (Javascript.statement * Javascript.location)) list > program %start standalone_expression %% @@ -248,8 +248,7 @@ annot: | a=TAnnot { a, pi $symbolstartpos } module_item: - | item { `Item $1 } - | annot { `Annot $1 } + | item { $symbolstartpos, $1 } (*************************************************************************) (* statement *) diff --git a/compiler/lib/js_traverse.ml b/compiler/lib/js_traverse.ml index 413e2f3604..0b422d7df4 100644 --- a/compiler/lib/js_traverse.ml +++ b/compiler/lib/js_traverse.ml @@ -1419,7 +1419,7 @@ class simpl = , Some (Expression_statement (EBin (Eq, v2, e2)), _) ) when Poly.(v1 = v2) -> (Expression_statement (EBin (Eq, v1, ECond (cond, e1, e2))), loc) :: rem - | Variable_statement ((Var as k), l1) -> + | Variable_statement (((Var | Let | Const) as k), l1) -> let x = List.map l1 ~f:(function | DeclPattern _ as d -> Variable_statement (k, [ d ]), loc diff --git a/compiler/lib/linker.ml b/compiler/lib/linker.ml index b39cdea75d..a33687e5f2 100644 --- a/compiler/lib/linker.ml +++ b/compiler/lib/linker.ml @@ -68,7 +68,7 @@ module Named_value : sig end = struct class traverse_and_find_named_values all = object - inherit Js_traverse.map as self + inherit Js_traverse.iter as self method expression x = let open Javascript in @@ -83,7 +83,7 @@ end = struct let find_all code = let all = ref StringSet.empty in let p = new traverse_and_find_named_values all in - ignore (p#program code); + p#program code; !all end diff --git a/compiler/lib/parse_js.ml b/compiler/lib/parse_js.ml index c327317cf0..3e34a084e5 100644 --- a/compiler/lib/parse_js.ml +++ b/compiler/lib/parse_js.ml @@ -29,9 +29,11 @@ module Lexer : sig val curr_pos : t -> Lexing.position - val token : t -> Js_token.t * (Lexing.position * Lexing.position) + val token : t -> Js_token.t * Lexing.position * Lexing.position - val lex_as_regexp : t -> Js_token.t * (Lexing.position * Lexing.position) + val lex_as_regexp : t -> Js_token.t * Lexing.position * Lexing.position + + val rollback : t -> unit val dummy_pos : Lexing.position end = struct @@ -96,333 +98,408 @@ end = struct let env, res = Flow_lexer.lex t.env in t.env <- env; let tok = Flow_lexer.Lex_result.token res in - let pos = Flow_lexer.Lex_result.loc res in + let p1, p2 = Flow_lexer.Lex_result.loc res in report_errors res; - tok, pos + tok, p1, p2 + + let rollback t = Sedlexing.rollback t.l let lex_as_regexp (t : t) = Sedlexing.rollback t.l; let env, res = Flow_lexer.regexp t.env in t.env <- env; let tok = Flow_lexer.Lex_result.token res in - let pos = Flow_lexer.Lex_result.loc res in + let p1, p2 = Flow_lexer.Lex_result.loc res in report_errors res; - tok, pos + tok, p1, p2 end exception Parsing_error of Parse_info.t -let matching_token (o : Js_token.t) (c : Js_token.t) = - match o, c with - | T_LPAREN, T_RPAREN | T_LBRACKET, T_RBRACKET | T_LCURLY, T_RCURLY -> true +let is_comment = function + | (Js_token.TComment _ | TAnnot _ | TCommentLineDirective _), _, _ -> true | _ -> false -module Tokens : sig - type elt = Js_token.t * (Lexing.position * Lexing.position) +module State : sig + type token = Js_token.t * Lexing.position * Lexing.position + + module Cursor : sig + type 'a t + + val insert_token : 'a t -> token -> 'a t + + val replace_token : 'a t -> token -> 'a t + + val last_token : 'a t -> (token * 'a t) option + + val rewind_block : 'a t -> (token * 'a t) option + end + + type 'a t + + val save_checkpoint : 'a t -> 'a t - type +'a t + val cursor : 'a t -> 'a Cursor.t - val add : elt -> 'a -> 'a t -> 'a t + val checkpoint : 'a t -> 'a Js_parser.MenhirInterpreter.checkpoint - val last : 'a t -> elt option + val offer : 'a t -> token -> 'a t - val last' : 'a t -> (elt * 'a t * 'a) option + val finalize_error : 'a t -> 'a t - val empty : 'a t + val try_recover : 'a Cursor.t -> 'a t - val all : 'a t -> (Js_token.t * Parse_info.t) list + val create : 'a Js_parser.MenhirInterpreter.checkpoint -> 'a t + + val all_tokens : 'a t -> token list end = struct - type elt = Js_token.t * (Lexing.position * Lexing.position) + type token = Js_token.t * Lexing.position * Lexing.position - type 'a t = (elt * 'a) list + type 'a checkpoint = 'a Js_parser.MenhirInterpreter.checkpoint - let empty = [] + type 'a w = + | Start of 'a checkpoint + | Checkpoint of 'a checkpoint * 'a w + | Token of token * 'a w - let add elt data t = (elt, data) :: t + module Cursor = struct + type 'a t = 'a w * token list - let rec last = function - | [] -> None - | (((Js_token.TComment _ | TCommentLineDirective _), _), _) :: l -> last l - | (x, _) :: _ -> Some x + let last_token ((h, next) : _ t) : (_ * _ t) option = + let rec find next = function + | Start _ -> None + | Checkpoint (_, t) -> find next t + | Token (tok, t) -> + if is_comment tok then find (tok :: next) t else Some (tok, (t, tok :: next)) + in + find next h + + let replace_token ((h, next) : _ t) tok : _ t = + match next with + | [] -> assert false + | _ :: next -> h, tok :: next + + let insert_token ((h, next) : _ t) tok : _ t = h, tok :: next + + let rewind_block : 'a t -> (token * 'a t) option = + fun h -> + let rec rewind (stack : Js_token.t list) (h : _ t) = + match last_token h with + | None -> None + | Some (((tok, _, _) as tok'), h) -> ( + match tok, stack with + | (T_RPAREN | T_RCURLY | T_RBRACKET), _ -> + let stack = tok :: stack in + rewind stack h + | T_LPAREN, [ T_RPAREN ] | T_LBRACKET, [ T_RBRACKET ] | T_LCURLY, [ T_RCURLY ] + -> Some (tok', h) + | T_LPAREN, T_RPAREN :: stack + | T_LBRACKET, T_RBRACKET :: stack + | T_LCURLY, T_RCURLY :: stack -> rewind stack h + | T_LPAREN, _ | T_LBRACKET, _ | T_LCURLY, _ -> assert false + | _, [] -> None + | _, (_ :: _ as stack) -> rewind stack h) + in + rewind [] h + end - let rec last' = function - | [] -> None - | (((Js_token.TComment _ | TCommentLineDirective _), _), _) :: l -> last' l - | (x, data) :: l -> Some (x, l, data) + type 'a t = + { checkpoint : 'a checkpoint + ; history : 'a w + ; next : token list + } - let all t_rev = List.rev_map t_rev ~f:(fun ((t, (p, _)), _) -> t, Parse_info.t_of_pos p) + let save_checkpoint { checkpoint; history; next } = + { checkpoint; history = Checkpoint (checkpoint, history); next } + + let cursor { history; next; _ } = history, next + + let rec advance t = + match (t : _ Js_parser.MenhirInterpreter.checkpoint) with + | Shifting _ | AboutToReduce _ -> advance (Js_parser.MenhirInterpreter.resume t) + | InputNeeded _ | Accepted _ | HandlingError _ | Rejected -> t + + let create checkpoint = { checkpoint; history = Start checkpoint; next = [] } + + let checkpoint { checkpoint; _ } = checkpoint + + let offer { checkpoint; history; next } tok : _ t = + match (checkpoint : _ checkpoint) with + | Accepted _ -> assert false + | Rejected | HandlingError _ -> { checkpoint; history; next = tok :: next } + | Shifting _ | AboutToReduce _ -> assert false + | InputNeeded _ -> ( + if is_comment tok + then { checkpoint; history = Token (tok, history); next } + else + let new_checkpoint = + advance (Js_parser.MenhirInterpreter.offer checkpoint tok) + in + match (new_checkpoint : 'a checkpoint) with + | Shifting _ | AboutToReduce _ -> assert false + | Rejected | Accepted _ | InputNeeded _ -> + let history = + match tok with + | T_VIRTUAL_SEMICOLON, _, _ -> + let rec insert = function + | Start _ as start -> Token (tok, start) + | Checkpoint (_, x) -> insert x + | Token (inner_tok, tail) as x -> + if is_comment inner_tok + then Token (inner_tok, insert tail) + else Token (tok, x) + in + insert history + | _ -> Token (tok, history) + in + { checkpoint = new_checkpoint; history; next } + | HandlingError _ -> + { checkpoint = new_checkpoint + ; history = Token (tok, Checkpoint (checkpoint, history)) + ; next + }) + + let try_recover ((h, next) : _ Cursor.t) = + let rec compute (t : _ w) = + match t with + | Start env -> advance env + | Checkpoint (env, _) -> advance env + | Token (tok, t) -> ( + if is_comment tok + then compute t + else + match compute t with + | InputNeeded _ as checkpoint -> + advance (Js_parser.MenhirInterpreter.offer checkpoint tok) + | Shifting _ | AboutToReduce _ -> assert false + | Accepted _ | Rejected | HandlingError _ -> assert false) + in + let checkpoint = compute h in + List.fold_left next ~init:{ checkpoint; history = h; next = [] } ~f:(fun t tok -> + offer t tok) + + let finalize_error { checkpoint; history; next } = + let rec loop (t : _ Js_parser.MenhirInterpreter.checkpoint) = + match t with + | HandlingError _ | Shifting _ | AboutToReduce _ -> + loop (Js_parser.MenhirInterpreter.resume t) + | Accepted _ | InputNeeded _ -> assert false + | Rejected -> t + in + { checkpoint = loop checkpoint; history; next } + + let all_tokens { history; _ } = + let rec collect acc t = + match t with + | Start _ -> acc + | Checkpoint (_, tail) -> collect acc tail + | Token (tok, tail) -> collect (tok :: acc) tail + in + collect [] history end -let parse_aux the_parser (lexbuf : Lexer.t) = - let init = the_parser (Lexer.curr_pos lexbuf) in - let fol prev (_, (c, _)) = - match Tokens.last prev with - | None -> true - | Some (_, (_, p)) -> c.Lexing.pos_lnum <> p.Lexing.pos_lnum - in - let rec loop_error prev checkpoint = - let module I = Js_parser.MenhirInterpreter in - match checkpoint with - | I.InputNeeded _env -> - let checkpoint = - I.offer checkpoint (Js_token.T_EOF, Lexer.curr_pos lexbuf, Lexer.curr_pos lexbuf) - in - loop_error prev checkpoint - | I.Shifting _ | I.AboutToReduce _ -> loop_error prev (I.resume checkpoint) - | I.Accepted _ -> assert false - | I.Rejected -> `Error prev - | I.HandlingError _ -> loop_error prev (I.resume checkpoint) - in - let parse_annot s = - match String.drop_prefix ~prefix:"//" s with - | None -> None - | Some s -> ( - let buf = Lexing.from_string s in - try - match Annot_parser.annot Annot_lexer.main buf with - | `Requires l -> Some (`Requires l) - | `Provides (n, k, ka) -> Some (`Provides (n, k, ka)) - | `Version l -> Some (`Version l) - | `Weakdef -> Some `Weakdef - | `Always -> Some `Always - | `If name -> Some (`If name) - | `Ifnot name -> Some (`Ifnot name) - | `Alias name -> Some (`Alias name) - with - | Not_found -> None - | _ -> None) - in - let rec loop prev buffer checkpoint = - let module I = Js_parser.MenhirInterpreter in - match checkpoint with - | I.InputNeeded _env -> - let token, buffer, prev = - match Tokens.last prev with - | Some ((Js_token.T_EOF, _) as last) -> last, buffer, prev - | _ -> - let read_tok buffer lexbuf = - match buffer with - | [] -> buffer, Lexer.token lexbuf - | x :: xs -> xs, x - in - let rec read_one prev buffer (lexbuf : Lexer.t) = - let buffer, t = read_tok buffer lexbuf in - match t with - | (TCommentLineDirective _, _) as t -> - let prev = Tokens.add t checkpoint prev in - read_one prev buffer lexbuf - | (TComment s, loc) as t -> - if fol prev t - then - match parse_annot s with - | None -> - let prev = Tokens.add t checkpoint prev in - read_one prev buffer lexbuf - | Some annot -> - let t = Js_token.TAnnot (s, annot), loc in - t, buffer, prev - else - let prev = Tokens.add t checkpoint prev in - read_one prev buffer lexbuf - | t -> t, buffer, prev - in - let t, buffer, prev = read_one prev buffer lexbuf in - let (t, pos), buffer = - match Tokens.last prev, t with - (* restricted productions - * 7.9.1 - 3 - * When, as the program is parsed from left to right, a token is encountered - * that is allowed by some production of the grammar, but the production - * is a restricted production and the token would be the first token for a - * terminal or nonterminal immediately following the annotation [no LineTerminator here] - * within the restricted production (and therefore such a token is called a restricted token), - * and the restricted token is separated from the previous token by at least - * one LineTerminator, then a semicolon is automatically inserted before the - * restricted token. *) - | ( Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _) - , (((T_SEMICOLON | T_VIRTUAL_SEMICOLON), _) as t) ) -> t, buffer - | Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _), t - when fol prev t -> - let buffer = t :: buffer in - (T_VIRTUAL_SEMICOLON, (Lexer.dummy_pos, Lexer.dummy_pos)), buffer - (* The practical effect of these restricted productions is as follows: - * When a ++ or -- token is encountered where the parser would treat it - * as a postfix operator, and at least one LineTerminator occurred between - * the preceding token and the ++ or -- token, then a semicolon is automatically - * inserted before the ++ or -- token. *) - | _, ((T_DECR, pos) as tok) when not (fol prev tok) -> - (Js_token.T_DECR_NB, pos), buffer - | _, ((T_INCR, pos) as tok) when not (fol prev tok) -> - (Js_token.T_INCR_NB, pos), buffer - | _, ((((T_DIV | T_DIV_ASSIGN) as t), ((start_pos, _) as _pos)) as tok) - -> ( - if I.acceptable checkpoint t start_pos - then tok, buffer - else - match buffer with - | [] -> Lexer.lex_as_regexp lexbuf, buffer - | _ -> - (* Trying to lex token differently, not allowed *) tok, buffer) - | _, t -> t, buffer - in - (t, pos), buffer, prev - in - let t, (pos_start, pos_stop) = token in - let prev = Tokens.add token checkpoint prev in - let checkpoint = I.offer checkpoint (t, pos_start, pos_stop) in - loop prev buffer checkpoint - | I.Shifting _ | I.AboutToReduce _ -> loop prev buffer (I.resume checkpoint) - | I.Accepted v -> `Ok (v, prev) - | I.Rejected -> `Error prev - | I.HandlingError _env -> ( - (* 7.9.1 - 1 *) - (* When, as the program is parsed from left to right, a token (called the offending token) - is encountered that is not allowed by any production of the grammar, then a semicolon - is automatically inserted before the offending token if one or more of the following - conditions is true: - - The offending token is }. - - The offending token is separated from the previous - token by at least one LineTerminator. *) - - (* 7.9.1 - 2 *) - (* When, as the program is parsed from left to right, the end of the input stream of tokens *) - (* is encountered and the parser is unable to parse the input token stream as a single *) - (* complete ECMAScript Program, then a semicolon is automatically inserted at the end *) - let to_ident (t, loc) = - let name = Js_token.to_string t in - Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn name, name), loc - in - let rec rewind stack buffer prev = - match Tokens.last' prev with - | None -> None - | Some (((tok, loc) as tok'), prev, checkpoint) -> ( - match tok, stack with - | (T_RPAREN | T_RCURLY | T_RBRACKET), _ -> - let buffer = tok' :: buffer in - let stack = tok :: stack in - rewind stack buffer prev - | ((T_LPAREN | T_LCURLY | T_LBRACKET) as o), c :: stack -> ( - if not (matching_token o c) - then None - else - match stack with - | [] -> Some (loc, prev, buffer, checkpoint) - | _ -> - let buffer = tok' :: buffer in - rewind stack buffer prev) - | _, stack -> - let buffer = tok' :: buffer in - rewind stack buffer prev) - in - let end_of_do_whle prev = - match rewind [ T_RPAREN ] [] prev with +let parse_annot s = + match String.drop_prefix ~prefix:"//" s with + | None -> None + | Some s -> ( + let buf = Lexing.from_string s in + try Some (Annot_parser.annot Annot_lexer.main buf) with + | Not_found -> None + | _ -> None) + +let rec nl_separated prev ((_, c, _) as ctok) = + match State.Cursor.last_token prev with + | None -> true + | Some ((T_VIRTUAL_SEMICOLON, _, _), prev) -> nl_separated prev ctok + | Some ((_, _, p2), _) -> c.Lexing.pos_lnum <> p2.Lexing.pos_lnum + +let acceptable checkpoint token = + let module I = Js_parser.MenhirInterpreter in + let checkpoint = State.checkpoint checkpoint in + I.acceptable checkpoint token Lexer.dummy_pos + +let semicolon = Js_token.T_VIRTUAL_SEMICOLON, Lexer.dummy_pos, Lexer.dummy_pos + +let rec offer_one t (lexbuf : Lexer.t) = + let tok = Lexer.token lexbuf in + match tok with + | TCommentLineDirective _, _, _ -> + let t = State.offer t tok in + offer_one t lexbuf + | (TComment s, p1, p2) as tok -> + let tok = + match parse_annot s with + | None -> tok + | Some a -> TAnnot (s, a), p1, p2 + in + let t = State.offer t tok in + offer_one t lexbuf + | _ -> + let t = + match tok with + | T_LPAREN, _, _ when acceptable t T_LPAREN_ARROW -> State.save_checkpoint t + | _ -> t + in + let h = State.cursor t in + let tok = + (* restricted productions + * 7.9.1 - 3 + * When, as the program is parsed from left to right, a token is encountered + * that is allowed by some production of the grammar, but the production + * is a restricted production and the token would be the first token for a + * terminal or nonterminal immediately following the annotation [no LineTerminator here] + * within the restricted production (and therefore such a token is called a restricted token), + * and the restricted token is separated from the previous token by at least + * one LineTerminator, then a semicolon is automatically inserted before the + * restricted token. *) + match State.Cursor.last_token h, tok with + | ( Some (((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _, _), _) + , (((T_SEMICOLON | T_VIRTUAL_SEMICOLON), _, _) as tok) ) -> tok + | Some (((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD), _, _), _), _ + when nl_separated h tok -> + Lexer.rollback lexbuf; + semicolon + (* The practical effect of these restricted productions is as follows: + * When a ++ or -- token is encountered where the parser would treat it + * as a postfix operator, and at least one LineTerminator occurred between + * the preceding token and the ++ or -- token, then a semicolon is automatically + * inserted before the ++ or -- token. *) + | _, ((T_DECR, p1, p2) as tok) when not (nl_separated h tok) -> + Js_token.T_DECR_NB, p1, p2 + | _, ((T_INCR, p1, p2) as tok) when not (nl_separated h tok) -> + Js_token.T_INCR_NB, p1, p2 + | _, ((((T_DIV | T_DIV_ASSIGN) as tok), _, _) as tok_and_pos) -> + if acceptable t tok then tok_and_pos else Lexer.lex_as_regexp lexbuf + | _ -> tok + in + State.offer t tok + +let dummy_ident = + let dummy = "" in + Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn dummy, dummy) + +let token_to_ident (t, p1, p2) = + let name = Js_token.to_string t in + Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn name, name), p1, p2 + +let end_of_do_whle prev = + match State.Cursor.rewind_block prev with + | None -> false + | Some ((T_LPAREN, _, _), prev) -> ( + match State.Cursor.last_token prev with + | None -> false + | Some ((T_WHILE, _, _), prev) -> ( + match State.Cursor.last_token prev with | None -> false - | Some (_, prev, _, _) -> ( - match Tokens.last' prev with + | Some ((T_SEMICOLON, _, _), prev) -> ( + match State.Cursor.last_token prev with | None -> false - | Some ((T_WHILE, _), prev, _checkpoint) -> ( - match Tokens.last' prev with + | Some ((T_DO, _, _), _) -> true + | Some (_, _) -> false) + | Some ((T_RCURLY, _, _), _) -> ( + match State.Cursor.rewind_block prev with + | None -> false + | Some ((T_LCURLY, _, _), prev) -> ( + match State.Cursor.last_token prev with | None -> false - | Some ((T_SEMICOLON, _), prev, _checkpoint) -> ( - match Tokens.last' prev with - | None -> false - | Some ((T_DO, _), _, _) -> true - | Some (_, _, _) -> false) - | Some ((T_RCURLY, _), prev, _checkpoint) -> ( - match rewind [ T_RCURLY ] [] prev with - | None -> false - | Some (_, prev, _, _) -> ( - match Tokens.last' prev with - | None -> false - | Some ((T_DO, _), _, _) -> true - | Some (_, _, _) -> false)) - | Some (_, _, _) -> false) - | Some (_, _, _) -> false) - in - let kind = - match Tokens.last' prev with - | None | Some ((T_VIRTUAL_SEMICOLON, _), _, _) -> `None + | Some ((T_DO, _, _), _) -> true + | Some (_, _) -> false) + | Some _ -> assert false) + | Some (_, _) -> false) + | Some (_, _) -> false) + | Some _ -> assert false + +let recover error_checkpoint previous_checkpoint = + (* 7.9.1 - 1 *) + (* When, as the program is parsed from left to right, a token (called the offending token) + is encountered that is not allowed by any production of the grammar, then a semicolon + is automatically inserted before the offending token if one or more of the following + conditions is true: + - The offending token is }. + - The offending token is separated from the previous + token by at least one LineTerminator. *) + + (* 7.9.1 - 2 *) + (* When, as the program is parsed from left to right, the end of the input stream of tokens *) + (* is encountered and the parser is unable to parse the input token stream as a single *) + (* complete ECMAScript Program, then a semicolon is automatically inserted at the end *) + match State.Cursor.last_token (State.cursor error_checkpoint) with + | None -> error_checkpoint + | Some (offending_token, rest) -> ( + match State.Cursor.last_token rest with + | None -> error_checkpoint + | Some ((last_token, _, _), _) -> ( + match offending_token with + | T_VIRTUAL_SEMICOLON, _, _ -> error_checkpoint (* contextually allowed as identifiers, namely await and yield; *) - | Some ((((T_YIELD | T_AWAIT), _) as tok), rest, checkpoint) - when I.acceptable checkpoint (fst (to_ident tok)) Lexer.dummy_pos -> - `Replace (to_ident tok, rest, checkpoint) - | Some (((T_RCURLY, _) as tok), rest, checkpoint) - when I.acceptable checkpoint Js_token.T_VIRTUAL_SEMICOLON Lexer.dummy_pos -> - `Semi_colon (tok, rest, checkpoint) - | Some (((T_EOF, _) as tok), rest, checkpoint) - when I.acceptable checkpoint Js_token.T_VIRTUAL_SEMICOLON Lexer.dummy_pos -> - `Semi_colon (tok, rest, checkpoint) - | Some (((T_ARROW, _) as tok), prev, checkpoint) when not (fol prev tok) -> - `Arrow (tok, prev, checkpoint) - | Some (last, rest, checkpoint) -> ( - match Tokens.last' rest with - | Some ((T_VIRTUAL_SEMICOLON, _), _, _) -> `None - | (Some _ | None) - when fol rest last - && I.acceptable - checkpoint - Js_token.T_VIRTUAL_SEMICOLON - Lexer.dummy_pos -> `Semi_colon (last, rest, checkpoint) - | Some ((T_RPAREN, _), rest, _) + | (T_YIELD | T_AWAIT), _, _ when acceptable previous_checkpoint dummy_ident -> + State.Cursor.replace_token rest (token_to_ident offending_token) + |> State.try_recover + | T_RCURLY, _, _ + when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> + State.Cursor.insert_token rest semicolon |> State.try_recover + | T_EOF, _, _ when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON + -> State.Cursor.insert_token rest semicolon |> State.try_recover + | (T_ARROW, _, _) as tok when not (nl_separated rest tok) -> ( + (* Restart parsing from the openning parens, patching the + token to be T_LPAREN_ARROW to help the parser *) + match last_token with + | T_RPAREN -> ( + match State.Cursor.rewind_block rest with + | Some ((T_LPAREN, p1, p2), prev) -> + State.Cursor.replace_token prev (T_LPAREN_ARROW, p1, p2) + |> State.try_recover + | Some _ -> assert false + | None -> error_checkpoint) + | _ -> error_checkpoint) + | last -> ( + match last_token with + | T_VIRTUAL_SEMICOLON -> error_checkpoint + | _ + when nl_separated rest last + && acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> + State.Cursor.insert_token rest semicolon |> State.try_recover + | T_RPAREN when end_of_do_whle rest - && I.acceptable - checkpoint - Js_token.T_VIRTUAL_SEMICOLON - Lexer.dummy_pos -> `Semi_colon (last, rest, checkpoint) - | _ -> `None) - in - - let drop_annot_or_error () = - match Tokens.last' prev with - | Some ((TAnnot (s, _), pos), prev, checkpoint) -> - let t = Js_token.TComment s, pos in - let prev = Tokens.add t checkpoint prev in - loop prev buffer checkpoint - | _ -> loop_error prev (I.resume checkpoint) - in - match kind with - | `None -> drop_annot_or_error () - | `Arrow (tok, prev, _checkpoint) -> ( - (* Restart parsing from the openning parens, patching the - token to be T_LPAREN_ARROW to help the parser *) - let buffer = tok :: buffer in - let err () = loop_error prev (I.resume checkpoint) in - match Tokens.last' prev with - | Some (((T_RPAREN, _) as tok), prev, _) -> ( - let buffer = tok :: buffer in - match rewind [ T_RPAREN ] buffer prev with - | None -> err () - | Some (loc, prev, buffer, checkpoint) -> - let buffer = (Js_token.T_LPAREN_ARROW, loc) :: buffer in - loop prev buffer checkpoint) - | Some _ | None -> err ()) - | `Replace (t, prev, checkpoint) -> - let checkpoint = - let t, pos = t in - I.offer checkpoint (t, fst pos, snd pos) - in - let prev = Tokens.add t checkpoint prev in - loop prev buffer checkpoint - | `Semi_colon (tok, prev, checkpoint) -> - let buffer = tok :: buffer in - let t = Js_token.T_VIRTUAL_SEMICOLON, (Lexer.dummy_pos, Lexer.dummy_pos) in - let checkpoint = - let t, pos = t in - I.offer checkpoint (t, fst pos, snd pos) - in - let prev = Tokens.add t checkpoint prev in - loop prev buffer checkpoint) + && acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> + State.Cursor.insert_token rest semicolon |> State.try_recover + | _ -> error_checkpoint))) + +let parse_aux the_parser (lexbuf : Lexer.t) = + let init = the_parser (Lexer.curr_pos lexbuf) in + let rec loop_error checkpoint = + match (State.checkpoint checkpoint : _ Js_parser.MenhirInterpreter.checkpoint) with + | InputNeeded _ | Shifting _ | AboutToReduce _ | Accepted _ -> assert false + | Rejected -> `Error checkpoint + | HandlingError _ -> loop_error checkpoint + in + let rec loop checkpoint previous_checkpoint = + match (State.checkpoint checkpoint : _ Js_parser.MenhirInterpreter.checkpoint) with + | Shifting _ | AboutToReduce _ -> assert false + | Accepted v -> `Ok (v, checkpoint) + | Rejected -> loop_error checkpoint + | InputNeeded _env -> + let previous_checkpoint = checkpoint in + let new_checkpoint = offer_one checkpoint lexbuf in + loop new_checkpoint previous_checkpoint + | HandlingError _env -> ( + let error_checkpoint = checkpoint in + let new_checkpoint = recover error_checkpoint previous_checkpoint in + match State.checkpoint new_checkpoint with + | HandlingError _ -> ( + let checkpoint = State.finalize_error new_checkpoint in + match State.checkpoint checkpoint with + | Rejected -> `Error checkpoint + | _ -> assert false) + | _ -> loop new_checkpoint new_checkpoint) in - match loop Tokens.empty [] init with + let checkpoint = State.create init in + match loop checkpoint checkpoint with | `Ok x -> x - | `Error toks -> - let rec pi last = - match Tokens.last' last with - | None -> Parse_info.zero - | Some ((_, (p, _)), rest, _) -> - if Poly.(p = Lexer.dummy_pos) then pi rest else Parse_info.t_of_pos p - in - raise (Parsing_error (pi toks)) + | `Error t -> ( + match State.Cursor.last_token (State.cursor t) with + | None -> assert false + | Some ((_, p, _), _) -> raise (Parsing_error (Parse_info.t_of_pos p))) let fail_early = object @@ -431,36 +508,46 @@ let fail_early = method early_error p = raise (Parsing_error p.loc) end -let check_program p = - List.iter p ~f:(function - | `Annot _ -> () - | `Item p -> fail_early#program [ p ]) +let check_program p = List.iter p ~f:(function _, p -> fail_early#program [ p ]) let parse' lex = let p, toks = parse_aux Js_parser.Incremental.program lex in check_program p; + let toks = State.all_tokens toks in + let take_annot_before = + let toks_r = ref toks in + let rec loop start_pos acc (toks : (Js_token.t * _ * _) list) = + match toks with + | [] -> assert false + | (TAnnot a, p1, _) :: xs -> loop start_pos ((a, Parse_info.t_of_pos p1) :: acc) xs + | ((TComment _ | TCommentLineDirective _), _, _) :: xs -> loop start_pos acc xs + | (_, p1, _p2) :: xs -> + if p1.Lexing.pos_cnum = start_pos.Lexing.pos_cnum + then ( + toks_r := toks; + List.rev acc) + else loop start_pos [] xs + in + fun start_pos -> loop start_pos [] !toks_r + in + let p = List.map p ~f:(fun (start_pos, s) -> take_annot_before start_pos, s) in let groups = - List.group p ~f:(fun a pred -> - match pred, a with - | `Item _, `Annot _ -> false - | `Annot _, `Annot _ -> true - | `Item _, `Item _ -> true - | `Annot _, `Item _ -> true) + List.group p ~f:(fun a _pred -> + match a with + | [], _ -> true + | _ :: _, _ -> false) in let p = - List.map groups ~f:(fun g -> - List.partition_map g ~f:(function - | `Annot a -> `Fst a - | `Item i -> `Snd i)) + List.map groups ~f:(function + | [] -> assert false + | (annot, _) :: _ as l -> annot, List.map l ~f:snd) in - p, Tokens.all toks + p, toks let parse lex = let p, _ = parse_aux Js_parser.Incremental.program lex in check_program p; - List.filter_map p ~f:(function - | `Item i -> Some i - | `Annot _ -> None) + List.map p ~f:(fun (_, x) -> x) let parse_expr lex = let expr, _ = parse_aux Js_parser.Incremental.standalone_expression lex in diff --git a/compiler/lib/parse_js.mli b/compiler/lib/parse_js.mli index a6b380725f..ff47ffb967 100644 --- a/compiler/lib/parse_js.mli +++ b/compiler/lib/parse_js.mli @@ -34,6 +34,6 @@ val parse : Lexer.t -> Javascript.program val parse' : Lexer.t -> ((Js_token.Annot.t * Parse_info.t) list * Javascript.program) list - * (Js_token.t * Parse_info.t) list + * (Js_token.t * Lexing.position * Lexing.position) list val parse_expr : Lexer.t -> Javascript.expression diff --git a/compiler/lib/source_map.ml b/compiler/lib/source_map.ml index 0d8c8a2566..1e0ce6429e 100644 --- a/compiler/lib/source_map.ml +++ b/compiler/lib/source_map.ml @@ -104,7 +104,7 @@ let string_of_mapping mapping = let l = (c.gen_col - !gen_col) :: - (if c.ori_source = -1 + (if c.ori_source < 0 then [] else (c.ori_source - !ori_source) @@ -119,7 +119,7 @@ let string_of_mapping mapping = [ n - n' ])) in gen_col := c.gen_col; - if c.ori_source <> -1 + if c.ori_source >= 0 then ( ori_source := c.ori_source; ori_line := c.ori_line; @@ -199,7 +199,9 @@ let mapping_of_string str = let maps ~sources_offset ~names_offset x = let gen_line = x.gen_line in - let ori_source = x.ori_source + sources_offset in + let ori_source = + if x.ori_source < 0 then x.ori_source else x.ori_source + sources_offset + in let ori_name = match x.ori_name with | None -> None diff --git a/compiler/tests-compiler/js_parser_printer.ml b/compiler/tests-compiler/js_parser_printer.ml index 053d7c110f..70ace1cf9f 100644 --- a/compiler/tests-compiler/js_parser_printer.ml +++ b/compiler/tests-compiler/js_parser_printer.ml @@ -652,9 +652,9 @@ let check_vs_string s toks = in let rec loop offset pos = function | [] -> space pos (String.length s) - | (Js_token.T_VIRTUAL_SEMICOLON, _) :: rest -> loop offset pos rest - | ((Js_token.T_STRING (_, codepoint_len) as x), pi) :: rest -> - let { Parse_info.idx = codepoint_idx; _ } = pi in + | (Js_token.T_VIRTUAL_SEMICOLON, _, _) :: rest -> loop offset pos rest + | ((Js_token.T_STRING (_, codepoint_len) as x), p1, _p2) :: rest -> + let { Parse_info.idx = codepoint_idx; _ } = Parse_info.t_of_pos p1 in let bytes_idx = codepoint_idx - offset in let bytes_len = let bytes_len = ref 0 in @@ -679,8 +679,8 @@ let check_vs_string s toks = a b); loop offset (bytes_idx + bytes_len + 1) rest - | (x, pi) :: rest -> - let { Parse_info.idx; _ } = pi in + | (x, p1, _p2) :: rest -> + let { Parse_info.idx; _ } = Parse_info.t_of_pos p1 in let idx = idx - offset in let str = Js_token.to_string x in space pos idx; @@ -706,8 +706,9 @@ let parse_print_token ?(invalid = false) ?(extra = false) s = let prev = ref 0 in let rec loop tokens = match tokens with - | [ (Js_token.T_EOF, _) ] | [] -> Printf.printf "\n" - | (tok, pos) :: xs -> + | [ (Js_token.T_EOF, _, _) ] | [] -> Printf.printf "\n" + | (tok, p1, _p2) :: xs -> + let pos = Parse_info.t_of_pos p1 in let s = if extra then Js_token.to_string_extra tok else Js_token.to_string tok in (match !prev <> pos.Parse_info.line && pos.Parse_info.line <> 0 with | true -> Printf.printf "\n%2d: " pos.Parse_info.line @@ -828,7 +829,7 @@ let%expect_test "multiline comments" = /* test */ 42 /* test */ |}; [%expect {| - 2: 0:/* test */, 11:42, 14:/* test */, 0:;, |}]; + 2: 0:/* test */, 11:42, 0:;, 14:/* test */, |}]; parse_print_token {| 42 /* @@ -838,11 +839,11 @@ let%expect_test "multiline comments" = 42 |}; [%expect {| - 2: 4:42, + 2: 4:42, 0:;, 3: 4:/* " - */, 0:;, + */, 7: 4:42, 0:;, |}] let%expect_test "++--" = @@ -911,6 +912,9 @@ a:while(true){ a = b + c (d + e).print() + do { x } while (true) y + do ; while (true) y + |}; [%expect {| @@ -935,7 +939,9 @@ a:while(true){ 23: 4:a (identifier), 6:=, 8:b (identifier), 0:; (virtual), 24: 4:++ (INCR), 6:c (identifier), 0:; (virtual), 26: 4:a (identifier), 6:=, 8:b (identifier), 10:+, 12:c (identifier), - 27: 4:(, 5:d (identifier), 7:+, 9:e (identifier), 10:), 11:., 12:print (identifier), 17:(, 18:), 0:; (virtual), |}] + 27: 4:(, 5:d (identifier), 7:+, 9:e (identifier), 10:), 11:., 12:print (identifier), 17:(, 18:), 0:; (virtual), + 29: 4:do, 7:{, 9:x (identifier), 0:; (virtual), 11:}, 13:while, 19:(, 20:true, 24:), 0:; (virtual), 26:y (identifier), 0:; (virtual), + 30: 4:do, 7:;, 9:while, 15:(, 16:true, 20:), 0:; (virtual), 22:y (identifier), 0:; (virtual), |}] let%expect_test _ = parse_print_token @@ -997,7 +1003,7 @@ c 6: 0://Another comment, 7: 0:a (identifier), 0:; (virtual), 8: 0:if, 3:(, 4:a (identifier), 5:), 7:{, - 9: 0://Provides: test, + 9: 0://Provides: test(annot), 10: 0:b (identifier), 0:; (virtual), 11: 0:}, 12: 0://Provides: test(annot), diff --git a/compiler/tests-sourcemap/dump.reference b/compiler/tests-sourcemap/dump.reference index d241c27442..d9a8cf150c 100644 --- a/compiler/tests-sourcemap/dump.reference +++ b/compiler/tests-sourcemap/dump.reference @@ -5,5 +5,3 @@ b.ml:1:10 -> 17: function f(x){<>return x - 1 | 0;} b.ml:1:6 -> 24: function f(x){return <>x - 1 | 0;} b.ml:1:15 -> 34: function f(x){return x - 1 | 0;<>} b.ml:1:4 -> 23: var Testlib_B = [0, <>f]; -a.ml:-1:-1 -> 3: <>function caml_call1(f, a0){ -a.ml:-1:-1 -> 2: <>}