diff --git a/CHANGES.md b/CHANGES.md index d83ce384f6..cb4f762d79 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## Features/Changes * Compiler: change control-flow compilation strategy (#1496) * Compiler: Dead code elimination of unused references (#2076) +* Compiler: reduce memory consumption (#1516) * Lib: add download attribute to anchor element * Misc: switch CI to OCaml 5.1 * Misc: preliminary support for OCaml 5.2 diff --git a/compiler/bin-js_of_ocaml/cmd_arg.ml b/compiler/bin-js_of_ocaml/cmd_arg.ml index fd9657b54a..2077da2674 100644 --- a/compiler/bin-js_of_ocaml/cmd_arg.ml +++ b/compiler/bin-js_of_ocaml/cmd_arg.ml @@ -365,7 +365,7 @@ let options = let t = Term.( const build_t - $ Jsoo_cmdline.Arg.t + $ Lazy.force Jsoo_cmdline.Arg.t $ set_param $ set_env $ dynlink @@ -604,7 +604,7 @@ let options_runtime_only = let t = Term.( const build_t - $ Jsoo_cmdline.Arg.t + $ Lazy.force Jsoo_cmdline.Arg.t $ toplevel $ no_cmis $ set_param diff --git a/compiler/bin-js_of_ocaml/link.ml b/compiler/bin-js_of_ocaml/link.ml index 4025734010..090913d20b 100644 --- a/compiler/bin-js_of_ocaml/link.ml +++ b/compiler/bin-js_of_ocaml/link.ml @@ -126,7 +126,7 @@ let options = let t = Term.( const build_t - $ Jsoo_cmdline.Arg.t + $ Lazy.force Jsoo_cmdline.Arg.t $ no_sourcemap $ sourcemap $ sourcemap_inline_in_js diff --git a/compiler/bin-jsoo_minify/cmd_arg.ml b/compiler/bin-jsoo_minify/cmd_arg.ml index 0260359a82..743cbe0e21 100644 --- a/compiler/bin-jsoo_minify/cmd_arg.ml +++ b/compiler/bin-jsoo_minify/cmd_arg.ml @@ -42,7 +42,9 @@ let options = let build_t common files output_file use_stdin = `Ok { common; use_stdin; output_file; files } in - let t = Term.(const build_t $ Jsoo_cmdline.Arg.t $ files $ output_file $ use_stdin) in + let t = + Term.(const build_t $ Lazy.force Jsoo_cmdline.Arg.t $ files $ output_file $ use_stdin) + in Term.ret t let info = diff --git a/compiler/lib-cmdline/arg.ml b/compiler/lib-cmdline/arg.ml index aa793f1d4a..925dc0f8aa 100644 --- a/compiler/lib-cmdline/arg.ml +++ b/compiler/lib-cmdline/arg.ml @@ -35,28 +35,31 @@ type t = } let debug = - let doc = "enable debug [$(docv)]." in - let all = List.map (Debug.available ()) ~f:(fun s -> s, s) in - let arg = - Arg.(value & opt_all (list (enum all)) [] & info [ "debug" ] ~docv:"SECTION" ~doc) - in - Term.(const List.flatten $ arg) + lazy + (let doc = "enable debug [$(docv)]." in + let all = List.map (Debug.available ()) ~f:(fun s -> s, s) in + let arg = + Arg.(value & opt_all (list (enum all)) [] & info [ "debug" ] ~docv:"SECTION" ~doc) + in + Term.(const List.flatten $ arg)) let enable = - let doc = "Enable optimization [$(docv)]." in - let all = List.map (Config.Flag.available ()) ~f:(fun s -> s, s) in - let arg = - Arg.(value & opt_all (list (enum all)) [] & info [ "enable" ] ~docv:"OPT" ~doc) - in - Term.(const List.flatten $ arg) + lazy + (let doc = "Enable optimization [$(docv)]." in + let all = List.map (Config.Flag.available ()) ~f:(fun s -> s, s) in + let arg = + Arg.(value & opt_all (list (enum all)) [] & info [ "enable" ] ~docv:"OPT" ~doc) + in + Term.(const List.flatten $ arg)) let disable = - let doc = "Disable optimization [$(docv)]." in - let all = List.map (Config.Flag.available ()) ~f:(fun s -> s, s) in - let arg = - Arg.(value & opt_all (list (enum all)) [] & info [ "disable" ] ~docv:"OPT" ~doc) - in - Term.(const List.flatten $ arg) + lazy + (let doc = "Disable optimization [$(docv)]." in + let all = List.map (Config.Flag.available ()) ~f:(fun s -> s, s) in + let arg = + Arg.(value & opt_all (list (enum all)) [] & info [ "disable" ] ~docv:"OPT" ~doc) + in + Term.(const List.flatten $ arg)) let pretty = let doc = "Pretty print the output." in @@ -86,31 +89,32 @@ let custom_header = Arg.(value & opt (some string) None & info [ "custom-header" ] ~doc) let t = - Term.( - const (fun debug enable disable pretty debuginfo noinline quiet werror c_header -> - let enable = if pretty then "pretty" :: enable else enable in - let enable = if debuginfo then "debuginfo" :: enable else enable in - let disable = if noinline then "inline" :: disable else disable in - let disable_if_pretty name disable = - if pretty && not (List.mem name ~set:enable) then name :: disable else disable - in - let disable = disable_if_pretty "shortvar" disable in - let disable = disable_if_pretty "share" disable in - { debug = { enable = debug; disable = [] } - ; optim = { enable; disable } - ; quiet - ; werror - ; custom_header = c_header - }) - $ debug - $ enable - $ disable - $ pretty - $ debuginfo - $ noinline - $ is_quiet - $ is_werror - $ custom_header) + lazy + Term.( + const (fun debug enable disable pretty debuginfo noinline quiet werror c_header -> + let enable = if pretty then "pretty" :: enable else enable in + let enable = if debuginfo then "debuginfo" :: enable else enable in + let disable = if noinline then "inline" :: disable else disable in + let disable_if_pretty name disable = + if pretty && not (List.mem name ~set:enable) then name :: disable else disable + in + let disable = disable_if_pretty "shortvar" disable in + let disable = disable_if_pretty "share" disable in + { debug = { enable = debug; disable = [] } + ; optim = { enable; disable } + ; quiet + ; werror + ; custom_header = c_header + }) + $ Lazy.force debug + $ Lazy.force enable + $ Lazy.force disable + $ pretty + $ debuginfo + $ noinline + $ is_quiet + $ is_werror + $ custom_header) let on_off on off t = List.iter ~f:on t.enable; diff --git a/compiler/lib-cmdline/arg.mli b/compiler/lib-cmdline/arg.mli index dfa35da022..295f58ac72 100644 --- a/compiler/lib-cmdline/arg.mli +++ b/compiler/lib-cmdline/arg.mli @@ -30,6 +30,6 @@ type t = ; custom_header : string option } -val t : t Cmdliner.Term.t +val t : t Cmdliner.Term.t Lazy.t val eval : t -> unit diff --git a/compiler/lib/flow_lexer.ml b/compiler/lib/flow_lexer.ml index cd3257bab0..db8ca644da 100644 --- a/compiler/lib/flow_lexer.ml +++ b/compiler/lib/flow_lexer.ml @@ -14,17 +14,6 @@ module Lex_mode = struct | REGEXP end -module Loc = struct - (* line numbers are 1-indexed; column numbers are 0-indexed *) - - type t = - { source : string option - ; start : Lexing.position - ; _end : Lexing.position - } - [@@ocaml.warning "-69"] -end - module Parse_error = struct type t = | Unexpected of string @@ -45,28 +34,20 @@ module Lex_env = struct type lex_state = { lex_errors_acc : (Loc.t * Parse_error.t) list } [@@ocaml.unboxed] type t = - { lex_source : string option - ; lex_lb : Sedlexing.lexbuf + { lex_lb : Sedlexing.lexbuf ; lex_state : lex_state ; lex_mode_stack : Lex_mode.t list + ; lex_last_loc : Loc.t ref } [@@ocaml.warning "-69"] - let source env = env.lex_source - let empty_lex_state = { lex_errors_acc = [] } let create lex_lb = - let s, _ = Sedlexing.lexing_positions lex_lb in - let lex_source = - match s.pos_fname with - | "" -> None - | s -> Some s - in - { lex_source - ; lex_lb + { lex_lb ; lex_state = empty_lex_state ; lex_mode_stack = [ Lex_mode.NORMAL ] + ; lex_last_loc = ref (Loc.create Lexing.dummy_pos Lexing.dummy_pos) } end @@ -84,7 +65,7 @@ let pop_mode env = module Lex_result = struct type t = { lex_token : Js_token.t - ; lex_loc : Lexing.position * Lexing.position + ; lex_loc : Loc.t ; lex_errors : (Loc.t * Parse_error.t) list } [@@ocaml.warning "-69"] @@ -239,9 +220,9 @@ let is_valid_identifier_name s = | js_id_start, Star js_id_continue, eof -> true | _ -> false -let loc_of_lexbuf env (lexbuf : Sedlexing.lexbuf) = +let loc_of_lexbuf _env (lexbuf : Sedlexing.lexbuf) = let start_offset, stop_offset = Sedlexing.lexing_positions lexbuf in - { Loc.source = Lex_env.source env; start = start_offset; _end = stop_offset } + Loc.create start_offset stop_offset let lex_error (env : Lex_env.t) loc err : Lex_env.t = let lex_errors_acc = (loc, err) :: env.lex_state.lex_errors_acc in @@ -821,12 +802,14 @@ let wrap f = let start, _ = Sedlexing.lexing_positions env.Lex_env.lex_lb in let t = f env env.Lex_env.lex_lb in let _, stop = Sedlexing.lexing_positions env.Lex_env.lex_lb in - t, (start, stop) + t, Loc.create ~last_line:(Loc.line_end' !(env.lex_last_loc)) start stop in let rec helper comments env = Sedlexing.start env.Lex_env.lex_lb; - match f env with - | Token (env, t), lex_loc -> + let res, lex_loc = f env in + match res with + | Token (env, t) -> + env.lex_last_loc := lex_loc; let lex_token = t in let lex_errors_acc = env.lex_state.lex_errors_acc in if lex_errors_acc = [] @@ -834,14 +817,15 @@ let wrap f = else ( { env with lex_state = Lex_env.empty_lex_state } , { Lex_result.lex_token; lex_loc; lex_errors = List.rev lex_errors_acc } ) - | Comment (env, comment), lex_loc -> + | Comment (env, comment) -> + env.lex_last_loc := lex_loc; let lex_errors_acc = env.lex_state.lex_errors_acc in ( env , { Lex_result.lex_token = TComment comment ; lex_loc ; lex_errors = List.rev lex_errors_acc } ) - | Continue env, _ -> helper comments env + | Continue env -> helper comments env in fun env -> helper [] env diff --git a/compiler/lib/flow_lexer.mli b/compiler/lib/flow_lexer.mli index 0c1d278aeb..e91d29ab8e 100644 --- a/compiler/lib/flow_lexer.mli +++ b/compiler/lib/flow_lexer.mli @@ -18,14 +18,6 @@ module Parse_error : sig val to_string : t -> string end -module Loc : sig - type t = - { source : string option - ; start : Lexing.position - ; _end : Lexing.position - } -end - module Lex_env : sig type t @@ -37,7 +29,7 @@ module Lex_result : sig val token : t -> Js_token.t - val loc : t -> Lexing.position * Lexing.position + val loc : t -> Loc.t val errors : t -> (Loc.t * Parse_error.t) list end diff --git a/compiler/lib/js_parser.mly b/compiler/lib/js_parser.mly index b409f84b67..96bb64e399 100644 --- a/compiler/lib/js_parser.mly +++ b/compiler/lib/js_parser.mly @@ -217,19 +217,22 @@ T_BACKQUOTE (* Macros *) (*************************************************************************) -listc(X): +listc_rev(X): | X { [$1] } - | listc(X) "," X { $1 @ [$3] } + | listc_rev(X) "," X { $3 :: $1 } -listc_with_empty_trail(X): - | e=elision { (List.map (fun () -> None) e) } - | x=X e=elision { Some x :: (List.map (fun () -> None) e) } - | listc_with_empty_trail(X) x=X e=elision { $1 @ [Some x] @ (List.map (fun () -> None) e) } +%inline listc(X): + | listc_rev(X) { List.rev $1 } + +listc_with_empty_trail_rev(X): + | e=elision { (List.rev_map (fun () -> None) e) } + | x=X e=elision { List.rev_append (List.rev_map (fun () -> None) e) [ Some x ] } + | listc_with_empty_trail_rev(X) x=X e=elision { List.rev_append (List.rev_map (fun () -> None) e) (Some x :: $1) } listc_with_empty(X): | X { [ Some $1 ] } - | listc_with_empty_trail(X) { $1 } - | listc_with_empty_trail(X) X { $1 @ [Some $2 ] } + | listc_with_empty_trail_rev(X) { List.rev $1 } + | listc_with_empty_trail_rev(X) X { List.rev ((Some $2) :: $1) } optl(X): | (* empty *) { [] } | X { $1 } diff --git a/compiler/lib/js_traverse.ml b/compiler/lib/js_traverse.ml index 295b7e3435..7ee45bf611 100644 --- a/compiler/lib/js_traverse.ml +++ b/compiler/lib/js_traverse.ml @@ -779,18 +779,33 @@ class free = } method use_var x = - let n = try IdentMap.find x !count with Not_found -> 0 in - count := IdentMap.add x (succ n) !count; + count := + IdentMap.update + x + (function + | None -> Some 1 + | Some n -> Some (succ n)) + !count; state_ <- { state_ with use = IdentSet.add x state_.use } method def_var x = - let n = try IdentMap.find x !count with Not_found -> 0 in - count := IdentMap.add x (succ n) !count; + count := + IdentMap.update + x + (function + | None -> Some 1 + | Some n -> Some (succ n)) + !count; state_ <- { state_ with def_var = IdentSet.add x state_.def_var } method def_local x = - let n = try IdentMap.find x !count with Not_found -> 0 in - count := IdentMap.add x (succ n) !count; + count := + IdentMap.update + x + (function + | None -> Some 1 + | Some n -> Some (succ n)) + !count; state_ <- { state_ with def_local = IdentSet.add x state_.def_local } method fun_decl (k, params, body, nid) = diff --git a/compiler/lib/loc.ml b/compiler/lib/loc.ml new file mode 100644 index 0000000000..6ae745a224 --- /dev/null +++ b/compiler/lib/loc.ml @@ -0,0 +1,88 @@ +type line = + { pos_fname : string + ; pos_lnum : int + ; pos_bol : int + } + +type t = + | SameLine of + { line : line + ; cnum_start : int + ; offset : int + } + | MultiLine of + { line_start : line + ; cnum_start : int + ; line_end : line + ; offset : int + } + +let filename = function + | SameLine { line = { pos_fname; _ }; _ } -> pos_fname + | MultiLine { line_start = { pos_fname; _ }; _ } -> pos_fname + +let line' = function + | SameLine { line; _ } -> line + | MultiLine { line_start; _ } -> line_start + +let line_end' = function + | SameLine { line; _ } -> line + | MultiLine { line_end; _ } -> line_end + +let cnum = function + | SameLine { cnum_start; _ } -> cnum_start + | MultiLine { cnum_start; _ } -> cnum_start + +let line t = (line' t).pos_lnum + +let line_end t = (line_end' t).pos_lnum + +let column = function + | SameLine { line = { pos_bol; _ }; cnum_start; _ } -> cnum_start - pos_bol + | MultiLine { line_start = { pos_bol; _ }; cnum_start; _ } -> cnum_start - pos_bol + +let dummy_line = { pos_fname = ""; pos_lnum = 0; pos_bol = 0 } + +let dummy = SameLine { line = dummy_line; cnum_start = 0; offset = 0 } + +let create ?(last_line = dummy_line) (p1 : Lexing.position) (p2 : Lexing.position) : t = + if p1.pos_fname = p2.pos_fname && p1.pos_lnum = p2.pos_lnum && p1.pos_bol = p2.pos_bol + then + let line = + if last_line.pos_fname == p1.pos_fname + && last_line.pos_lnum = p1.pos_lnum + && last_line.pos_bol = p1.pos_bol + then last_line + else { pos_fname = p1.pos_fname; pos_lnum = p1.pos_lnum; pos_bol = p1.pos_bol } + in + SameLine { line; cnum_start = p1.pos_cnum; offset = p2.pos_cnum - p1.pos_cnum } + else + let line_start = + { pos_fname = p1.pos_fname; pos_lnum = p1.pos_lnum; pos_bol = p1.pos_bol } + in + let line_end = + { pos_fname = p2.pos_fname; pos_lnum = p2.pos_lnum; pos_bol = p2.pos_bol } + in + MultiLine + { line_start + ; cnum_start = p1.pos_cnum + ; line_end + ; offset = p2.pos_cnum - p1.pos_cnum + } + +let p1 : t -> Lexing.position = function + | SameLine { line = { pos_fname; pos_lnum; pos_bol }; cnum_start; offset = _ } -> + { pos_bol; pos_lnum; pos_fname; pos_cnum = cnum_start } + | MultiLine + { line_start = { pos_fname; pos_lnum; pos_bol } + ; cnum_start + ; line_end = _ + ; offset = _ + } -> { pos_bol; pos_lnum; pos_fname; pos_cnum = cnum_start } + +let p2 : t -> Lexing.position = function + | SameLine { line = { pos_fname; pos_lnum; pos_bol }; cnum_start; offset } -> + { pos_bol; pos_lnum; pos_fname; pos_cnum = cnum_start + offset } + | MultiLine + { line_end = { pos_fname; pos_lnum; pos_bol }; line_start = _; cnum_start; offset } + -> { pos_bol; pos_lnum; pos_fname; pos_cnum = cnum_start + offset } diff --git a/compiler/lib/parse_js.ml b/compiler/lib/parse_js.ml index 39af863a4c..d45d0c2372 100644 --- a/compiler/lib/parse_js.ml +++ b/compiler/lib/parse_js.ml @@ -29,9 +29,9 @@ module Lexer : sig val curr_pos : t -> Lexing.position - val token : t -> Js_token.t * Lexing.position * Lexing.position + val token : t -> Js_token.t * Loc.t - val lex_as_regexp : t -> Js_token.t * Lexing.position * Lexing.position + val lex_as_regexp : t -> Js_token.t * Loc.t val rollback : t -> unit @@ -74,20 +74,8 @@ end = struct | [] -> () | l -> List.iter l ~f:(fun (loc, e) -> - let loc = - match loc.Flow_lexer.Loc.source with - | None -> - Printf.sprintf - "%d:%d" - loc.start.pos_lnum - (loc.start.pos_cnum - loc.start.pos_bol) - | Some f -> - Printf.sprintf - "%s:%d:%d" - f - loc.start.pos_lnum - (loc.start.pos_cnum - loc.start.pos_bol) - in + let f = Loc.filename loc in + let loc = Printf.sprintf "%s:%d:%d" f (Loc.line loc) (Loc.column loc) in Printf.eprintf "Lexer error: %s: %s\n" @@ -98,9 +86,9 @@ end = struct let env, res = Flow_lexer.lex t.env in t.env <- env; let tok = Flow_lexer.Lex_result.token res in - let p1, p2 = Flow_lexer.Lex_result.loc res in + let loc = Flow_lexer.Lex_result.loc res in report_errors res; - tok, p1, p2 + tok, loc let rollback t = Sedlexing.rollback t.l @@ -109,30 +97,30 @@ end = struct let env, res = Flow_lexer.regexp t.env in t.env <- env; let tok = Flow_lexer.Lex_result.token res in - let p1, p2 = Flow_lexer.Lex_result.loc res in + let loc = Flow_lexer.Lex_result.loc res in report_errors res; - tok, p1, p2 + tok, loc end exception Parsing_error of Parse_info.t let is_comment = function - | (Js_token.TComment _ | TAnnot _ | TCommentLineDirective _), _, _ -> true + | Js_token.TComment _ | TAnnot _ | TCommentLineDirective _ -> true | _ -> false module State : sig - type token = Js_token.t * Lexing.position * Lexing.position + type token = Js_token.t * Loc.t module Cursor : sig type 'a t - val insert_token : 'a t -> token -> 'a t + val insert_token : 'a t -> Js_token.t -> Loc.t -> 'a t - val replace_token : 'a t -> token -> 'a t + val replace_token : 'a t -> Js_token.t -> Loc.t -> 'a t - val last_token : 'a t -> (token * 'a t) option + val last_token : 'a t -> (Js_token.t * Loc.t * 'a t) option - val rewind_block : 'a t -> (token * 'a t) option + val rewind_block : 'a t -> (Js_token.t * Loc.t * 'a t) option end type 'a t @@ -143,7 +131,7 @@ module State : sig val checkpoint : 'a t -> 'a Js_parser.MenhirInterpreter.checkpoint - val offer : 'a t -> token -> 'a t + val offer : 'a t -> Js_token.t -> Loc.t -> 'a t val finalize_error : 'a t -> 'a t @@ -153,46 +141,48 @@ module State : sig val all_tokens : 'a t -> token list end = struct - type token = Js_token.t * Lexing.position * Lexing.position + type token = Js_token.t * Loc.t type 'a checkpoint = 'a Js_parser.MenhirInterpreter.checkpoint type 'a w = | Start of 'a checkpoint | Checkpoint of 'a checkpoint * 'a w - | Token of token * 'a w + | Token of Js_token.t * Loc.t * 'a w module Cursor = struct type 'a t = 'a w * token list - let last_token ((h, next) : _ t) : (_ * _ t) option = + let last_token ((h, next) : _ t) : (_ * _ * _ t) option = let rec find next = function | Start _ -> None | Checkpoint (_, t) -> find next t - | Token (tok, t) -> - if is_comment tok then find (tok :: next) t else Some (tok, (t, tok :: next)) + | Token (tok, loc, t) -> + if is_comment tok + then find ((tok, loc) :: next) t + else Some (tok, loc, (t, (tok, loc) :: next)) in find next h - let replace_token ((h, next) : _ t) tok : _ t = + let replace_token ((h, next) : _ t) tok loc : _ t = match next with | [] -> assert false - | _ :: next -> h, tok :: next + | _ :: next -> h, (tok, loc) :: next - let insert_token ((h, next) : _ t) tok : _ t = h, tok :: next + let insert_token ((h, next) : _ t) tok loc : _ t = h, (tok, loc) :: next - let rewind_block : 'a t -> (token * 'a t) option = + let rewind_block : 'a t -> (Js_token.t * Loc.t * 'a t) option = fun h -> let rec rewind (stack : Js_token.t list) (h : _ t) = match last_token h with | None -> None - | Some (((tok, _, _) as tok'), h) -> ( + | Some (tok, loc, h) -> ( match tok, stack with | (T_RPAREN | T_RCURLY | T_RBRACKET), _ -> let stack = tok :: stack in rewind stack h | T_LPAREN, [ T_RPAREN ] | T_LBRACKET, [ T_RBRACKET ] | T_LCURLY, [ T_RCURLY ] - -> Some (tok', h) + -> Some (tok, loc, h) | T_LPAREN, T_RPAREN :: stack | T_LBRACKET, T_RBRACKET :: stack | T_LCURLY, T_RCURLY :: stack -> rewind stack h @@ -223,39 +213,40 @@ end = struct let checkpoint { checkpoint; _ } = checkpoint - let offer { checkpoint; history; next } tok : _ t = + let offer { checkpoint; history; next } tok loc : _ t = match (checkpoint : _ checkpoint) with | Accepted _ -> assert false - | Rejected | HandlingError _ -> { checkpoint; history; next = tok :: next } + | Rejected | HandlingError _ -> { checkpoint; history; next = (tok, loc) :: next } | Shifting _ | AboutToReduce _ -> assert false | InputNeeded _ -> ( if is_comment tok - then { checkpoint; history = Token (tok, history); next } + then { checkpoint; history = Token (tok, loc, history); next } else let new_checkpoint = - advance (Js_parser.MenhirInterpreter.offer checkpoint tok) + advance + (Js_parser.MenhirInterpreter.offer checkpoint (tok, Loc.p1 loc, Loc.p2 loc)) in match (new_checkpoint : 'a checkpoint) with | Shifting _ | AboutToReduce _ -> assert false | Rejected | Accepted _ | InputNeeded _ -> let history = match tok with - | T_VIRTUAL_SEMICOLON, _, _ -> + | T_VIRTUAL_SEMICOLON -> let rec insert = function - | Start _ as start -> Token (tok, start) + | Start _ as start -> Token (tok, loc, start) | Checkpoint (_, x) -> insert x - | Token (inner_tok, tail) as x -> + | Token (inner_tok, loc', tail) as x -> if is_comment inner_tok - then Token (inner_tok, insert tail) - else Token (tok, x) + then Token (inner_tok, loc', insert tail) + else Token (tok, loc, x) in insert history - | _ -> Token (tok, history) + | _ -> Token (tok, loc, history) in { checkpoint = new_checkpoint; history; next } | HandlingError _ -> { checkpoint = new_checkpoint - ; history = Token (tok, Checkpoint (checkpoint, history)) + ; history = Token (tok, loc, Checkpoint (checkpoint, history)) ; next }) @@ -264,19 +255,24 @@ end = struct match t with | Start env -> advance env | Checkpoint (env, _) -> advance env - | Token (tok, t) -> ( + | Token (tok, loc, t) -> ( if is_comment tok then compute t else match compute t with | InputNeeded _ as checkpoint -> - advance (Js_parser.MenhirInterpreter.offer checkpoint tok) + advance + (Js_parser.MenhirInterpreter.offer + checkpoint + (tok, Loc.p1 loc, Loc.p2 loc)) | Shifting _ | AboutToReduce _ -> assert false | Accepted _ | Rejected | HandlingError _ -> assert false) in let checkpoint = compute h in - List.fold_left next ~init:{ checkpoint; history = h; next = [] } ~f:(fun t tok -> - offer t tok) + List.fold_left + next + ~init:{ checkpoint; history = h; next = [] } + ~f:(fun t (tok, loc) -> offer t tok loc) let finalize_error { checkpoint; history; next } = let rec loop (t : _ Js_parser.MenhirInterpreter.checkpoint) = @@ -293,7 +289,7 @@ end = struct match t with | Start _ -> acc | Checkpoint (_, tail) -> collect acc tail - | Token (tok, tail) -> collect (tok :: acc) tail + | Token (tok, loc, tail) -> collect ((tok, loc) :: acc) tail in collect [] history end @@ -307,41 +303,43 @@ let parse_annot s = | Not_found -> None | _ -> None) -let rec nl_separated prev ((_, c, _) as ctok) = +let rec nl_separated prev loc' = match State.Cursor.last_token prev with | None -> true - | Some ((T_VIRTUAL_SEMICOLON, _, _), prev) -> nl_separated prev ctok - | Some ((_, _, p2), _) -> c.Lexing.pos_lnum <> p2.Lexing.pos_lnum + | Some (T_VIRTUAL_SEMICOLON, _, prev) -> nl_separated prev loc' + | Some (_, loc, _) -> Loc.line loc' <> Loc.line_end loc let acceptable checkpoint token = let module I = Js_parser.MenhirInterpreter in let checkpoint = State.checkpoint checkpoint in I.acceptable checkpoint token Lexer.dummy_pos -let semicolon = Js_token.T_VIRTUAL_SEMICOLON, Lexer.dummy_pos, Lexer.dummy_pos +let semicolon = Js_token.T_VIRTUAL_SEMICOLON + +let dummy_loc = Loc.create Lexer.dummy_pos Lexer.dummy_pos let rec offer_one t (lexbuf : Lexer.t) = - let tok = Lexer.token lexbuf in + let tok, loc = Lexer.token lexbuf in match tok with - | TCommentLineDirective _, _, _ -> - let t = State.offer t tok in + | TCommentLineDirective _ -> + let t = State.offer t tok loc in offer_one t lexbuf - | (TComment s, p1, p2) as tok -> + | TComment s as tok -> let tok = match parse_annot s with | None -> tok - | Some a -> TAnnot (s, a), p1, p2 + | Some a -> TAnnot (s, a) in - let t = State.offer t tok in + let t = State.offer t tok loc in offer_one t lexbuf | _ -> let t = match tok with - | T_LPAREN, _, _ when acceptable t T_LPAREN_ARROW -> State.save_checkpoint t + | T_LPAREN when acceptable t T_LPAREN_ARROW -> State.save_checkpoint t | _ -> t in let h = State.cursor t in - let tok = + let tok, loc = (* restricted productions * 7.9.1 - 3 * When, as the program is parsed from left to right, a token is encountered @@ -353,67 +351,66 @@ let rec offer_one t (lexbuf : Lexer.t) = * one LineTerminator, then a semicolon is automatically inserted before the * restricted token. *) match State.Cursor.last_token h, tok with - | ( Some - (((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD | T_ASYNC), _, _), _) - , (((T_SEMICOLON | T_VIRTUAL_SEMICOLON), _, _) as tok) ) -> tok - | ( Some - (((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD | T_ASYNC), _, _), _) - , _ ) - when nl_separated h tok && acceptable t T_VIRTUAL_SEMICOLON -> + | ( Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD | T_ASYNC), _, _) + , ((T_SEMICOLON | T_VIRTUAL_SEMICOLON) as tok) ) -> tok, loc + | Some ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW | T_YIELD | T_ASYNC), _, _), _ + when nl_separated h loc && acceptable t T_VIRTUAL_SEMICOLON -> (* restricted token can also appear as regular identifier such as in [x.return]. In such case, feeding a virtual semicolon could trigger a parser error. Here, we first checkpoint that a virtual semicolon is acceptable. *) Lexer.rollback lexbuf; - semicolon + semicolon, dummy_loc (* The practical effect of these restricted productions is as follows: * When a ++ or -- token is encountered where the parser would treat it * as a postfix operator, and at least one LineTerminator occurred between * the preceding token and the ++ or -- token, then a semicolon is automatically * inserted before the ++ or -- token. *) - | _, ((T_DECR, p1, p2) as tok) when not (nl_separated h tok) -> - Js_token.T_DECR_NB, p1, p2 - | _, ((T_INCR, p1, p2) as tok) when not (nl_separated h tok) -> - Js_token.T_INCR_NB, p1, p2 - | _, ((((T_DIV | T_DIV_ASSIGN) as tok), _, _) as tok_and_pos) -> - if acceptable t tok then tok_and_pos else Lexer.lex_as_regexp lexbuf - | _ -> tok + | _, T_DECR when not (nl_separated h loc) -> Js_token.T_DECR_NB, loc + | _, T_INCR when not (nl_separated h loc) -> Js_token.T_INCR_NB, loc + | _, ((T_DIV | T_DIV_ASSIGN) as tok) -> + if acceptable t tok + then tok, loc + else + let t, loc = Lexer.lex_as_regexp lexbuf in + t, loc + | _ -> tok, loc in - State.offer t tok + State.offer t tok loc let dummy_ident = let dummy = "" in Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn dummy, dummy) -let token_to_ident (t, p1, p2) = +let token_to_ident t = let name = Js_token.to_string t in - Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn name, name), p1, p2 + Js_token.T_IDENTIFIER (Stdlib.Utf8_string.of_string_exn name, name) let end_of_do_whle prev = match State.Cursor.rewind_block prev with | None -> false - | Some ((T_LPAREN, _, _), prev) -> ( + | Some (T_LPAREN, _, prev) -> ( match State.Cursor.last_token prev with | None -> false - | Some ((T_WHILE, _, _), prev) -> ( + | Some (T_WHILE, _, prev) -> ( match State.Cursor.last_token prev with | None -> false - | Some ((T_SEMICOLON, _, _), prev) -> ( + | Some (T_SEMICOLON, _, prev) -> ( match State.Cursor.last_token prev with | None -> false - | Some ((T_DO, _, _), _) -> true - | Some (_, _) -> false) - | Some ((T_RCURLY, _, _), _) -> ( + | Some (T_DO, _, _) -> true + | Some (_, _, _) -> false) + | Some (T_RCURLY, _, _) -> ( match State.Cursor.rewind_block prev with | None -> false - | Some ((T_LCURLY, _, _), prev) -> ( + | Some (T_LCURLY, _, prev) -> ( match State.Cursor.last_token prev with | None -> false - | Some ((T_DO, _, _), _) -> true - | Some (_, _) -> false) + | Some (T_DO, _, _) -> true + | Some (_, _, _) -> false) | Some _ -> assert false) - | Some (_, _) -> false) - | Some (_, _) -> false) + | Some (_, _, _) -> false) + | Some (_, _, _) -> false) | Some _ -> assert false let recover error_checkpoint previous_checkpoint = @@ -432,44 +429,46 @@ let recover error_checkpoint previous_checkpoint = (* complete ECMAScript Program, then a semicolon is automatically inserted at the end *) match State.Cursor.last_token (State.cursor error_checkpoint) with | None -> error_checkpoint - | Some (offending_token, rest) -> ( + | Some (offending_token, offending_loc, rest) -> ( match State.Cursor.last_token rest with | None -> error_checkpoint - | Some ((last_token, _, _), _) -> ( + | Some (last_token, _, _) -> ( match offending_token with - | T_VIRTUAL_SEMICOLON, _, _ -> error_checkpoint + | T_VIRTUAL_SEMICOLON -> error_checkpoint (* contextually allowed as identifiers, namely await and yield; *) - | (T_YIELD | T_AWAIT), _, _ when acceptable previous_checkpoint dummy_ident -> - State.Cursor.replace_token rest (token_to_ident offending_token) + | (T_YIELD | T_AWAIT) when acceptable previous_checkpoint dummy_ident -> + State.Cursor.replace_token + rest + (token_to_ident offending_token) + offending_loc |> State.try_recover - | T_RCURLY, _, _ - when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> - State.Cursor.insert_token rest semicolon |> State.try_recover - | T_EOF, _, _ when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON - -> State.Cursor.insert_token rest semicolon |> State.try_recover - | (T_ARROW, _, _) as tok when not (nl_separated rest tok) -> ( + | T_RCURLY when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> + State.Cursor.insert_token rest semicolon dummy_loc |> State.try_recover + | T_EOF when acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> + State.Cursor.insert_token rest semicolon dummy_loc |> State.try_recover + | T_ARROW when not (nl_separated rest offending_loc) -> ( (* Restart parsing from the openning parens, patching the token to be T_LPAREN_ARROW to help the parser *) match last_token with | T_RPAREN -> ( match State.Cursor.rewind_block rest with - | Some ((T_LPAREN, p1, p2), prev) -> - State.Cursor.replace_token prev (T_LPAREN_ARROW, p1, p2) + | Some (T_LPAREN, loc, prev) -> + State.Cursor.replace_token prev T_LPAREN_ARROW loc |> State.try_recover | Some _ -> assert false | None -> error_checkpoint) | _ -> error_checkpoint) - | last -> ( + | _ -> ( match last_token with | T_VIRTUAL_SEMICOLON -> error_checkpoint | _ - when nl_separated rest last + when nl_separated rest offending_loc && acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> - State.Cursor.insert_token rest semicolon |> State.try_recover + State.Cursor.insert_token rest semicolon dummy_loc |> State.try_recover | T_RPAREN when end_of_do_whle rest && acceptable previous_checkpoint Js_token.T_VIRTUAL_SEMICOLON -> - State.Cursor.insert_token rest semicolon |> State.try_recover + State.Cursor.insert_token rest semicolon dummy_loc |> State.try_recover | _ -> error_checkpoint))) let parse_aux the_parser (lexbuf : Lexer.t) = @@ -501,14 +500,15 @@ let parse_aux the_parser (lexbuf : Lexer.t) = | _ -> loop new_checkpoint new_checkpoint) in let checkpoint = State.create init in - match loop checkpoint checkpoint with - | `Ok x -> x + let res = loop checkpoint checkpoint in + match res with + | `Ok all -> all | `Error t -> let rec last cursor = match State.Cursor.last_token cursor with | None -> assert false - | Some ((T_VIRTUAL_SEMICOLON, _, _), cursor) -> last cursor - | Some ((_, p, _), _) -> p + | Some (T_VIRTUAL_SEMICOLON, _, cursor) -> last cursor + | Some (_, loc, _) -> Loc.p1 loc in let p = last (State.cursor t) in raise (Parsing_error (Parse_info.t_of_pos p)) @@ -528,13 +528,14 @@ let parse' lex = let toks = State.all_tokens toks in let take_annot_before = let toks_r = ref toks in - let rec loop start_pos acc (toks : (Js_token.t * _ * _) list) = + let rec loop start_pos acc (toks : (Js_token.t * _) list) = match toks with | [] -> assert false - | (TAnnot a, p1, _) :: xs -> loop start_pos ((a, Parse_info.t_of_pos p1) :: acc) xs - | ((TComment _ | TCommentLineDirective _), _, _) :: xs -> loop start_pos acc xs - | (_, p1, _p2) :: xs -> - if p1.Lexing.pos_cnum = start_pos.Lexing.pos_cnum + | (TAnnot a, loc) :: xs -> + loop start_pos ((a, Parse_info.t_of_pos (Loc.p1 loc)) :: acc) xs + | ((TComment _ | TCommentLineDirective _), _) :: xs -> loop start_pos acc xs + | (_, loc) :: xs -> + if Loc.cnum loc = start_pos.Lexing.pos_cnum then ( toks_r := toks; List.rev acc) diff --git a/compiler/lib/parse_js.mli b/compiler/lib/parse_js.mli index ff47ffb967..14952d9ab7 100644 --- a/compiler/lib/parse_js.mli +++ b/compiler/lib/parse_js.mli @@ -34,6 +34,6 @@ val parse : Lexer.t -> Javascript.program val parse' : Lexer.t -> ((Js_token.Annot.t * Parse_info.t) list * Javascript.program) list - * (Js_token.t * Lexing.position * Lexing.position) list + * (Js_token.t * Loc.t) list val parse_expr : Lexer.t -> Javascript.expression diff --git a/compiler/tests-compiler/js_parser_printer.ml b/compiler/tests-compiler/js_parser_printer.ml index a90a260995..5e6e914663 100644 --- a/compiler/tests-compiler/js_parser_printer.ml +++ b/compiler/tests-compiler/js_parser_printer.ml @@ -706,8 +706,9 @@ let check_vs_string s toks = in let rec loop offset pos = function | [] -> space pos (String.length s) - | (Js_token.T_VIRTUAL_SEMICOLON, _, _) :: rest -> loop offset pos rest - | ((Js_token.T_STRING (_, codepoint_len) as x), p1, _p2) :: rest -> + | (Js_token.T_VIRTUAL_SEMICOLON, _) :: rest -> loop offset pos rest + | ((Js_token.T_STRING (_, codepoint_len) as x), loc) :: rest -> + let p1 = Loc.p1 loc in let { Parse_info.idx = codepoint_idx; _ } = Parse_info.t_of_pos p1 in let bytes_idx = codepoint_idx - offset in let bytes_len = @@ -733,7 +734,8 @@ let check_vs_string s toks = a b); loop offset (bytes_idx + bytes_len + 1) rest - | (x, p1, _p2) :: rest -> + | (x, loc) :: rest -> + let p1 = Loc.p1 loc in let { Parse_info.idx; _ } = Parse_info.t_of_pos p1 in let idx = idx - offset in let str = Js_token.to_string x in @@ -760,8 +762,9 @@ let parse_print_token ?(invalid = false) ?(extra = false) s = let prev = ref 0 in let rec loop tokens = match tokens with - | [ (Js_token.T_EOF, _, _) ] | [] -> Printf.printf "\n" - | (tok, p1, _p2) :: xs -> + | [ (Js_token.T_EOF, _) ] | [] -> Printf.printf "\n" + | (tok, loc) :: xs -> + let p1 = Loc.p1 loc in let pos = Parse_info.t_of_pos p1 in let s = if extra then Js_token.to_string_extra tok else Js_token.to_string tok in (match !prev <> pos.Parse_info.line && pos.Parse_info.line <> 0 with