Skip to content

Commit 76c5000

Browse files
authored
Compiler: rewrite the js parser using menhir incremental api (#1012)
1 parent 8263a4c commit 76c5000

File tree

16 files changed

+13245
-25919
lines changed

16 files changed

+13245
-25919
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
## Features/Changes
33
* Runtime: allow one to override xmlHttpRequest.create (#1002)
44
* Compiler: initial support for OCaml 4.11
5+
* Compiler: improve the javascript parser by relying on menhir incremental api.
56

67
## Bug fixes
78
* Compiler: fix code generation for recursive function under for-loops (#1009)

compiler/lib/dune

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
(name js_of_ocaml_compiler)
33
(public_name js_of_ocaml-compiler)
44
(synopsis "Js_of_ocaml compiler library")
5-
(libraries compiler-libs.common compiler-libs.bytecomp bytes
5+
(libraries compiler-libs.common compiler-libs.bytecomp bytes menhirLib
66
(select source_map_io.ml from
77
(yojson -> source_map_io.yojson.ml)
88
( -> source_map_io.unsupported.ml))
@@ -30,7 +30,7 @@
3030
(targets js_parser.mli js_parser.ml)
3131
(deps standard.mly)
3232
(mode promote)
33-
(action (ignore-stderr (run menhir --stdlib . --external-tokens Js_token --explain %{dep:js_parser.mly}))))
33+
(action (ignore-stderr (run menhir --stdlib . --table --external-tokens Js_token --explain %{dep:js_parser.mly}))))
3434

3535
(rule
3636
(targets annot_parser.mli annot_parser.ml)

compiler/lib/js_lexer.ml

Lines changed: 373 additions & 382 deletions
Large diffs are not rendered by default.

compiler/lib/js_lexer.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@
1717
* license.txt for more details.
1818
*)
1919

20-
val main : Js_token.t option -> Lexing.lexbuf -> Js_token.t
20+
val main : Lexing.lexbuf -> Js_token.t
21+
22+
val main_regexp : Lexing.lexbuf -> Js_token.t

compiler/lib/js_lexer.mll

Lines changed: 88 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,15 @@ let update_loc lexbuf ?file ~line ~absolute chars =
7474
pos_bol = pos.pos_cnum - chars;
7575
}
7676

77-
let tokinfo prev lexbuf =
78-
let pi = Parse_info.t_of_lexbuf lexbuf in
79-
match prev with
80-
| None -> { pi with Parse_info.fol = Yes }
81-
| Some prev ->
82-
let prev_pi = Js_token.info prev in
83-
if prev_pi.Parse_info.line <> pi.Parse_info.line
84-
&& Option.equal String.equal prev_pi.Parse_info.name pi.Parse_info.name
85-
then { pi with Parse_info.fol = Yes }
86-
else { pi with Parse_info.fol = No }
87-
77+
let tokinfo lexbuf = Parse_info.t_of_lexbuf lexbuf
78+
79+
let with_pos lexbuf f =
80+
let p = lexbuf.Lexing.lex_start_p in
81+
let pos = lexbuf.Lexing.lex_start_pos in
82+
let r = f () in
83+
lexbuf.Lexing.lex_start_p <- p;
84+
lexbuf.Lexing.lex_start_pos <- pos;
85+
r
8886
}
8987

9088
(*****************************************************************************)
@@ -94,111 +92,102 @@ let hexa = ['0'-'9''a'-'f''A'-'F']
9492
let inputCharacter = [^ '\r' '\n' ]
9593
(*****************************************************************************)
9694

97-
rule main prev = parse
95+
rule main = parse
9896

9997
(* ----------------------------------------------------------------------- *)
10098
(* spacing/comments *)
10199
(* ----------------------------------------------------------------------- *)
102100
| "/*" {
103-
let info = tokinfo prev lexbuf in
101+
with_pos lexbuf (fun () ->
102+
let info = tokinfo lexbuf in
104103
let buf = Buffer.create 127 in
105104
Buffer.add_string buf (tok lexbuf);
106105
st_comment buf lexbuf;
107106
let content = Buffer.contents buf in
108-
TComment(content, info)
107+
TComment(content, info))
109108
}
110109
| ("//#" [' ' '\t' ]*
111110
(['0'-'9']+ as line) [' ' '\t' ]*
112111
'"' ([^ '"' '\n']* as file) '"' [' ' '\t' ]*
113112
) as raw NEWLINE {
114-
let info = tokinfo prev lexbuf in
113+
let info = tokinfo lexbuf in
115114
let line = int_of_string line in
116115
update_loc lexbuf ~file ~line ~absolute:true 0;
117116
TCommentLineDirective (raw, info)
118117
}
119118
(* don't keep the trailing \n; it will be handled later *)
120-
| ("//" inputCharacter*) as cmt { TComment(cmt, tokinfo prev lexbuf) }
119+
| ("//" inputCharacter*) as cmt { TComment(cmt, tokinfo lexbuf) }
121120

122121
| [' ' '\t' ]+ {
123-
main prev lexbuf
122+
main lexbuf
124123
}
125124
| NEWLINE {
126125
update_loc lexbuf ~line:1 ~absolute:false 0;
127-
main prev lexbuf
126+
main lexbuf
128127
}
129128

130129
(* ----------------------------------------------------------------------- *)
131130
(* symbols *)
132131
(* ----------------------------------------------------------------------- *)
133132

134-
| "{" { T_LCURLY (tokinfo prev lexbuf); }
135-
| "}" { T_RCURLY (tokinfo prev lexbuf); }
136-
137-
| "(" { T_LPAREN (tokinfo prev lexbuf); }
138-
| ")" { T_RPAREN (tokinfo prev lexbuf); }
139-
140-
| "[" { T_LBRACKET (tokinfo prev lexbuf); }
141-
| "]" { T_RBRACKET (tokinfo prev lexbuf); }
142-
| "." { T_PERIOD (tokinfo prev lexbuf); }
143-
| ";" { T_SEMICOLON (tokinfo prev lexbuf); }
144-
| "," { T_COMMA (tokinfo prev lexbuf); }
145-
| ":" { T_COLON (tokinfo prev lexbuf); }
146-
| "?" { T_PLING (tokinfo prev lexbuf); }
147-
| "&&" { T_AND (tokinfo prev lexbuf); }
148-
| "||" { T_OR (tokinfo prev lexbuf); }
149-
| "===" { T_STRICT_EQUAL (tokinfo prev lexbuf); }
150-
| "!==" { T_STRICT_NOT_EQUAL (tokinfo prev lexbuf); }
151-
| "<=" { T_LESS_THAN_EQUAL (tokinfo prev lexbuf); }
152-
| ">=" { T_GREATER_THAN_EQUAL (tokinfo prev lexbuf); }
153-
| "==" { T_EQUAL (tokinfo prev lexbuf); }
154-
| "!=" { T_NOT_EQUAL (tokinfo prev lexbuf); }
155-
| "++" {
156-
let cpi = tokinfo prev lexbuf in
157-
match prev with
158-
| Some p when (Js_token.info p).Parse_info.line = cpi.Parse_info.line ->
159-
T_INCR_NB(cpi)
160-
| _ -> T_INCR(cpi) }
161-
| "--" {
162-
let cpi = tokinfo prev lexbuf in
163-
match prev with
164-
| Some p when (Js_token.info p).Parse_info.line = cpi.Parse_info.line ->
165-
T_DECR_NB(cpi)
166-
| _ -> T_DECR(cpi) }
167-
| "<<=" { T_LSHIFT_ASSIGN (tokinfo prev lexbuf); }
168-
| "<<" { T_LSHIFT (tokinfo prev lexbuf); }
169-
| ">>=" { T_RSHIFT_ASSIGN (tokinfo prev lexbuf); }
170-
| ">>>=" { T_RSHIFT3_ASSIGN (tokinfo prev lexbuf); }
171-
| "..." { T_SPREAD (tokinfo prev lexbuf); }
172-
| ">>>" { T_RSHIFT3 (tokinfo prev lexbuf); }
173-
| ">>" { T_RSHIFT (tokinfo prev lexbuf); }
174-
| "+=" { T_PLUS_ASSIGN (tokinfo prev lexbuf); }
175-
| "-=" { T_MINUS_ASSIGN (tokinfo prev lexbuf); }
176-
177-
| "*=" { T_MULT_ASSIGN (tokinfo prev lexbuf); }
178-
| "%=" { T_MOD_ASSIGN (tokinfo prev lexbuf); }
179-
| "&=" { T_BIT_AND_ASSIGN (tokinfo prev lexbuf); }
180-
| "|=" { T_BIT_OR_ASSIGN (tokinfo prev lexbuf); }
181-
| "^=" { T_BIT_XOR_ASSIGN (tokinfo prev lexbuf); }
182-
| "<" { T_LESS_THAN (tokinfo prev lexbuf); }
183-
| ">" { T_GREATER_THAN (tokinfo prev lexbuf); }
184-
| "+" { T_PLUS (tokinfo prev lexbuf); }
185-
| "-" { T_MINUS (tokinfo prev lexbuf); }
186-
| "*" { T_MULT (tokinfo prev lexbuf); }
133+
| "{" { T_LCURLY (tokinfo lexbuf); }
134+
| "}" { T_RCURLY (tokinfo lexbuf); }
135+
136+
| "(" { T_LPAREN (tokinfo lexbuf); }
137+
| ")" { T_RPAREN (tokinfo lexbuf); }
138+
139+
| "[" { T_LBRACKET (tokinfo lexbuf); }
140+
| "]" { T_RBRACKET (tokinfo lexbuf); }
141+
| "." { T_PERIOD (tokinfo lexbuf); }
142+
| ";" { T_SEMICOLON (tokinfo lexbuf); }
143+
| "," { T_COMMA (tokinfo lexbuf); }
144+
| ":" { T_COLON (tokinfo lexbuf); }
145+
| "?" { T_PLING (tokinfo lexbuf); }
146+
| "&&" { T_AND (tokinfo lexbuf); }
147+
| "||" { T_OR (tokinfo lexbuf); }
148+
| "===" { T_STRICT_EQUAL (tokinfo lexbuf); }
149+
| "!==" { T_STRICT_NOT_EQUAL (tokinfo lexbuf); }
150+
| "<=" { T_LESS_THAN_EQUAL (tokinfo lexbuf); }
151+
| ">=" { T_GREATER_THAN_EQUAL (tokinfo lexbuf); }
152+
| "==" { T_EQUAL (tokinfo lexbuf); }
153+
| "!=" { T_NOT_EQUAL (tokinfo lexbuf); }
154+
| "++" { T_INCR (tokinfo lexbuf); }
155+
| "--" { T_DECR (tokinfo lexbuf); }
156+
| "<<=" { T_LSHIFT_ASSIGN (tokinfo lexbuf); }
157+
| "<<" { T_LSHIFT (tokinfo lexbuf); }
158+
| ">>=" { T_RSHIFT_ASSIGN (tokinfo lexbuf); }
159+
| ">>>=" { T_RSHIFT3_ASSIGN (tokinfo lexbuf); }
160+
| "..." { T_SPREAD (tokinfo lexbuf); }
161+
| ">>>" { T_RSHIFT3 (tokinfo lexbuf); }
162+
| ">>" { T_RSHIFT (tokinfo lexbuf); }
163+
| "+=" { T_PLUS_ASSIGN (tokinfo lexbuf); }
164+
| "-=" { T_MINUS_ASSIGN (tokinfo lexbuf); }
165+
166+
| "*=" { T_MULT_ASSIGN (tokinfo lexbuf); }
167+
| "%=" { T_MOD_ASSIGN (tokinfo lexbuf); }
168+
| "&=" { T_BIT_AND_ASSIGN (tokinfo lexbuf); }
169+
| "|=" { T_BIT_OR_ASSIGN (tokinfo lexbuf); }
170+
| "^=" { T_BIT_XOR_ASSIGN (tokinfo lexbuf); }
171+
| "<" { T_LESS_THAN (tokinfo lexbuf); }
172+
| ">" { T_GREATER_THAN (tokinfo lexbuf); }
173+
| "+" { T_PLUS (tokinfo lexbuf); }
174+
| "-" { T_MINUS (tokinfo lexbuf); }
175+
| "*" { T_MULT (tokinfo lexbuf); }
187176
(* for '/' see below the regexp handling *)
188-
| "%" { T_MOD (tokinfo prev lexbuf); }
189-
| "|" { T_BIT_OR (tokinfo prev lexbuf); }
190-
| "&" { T_BIT_AND (tokinfo prev lexbuf); }
191-
| "^" { T_BIT_XOR (tokinfo prev lexbuf); }
192-
| "!" { T_NOT (tokinfo prev lexbuf); }
193-
| "~" { T_BIT_NOT (tokinfo prev lexbuf); }
194-
| "=" { T_ASSIGN (tokinfo prev lexbuf); }
177+
| "%" { T_MOD (tokinfo lexbuf); }
178+
| "|" { T_BIT_OR (tokinfo lexbuf); }
179+
| "&" { T_BIT_AND (tokinfo lexbuf); }
180+
| "^" { T_BIT_XOR (tokinfo lexbuf); }
181+
| "!" { T_NOT (tokinfo lexbuf); }
182+
| "~" { T_BIT_NOT (tokinfo lexbuf); }
183+
| "=" { T_ASSIGN (tokinfo lexbuf); }
195184

196185
(* ----------------------------------------------------------------------- *)
197186
(* Keywords and ident *)
198187
(* ----------------------------------------------------------------------- *)
199188
| ['a'-'z''A'-'Z''$''_']['a'-'z''A'-'Z''$''_''0'-'9']* {
200189
let s = tok lexbuf in
201-
let info = tokinfo prev lexbuf in
190+
let info = tokinfo lexbuf in
202191
try
203192
let f = Hashtbl.find keyword_table s in
204193
f info
@@ -212,89 +201,51 @@ rule main prev = parse
212201

213202
| "0" ['X''x'] hexa+ {
214203
let s = tok lexbuf in
215-
let info = tokinfo prev lexbuf in
204+
let info = tokinfo lexbuf in
216205
T_NUMBER (s, info)
217206
}
218207
| '0'['0'-'7']+ {
219208
let s = tok lexbuf in
220-
let info = tokinfo prev lexbuf in
209+
let info = tokinfo lexbuf in
221210
T_NUMBER (s, info)
222211
}
223212

224213
| ['0'-'9']*'.'?['0'-'9']+['e''E']['-''+']?['0'-'9']+ (* {1,3} *) {
225214
let s = tok lexbuf in
226-
let info = tokinfo prev lexbuf in
215+
let info = tokinfo lexbuf in
227216
T_NUMBER (s, info)
228217
}
229218
| ['0'-'9']+'.'? |
230219
['0'-'9']*'.'['0'-'9']+ {
231220
let s = tok lexbuf in
232-
let info = tokinfo prev lexbuf in
221+
let info = tokinfo lexbuf in
233222
T_NUMBER (s, info)
234223
}
235224

236225
(* ----------------------------------------------------------------------- *)
237226
(* Strings *)
238227
(* ----------------------------------------------------------------------- *)
239228
| ("'"|'"') as quote {
229+
with_pos lexbuf (fun () ->
240230
let from = lexbuf.Lexing.lex_start_p.pos_cnum in
241-
let info = tokinfo prev lexbuf in
231+
let info = tokinfo lexbuf in
242232
let buf = Buffer.create 127 in
243233
string_quote quote buf lexbuf;
244234
let s = Buffer.contents buf in
245235
(* s does not contain the enclosing "'" but the info does *)
246236
let to_ = lexbuf.Lexing.lex_curr_p.pos_cnum in
247-
T_STRING (s, info, to_ - 1 - from)
237+
T_STRING (s, info, to_ - 1 - from))
248238
}
249-
250-
(* ----------------------------------------------------------------------- *)
251-
(* Regexp *)
252-
(* ----------------------------------------------------------------------- *)
253-
(* take care of ambiguity with start of comment //, and with
254-
* '/' as a divisor operator
255-
*
256-
* it can not be '/' [^ '/']* '/' because then
257-
* comments will not be recognized as lex tries
258-
* to find the longest match.
259-
*
260-
* It can not be
261-
* '/' [^'*''/'] ([^'/''\n'])* '/' ['A'-'Z''a'-'z']*
262-
* because a / (b/c) will be recognized as a regexp.
263-
*
264-
*)
265-
266-
| "/" | "/=" {
267-
let s = tok lexbuf in
268-
let info = tokinfo prev lexbuf in
269-
270-
match prev with
271-
| Some (
272-
T_IDENTIFIER _
273-
| T_NUMBER _ | T_STRING _ | T_REGEX _
274-
| T_FALSE _ | T_TRUE _ | T_NULL _
275-
| T_THIS _
276-
| T_INCR _ | T_DECR _
277-
| T_RBRACKET _ | T_RPAREN _
278-
) -> begin match s with
279-
| "/" -> T_DIV (info);
280-
| "/=" -> T_DIV_ASSIGN info
281-
| _ -> assert false
282-
end
283-
| _ ->
284-
let buf = Buffer.create 127 in
285-
Buffer.add_string buf s;
286-
regexp buf lexbuf;
287-
T_REGEX (Buffer.contents buf, info)
288-
}
289-
239+
| "/" { T_DIV (tokinfo lexbuf) }
240+
| "/=" { T_DIV_ASSIGN (tokinfo lexbuf) }
290241
(* ----------------------------------------------------------------------- *)
291242
(* eof *)
292243
(* ----------------------------------------------------------------------- *)
293244

294-
| eof { EOF (tokinfo prev lexbuf) }
245+
| eof { EOF (tokinfo lexbuf) }
295246

296247
| _ {
297-
TUnknown (tok lexbuf, tokinfo prev lexbuf)
248+
TUnknown (tok lexbuf, tokinfo lexbuf)
298249
}
299250
(*****************************************************************************)
300251

@@ -330,6 +281,15 @@ and string_quote q buf = parse
330281
| eof { Format.eprintf "LEXER: WEIRD end of file in quoted string@."; ()}
331282

332283
(*****************************************************************************)
284+
and main_regexp = parse
285+
| '/' {
286+
with_pos lexbuf (fun () ->
287+
let info = tokinfo lexbuf in
288+
let buf = Buffer.create 127 in
289+
Buffer.add_string buf (Lexing.lexeme lexbuf);
290+
regexp buf lexbuf;
291+
T_REGEX (Buffer.contents buf, info)) }
292+
333293
and regexp buf = parse
334294
| '\\' (_ as x) { Buffer.add_char buf '\\';
335295
Buffer.add_char buf x;

0 commit comments

Comments
 (0)