Skip to content

Commit 6642796

Browse files
committed
Compiler: rewrite the js parser using menhir incremental api
1 parent fa494b6 commit 6642796

File tree

14 files changed

+13113
-25845
lines changed

14 files changed

+13113
-25845
lines changed

compiler/lib/dune

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
(name js_of_ocaml_compiler)
33
(public_name js_of_ocaml-compiler)
44
(synopsis "Js_of_ocaml compiler library")
5-
(libraries compiler-libs.common compiler-libs.bytecomp bytes
5+
(libraries compiler-libs.common compiler-libs.bytecomp bytes menhirLib
66
(select source_map_io.ml from
77
(yojson -> source_map_io.yojson.ml)
88
( -> source_map_io.unsupported.ml))
@@ -30,7 +30,7 @@
3030
(targets js_parser.mli js_parser.ml)
3131
(deps standard.mly)
3232
(mode promote)
33-
(action (ignore-stderr (run menhir --stdlib . --external-tokens Js_token --explain %{dep:js_parser.mly}))))
33+
(action (ignore-stderr (run menhir --stdlib . --table --external-tokens Js_token --explain %{dep:js_parser.mly}))))
3434

3535
(rule
3636
(targets annot_parser.mli annot_parser.ml)

compiler/lib/js_lexer.ml

Lines changed: 361 additions & 380 deletions
Large diffs are not rendered by default.

compiler/lib/js_lexer.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@
1717
* license.txt for more details.
1818
*)
1919

20-
val main : Js_token.t option -> Lexing.lexbuf -> Js_token.t
20+
val main : Lexing.lexbuf -> Js_token.t
21+
22+
val main_regexp : Lexing.lexbuf -> Js_token.t

compiler/lib/js_lexer.mll

Lines changed: 75 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,7 @@ let update_loc lexbuf ?file ~line ~absolute chars =
7474
pos_bol = pos.pos_cnum - chars;
7575
}
7676

77-
let tokinfo prev lexbuf =
78-
let pi = Parse_info.t_of_lexbuf lexbuf in
79-
match prev with
80-
| None -> { pi with Parse_info.fol = Yes }
81-
| Some prev ->
82-
let prev_pi = Js_token.info prev in
83-
if prev_pi.Parse_info.line <> pi.Parse_info.line
84-
&& Option.equal String.equal prev_pi.Parse_info.name pi.Parse_info.name
85-
then { pi with Parse_info.fol = Yes }
86-
else { pi with Parse_info.fol = No }
77+
let tokinfo lexbuf = Parse_info.t_of_lexbuf lexbuf
8778

8879
}
8980

@@ -94,13 +85,13 @@ let hexa = ['0'-'9''a'-'f''A'-'F']
9485
let inputCharacter = [^ '\r' '\n' ]
9586
(*****************************************************************************)
9687

97-
rule main prev = parse
88+
rule main = parse
9889

9990
(* ----------------------------------------------------------------------- *)
10091
(* spacing/comments *)
10192
(* ----------------------------------------------------------------------- *)
10293
| "/*" {
103-
let info = tokinfo prev lexbuf in
94+
let info = tokinfo lexbuf in
10495
let buf = Buffer.create 127 in
10596
Buffer.add_string buf (tok lexbuf);
10697
st_comment buf lexbuf;
@@ -111,94 +102,84 @@ rule main prev = parse
111102
(['0'-'9']+ as line) [' ' '\t' ]*
112103
'"' ([^ '"' '\n']* as file) '"' [' ' '\t' ]*
113104
) as raw NEWLINE {
114-
let info = tokinfo prev lexbuf in
105+
let info = tokinfo lexbuf in
115106
let line = int_of_string line in
116107
update_loc lexbuf ~file ~line ~absolute:true 0;
117108
TCommentLineDirective (raw, info)
118109
}
119110
(* don't keep the trailing \n; it will be handled later *)
120-
| ("//" inputCharacter*) as cmt { TComment(cmt, tokinfo prev lexbuf) }
111+
| ("//" inputCharacter*) as cmt { TComment(cmt, tokinfo lexbuf) }
121112

122113
| [' ' '\t' ]+ {
123-
main prev lexbuf
114+
main lexbuf
124115
}
125116
| NEWLINE {
126117
update_loc lexbuf ~line:1 ~absolute:false 0;
127-
main prev lexbuf
118+
main lexbuf
128119
}
129120

130121
(* ----------------------------------------------------------------------- *)
131122
(* symbols *)
132123
(* ----------------------------------------------------------------------- *)
133124

134-
| "{" { T_LCURLY (tokinfo prev lexbuf); }
135-
| "}" { T_RCURLY (tokinfo prev lexbuf); }
136-
137-
| "(" { T_LPAREN (tokinfo prev lexbuf); }
138-
| ")" { T_RPAREN (tokinfo prev lexbuf); }
139-
140-
| "[" { T_LBRACKET (tokinfo prev lexbuf); }
141-
| "]" { T_RBRACKET (tokinfo prev lexbuf); }
142-
| "." { T_PERIOD (tokinfo prev lexbuf); }
143-
| ";" { T_SEMICOLON (tokinfo prev lexbuf); }
144-
| "," { T_COMMA (tokinfo prev lexbuf); }
145-
| ":" { T_COLON (tokinfo prev lexbuf); }
146-
| "?" { T_PLING (tokinfo prev lexbuf); }
147-
| "&&" { T_AND (tokinfo prev lexbuf); }
148-
| "||" { T_OR (tokinfo prev lexbuf); }
149-
| "===" { T_STRICT_EQUAL (tokinfo prev lexbuf); }
150-
| "!==" { T_STRICT_NOT_EQUAL (tokinfo prev lexbuf); }
151-
| "<=" { T_LESS_THAN_EQUAL (tokinfo prev lexbuf); }
152-
| ">=" { T_GREATER_THAN_EQUAL (tokinfo prev lexbuf); }
153-
| "==" { T_EQUAL (tokinfo prev lexbuf); }
154-
| "!=" { T_NOT_EQUAL (tokinfo prev lexbuf); }
155-
| "++" {
156-
let cpi = tokinfo prev lexbuf in
157-
match prev with
158-
| Some p when (Js_token.info p).Parse_info.line = cpi.Parse_info.line ->
159-
T_INCR_NB(cpi)
160-
| _ -> T_INCR(cpi) }
161-
| "--" {
162-
let cpi = tokinfo prev lexbuf in
163-
match prev with
164-
| Some p when (Js_token.info p).Parse_info.line = cpi.Parse_info.line ->
165-
T_DECR_NB(cpi)
166-
| _ -> T_DECR(cpi) }
167-
| "<<=" { T_LSHIFT_ASSIGN (tokinfo prev lexbuf); }
168-
| "<<" { T_LSHIFT (tokinfo prev lexbuf); }
169-
| ">>=" { T_RSHIFT_ASSIGN (tokinfo prev lexbuf); }
170-
| ">>>=" { T_RSHIFT3_ASSIGN (tokinfo prev lexbuf); }
171-
| "..." { T_SPREAD (tokinfo prev lexbuf); }
172-
| ">>>" { T_RSHIFT3 (tokinfo prev lexbuf); }
173-
| ">>" { T_RSHIFT (tokinfo prev lexbuf); }
174-
| "+=" { T_PLUS_ASSIGN (tokinfo prev lexbuf); }
175-
| "-=" { T_MINUS_ASSIGN (tokinfo prev lexbuf); }
176-
177-
| "*=" { T_MULT_ASSIGN (tokinfo prev lexbuf); }
178-
| "%=" { T_MOD_ASSIGN (tokinfo prev lexbuf); }
179-
| "&=" { T_BIT_AND_ASSIGN (tokinfo prev lexbuf); }
180-
| "|=" { T_BIT_OR_ASSIGN (tokinfo prev lexbuf); }
181-
| "^=" { T_BIT_XOR_ASSIGN (tokinfo prev lexbuf); }
182-
| "<" { T_LESS_THAN (tokinfo prev lexbuf); }
183-
| ">" { T_GREATER_THAN (tokinfo prev lexbuf); }
184-
| "+" { T_PLUS (tokinfo prev lexbuf); }
185-
| "-" { T_MINUS (tokinfo prev lexbuf); }
186-
| "*" { T_MULT (tokinfo prev lexbuf); }
125+
| "{" { T_LCURLY (tokinfo lexbuf); }
126+
| "}" { T_RCURLY (tokinfo lexbuf); }
127+
128+
| "(" { T_LPAREN (tokinfo lexbuf); }
129+
| ")" { T_RPAREN (tokinfo lexbuf); }
130+
131+
| "[" { T_LBRACKET (tokinfo lexbuf); }
132+
| "]" { T_RBRACKET (tokinfo lexbuf); }
133+
| "." { T_PERIOD (tokinfo lexbuf); }
134+
| ";" { T_SEMICOLON (tokinfo lexbuf); }
135+
| "," { T_COMMA (tokinfo lexbuf); }
136+
| ":" { T_COLON (tokinfo lexbuf); }
137+
| "?" { T_PLING (tokinfo lexbuf); }
138+
| "&&" { T_AND (tokinfo lexbuf); }
139+
| "||" { T_OR (tokinfo lexbuf); }
140+
| "===" { T_STRICT_EQUAL (tokinfo lexbuf); }
141+
| "!==" { T_STRICT_NOT_EQUAL (tokinfo lexbuf); }
142+
| "<=" { T_LESS_THAN_EQUAL (tokinfo lexbuf); }
143+
| ">=" { T_GREATER_THAN_EQUAL (tokinfo lexbuf); }
144+
| "==" { T_EQUAL (tokinfo lexbuf); }
145+
| "!=" { T_NOT_EQUAL (tokinfo lexbuf); }
146+
| "++" { T_INCR (tokinfo lexbuf); }
147+
| "--" { T_DECR (tokinfo lexbuf); }
148+
| "<<=" { T_LSHIFT_ASSIGN (tokinfo lexbuf); }
149+
| "<<" { T_LSHIFT (tokinfo lexbuf); }
150+
| ">>=" { T_RSHIFT_ASSIGN (tokinfo lexbuf); }
151+
| ">>>=" { T_RSHIFT3_ASSIGN (tokinfo lexbuf); }
152+
| "..." { T_SPREAD (tokinfo lexbuf); }
153+
| ">>>" { T_RSHIFT3 (tokinfo lexbuf); }
154+
| ">>" { T_RSHIFT (tokinfo lexbuf); }
155+
| "+=" { T_PLUS_ASSIGN (tokinfo lexbuf); }
156+
| "-=" { T_MINUS_ASSIGN (tokinfo lexbuf); }
157+
158+
| "*=" { T_MULT_ASSIGN (tokinfo lexbuf); }
159+
| "%=" { T_MOD_ASSIGN (tokinfo lexbuf); }
160+
| "&=" { T_BIT_AND_ASSIGN (tokinfo lexbuf); }
161+
| "|=" { T_BIT_OR_ASSIGN (tokinfo lexbuf); }
162+
| "^=" { T_BIT_XOR_ASSIGN (tokinfo lexbuf); }
163+
| "<" { T_LESS_THAN (tokinfo lexbuf); }
164+
| ">" { T_GREATER_THAN (tokinfo lexbuf); }
165+
| "+" { T_PLUS (tokinfo lexbuf); }
166+
| "-" { T_MINUS (tokinfo lexbuf); }
167+
| "*" { T_MULT (tokinfo lexbuf); }
187168
(* for '/' see below the regexp handling *)
188-
| "%" { T_MOD (tokinfo prev lexbuf); }
189-
| "|" { T_BIT_OR (tokinfo prev lexbuf); }
190-
| "&" { T_BIT_AND (tokinfo prev lexbuf); }
191-
| "^" { T_BIT_XOR (tokinfo prev lexbuf); }
192-
| "!" { T_NOT (tokinfo prev lexbuf); }
193-
| "~" { T_BIT_NOT (tokinfo prev lexbuf); }
194-
| "=" { T_ASSIGN (tokinfo prev lexbuf); }
169+
| "%" { T_MOD (tokinfo lexbuf); }
170+
| "|" { T_BIT_OR (tokinfo lexbuf); }
171+
| "&" { T_BIT_AND (tokinfo lexbuf); }
172+
| "^" { T_BIT_XOR (tokinfo lexbuf); }
173+
| "!" { T_NOT (tokinfo lexbuf); }
174+
| "~" { T_BIT_NOT (tokinfo lexbuf); }
175+
| "=" { T_ASSIGN (tokinfo lexbuf); }
195176

196177
(* ----------------------------------------------------------------------- *)
197178
(* Keywords and ident *)
198179
(* ----------------------------------------------------------------------- *)
199180
| ['a'-'z''A'-'Z''$''_']['a'-'z''A'-'Z''$''_''0'-'9']* {
200181
let s = tok lexbuf in
201-
let info = tokinfo prev lexbuf in
182+
let info = tokinfo lexbuf in
202183
try
203184
let f = Hashtbl.find keyword_table s in
204185
f info
@@ -212,24 +193,24 @@ rule main prev = parse
212193

213194
| "0" ['X''x'] hexa+ {
214195
let s = tok lexbuf in
215-
let info = tokinfo prev lexbuf in
196+
let info = tokinfo lexbuf in
216197
T_NUMBER (s, info)
217198
}
218199
| '0'['0'-'7']+ {
219200
let s = tok lexbuf in
220-
let info = tokinfo prev lexbuf in
201+
let info = tokinfo lexbuf in
221202
T_NUMBER (s, info)
222203
}
223204

224205
| ['0'-'9']*'.'?['0'-'9']+['e''E']['-''+']?['0'-'9']+ (* {1,3} *) {
225206
let s = tok lexbuf in
226-
let info = tokinfo prev lexbuf in
207+
let info = tokinfo lexbuf in
227208
T_NUMBER (s, info)
228209
}
229210
| ['0'-'9']+'.'? |
230211
['0'-'9']*'.'['0'-'9']+ {
231212
let s = tok lexbuf in
232-
let info = tokinfo prev lexbuf in
213+
let info = tokinfo lexbuf in
233214
T_NUMBER (s, info)
234215
}
235216

@@ -238,7 +219,7 @@ rule main prev = parse
238219
(* ----------------------------------------------------------------------- *)
239220
| ("'"|'"') as quote {
240221
let from = lexbuf.Lexing.lex_start_p.pos_cnum in
241-
let info = tokinfo prev lexbuf in
222+
let info = tokinfo lexbuf in
242223
let buf = Buffer.create 127 in
243224
string_quote quote buf lexbuf;
244225
let s = Buffer.contents buf in
@@ -263,38 +244,16 @@ rule main prev = parse
263244
*
264245
*)
265246

266-
| "/" | "/=" {
267-
let s = tok lexbuf in
268-
let info = tokinfo prev lexbuf in
269-
270-
match prev with
271-
| Some (
272-
T_IDENTIFIER _
273-
| T_NUMBER _ | T_STRING _ | T_REGEX _
274-
| T_FALSE _ | T_TRUE _ | T_NULL _
275-
| T_THIS _
276-
| T_INCR _ | T_DECR _
277-
| T_RBRACKET _ | T_RPAREN _
278-
) -> begin match s with
279-
| "/" -> T_DIV (info);
280-
| "/=" -> T_DIV_ASSIGN info
281-
| _ -> assert false
282-
end
283-
| _ ->
284-
let buf = Buffer.create 127 in
285-
Buffer.add_string buf s;
286-
regexp buf lexbuf;
287-
T_REGEX (Buffer.contents buf, info)
288-
}
289-
247+
| "/" { T_DIV (tokinfo lexbuf) }
248+
| "/=" { T_DIV_ASSIGN (tokinfo lexbuf) }
290249
(* ----------------------------------------------------------------------- *)
291250
(* eof *)
292251
(* ----------------------------------------------------------------------- *)
293252

294-
| eof { EOF (tokinfo prev lexbuf) }
253+
| eof { EOF (tokinfo lexbuf) }
295254

296255
| _ {
297-
TUnknown (tok lexbuf, tokinfo prev lexbuf)
256+
TUnknown (tok lexbuf, tokinfo lexbuf)
298257
}
299258
(*****************************************************************************)
300259

@@ -330,6 +289,14 @@ and string_quote q buf = parse
330289
| eof { Format.eprintf "LEXER: WEIRD end of file in quoted string@."; ()}
331290

332291
(*****************************************************************************)
292+
and main_regexp = parse
293+
| '/' {
294+
let info = tokinfo lexbuf in
295+
let buf = Buffer.create 127 in
296+
Buffer.add_string buf (Lexing.lexeme lexbuf);
297+
regexp buf lexbuf;
298+
T_REGEX (Buffer.contents buf, info) }
299+
333300
and regexp buf = parse
334301
| '\\' (_ as x) { Buffer.add_char buf '\\';
335302
Buffer.add_char buf x;

0 commit comments

Comments
 (0)