@@ -74,17 +74,15 @@ let update_loc lexbuf ?file ~line ~absolute chars =
7474 pos_bol = pos.pos_cnum - chars;
7575 }
7676
77- let tokinfo prev lexbuf =
78- let pi = Parse_info. t_of_lexbuf lexbuf in
79- match prev with
80- | None -> { pi with Parse_info. fol = Yes }
81- | Some prev ->
82- let prev_pi = Js_token. info prev in
83- if prev_pi.Parse_info. line <> pi.Parse_info. line
84- && Option. equal String. equal prev_pi.Parse_info. name pi.Parse_info. name
85- then { pi with Parse_info. fol = Yes }
86- else { pi with Parse_info. fol = No }
87-
77+ let tokinfo lexbuf = Parse_info. t_of_lexbuf lexbuf
78+
79+ let with_pos lexbuf f =
80+ let p = lexbuf.Lexing. lex_start_p in
81+ let pos = lexbuf.Lexing. lex_start_pos in
82+ let r = f () in
83+ lexbuf.Lexing. lex_start_p < - p;
84+ lexbuf.Lexing. lex_start_pos < - pos;
85+ r
8886}
8987
9088(* ****************************************************************************)
@@ -94,111 +92,102 @@ let hexa = ['0'-'9''a'-'f''A'-'F']
9492let inputCharacter = [^ '\r' '\n' ]
9593(* ****************************************************************************)
9694
97- rule main prev = parse
95+ rule main = parse
9896
9997 (* ----------------------------------------------------------------------- *)
10098 (* spacing/comments *)
10199 (* ----------------------------------------------------------------------- *)
102100 | " /*" {
103- let info = tokinfo prev lexbuf in
101+ with_pos lexbuf (fun () ->
102+ let info = tokinfo lexbuf in
104103 let buf = Buffer. create 127 in
105104 Buffer. add_string buf (tok lexbuf);
106105 st_comment buf lexbuf;
107106 let content = Buffer. contents buf in
108- TComment (content, info)
107+ TComment (content, info))
109108 }
110109 | (" //#" [' ' '\t' ]*
111110 (['0' - '9' ]+ as line) [' ' '\t' ]*
112111 '"' ([^ '"' '\n' ]* as file) '"' [' ' '\t' ]*
113112 ) as raw NEWLINE {
114- let info = tokinfo prev lexbuf in
113+ let info = tokinfo lexbuf in
115114 let line = int_of_string line in
116115 update_loc lexbuf ~file ~line ~absolute: true 0 ;
117116 TCommentLineDirective (raw, info)
118117 }
119118 (* don't keep the trailing \n; it will be handled later *)
120- | (" //" inputCharacter* ) as cmt { TComment (cmt, tokinfo prev lexbuf) }
119+ | (" //" inputCharacter* ) as cmt { TComment (cmt, tokinfo lexbuf) }
121120
122121 | [' ' '\t' ]+ {
123- main prev lexbuf
122+ main lexbuf
124123 }
125124 | NEWLINE {
126125 update_loc lexbuf ~line: 1 ~absolute: false 0 ;
127- main prev lexbuf
126+ main lexbuf
128127 }
129128
130129 (* ----------------------------------------------------------------------- *)
131130 (* symbols *)
132131 (* ----------------------------------------------------------------------- *)
133132
134- | " {" { T_LCURLY (tokinfo prev lexbuf); }
135- | " }" { T_RCURLY (tokinfo prev lexbuf); }
136-
137- | " (" { T_LPAREN (tokinfo prev lexbuf); }
138- | " )" { T_RPAREN (tokinfo prev lexbuf); }
139-
140- | " [" { T_LBRACKET (tokinfo prev lexbuf); }
141- | " ]" { T_RBRACKET (tokinfo prev lexbuf); }
142- | " ." { T_PERIOD (tokinfo prev lexbuf); }
143- | " ;" { T_SEMICOLON (tokinfo prev lexbuf); }
144- | " ," { T_COMMA (tokinfo prev lexbuf); }
145- | " :" { T_COLON (tokinfo prev lexbuf); }
146- | " ?" { T_PLING (tokinfo prev lexbuf); }
147- | " &&" { T_AND (tokinfo prev lexbuf); }
148- | " ||" { T_OR (tokinfo prev lexbuf); }
149- | " ===" { T_STRICT_EQUAL (tokinfo prev lexbuf); }
150- | " !==" { T_STRICT_NOT_EQUAL (tokinfo prev lexbuf); }
151- | " <=" { T_LESS_THAN_EQUAL (tokinfo prev lexbuf); }
152- | " >=" { T_GREATER_THAN_EQUAL (tokinfo prev lexbuf); }
153- | " ==" { T_EQUAL (tokinfo prev lexbuf); }
154- | " !=" { T_NOT_EQUAL (tokinfo prev lexbuf); }
155- | " ++" {
156- let cpi = tokinfo prev lexbuf in
157- match prev with
158- | Some p when (Js_token. info p).Parse_info. line = cpi.Parse_info. line ->
159- T_INCR_NB (cpi)
160- | _ -> T_INCR (cpi) }
161- | " --" {
162- let cpi = tokinfo prev lexbuf in
163- match prev with
164- | Some p when (Js_token. info p).Parse_info. line = cpi.Parse_info. line ->
165- T_DECR_NB (cpi)
166- | _ -> T_DECR (cpi) }
167- | " <<=" { T_LSHIFT_ASSIGN (tokinfo prev lexbuf); }
168- | " <<" { T_LSHIFT (tokinfo prev lexbuf); }
169- | " >>=" { T_RSHIFT_ASSIGN (tokinfo prev lexbuf); }
170- | " >>>=" { T_RSHIFT3_ASSIGN (tokinfo prev lexbuf); }
171- | " ..." { T_SPREAD (tokinfo prev lexbuf); }
172- | " >>>" { T_RSHIFT3 (tokinfo prev lexbuf); }
173- | " >>" { T_RSHIFT (tokinfo prev lexbuf); }
174- | " +=" { T_PLUS_ASSIGN (tokinfo prev lexbuf); }
175- | " -=" { T_MINUS_ASSIGN (tokinfo prev lexbuf); }
176-
177- | " *=" { T_MULT_ASSIGN (tokinfo prev lexbuf); }
178- | " %=" { T_MOD_ASSIGN (tokinfo prev lexbuf); }
179- | " &=" { T_BIT_AND_ASSIGN (tokinfo prev lexbuf); }
180- | " |=" { T_BIT_OR_ASSIGN (tokinfo prev lexbuf); }
181- | " ^=" { T_BIT_XOR_ASSIGN (tokinfo prev lexbuf); }
182- | " <" { T_LESS_THAN (tokinfo prev lexbuf); }
183- | " >" { T_GREATER_THAN (tokinfo prev lexbuf); }
184- | " +" { T_PLUS (tokinfo prev lexbuf); }
185- | " -" { T_MINUS (tokinfo prev lexbuf); }
186- | " *" { T_MULT (tokinfo prev lexbuf); }
133+ | " {" { T_LCURLY (tokinfo lexbuf); }
134+ | " }" { T_RCURLY (tokinfo lexbuf); }
135+
136+ | " (" { T_LPAREN (tokinfo lexbuf); }
137+ | " )" { T_RPAREN (tokinfo lexbuf); }
138+
139+ | " [" { T_LBRACKET (tokinfo lexbuf); }
140+ | " ]" { T_RBRACKET (tokinfo lexbuf); }
141+ | " ." { T_PERIOD (tokinfo lexbuf); }
142+ | " ;" { T_SEMICOLON (tokinfo lexbuf); }
143+ | " ," { T_COMMA (tokinfo lexbuf); }
144+ | " :" { T_COLON (tokinfo lexbuf); }
145+ | " ?" { T_PLING (tokinfo lexbuf); }
146+ | " &&" { T_AND (tokinfo lexbuf); }
147+ | " ||" { T_OR (tokinfo lexbuf); }
148+ | " ===" { T_STRICT_EQUAL (tokinfo lexbuf); }
149+ | " !==" { T_STRICT_NOT_EQUAL (tokinfo lexbuf); }
150+ | " <=" { T_LESS_THAN_EQUAL (tokinfo lexbuf); }
151+ | " >=" { T_GREATER_THAN_EQUAL (tokinfo lexbuf); }
152+ | " ==" { T_EQUAL (tokinfo lexbuf); }
153+ | " !=" { T_NOT_EQUAL (tokinfo lexbuf); }
154+ | " ++" { T_INCR (tokinfo lexbuf); }
155+ | " --" { T_DECR (tokinfo lexbuf); }
156+ | " <<=" { T_LSHIFT_ASSIGN (tokinfo lexbuf); }
157+ | " <<" { T_LSHIFT (tokinfo lexbuf); }
158+ | " >>=" { T_RSHIFT_ASSIGN (tokinfo lexbuf); }
159+ | " >>>=" { T_RSHIFT3_ASSIGN (tokinfo lexbuf); }
160+ | " ..." { T_SPREAD (tokinfo lexbuf); }
161+ | " >>>" { T_RSHIFT3 (tokinfo lexbuf); }
162+ | " >>" { T_RSHIFT (tokinfo lexbuf); }
163+ | " +=" { T_PLUS_ASSIGN (tokinfo lexbuf); }
164+ | " -=" { T_MINUS_ASSIGN (tokinfo lexbuf); }
165+
166+ | " *=" { T_MULT_ASSIGN (tokinfo lexbuf); }
167+ | " %=" { T_MOD_ASSIGN (tokinfo lexbuf); }
168+ | " &=" { T_BIT_AND_ASSIGN (tokinfo lexbuf); }
169+ | " |=" { T_BIT_OR_ASSIGN (tokinfo lexbuf); }
170+ | " ^=" { T_BIT_XOR_ASSIGN (tokinfo lexbuf); }
171+ | " <" { T_LESS_THAN (tokinfo lexbuf); }
172+ | " >" { T_GREATER_THAN (tokinfo lexbuf); }
173+ | " +" { T_PLUS (tokinfo lexbuf); }
174+ | " -" { T_MINUS (tokinfo lexbuf); }
175+ | " *" { T_MULT (tokinfo lexbuf); }
187176 (* for '/' see below the regexp handling *)
188- | " %" { T_MOD (tokinfo prev lexbuf); }
189- | " |" { T_BIT_OR (tokinfo prev lexbuf); }
190- | " &" { T_BIT_AND (tokinfo prev lexbuf); }
191- | " ^" { T_BIT_XOR (tokinfo prev lexbuf); }
192- | " !" { T_NOT (tokinfo prev lexbuf); }
193- | " ~" { T_BIT_NOT (tokinfo prev lexbuf); }
194- | " =" { T_ASSIGN (tokinfo prev lexbuf); }
177+ | " %" { T_MOD (tokinfo lexbuf); }
178+ | " |" { T_BIT_OR (tokinfo lexbuf); }
179+ | " &" { T_BIT_AND (tokinfo lexbuf); }
180+ | " ^" { T_BIT_XOR (tokinfo lexbuf); }
181+ | " !" { T_NOT (tokinfo lexbuf); }
182+ | " ~" { T_BIT_NOT (tokinfo lexbuf); }
183+ | " =" { T_ASSIGN (tokinfo lexbuf); }
195184
196185 (* ----------------------------------------------------------------------- *)
197186 (* Keywords and ident *)
198187 (* ----------------------------------------------------------------------- *)
199188 | ['a' - 'z''A' - 'Z''$''_' ]['a' - 'z''A' - 'Z''$''_''0' - '9' ]* {
200189 let s = tok lexbuf in
201- let info = tokinfo prev lexbuf in
190+ let info = tokinfo lexbuf in
202191 try
203192 let f = Hashtbl. find keyword_table s in
204193 f info
@@ -212,89 +201,51 @@ rule main prev = parse
212201
213202 | " 0" ['X''x' ] hexa+ {
214203 let s = tok lexbuf in
215- let info = tokinfo prev lexbuf in
204+ let info = tokinfo lexbuf in
216205 T_NUMBER (s, info)
217206 }
218207 | '0' ['0' - '7' ]+ {
219208 let s = tok lexbuf in
220- let info = tokinfo prev lexbuf in
209+ let info = tokinfo lexbuf in
221210 T_NUMBER (s, info)
222211 }
223212
224213 | ['0' - '9' ]* '.' ?['0' - '9' ]+ ['e''E' ]['-''+' ]?['0' - '9' ]+ (* {1,3} *) {
225214 let s = tok lexbuf in
226- let info = tokinfo prev lexbuf in
215+ let info = tokinfo lexbuf in
227216 T_NUMBER (s, info)
228217 }
229218 | ['0' - '9' ]+ '.' ? |
230219 ['0' - '9' ]* '.' ['0' - '9' ]+ {
231220 let s = tok lexbuf in
232- let info = tokinfo prev lexbuf in
221+ let info = tokinfo lexbuf in
233222 T_NUMBER (s, info)
234223 }
235224
236225 (* ----------------------------------------------------------------------- *)
237226 (* Strings *)
238227 (* ----------------------------------------------------------------------- *)
239228 | (" '" | '"' ) as quote {
229+ with_pos lexbuf (fun () ->
240230 let from = lexbuf.Lexing. lex_start_p.pos_cnum in
241- let info = tokinfo prev lexbuf in
231+ let info = tokinfo lexbuf in
242232 let buf = Buffer. create 127 in
243233 string_quote quote buf lexbuf;
244234 let s = Buffer. contents buf in
245235 (* s does not contain the enclosing "'" but the info does *)
246236 let to_ = lexbuf.Lexing. lex_curr_p.pos_cnum in
247- T_STRING (s, info, to_ - 1 - from)
237+ T_STRING (s, info, to_ - 1 - from))
248238 }
249-
250- (* ----------------------------------------------------------------------- *)
251- (* Regexp *)
252- (* ----------------------------------------------------------------------- *)
253- (* take care of ambiguity with start of comment //, and with
254- * '/' as a divisor operator
255- *
256- * it can not be '/' [^ '/']* '/' because then
257- * comments will not be recognized as lex tries
258- * to find the longest match.
259- *
260- * It can not be
261- * '/' [^'*''/'] ([^'/''\n'])* '/' ['A'-'Z''a'-'z']*
262- * because a / (b/c) will be recognized as a regexp.
263- *
264- *)
265-
266- | " /" | " /=" {
267- let s = tok lexbuf in
268- let info = tokinfo prev lexbuf in
269-
270- match prev with
271- | Some (
272- T_IDENTIFIER _
273- | T_NUMBER _ | T_STRING _ | T_REGEX _
274- | T_FALSE _ | T_TRUE _ | T_NULL _
275- | T_THIS _
276- | T_INCR _ | T_DECR _
277- | T_RBRACKET _ | T_RPAREN _
278- ) -> begin match s with
279- | "/" -> T_DIV (info);
280- | "/=" -> T_DIV_ASSIGN info
281- | _ -> assert false
282- end
283- | _ ->
284- let buf = Buffer. create 127 in
285- Buffer. add_string buf s;
286- regexp buf lexbuf;
287- T_REGEX (Buffer. contents buf, info)
288- }
289-
239+ | " /" { T_DIV (tokinfo lexbuf) }
240+ | " /=" { T_DIV_ASSIGN (tokinfo lexbuf) }
290241 (* ----------------------------------------------------------------------- *)
291242 (* eof *)
292243 (* ----------------------------------------------------------------------- *)
293244
294- | eof { EOF (tokinfo prev lexbuf) }
245+ | eof { EOF (tokinfo lexbuf) }
295246
296247 | _ {
297- TUnknown (tok lexbuf, tokinfo prev lexbuf)
248+ TUnknown (tok lexbuf, tokinfo lexbuf)
298249 }
299250(* ****************************************************************************)
300251
@@ -330,6 +281,15 @@ and string_quote q buf = parse
330281 | eof { Format. eprintf " LEXER: WEIRD end of file in quoted string@." ; () }
331282
332283(* ****************************************************************************)
284+ and main_regexp = parse
285+ | '/' {
286+ with_pos lexbuf (fun () ->
287+ let info = tokinfo lexbuf in
288+ let buf = Buffer. create 127 in
289+ Buffer. add_string buf (Lexing. lexeme lexbuf);
290+ regexp buf lexbuf;
291+ T_REGEX (Buffer. contents buf, info)) }
292+
333293and regexp buf = parse
334294 | '\\' (_ as x) { Buffer. add_char buf '\\' ;
335295 Buffer. add_char buf x;
0 commit comments