1+ /*
2+ [The "BSD licence"]
3+ Copyright (c) 2013 Terence Parr, Sam Harwell
4+ Copyright (c) 2017 Ivan Kochurkin (upgrade to Java 8)
5+ Copyright (c) 2021 Michał Lorek (upgrade to Java 11)
6+ Copyright (c) 2022 Michał Lorek (upgrade to Java 17)
7+ All rights reserved.
8+
9+ Redistribution and use in source and binary forms, with or without
10+ modification, are permitted provided that the following conditions
11+ are met:
12+ 1. Redistributions of source code must retain the above copyright
13+ notice, this list of conditions and the following disclaimer.
14+ 2. Redistributions in binary form must reproduce the above copyright
15+ notice, this list of conditions and the following disclaimer in the
16+ documentation and/or other materials provided with the distribution.
17+ 3. The name of the author may not be used to endorse or promote products
18+ derived from this software without specific prior written permission.
19+
20+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+ */
31+
32+ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
33+ // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
34+ // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
35+
36+ lexer grammar JavaLexer;
37+
38+ // Keywords
39+
40+ ABSTRACT : ' abstract' ;
41+ ASSERT : ' assert' ;
42+ BOOLEAN : ' boolean' ;
43+ BREAK : ' break' ;
44+ BYTE : ' byte' ;
45+ CASE : ' case' ;
46+ CATCH : ' catch' ;
47+ CHAR : ' char' ;
48+ CLASS : ' class' ;
49+ CONST : ' const' ;
50+ CONTINUE : ' continue' ;
51+ DEFAULT : ' default' ;
52+ DO : ' do' ;
53+ DOUBLE : ' double' ;
54+ ELSE : ' else' ;
55+ ENUM : ' enum' ;
56+ EXTENDS : ' extends' ;
57+ FINAL : ' final' ;
58+ FINALLY : ' finally' ;
59+ FLOAT : ' float' ;
60+ FOR : ' for' ;
61+ IF : ' if' ;
62+ GOTO : ' goto' ;
63+ IMPLEMENTS : ' implements' ;
64+ IMPORT : ' import' ;
65+ INSTANCEOF : ' instanceof' ;
66+ INT : ' int' ;
67+ INTERFACE : ' interface' ;
68+ LONG : ' long' ;
69+ NATIVE : ' native' ;
70+ NEW : ' new' ;
71+ PACKAGE : ' package' ;
72+ PRIVATE : ' private' ;
73+ PROTECTED : ' protected' ;
74+ PUBLIC : ' public' ;
75+ RETURN : ' return' ;
76+ SHORT : ' short' ;
77+ STATIC : ' static' ;
78+ STRICTFP : ' strictfp' ;
79+ SUPER : ' super' ;
80+ SWITCH : ' switch' ;
81+ SYNCHRONIZED : ' synchronized' ;
82+ THIS : ' this' ;
83+ THROW : ' throw' ;
84+ THROWS : ' throws' ;
85+ TRANSIENT : ' transient' ;
86+ TRY : ' try' ;
87+ VOID : ' void' ;
88+ VOLATILE : ' volatile' ;
89+ WHILE : ' while' ;
90+
91+ // Module related keywords
92+ MODULE : ' module' ;
93+ OPEN : ' open' ;
94+ REQUIRES : ' requires' ;
95+ EXPORTS : ' exports' ;
96+ OPENS : ' opens' ;
97+ TO : ' to' ;
98+ USES : ' uses' ;
99+ PROVIDES : ' provides' ;
100+ WITH : ' with' ;
101+ TRANSITIVE : ' transitive' ;
102+
103+ // Local Variable Type Inference
104+ VAR : ' var' ; // reserved type name
105+
106+ // Switch Expressions
107+ YIELD : ' yield' ; // reserved type name from Java 14
108+
109+ // Records
110+ RECORD : ' record' ;
111+
112+ // Sealed Classes
113+ SEALED : ' sealed' ;
114+ PERMITS : ' permits' ;
115+ NON_SEALED : ' non-sealed' ;
116+
117+ // Literals
118+
119+ DECIMAL_LITERAL : (' 0' | [1-9] (Digits? | ' _' + Digits)) [lL]?;
120+ HEX_LITERAL : ' 0' [xX] [0-9a-fA-F ] ([0-9a-fA-F_ ]* [0-9a-fA-F ])? [lL]?;
121+ OCT_LITERAL : ' 0' ' _' * [0-7] ([0-7_]* [0-7])? [lL]?;
122+ BINARY_LITERAL : ' 0' [bB] [01] ([01_]* [01])? [lL]?;
123+
124+ FLOAT_LITERAL :
125+ (Digits ' .' Digits? | ' .' Digits) ExponentPart? [fFdD]?
126+ | Digits (ExponentPart [fFdD]? | [fFdD])
127+ ;
128+
129+ HEX_FLOAT_LITERAL : ' 0' [xX] (HexDigits ' .' ? | HexDigits? ' .' HexDigits) [pP] [+-]? Digits [fFdD]?;
130+
131+ BOOL_LITERAL : ' true' | ' false' ;
132+
133+ CHAR_LITERAL : ' \' ' (~[' \\\r\n ] | EscapeSequence) ' \' ' ;
134+
135+ STRING_LITERAL : ' "' (~[" \\\r\n ] | EscapeSequence)* '" ' ;
136+
137+ TEXT_BLOCK: ' " " " ' [ \t ]* [\r\n ] (. | EscapeSequence)*? '" " " ' ;
138+
139+ NULL_LITERAL: ' null' ;
140+
141+ // Separators
142+
143+ LPAREN : ' (' ;
144+ RPAREN : ' )' ;
145+ LBRACE : ' {' ;
146+ RBRACE : ' }' ;
147+ LBRACK : ' [' ;
148+ RBRACK : ' ]' ;
149+ SEMI : ' ;' ;
150+ COMMA : ' ,' ;
151+ DOT : ' .' ;
152+
153+ // Operators
154+
155+ ASSIGN : ' =' ;
156+ GT : ' >' ;
157+ LT : ' <' ;
158+ BANG : ' !' ;
159+ TILDE : ' ~' ;
160+ QUESTION : ' ?' ;
161+ COLON : ' :' ;
162+ EQUAL : ' ==' ;
163+ LE : ' <=' ;
164+ GE : ' >=' ;
165+ NOTEQUAL : ' !=' ;
166+ AND : ' &&' ;
167+ OR : ' ||' ;
168+ INC : ' ++' ;
169+ DEC : ' --' ;
170+ ADD : ' +' ;
171+ SUB : ' -' ;
172+ MUL : ' *' ;
173+ DIV : ' /' ;
174+ BITAND : ' &' ;
175+ BITOR : ' |' ;
176+ CARET : ' ^' ;
177+ MOD : ' %' ;
178+
179+ ADD_ASSIGN : ' +=' ;
180+ SUB_ASSIGN : ' -=' ;
181+ MUL_ASSIGN : ' *=' ;
182+ DIV_ASSIGN : ' /=' ;
183+ AND_ASSIGN : ' &=' ;
184+ OR_ASSIGN : ' |=' ;
185+ XOR_ASSIGN : ' ^=' ;
186+ MOD_ASSIGN : ' %=' ;
187+ LSHIFT_ASSIGN : ' <<=' ;
188+ RSHIFT_ASSIGN : ' >>=' ;
189+ URSHIFT_ASSIGN : ' >>>=' ;
190+
191+ // Java 8 tokens
192+
193+ ARROW : ' ->' ;
194+ COLONCOLON : ' ::' ;
195+
196+ // Additional symbols not defined in the lexical specification
197+
198+ AT : ' @' ;
199+ ELLIPSIS : ' ...' ;
200+
201+ // Whitespace and comments
202+
203+ WS : [ \t\r\n\u000C ]+ -> channel(HIDDEN);
204+ COMMENT : ' /* ' .*? '*/ ' -> channel(HIDDEN);
205+ LINE_COMMENT : ' // ' ~[\r\n]* -> channel(HIDDEN);
206+
207+ // Identifiers
208+
209+ IDENTIFIER : Letter LetterOrDigit*;
210+
211+ // Fragment rules
212+
213+ fragment ExponentPart: [eE] [+-]? Digits;
214+
215+ fragment EscapeSequence:
216+ ' \\ ' ' u005c' ? [btnfr" '\\ ]
217+ | '\\ ' 'u005c'? ([0-3]? [0-7])? [0-7]
218+ | '\\ ' 'u'+ HexDigit HexDigit HexDigit HexDigit
219+ ;
220+
221+ fragment HexDigits: HexDigit ((HexDigit | '_')* HexDigit)?;
222+
223+ fragment HexDigit: [0-9a-fA-F];
224+
225+ fragment Digits: [0-9] ([0-9_]* [0-9])?;
226+
227+ fragment LetterOrDigit: Letter | [0-9];
228+
229+ fragment Letter:
230+ [a-zA-Z$_] // these are the " java letters" below 0x7F
231+ | ~[\u0000 -\u007F\uD800 -\uDBFF ] // covers all characters above 0x7F which are not a surrogate
232+ | [\uD800 -\uDBFF ] [\uDC00 -\uDFFF ] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
233+ ;
0 commit comments