1+ /*
2+ [The "BSD licence"]
3+ Copyright (c) 2013 Terence Parr, Sam Harwell
4+ Copyright (c) 2017 Ivan Kochurkin (upgrade to Java 8)
5+ Copyright (c) 2021 Michał Lorek (upgrade to Java 11)
6+ Copyright (c) 2022 Michał Lorek (upgrade to Java 17)
7+ All rights reserved.
8+
9+ Redistribution and use in source and binary forms, with or without
10+ modification, are permitted provided that the following conditions
11+ are met:
12+ 1. Redistributions of source code must retain the above copyright
13+ notice, this list of conditions and the following disclaimer.
14+ 2. Redistributions in binary form must reproduce the above copyright
15+ notice, this list of conditions and the following disclaimer in the
16+ documentation and/or other materials provided with the distribution.
17+ 3. The name of the author may not be used to endorse or promote products
18+ derived from this software without specific prior written permission.
19+
20+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+ */
31+
32+ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
33+ // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
34+ // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
35+
36+ lexer grammar JavaLexer;
37+
38+ // Keywords
39+
40+ ABSTRACT : ' abstract' ;
41+ ASSERT : ' assert' ;
42+ BOOLEAN : ' boolean' ;
43+ BREAK : ' break' ;
44+ BYTE : ' byte' ;
45+ CASE : ' case' ;
46+ CATCH : ' catch' ;
47+ CHAR : ' char' ;
48+ CLASS : ' class' ;
49+ CONST : ' const' ;
50+ CONTINUE : ' continue' ;
51+ DEFAULT : ' default' ;
52+ DO : ' do' ;
53+ DOUBLE : ' double' ;
54+ ELSE : ' else' ;
55+ ENUM : ' enum' ;
56+ EXTENDS : ' extends' ;
57+ FINAL : ' final' ;
58+ FINALLY : ' finally' ;
59+ FLOAT : ' float' ;
60+ FOR : ' for' ;
61+ IF : ' if' ;
62+ GOTO : ' goto' ;
63+ IMPLEMENTS : ' implements' ;
64+ IMPORT : ' import' ;
65+ INSTANCEOF : ' instanceof' ;
66+ INT : ' int' ;
67+ INTERFACE : ' interface' ;
68+ LONG : ' long' ;
69+ NATIVE : ' native' ;
70+ NEW : ' new' ;
71+ PACKAGE : ' package' ;
72+ PRIVATE : ' private' ;
73+ PROTECTED : ' protected' ;
74+ PUBLIC : ' public' ;
75+ RETURN : ' return' ;
76+ SHORT : ' short' ;
77+ STATIC : ' static' ;
78+ STRICTFP : ' strictfp' ;
79+ SUPER : ' super' ;
80+ SWITCH : ' switch' ;
81+ SYNCHRONIZED : ' synchronized' ;
82+ THIS : ' this' ;
83+ THROW : ' throw' ;
84+ THROWS : ' throws' ;
85+ TRANSIENT : ' transient' ;
86+ TRY : ' try' ;
87+ VOID : ' void' ;
88+ VOLATILE : ' volatile' ;
89+ WHILE : ' while' ;
90+
91+ // Module related keywords
92+ MODULE : ' module' ;
93+ OPEN : ' open' ;
94+ REQUIRES : ' requires' ;
95+ EXPORTS : ' exports' ;
96+ OPENS : ' opens' ;
97+ TO : ' to' ;
98+ USES : ' uses' ;
99+ PROVIDES : ' provides' ;
100+ WITH : ' with' ;
101+ TRANSITIVE : ' transitive' ;
102+
103+ // Local Variable Type Inference
104+ VAR : ' var' ; // reserved type name
105+
106+ // Switch Expressions
107+ YIELD : ' yield' ; // reserved type name from Java 14
108+
109+ // Records
110+ RECORD : ' record' ;
111+
112+ // Sealed Classes
113+ SEALED : ' sealed' ;
114+ PERMITS : ' permits' ;
115+ NON_SEALED : ' non-sealed' ;
116+
117+ // Literals
118+
119+ DECIMAL_LITERAL : (' 0' | [1-9] (Digits? | ' _' + Digits)) [lL]?;
120+ HEX_LITERAL : ' 0' [xX] [0-9a-fA-F ] ([0-9a-fA-F_ ]* [0-9a-fA-F ])? [lL]?;
121+ OCT_LITERAL : ' 0' ' _' * [0-7] ([0-7_]* [0-7])? [lL]?;
122+ BINARY_LITERAL : ' 0' [bB] [01] ([01_]* [01])? [lL]?;
123+
124+ FLOAT_LITERAL :
125+ (Digits ' .' Digits? | ' .' Digits) ExponentPart? [fFdD]?
126+ | Digits (ExponentPart [fFdD]? | [fFdD])
127+ ;
128+
129+ HEX_FLOAT_LITERAL : ' 0' [xX] (HexDigits ' .' ? | HexDigits? ' .' HexDigits) [pP] [+-]? Digits [fFdD]?;
130+
131+ BOOL_LITERAL : ' true' | ' false' ;
132+
133+ CHAR_LITERAL : ' \' ' (~[' \\\r\n ] | EscapeSequence) ' \' ' ;
134+
135+ STRING_LITERAL : ' "' (~[" \\\r\n ] | EscapeSequence)* '" ' ;
136+
137+ MULTI_STRING_LIT: ' " " " ' (~[\\ ] | EscapeSequence)*? '" " " ' ;
138+
139+ TEXT_BLOCK: ' " " " ' [ \t ]* [\r\n ] (. | EscapeSequence)*? '" " " ' ;
140+
141+ NULL_LITERAL: ' null' ;
142+
143+ // Separators
144+
145+ LPAREN : ' (' ;
146+ RPAREN : ' )' ;
147+ LBRACE : ' {' ;
148+ RBRACE : ' }' ;
149+ LBRACK : ' [' ;
150+ RBRACK : ' ]' ;
151+ SEMI : ' ;' ;
152+ COMMA : ' ,' ;
153+ DOT : ' .' ;
154+
155+ // Operators
156+
157+ ASSIGN : ' =' ;
158+ GT : ' >' ;
159+ LT : ' <' ;
160+ BANG : ' !' ;
161+ TILDE : ' ~' ;
162+ QUESTION : ' ?' ;
163+ COLON : ' :' ;
164+ EQUAL : ' ==' ;
165+ LE : ' <=' ;
166+ GE : ' >=' ;
167+ NOTEQUAL : ' !=' ;
168+ AND : ' &&' ;
169+ OR : ' ||' ;
170+ INC : ' ++' ;
171+ DEC : ' --' ;
172+ ADD : ' +' ;
173+ SUB : ' -' ;
174+ MUL : ' *' ;
175+ DIV : ' /' ;
176+ BITAND : ' &' ;
177+ BITOR : ' |' ;
178+ CARET : ' ^' ;
179+ MOD : ' %' ;
180+
181+ ADD_ASSIGN : ' +=' ;
182+ SUB_ASSIGN : ' -=' ;
183+ MUL_ASSIGN : ' *=' ;
184+ DIV_ASSIGN : ' /=' ;
185+ AND_ASSIGN : ' &=' ;
186+ OR_ASSIGN : ' |=' ;
187+ XOR_ASSIGN : ' ^=' ;
188+ MOD_ASSIGN : ' %=' ;
189+ LSHIFT_ASSIGN : ' <<=' ;
190+ RSHIFT_ASSIGN : ' >>=' ;
191+ URSHIFT_ASSIGN : ' >>>=' ;
192+
193+ // Java 8 tokens
194+
195+ ARROW : ' ->' ;
196+ COLONCOLON : ' ::' ;
197+
198+ // Additional symbols not defined in the lexical specification
199+
200+ AT : ' @' ;
201+ ELLIPSIS : ' ...' ;
202+
203+ // Whitespace and comments
204+
205+ WS : [ \t\r\n\u000C ]+ -> channel(HIDDEN);
206+ COMMENT : ' /* ' .*? '*/ ' -> channel(HIDDEN);
207+ LINE_COMMENT : ' // ' ~[\r\n]* -> channel(HIDDEN);
208+
209+ // Identifiers
210+
211+ IDENTIFIER : Letter LetterOrDigit*;
212+
213+ // Fragment rules
214+
215+ fragment ExponentPart: [eE] [+-]? Digits;
216+
217+ fragment EscapeSequence:
218+ ' \\ ' ' u005c' ? [btnfr" '\\ ]
219+ | '\\ ' 'u005c'? ([0-3]? [0-7])? [0-7]
220+ | '\\ ' 'u'+ HexDigit HexDigit HexDigit HexDigit
221+ ;
222+
223+ fragment HexDigits: HexDigit ((HexDigit | '_')* HexDigit)?;
224+
225+ fragment HexDigit: [0-9a-fA-F];
226+
227+ fragment Digits: [0-9] ([0-9_]* [0-9])?;
228+
229+ fragment LetterOrDigit: Letter | [0-9];
230+
231+ fragment Letter:
232+ [a-zA-Z$_] // these are the " java letters" below 0x7F
233+ | ~[\u0000 -\u007F\uD800 -\uDBFF ] // covers all characters above 0x7F which are not a surrogate
234+ | [\uD800 -\uDBFF ] [\uDC00 -\uDFFF ] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
235+ ;
0 commit comments