@@ -86,6 +86,17 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
8686 }
8787 }
8888
89+ private static class DedentedLevel extends InterpolatedStringLevel {
90+ public final int delimiterLength;
91+ public DedentedLevel (CharSequence interpolator , int delimiterLength ) {
92+ super (interpolator);
93+ this . delimiterLength = delimiterLength;
94+ }
95+ public int getState () {
96+ return INSIDE_DEDENTED_INTERPOLATED_STRING ;
97+ }
98+ }
99+
89100 private boolean isScala3;
90101
91102 //
@@ -94,6 +105,7 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
94105 // to get id after $ in interpolated String
95106 private boolean haveIdInString = false ;
96107 private boolean haveIdInMultilineString = false ;
108+ private boolean haveIdInDedentedString = false ;
97109 // Currently opened interpolated Strings. Each int represents the number of the opened left structural braces in the String
98110 private Stack<InterpolatedStringLevel > nestedString = new Stack<> ();
99111 private CharSequence lastSeenInterpolator = null ;
@@ -105,16 +117,49 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
105117 public void resetCustom() {
106118 haveIdInString = false ;
107119 haveIdInMultilineString = false ;
120+ haveIdInDedentedString = false ;
108121 nestedString. clear();
109122 lastSeenInterpolator = null ;
110123 }
111124
125+ private int countLeadingQuotes(CharSequence text) {
126+ int count = 0 ;
127+ for (int i = 0 ; i < text. length() && text. charAt(i) == ' \' ' ; i++ ) {
128+ count++ ;
129+ }
130+ return count;
131+ }
132+
133+ private boolean endsWithQuotes(CharSequence text, int expectedCount) {
134+ if (text. length() < expectedCount) return false ;
135+ int count = 0 ;
136+ for (int i = text. length() - 1 ; i >= 0 && text. charAt(i) == ' \' ' ; i-- ) {
137+ count++ ;
138+ }
139+ return count >= expectedCount;
140+ }
141+
142+ private boolean isValidDedentedString(CharSequence text) {
143+ int leadingQuotes = countLeadingQuotes(text);
144+ if (leadingQuotes < 3 ) return false ; // Must have at least 3 quotes
145+
146+ // Find the ending quotes
147+ int trailingQuotes = 0 ;
148+ for (int i = text. length() - 1 ; i >= 0 && text. charAt(i) == ' \' ' ; i-- ) {
149+ trailingQuotes++ ;
150+ }
151+
152+ return leadingQuotes == trailingQuotes;
153+ }
154+
112155 public boolean isInterpolatedStringState() {
113156 return isInsideInterpolatedString() ||
114157 haveIdInString ||
115158 haveIdInMultilineString ||
159+ haveIdInDedentedString ||
116160 yystate() == INSIDE_INTERPOLATED_STRING ||
117- yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING ;
161+ yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING ||
162+ yystate() == INSIDE_DEDENTED_INTERPOLATED_STRING ;
118163 }
119164
120165 private boolean shouldProcessBracesForInterpolated() {
@@ -145,6 +190,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
145190 } else if (haveIdInMultilineString) {
146191 haveIdInMultilineString = false ;
147192 yybegin(INSIDE_MULTI_LINE_INTERPOLATED_STRING );
193+ } else if (haveIdInDedentedString) {
194+ haveIdInDedentedString = false ;
195+ yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING );
148196 }
149197 }
150198
@@ -158,6 +206,8 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
158206 typeAdjusted = tINTERPOLATED_RAW_STRING;
159207 else if (type == tINTERPOLATED_MULTILINE_STRING && isInsideRawInterpolator())
160208 typeAdjusted = tINTERPOLATED_MULTILINE_RAW_STRING;
209+ else if (type == tINTERPOLATED_DEDENTED_STRING && isInsideRawInterpolator())
210+ typeAdjusted = tINTERPOLATED_DEDENTED_RAW_STRING;
161211 else
162212 typeAdjusted = type;
163213
@@ -166,6 +216,11 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
166216
167217 @NotNull
168218 private IElementType processDollarInsideString(boolean isInsideMultiline) {
219+ return processDollarInsideString(isInsideMultiline, false );
220+ }
221+
222+ @NotNull
223+ private IElementType processDollarInsideString(boolean isInsideMultiline, boolean isInsideDedented) {
169224 final IElementType token;
170225
171226 // TODO: remove this chech, this should always be false, cause $$ is handled by INTERPOLATED_STRING_ESCAPE pattern earlier
@@ -175,7 +230,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
175230 token = tINTERPOLATED_STRING_ESCAPE;
176231 }
177232 else {
178- if (isInsideMultiline) {
233+ if (isInsideDedented) {
234+ haveIdInDedentedString = true ;
235+ } else if (isInsideMultiline) {
179236 haveIdInMultilineString = true ;
180237 } else {
181238 haveIdInString = true ;
@@ -271,12 +328,19 @@ hexDigit = [0-9A-Fa-f]
271328CHAR_ESCAPE_SEQUENCE = \\ [^\r\n]
272329UNICODE_ESCAPE = \\ u+ {hexDigit}{hexDigit}{hexDigit}{hexDigit} // Scala supports 1. multiple `u` chars after `\` 2. even \u000A ('\n') and \u000D (unlike Java)
273330ESCAPE_SEQUENCE = {UNICODE_ESCAPE} | {CHAR_ESCAPE_SEQUENCE}
274- CHARACTER_LITERAL = "'" ( [^ \\\' \r\n] | {ESCAPE_SEQUENCE} | {OCTAL_ESCAPE_LITERAL} )( "'" | \\ ) | \'\\ u000A\' | "'''" // TODO: \'\\u000A\' is redundunt, remove
331+ CHARACTER_LITERAL = "'" ( [^ \\\' \r\n] | {ESCAPE_SEQUENCE} | {OCTAL_ESCAPE_LITERAL} )( "'" | \\ ) | \'\\ u000A\' // TODO: \'\\u000A\' is redundunt, remove
275332
276333STRING_BEGIN = \" ( [^ \\\" \r\n] | {CHAR_ESCAPE_SEQUENCE} )*
277334STRING_LITERAL = {STRING_BEGIN} \"
278335MULTI_LINE_STRING = \"\"\" ( ( \" ( \" )?)? [^ \" ] )* \"\"\" ( \" )* // Multi-line string
279336
337+ // Dedented string literals (Scala 3) - modeled after MULTI_LINE_STRING pattern
338+ DEDENTED_STRING_3 = \'\'\' ( ( \' ( \' )?)? [^ \' ] )* \'\'\' ( \' )*
339+ DEDENTED_STRING_4 = \'\'\'\' ( ( \' ( \'\' ?)?)? [^ \' ] )* \'\'\'\' ( \' )*
340+ DEDENTED_STRING_5 = \'\'\'\'\' ( ( \' ( \'\'\' ?)?)? [^ \' ] )* \'\'\'\'\' ( \' )*
341+ DEDENTED_STRING_6 = \'\'\'\'\'\' ( ( \' ( \'\'\'\' ?)?)? [^ \' ] )* \'\'\'\'\'\' ( \' )*
342+ DEDENTED_STRING = {DEDENTED_STRING_6} | {DEDENTED_STRING_5} | {DEDENTED_STRING_4} | {DEDENTED_STRING_3}
343+
280344// //////String Interpolation////////
281345INTERPOLATED_STRING_ID = {varid}
282346
@@ -287,6 +351,9 @@ INTERPOLATED_STRING_PART_NOT_ESCAPED = [^\\\"\r\n\$]
287351INTERPOLATED_MULTI_LINE_STRING_BEGIN = \"\"\" {INTERPOLATED_MULTI_LINE_STRING_PART} *
288352INTERPOLATED_MULTI_LINE_STRING_PART = (( \" ( \" )?)? [^ \"\$ ] )
289353
354+ INTERPOLATED_DEDENTED_STRING_BEGIN = \'\'\' + {INTERPOLATED_DEDENTED_STRING_PART} *
355+ INTERPOLATED_DEDENTED_STRING_PART = [^ \'\$ ] | \$ [^ {] | \' [^ \' ] +
356+
290357// TODO: rename, it's missleading
291358INTERPOLATED_STRING_ESCAPE = "$$"
292359// INTERPOLATED_STRING_VARIABLE = "$"({identifier})
@@ -324,6 +391,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
324391%xstate WAIT_FOR_INTERPOLATED_STRING
325392%xstate INSIDE_INTERPOLATED_STRING
326393%xstate INSIDE_MULTI_LINE_INTERPOLATED_STRING
394+ %xstate INSIDE_DEDENTED_INTERPOLATED_STRING
327395%xstate INJ_COMMON_STATE
328396
329397%%
@@ -344,7 +412,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
344412{END_OF_LINE_COMMENT} { return process(tLINE_COMMENT); }
345413
346414
347- {INTERPOLATED_STRING_ID} / ( {INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN} ) {
415+ {INTERPOLATED_STRING_ID} / ( {INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN} | {INTERPOLATED_DEDENTED_STRING_BEGIN} ) {
348416 yybegin(WAIT_FOR_INTERPOLATED_STRING );
349417 // TODO: remove this check: looks like it's a dead code,
350418 // yytext() should only return text that is matched by INTERPOLATED_STRING_ID, which can't end with \"\"
@@ -367,6 +435,13 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
367435 nestedString. push(new MultilineLevel (lastSeenInterpolator));
368436 return process(tINTERPOLATED_MULTILINE_STRING);
369437 }
438+
439+ {INTERPOLATED_DEDENTED_STRING_BEGIN} {
440+ yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING );
441+ int delimiterLength = countLeadingQuotes(yytext());
442+ nestedString. push(new DedentedLevel (lastSeenInterpolator, delimiterLength));
443+ return process(tINTERPOLATED_DEDENTED_STRING);
444+ }
370445}
371446
372447<INJ_COMMON_STATE> {identifier} {
@@ -470,6 +545,61 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
470545 }
471546}
472547
548+ <INSIDE_DEDENTED_INTERPOLATED_STRING> {
549+ {INTERPOLATED_STRING_ESCAPE} {
550+ return process(tINTERPOLATED_STRING_ESCAPE);
551+ }
552+
553+ ( \'\' ) / "$" {
554+ return process(tINTERPOLATED_DEDENTED_STRING);
555+ }
556+
557+ {INTERPOLATED_DEDENTED_STRING_PART} + {
558+ return process(tINTERPOLATED_DEDENTED_STRING);
559+ }
560+
561+ "$" {identifier} {
562+ return processDollarInsideString(false , true );
563+ }
564+
565+ \'\'\' + ( \' )+ {
566+ yypushback(yylength() - 1 );
567+ return process(tINTERPOLATED_DEDENTED_STRING);
568+ }
569+
570+ \'\'\' + {
571+ // Check if this ends the dedented string with matching delimiter length
572+ if (! nestedString. isEmpty() && nestedString. peek() instanceof DedentedLevel ) {
573+ DedentedLevel level = (DedentedLevel ) nestedString. peek();
574+ int quoteCount = yylength();
575+ if (quoteCount == level. delimiterLength) {
576+ return processOutsideString();
577+ } else if (quoteCount < level. delimiterLength) {
578+ // Not enough quotes to close, treat as content
579+ return process(tINTERPOLATED_DEDENTED_STRING);
580+ } else {
581+ // Too many quotes, pushback the excess
582+ yypushback(quoteCount - level. delimiterLength);
583+ return processOutsideString();
584+ }
585+ }
586+ return processOutsideString();
587+ }
588+
589+ "$" / "{" {
590+ yybegin(COMMON_STATE );
591+ return process(tINTERPOLATED_STRING_INJECTION);
592+ }
593+
594+ \' / [^ \' ] {
595+ return process(tINTERPOLATED_DEDENTED_STRING);
596+ }
597+
598+ [^] {
599+ return process(tWRONG_STRING);
600+ }
601+ }
602+
473603
474604"/**" ( "*" ? [^ \/ ] )* "*/" { // for comments in interpolated strings
475605 return process(ScalaDocElementTypes . SCALA_DOC_COMMENT );
@@ -486,6 +616,8 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
486616// TODO: incomplete strings should be handled the same way with interpolated strings
487617// what can be parsed should be parsed as tSTRING,
488618// tWRONG_LINE_BREAK_IN_STRING error token should be added at unexpected new line should
619+ {DEDENTED_STRING} { if (isScala3) return process(tDEDENTED_STRING); else return process(tIDENTIFIER); }
620+
489621{WRONG_STRING} { return process(tWRONG_STRING); }
490622
491623
0 commit comments