@@ -1855,28 +1855,33 @@ impl<'a> Tokenizer<'a> {
18551855 ) -> Result < Option < Token > , TokenizerError > {
18561856 let mut s = String :: new ( ) ;
18571857 let mut nested = 1 ;
1858- let mut last_ch = ' ' ;
1858+ let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
18591859
18601860 loop {
18611861 match chars. next ( ) {
1862- Some ( ch) => {
1863- if last_ch == '/' && ch == '*' {
1864- nested += 1 ;
1865- } else if last_ch == '*' && ch == '/' {
1866- nested -= 1 ;
1867- if nested == 0 {
1868- s. pop ( ) ;
1869- break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1870- }
1862+ Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
1863+ chars. next ( ) ; // consume the '*'
1864+ s. push ( '/' ) ;
1865+ s. push ( '*' ) ;
1866+ nested += 1 ;
1867+ }
1868+ Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
1869+ chars. next ( ) ; // consume the '/'
1870+ nested -= 1 ;
1871+ if nested == 0 {
1872+ break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
18711873 }
1874+ s. push ( '*' ) ;
1875+ s. push ( '/' ) ;
1876+ }
1877+ Some ( ch) => {
18721878 s. push ( ch) ;
1873- last_ch = ch;
18741879 }
18751880 None => {
18761881 break self . tokenizer_error (
18771882 chars. location ( ) ,
18781883 "Unexpected EOF while in a multi-line comment" ,
1879- )
1884+ ) ;
18801885 }
18811886 }
18821887 }
@@ -2718,18 +2723,90 @@ mod tests {
27182723
27192724 #[ test]
27202725 fn tokenize_nested_multiline_comment ( ) {
2721- let sql = String :: from ( "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ) ;
2726+ let dialect = GenericDialect { } ;
2727+ let test_cases = vec ! [
2728+ (
2729+ "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ,
2730+ vec![
2731+ Token :: Number ( "0" . to_string( ) , false ) ,
2732+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2733+ "multi-line\n * \n /* comment \n /*comment*/*/ " . into( ) ,
2734+ ) ) ,
2735+ Token :: Whitespace ( Whitespace :: Space ) ,
2736+ Token :: Div ,
2737+ Token :: Word ( Word {
2738+ value: "comment" . to_string( ) ,
2739+ quote_style: None ,
2740+ keyword: Keyword :: COMMENT ,
2741+ } ) ,
2742+ Token :: Mul ,
2743+ Token :: Div ,
2744+ Token :: Number ( "1" . to_string( ) , false ) ,
2745+ ] ,
2746+ ) ,
2747+ (
2748+ "0/*multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/*/1" ,
2749+ vec![
2750+ Token :: Number ( "0" . to_string( ) , false ) ,
2751+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2752+ "multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/" . into( ) ,
2753+ ) ) ,
2754+ Token :: Number ( "1" . to_string( ) , false ) ,
2755+ ] ,
2756+ ) ,
2757+ (
2758+ "SELECT 1/* a /* b */ c */0" ,
2759+ vec![
2760+ Token :: make_keyword( "SELECT" ) ,
2761+ Token :: Whitespace ( Whitespace :: Space ) ,
2762+ Token :: Number ( "1" . to_string( ) , false ) ,
2763+ Token :: Whitespace ( Whitespace :: MultiLineComment ( " a /* b */ c " . to_string( ) ) ) ,
2764+ Token :: Number ( "0" . to_string( ) , false ) ,
2765+ ] ,
2766+ ) ,
2767+ ] ;
2768+
2769+ for ( sql, expected) in test_cases {
2770+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2771+ compare ( expected, tokens) ;
2772+ }
2773+ }
2774+
2775+ #[ test]
2776+ fn tokenize_nested_multiline_comment_empty ( ) {
2777+ let sql = "select 1/*/**/*/0" ;
27222778
27232779 let dialect = GenericDialect { } ;
2724- let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
2780+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
27252781 let expected = vec ! [
2782+ Token :: make_keyword( "select" ) ,
2783+ Token :: Whitespace ( Whitespace :: Space ) ,
2784+ Token :: Number ( "1" . to_string( ) , false ) ,
2785+ Token :: Whitespace ( Whitespace :: MultiLineComment ( "/**/" . to_string( ) ) ) ,
27262786 Token :: Number ( "0" . to_string( ) , false ) ,
2787+ ] ;
2788+
2789+ compare ( expected, tokens) ;
2790+ }
2791+
2792+ #[ test]
2793+ fn tokenize_nested_comments_if_not_supported ( ) {
2794+ let dialect = SQLiteDialect { } ;
2795+ let sql = "SELECT 1/*/* nested comment */*/0" ;
2796+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) ;
2797+ let expected = vec ! [
2798+ Token :: make_keyword( "SELECT" ) ,
2799+ Token :: Whitespace ( Whitespace :: Space ) ,
2800+ Token :: Number ( "1" . to_string( ) , false ) ,
27272801 Token :: Whitespace ( Whitespace :: MultiLineComment (
2728- "multi-line \n * \n /* comment \n /* comment*/*/ */ /comment " . to_string( ) ,
2802+ "/* nested comment " . to_string( ) ,
27292803 ) ) ,
2730- Token :: Number ( "1" . to_string( ) , false ) ,
2804+ Token :: Mul ,
2805+ Token :: Div ,
2806+ Token :: Number ( "0" . to_string( ) , false ) ,
27312807 ] ;
2732- compare ( expected, tokens) ;
2808+
2809+ compare ( expected, tokens. unwrap ( ) ) ;
27332810 }
27342811
27352812 #[ test]
0 commit comments