@@ -55,6 +55,8 @@ pub enum Token {
5555 EscapedStringLiteral ( String ) ,
5656 /// Hexadecimal string literal: i.e.: X'deadbeef'
5757 HexStringLiteral ( String ) ,
58+ /// Unicode escaped string: U&'d\0061t\+000061' (data)
59+ UnicodeEscapedStringLiteral ( String ) ,
5860 /// Comma
5961 Comma ,
6062 /// Whitespace (space, tab, etc)
@@ -164,6 +166,7 @@ impl fmt::Display for Token {
164166 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{}'" , s) ,
165167 Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{}'" , s) ,
166168 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{}'" , s) ,
169+ Token :: UnicodeEscapedStringLiteral ( ref s) => write ! ( f, "U&'{}'" , s) ,
167170 Token :: Comma => f. write_str ( "," ) ,
168171 Token :: Whitespace ( ws) => write ! ( f, "{}" , ws) ,
169172 Token :: DoubleEq => f. write_str ( "==" ) ,
@@ -427,6 +430,28 @@ impl<'a> Tokenizer<'a> {
427430 }
428431 }
429432 }
433+ x @ 'u' | x @ 'U' => {
434+ chars. next ( ) ; // consume, to check the next char
435+ let mut look_ahead_chars = chars. clone ( ) ;
436+ if look_ahead_chars. next_if_eq ( & '&' ) . is_some ( ) {
437+ match look_ahead_chars. peek ( ) {
438+ Some ( '\'' ) => {
439+ //Move chars to the position of look_ahead_chars
440+ chars. next ( ) ;
441+ // U&'...' - a <binary string literal>
442+ let s = self . tokenize_single_quoted_string ( chars) ?;
443+ Ok ( Some ( Token :: UnicodeEscapedStringLiteral ( s) ) )
444+ }
445+ _ => {
446+ let s = self . tokenize_word ( x, chars) ;
447+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
448+ }
449+ }
450+ } else {
451+ let s = self . tokenize_word ( x, chars) ;
452+ Ok ( Some ( Token :: make_word ( & s, None ) ) )
453+ }
454+ }
430455 // identifier or keyword
431456 ch if self . dialect . is_identifier_start ( ch) => {
432457 chars. next ( ) ; // consume the first char
@@ -1454,4 +1479,36 @@ mod tests {
14541479 //println!("------------------------------");
14551480 assert_eq ! ( expected, actual) ;
14561481 }
1482+ #[ test]
1483+ fn tokenize_unicode_escaped_literal ( ) {
1484+ let sql = r#"U&'aaa'"# ;
1485+ let dialect = GenericDialect { } ;
1486+ let mut tokenizer = Tokenizer :: new ( & dialect, sql) ;
1487+ let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
1488+ let expected = vec ! [ Token :: UnicodeEscapedStringLiteral ( "aaa" . to_string( ) ) ] ;
1489+ compare ( expected, tokens) ;
1490+
1491+ let sql = r#"U&a"# ;
1492+ let dialect = GenericDialect { } ;
1493+ let mut tokenizer = Tokenizer :: new ( & dialect, sql) ;
1494+ let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
1495+ let expected = vec ! [
1496+ Token :: make_word( "U" , None ) ,
1497+ Token :: Ampersand ,
1498+ Token :: make_word( "a" , None ) ,
1499+ ] ;
1500+ compare ( expected, tokens) ;
1501+ let sql = r#"U & 'aaa'"# ;
1502+ let dialect = GenericDialect { } ;
1503+ let mut tokenizer = Tokenizer :: new ( & dialect, sql) ;
1504+ let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
1505+ let expected = vec ! [
1506+ Token :: make_word( "U" , None ) ,
1507+ Token :: Whitespace ( Whitespace :: Space ) ,
1508+ Token :: Ampersand ,
1509+ Token :: Whitespace ( Whitespace :: Space ) ,
1510+ Token :: SingleQuotedString ( "aaa" . to_string( ) ) ,
1511+ ] ;
1512+ compare ( expected, tokens) ;
1513+ }
14571514}
0 commit comments