@@ -231,6 +231,10 @@ pub enum Token {
231231 /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level
232232 /// keys within the jsonb object
233233 QuestionPipe ,
234+ /// Custom binary operator
235+ /// This is used to represent any custom binary operator that is not part of the SQL standard.
236+ /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR.
237+ CustomBinaryOperator ( String ) ,
234238}
235239
236240impl fmt:: Display for Token {
@@ -320,6 +324,7 @@ impl fmt::Display for Token {
320324 Token :: Question => write ! ( f, "?" ) ,
321325 Token :: QuestionAnd => write ! ( f, "?&" ) ,
322326 Token :: QuestionPipe => write ! ( f, "?|" ) ,
327+ Token :: CustomBinaryOperator ( s) => f. write_str ( s) ,
323328 }
324329 }
325330}
@@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> {
961966 Some ( '>' ) => {
962967 chars. next ( ) ;
963968 match chars. peek ( ) {
964- Some ( '>' ) => {
965- chars. next ( ) ;
966- Ok ( Some ( Token :: LongArrow ) )
967- }
968- _ => Ok ( Some ( Token :: Arrow ) ) ,
969+ Some ( '>' ) => self . consume_for_binop ( chars, "->>" , Token :: LongArrow ) ,
970+ _ => self . start_binop ( chars, "->" , Token :: Arrow ) ,
969971 }
970972 }
971973 // a regular '-' operator
972- _ => Ok ( Some ( Token :: Minus ) ) ,
974+ _ => self . start_binop ( chars , "-" , Token :: Minus ) ,
973975 }
974976 }
975977 '/' => {
@@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> {
9991001 '%' => {
10001002 chars. next ( ) ; // advance past '%'
10011003 match chars. peek ( ) {
1002- Some ( ' ' ) => Ok ( Some ( Token :: Mod ) ) ,
1004+ Some ( s ) if s . is_whitespace ( ) => Ok ( Some ( Token :: Mod ) ) ,
10031005 Some ( sch) if self . dialect . is_identifier_start ( '%' ) => {
10041006 self . tokenize_identifier_or_keyword ( [ ch, * sch] , chars)
10051007 }
1006- _ => Ok ( Some ( Token :: Mod ) ) ,
1008+ _ => self . start_binop ( chars , "%" , Token :: Mod ) ,
10071009 }
10081010 }
10091011 '|' => {
10101012 chars. next ( ) ; // consume the '|'
10111013 match chars. peek ( ) {
1012- Some ( '/' ) => self . consume_and_return ( chars, Token :: PGSquareRoot ) ,
1014+ Some ( '/' ) => self . consume_for_binop ( chars, "|/" , Token :: PGSquareRoot ) ,
10131015 Some ( '|' ) => {
10141016 chars. next ( ) ; // consume the second '|'
10151017 match chars. peek ( ) {
1016- Some ( '/' ) => self . consume_and_return ( chars, Token :: PGCubeRoot ) ,
1017- _ => Ok ( Some ( Token :: StringConcat ) ) ,
1018+ Some ( '/' ) => {
1019+ self . consume_for_binop ( chars, "||/" , Token :: PGCubeRoot )
1020+ }
1021+ _ => self . start_binop ( chars, "||" , Token :: StringConcat ) ,
10181022 }
10191023 }
10201024 // Bitshift '|' operator
1021- _ => Ok ( Some ( Token :: Pipe ) ) ,
1025+ _ => self . start_binop ( chars , "|" , Token :: Pipe ) ,
10221026 }
10231027 }
10241028 '=' => {
@@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> {
10611065 Some ( '=' ) => {
10621066 chars. next ( ) ;
10631067 match chars. peek ( ) {
1064- Some ( '>' ) => self . consume_and_return ( chars, Token :: Spaceship ) ,
1065- _ => Ok ( Some ( Token :: LtEq ) ) ,
1068+ Some ( '>' ) => self . consume_for_binop ( chars, "<=>" , Token :: Spaceship ) ,
1069+ _ => self . start_binop ( chars , "<=" , Token :: LtEq ) ,
10661070 }
10671071 }
1068- Some ( '>' ) => self . consume_and_return ( chars, Token :: Neq ) ,
1069- Some ( '<' ) => self . consume_and_return ( chars, Token :: ShiftLeft ) ,
1070- Some ( '@' ) => self . consume_and_return ( chars, Token :: ArrowAt ) ,
1071- _ => Ok ( Some ( Token :: Lt ) ) ,
1072+ Some ( '>' ) => self . consume_for_binop ( chars, "<>" , Token :: Neq ) ,
1073+ Some ( '<' ) => self . consume_for_binop ( chars, "<<" , Token :: ShiftLeft ) ,
1074+ Some ( '@' ) => self . consume_for_binop ( chars, "<@" , Token :: ArrowAt ) ,
1075+ _ => self . start_binop ( chars , "<" , Token :: Lt ) ,
10721076 }
10731077 }
10741078 '>' => {
10751079 chars. next ( ) ; // consume
10761080 match chars. peek ( ) {
1077- Some ( '=' ) => self . consume_and_return ( chars, Token :: GtEq ) ,
1078- Some ( '>' ) => self . consume_and_return ( chars, Token :: ShiftRight ) ,
1079- _ => Ok ( Some ( Token :: Gt ) ) ,
1081+ Some ( '=' ) => self . consume_for_binop ( chars, ">=" , Token :: GtEq ) ,
1082+ Some ( '>' ) => self . consume_for_binop ( chars, ">>" , Token :: ShiftRight ) ,
1083+ _ => self . start_binop ( chars , ">" , Token :: Gt ) ,
10801084 }
10811085 }
10821086 ':' => {
@@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> {
10941098 '&' => {
10951099 chars. next ( ) ; // consume the '&'
10961100 match chars. peek ( ) {
1097- Some ( '&' ) => self . consume_and_return ( chars, Token :: Overlap ) ,
1101+ Some ( '&' ) => {
1102+ chars. next ( ) ; // consume the second '&'
1103+ self . start_binop ( chars, "&&" , Token :: Overlap )
1104+ }
10981105 // Bitshift '&' operator
1099- _ => Ok ( Some ( Token :: Ampersand ) ) ,
1106+ _ => self . start_binop ( chars , "&" , Token :: Ampersand ) ,
11001107 }
11011108 }
11021109 '^' => {
@@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> {
11191126 '~' => {
11201127 chars. next ( ) ; // consume
11211128 match chars. peek ( ) {
1122- Some ( '*' ) => self . consume_and_return ( chars, Token :: TildeAsterisk ) ,
1129+ Some ( '*' ) => self . consume_for_binop ( chars, "~*" , Token :: TildeAsterisk ) ,
11231130 Some ( '~' ) => {
11241131 chars. next ( ) ;
11251132 match chars. peek ( ) {
11261133 Some ( '*' ) => {
1127- self . consume_and_return ( chars, Token :: DoubleTildeAsterisk )
1134+ self . consume_for_binop ( chars, "~~*" , Token :: DoubleTildeAsterisk )
11281135 }
1129- _ => Ok ( Some ( Token :: DoubleTilde ) ) ,
1136+ _ => self . start_binop ( chars , "~~" , Token :: DoubleTilde ) ,
11301137 }
11311138 }
1132- _ => Ok ( Some ( Token :: Tilde ) ) ,
1139+ _ => self . start_binop ( chars , "~" , Token :: Tilde ) ,
11331140 }
11341141 }
11351142 '#' => {
11361143 chars. next ( ) ;
11371144 match chars. peek ( ) {
1138- Some ( '-' ) => self . consume_and_return ( chars, Token :: HashMinus ) ,
1145+ Some ( '-' ) => self . consume_for_binop ( chars, "#-" , Token :: HashMinus ) ,
11391146 Some ( '>' ) => {
11401147 chars. next ( ) ;
11411148 match chars. peek ( ) {
11421149 Some ( '>' ) => {
1143- chars. next ( ) ;
1144- Ok ( Some ( Token :: HashLongArrow ) )
1150+ self . consume_for_binop ( chars, "#>>" , Token :: HashLongArrow )
11451151 }
1146- _ => Ok ( Some ( Token :: HashArrow ) ) ,
1152+ _ => self . start_binop ( chars , "#>" , Token :: HashArrow ) ,
11471153 }
11481154 }
11491155 Some ( ' ' ) => Ok ( Some ( Token :: Sharp ) ) ,
11501156 Some ( sch) if self . dialect . is_identifier_start ( '#' ) => {
11511157 self . tokenize_identifier_or_keyword ( [ ch, * sch] , chars)
11521158 }
1153- _ => Ok ( Some ( Token :: Sharp ) ) ,
1159+ _ => self . start_binop ( chars , "#" , Token :: Sharp ) ,
11541160 }
11551161 }
11561162 '@' => {
@@ -1206,6 +1212,39 @@ impl<'a> Tokenizer<'a> {
12061212 }
12071213 }
12081214
1215+ /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix
1216+ fn consume_for_binop (
1217+ & self ,
1218+ chars : & mut State ,
1219+ prefix : & str ,
1220+ default : Token ,
1221+ ) -> Result < Option < Token > , TokenizerError > {
1222+ chars. next ( ) ; // consume the first char
1223+ self . start_binop ( chars, prefix, default)
1224+ }
1225+
1226+ /// parse a custom binary operator
1227+ fn start_binop (
1228+ & self ,
1229+ chars : & mut State ,
1230+ prefix : & str ,
1231+ default : Token ,
1232+ ) -> Result < Option < Token > , TokenizerError > {
1233+ let mut custom = None ;
1234+ while let Some ( & ch) = chars. peek ( ) {
1235+ if !self . dialect . is_custom_operator_part ( ch) {
1236+ break ;
1237+ }
1238+
1239+ custom. get_or_insert_with ( || prefix. to_string ( ) ) . push ( ch) ;
1240+ chars. next ( ) ;
1241+ }
1242+
1243+ Ok ( Some (
1244+ custom. map ( Token :: CustomBinaryOperator ) . unwrap_or ( default) ,
1245+ ) )
1246+ }
1247+
12091248 /// Tokenize dollar preceded value (i.e: a string/placeholder)
12101249 fn tokenize_dollar_preceded_value ( & self , chars : & mut State ) -> Result < Token , TokenizerError > {
12111250 let mut s = String :: new ( ) ;
0 commit comments