1
- from typing import List , Union
1
+ import re
2
+ from typing import Any , List , Union
2
3
from ..abcs .database_types import (
4
+ ColType ,
5
+ Array ,
6
+ JSON ,
7
+ Struct ,
3
8
Timestamp ,
4
9
Datetime ,
5
10
Integer ,
10
15
FractionalType ,
11
16
TemporalType ,
12
17
Boolean ,
18
+ UnknownColType ,
13
19
)
14
20
from ..abcs .mixins import (
15
21
AbstractMixin_MD5 ,
@@ -36,6 +42,7 @@ def md5_as_int(self, s: str) -> str:
36
42
37
43
38
44
class Mixin_NormalizeValue (AbstractMixin_NormalizeValue ):
45
+
39
46
def normalize_timestamp (self , value : str , coltype : TemporalType ) -> str :
40
47
if coltype .rounds :
41
48
timestamp = f"timestamp_micros(cast(round(unix_micros(cast({ value } as timestamp))/1000000, { coltype .precision } )*1000000 as int))"
@@ -57,6 +64,27 @@ def normalize_number(self, value: str, coltype: FractionalType) -> str:
57
64
def normalize_boolean (self , value : str , _coltype : Boolean ) -> str :
58
65
return self .to_string (f"cast({ value } as int)" )
59
66
67
+ def normalize_json (self , value : str , _coltype : JSON ) -> str :
68
+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
69
+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
70
+ # So we do the best effort and compare it as strings, hoping that the JSON forms
71
+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
72
+ return f"to_json_string({ value } )"
73
+
74
+ def normalize_array (self , value : str , _coltype : Array ) -> str :
75
+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
76
+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
77
+ # So we do the best effort and compare it as strings, hoping that the JSON forms
78
+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
79
+ return f"to_json_string({ value } )"
80
+
81
+ def normalize_struct (self , value : str , _coltype : Struct ) -> str :
82
+ # BigQuery is unable to compare arrays & structs with ==/!=/distinct from, e.g.:
83
+ # Got error: 400 Grouping is not defined for arguments of type ARRAY<INT64> at …
84
+ # So we do the best effort and compare it as strings, hoping that the JSON forms
85
+ # match on both sides: i.e. have properly ordered keys, same spacing, same quotes, etc.
86
+ return f"to_json_string({ value } )"
87
+
60
88
61
89
class Mixin_Schema (AbstractMixin_Schema ):
62
90
def list_tables (self , table_schema : str , like : Compilable = None ) -> Compilable :
@@ -112,11 +140,12 @@ class Dialect(BaseDialect, Mixin_Schema):
112
140
"BIGNUMERIC" : Decimal ,
113
141
"FLOAT64" : Float ,
114
142
"FLOAT32" : Float ,
115
- # Text
116
143
"STRING" : Text ,
117
- # Boolean
118
144
"BOOL" : Boolean ,
145
+ "JSON" : JSON ,
119
146
}
147
+ TYPE_ARRAY_RE = re .compile (r'ARRAY<(.+)>' )
148
+ TYPE_STRUCT_RE = re .compile (r'STRUCT<(.+)>' )
120
149
MIXINS = {Mixin_Schema , Mixin_MD5 , Mixin_NormalizeValue , Mixin_TimeTravel , Mixin_RandomSample }
121
150
122
151
def random (self ) -> str :
@@ -134,6 +163,40 @@ def type_repr(self, t) -> str:
134
163
except KeyError :
135
164
return super ().type_repr (t )
136
165
166
+ def parse_type (
167
+ self ,
168
+ table_path : DbPath ,
169
+ col_name : str ,
170
+ type_repr : str ,
171
+ * args : Any , # pass-through args
172
+ ** kwargs : Any , # pass-through args
173
+ ) -> ColType :
174
+ col_type = super ().parse_type (table_path , col_name , type_repr , * args , ** kwargs )
175
+ if isinstance (col_type , UnknownColType ):
176
+
177
+ m = self .TYPE_ARRAY_RE .fullmatch (type_repr )
178
+ if m :
179
+ item_type = self .parse_type (table_path , col_name , m .group (1 ), * args , ** kwargs )
180
+ col_type = Array (item_type = item_type )
181
+
182
+ # We currently ignore structs' structure, but later can parse it too. Examples:
183
+ # - STRUCT<INT64, STRING(10)> (unnamed)
184
+ # - STRUCT<foo INT64, bar STRING(10)> (named)
185
+ # - STRUCT<foo INT64, bar ARRAY<INT64>> (with complex fields)
186
+ # - STRUCT<foo INT64, bar STRUCT<a INT64, b INT64>> (nested)
187
+ m = self .TYPE_STRUCT_RE .fullmatch (type_repr )
188
+ if m :
189
+ col_type = Struct ()
190
+
191
+ return col_type
192
+
193
+ def to_comparable (self , value : str , coltype : ColType ) -> str :
194
+ """Ensure that the expression is comparable in ``IS DISTINCT FROM``."""
195
+ if isinstance (coltype , (JSON , Array , Struct )):
196
+ return self .normalize_value_by_type (value , coltype )
197
+ else :
198
+ return super ().to_comparable (value , coltype )
199
+
137
200
def set_timezone_to_utc (self ) -> str :
138
201
raise NotImplementedError ()
139
202
0 commit comments