@@ -146,21 +146,21 @@ def test_expected_groups(self):
146146 assert pattern_found ["signal" ] == "signal_signal"
147147
148148
149- class TestCheckBadGeoId :
149+ class TestCheckBadGeoIdFormat :
150150 params = {"data_source" : "" , "span_length" : 0 ,
151151 "end_date" : "2020-09-02" , "expected_lag" : {}}
152152
153153 def test_empty_df (self ):
154154 validator = Validator (self .params )
155155 empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
156- validator .check_bad_geo_id (empty_df , "name" , "county" )
156+ validator .check_bad_geo_id_format (empty_df , "name" , "county" )
157157
158158 assert len (validator .raised_errors ) == 0
159159
160160 def test_invalid_geo_type (self ):
161161 validator = Validator (self .params )
162162 empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
163- validator .check_bad_geo_id (empty_df , "name" , "hello" )
163+ validator .check_bad_geo_id_format (empty_df , "name" , "hello" )
164164
165165 assert len (validator .raised_errors ) == 1
166166 assert "check_geo_type" in [
@@ -173,7 +173,7 @@ def test_invalid_geo_id_county(self):
173173 validator = Validator (self .params )
174174 df = pd .DataFrame (["0" , "54321" , "123" , ".0000" ,
175175 "abc12" ], columns = ["geo_id" ])
176- validator .check_bad_geo_id (df , "name" , "county" )
176+ validator .check_bad_geo_id_format (df , "name" , "county" )
177177
178178 assert len (validator .raised_errors ) == 1
179179 assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -184,7 +184,7 @@ def test_invalid_geo_id_msa(self):
184184 validator = Validator (self .params )
185185 df = pd .DataFrame (["0" , "54321" , "123" , ".0000" ,
186186 "abc12" ], columns = ["geo_id" ])
187- validator .check_bad_geo_id (df , "name" , "msa" )
187+ validator .check_bad_geo_id_format (df , "name" , "msa" )
188188
189189 assert len (validator .raised_errors ) == 1
190190 assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -195,7 +195,7 @@ def test_invalid_geo_id_hrr(self):
195195 validator = Validator (self .params )
196196 df = pd .DataFrame (["1" , "12" , "123" , "1234" , "12345" ,
197197 "a" , "." , "ab1" ], columns = ["geo_id" ])
198- validator .check_bad_geo_id (df , "name" , "hrr" )
198+ validator .check_bad_geo_id_format (df , "name" , "hrr" )
199199
200200 assert len (validator .raised_errors ) == 1
201201 assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -208,7 +208,7 @@ def test_invalid_geo_id_state(self):
208208 validator = Validator (self .params )
209209 df = pd .DataFrame (["aa" , "hi" , "HI" , "hawaii" ,
210210 "Hawaii" , "a" , "H.I." ], columns = ["geo_id" ])
211- validator .check_bad_geo_id (df , "name" , "state" )
211+ validator .check_bad_geo_id_format (df , "name" , "state" )
212212
213213 assert len (validator .raised_errors ) == 1
214214 assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -221,7 +221,7 @@ def test_invalid_geo_id_national(self):
221221 validator = Validator (self .params )
222222 df = pd .DataFrame (["usa" , "SP" , " us" , "us" ,
223223 "usausa" , "US" ], columns = ["geo_id" ])
224- validator .check_bad_geo_id (df , "name" , "national" )
224+ validator .check_bad_geo_id_format (df , "name" , "national" )
225225
226226 assert len (validator .raised_errors ) == 1
227227 assert "check_geo_id_format" in validator .raised_errors [0 ].check_data_id
@@ -230,6 +230,87 @@ def test_invalid_geo_id_national(self):
230230 assert "US" not in validator .raised_errors [0 ].expression
231231 assert "SP" not in validator .raised_errors [0 ].expression
232232
233+ class TestCheckBadGeoIdValue :
234+ params = {"data_source" : "" , "span_length" : 0 ,
235+ "end_date" : "2020-09-02" , "expected_lag" : {}}
236+
237+ def test_empty_df (self ):
238+ validator = Validator (self .params )
239+ empty_df = pd .DataFrame (columns = ["geo_id" ], dtype = str )
240+ validator .check_bad_geo_id_value (empty_df , "name" , "county" )
241+ assert len (validator .raised_errors ) == 0
242+
243+ def test_invalid_geo_id_county (self ):
244+ validator = Validator (self .params )
245+ df = pd .DataFrame (["01001" , "88888" , "99999" ], columns = ["geo_id" ])
246+ validator .check_bad_geo_id_value (df , "name" , "county" )
247+
248+ assert len (validator .raised_errors ) == 1
249+ assert "check_bad_geo_id_value" in validator .raised_errors [0 ].check_data_id
250+ assert len (validator .raised_errors [0 ].expression ) == 2
251+ assert "01001" not in validator .raised_errors [0 ].expression
252+ assert "88888" in validator .raised_errors [0 ].expression
253+ assert "99999" in validator .raised_errors [0 ].expression
254+
255+ def test_invalid_geo_id_msa (self ):
256+ validator = Validator (self .params )
257+ df = pd .DataFrame (["10180" , "88888" , "99999" ], columns = ["geo_id" ])
258+ validator .check_bad_geo_id_value (df , "name" , "msa" )
259+
260+ assert len (validator .raised_errors ) == 1
261+ assert "check_bad_geo_id_value" in validator .raised_errors [0 ].check_data_id
262+ assert len (validator .raised_errors [0 ].expression ) == 2
263+ assert "10180" not in validator .raised_errors [0 ].expression
264+ assert "88888" in validator .raised_errors [0 ].expression
265+ assert "99999" in validator .raised_errors [0 ].expression
266+
267+ def test_invalid_geo_id_hrr (self ):
268+ validator = Validator (self .params )
269+ df = pd .DataFrame (["1" , "11" , "111" , "8" , "88" , "888" ], columns = ["geo_id" ])
270+ validator .check_bad_geo_id_value (df , "name" , "hrr" )
271+
272+ assert len (validator .raised_errors ) == 1
273+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
274+ assert len (validator .raised_errors [0 ].expression ) == 3
275+ assert "1" not in validator .raised_errors [0 ].expression
276+ assert "11" not in validator .raised_errors [0 ].expression
277+ assert "111" not in validator .raised_errors [0 ].expression
278+ assert "8" in validator .raised_errors [0 ].expression
279+ assert "88" in validator .raised_errors [0 ].expression
280+ assert "888" in validator .raised_errors [0 ].expression
281+
282+ def test_invalid_geo_id_state (self ):
283+ validator = Validator (self .params )
284+ df = pd .DataFrame (["aa" , "ak" ], columns = ["geo_id" ])
285+ validator .check_bad_geo_id_value (df , "name" , "state" )
286+
287+ assert len (validator .raised_errors ) == 1
288+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
289+ assert len (validator .raised_errors [0 ].expression ) == 1
290+ assert "ak" not in validator .raised_errors [0 ].expression
291+ assert "aa" in validator .raised_errors [0 ].expression
292+
293+ def test_uppercase_geo_id (self ):
294+ validator = Validator (self .params )
295+ df = pd .DataFrame (["ak" , "AK" ], columns = ["geo_id" ])
296+ validator .check_bad_geo_id_value (df , "name" , "state" )
297+
298+ assert len (validator .raised_errors ) == 0
299+ assert len (validator .raised_warnings ) == 1
300+ assert "check_geo_id_lowercase" in validator .raised_warnings [0 ].check_data_id
301+ assert "AK" in validator .raised_warnings [0 ].expression
302+
303+ def test_invalid_geo_id_national (self ):
304+ validator = Validator (self .params )
305+ df = pd .DataFrame (["us" , "zz" ], columns = ["geo_id" ])
306+ validator .check_bad_geo_id_value (df , "name" , "national" )
307+
308+ assert len (validator .raised_errors ) == 1
309+ assert "check_geo_id_value" in validator .raised_errors [0 ].check_data_id
310+ assert len (validator .raised_errors [0 ].expression ) == 1
311+ assert "us" not in validator .raised_errors [0 ].expression
312+ assert "zz" in validator .raised_errors [0 ].expression
313+
233314
234315class TestCheckBadVal :
235316 params = {"data_source" : "" , "span_length" : 1 ,
0 commit comments