44import covidcast
55import pandas as pd
66from datetime import date , datetime , timedelta
7- from .errors import *
7+ from .errors import APIDataFetchError
88import re
99from typing import List
1010import json
1111
12+ filename_regex = re .compile (r'^(?P<date>\d{8})_(?P<geo_type>\w+?)_(?P<signal>\w+)\.csv$' )
1213
13- def get_filenames_with_geo_signal (path , date_slist : List [str ]):
14-
15- if pipeline_version == 'new' :
16- meta = covidcast .metadata ()
17- fb_meta = meta [meta ['data_source' ]== DATA_SOURCE ]
18- unique_signals = fb_meta ['signal' ].unique ().tolist ()
19- unique_geotypes = fb_meta ['geo_type' ].unique ().tolist ()
20-
21-
22- ##### Currently metadata returns --*community*-- signals that don't get generated
23- ##### in the new fb-pipeline. Seiving them out for now.
24- # Todo - Include weighted whh_cmnty_cli and wnohh_cmnty_cli
25- for sig in unique_signals :
26- if "community" in sig :
27- unique_signals .remove (sig )
28-
29-
30- geo_sig_cmbo = list (product (unique_geotypes , unique_signals ))
31- print (geo_sig_cmbo )
32- print ("Number of mixed types:" , len (geo_sig_cmbo ))
33-
34- for cmb in geo_sig_cmbo :
35- print (cmb )
36-
37-
38- filenames = read_relevant_date_filenames (data_folder , date_slist [0 ])
39-
40- else :
41- sdate = date_slist [0 ]
42- filenames = [f for f in listdir (path ) if isfile (join (path , f ))]
43-
44- sdate_filenames = [fname for fname in filenames if fname .find (sdate ) != - 1 ]
45-
46- # example: 20200624_county_smoothed_nohh_cmnty_cli
47- filename_regex = re .compile (r'^(\d{8})_([a-z]+)_(raw\S*|smoothed\S*)[_?](w?)([ci]li).csv$' )
48- geo_sig_cmbo = list ()
49- for f in sdate_filenames :
50-
51- m = filename_regex .match (f )
52- if (not m .group (0 )):
53- print ('=nameformat= not recognized as a daily format' )
54-
55- geo_type = m .group (2 )
56-
57-
58- if m .group (4 ): # weighted data 'w'
59- signal = "" .join ([m .group (4 ), m .group (5 )])
60- signal = "_" .join ([m .group (3 ), signal ])
61- # max_weighted_date = survey_date
62- else :
63- signal = "_" .join ([m .group (3 ), m .group (5 )])
64- # max_date = survey_date
65-
66- geo_sig_cmbo .append ((geo_type , signal ))
6714
15+ def get_filenames_with_geo_signal (path , data_source , date_slist : List [str ]):
16+ meta = covidcast .metadata ()
17+ source_meta = meta [meta ['data_source' ]== data_source ]
18+ unique_signals = source_meta ['signal' ].unique ().tolist ()
19+ unique_geotypes = source_meta ['geo_type' ].unique ().tolist ()
20+
21+ ##### Currently metadata returns --*community*-- signals that don't get generated
22+ ##### in the new fb-pipeline. Seiving them out for now.
23+ # Todo - Include weighted whh_cmnty_cli and wnohh_cmnty_cli
24+ for sig in unique_signals :
25+ if "community" in sig :
26+ unique_signals .remove (sig )
27+
28+ geo_sig_cmbo = list (product (unique_geotypes , unique_signals ))
29+ print (geo_sig_cmbo )
30+ print ("Number of mixed types:" , len (geo_sig_cmbo ))
31+
32+ for cmb in geo_sig_cmbo :
33+ print (cmb )
34+
35+ filenames = read_relevant_date_filenames (data_folder , date_slist [0 ])
6836 return filenames , geo_sig_cmbo
6937
7038
7139def read_filenames (path ):
72- daily_filenames = [f for f in listdir (path ) if isfile (join (path , f ))]
40+ daily_filenames = [ ( f , filename_regex . match ( f )) for f in listdir (path ) if isfile (join (path , f ))]
7341 return daily_filenames
7442
7543def read_relevant_date_filenames (data_path , date_slist ):
@@ -80,7 +48,7 @@ def read_relevant_date_filenames(data_path, date_slist):
8048 for dt in date_slist :
8149 if fl .find (dt ) != - 1 :
8250 filenames .append (fl )
83- return filenames
51+ return filenames
8452
8553def read_geo_sig_cmbo_files (geo_sig_cmbo , data_folder , filenames , date_slist ):
8654 for geo_sig in geo_sig_cmbo :
@@ -105,6 +73,16 @@ def read_geo_sig_cmbo_files(geo_sig_cmbo, data_folder, filenames, date_slist):
10573 df_list .append (df )
10674 yield pd .concat (df_list ), geo_sig [0 ], geo_sig [1 ]
10775
76+ def load_csv (path ):
77+ return pd .read_csv (
78+ path ,
79+ dtype = {
80+ 'geo_id' : str ,
81+ 'val' : float ,
82+ 'se' : float ,
83+ 'sample_size' : float ,
84+ })
85+
10886def fetch_daily_data (data_source , survey_date , geo_type , signal ):
10987 data_to_validate = covidcast .signal (data_source , signal , survey_date , survey_date , geo_type )
11088 if not isinstance (data_to_validate , pd .DataFrame ):
@@ -114,4 +92,4 @@ def fetch_daily_data(data_source, survey_date, geo_type, signal):
11492 ", geography-type:" + geo_type
11593 raise APIDataFetchError (custom_msg )
11694 return data_to_validate
117-
95+
0 commit comments