1- import dask .dataframe as dd
1+ """Module providing functions for processing and wrangling data."""
2+
23from datetime import datetime
4+ from pathlib import Path
5+
36import numpy as np
47import pandas as pd
5- from pathlib import Path
68
9+ import dask .dataframe as dd
710from .config import Config
811
912def format_outname (prefix : str , se : bool , weekday :bool ):
10- '''
13+ """
14+ Write out results.
1115
1216 Parameters
1317 ----------
14- prefix
15- se
16- weekday
18+ prefix:
19+ se: boolean to write out standard errors, if true, use an obfuscated name
20+ weekday: boolean for weekday adjustments.
21+ signals will be generated with weekday adjustments (True) or without
22+ adjustments (False)
1723
1824 Returns
1925 -------
20-
21- '''
22- # write out results
26+ outname str
27+ """
2328 out_name = "smoothed_adj_cli" if weekday else "smoothed_cli"
2429 if se :
2530 assert prefix is not None , "template has no obfuscated prefix"
2631 out_name = prefix + "_" + out_name
2732 return out_name
2833
2934def format_df (df : pd .DataFrame , geo_id : str , se : bool , logger ):
30- '''
31- format dataframe and checks for anomalies to write results
35+ """
36+ Format dataframe and checks for anomalies to write results.
37+
3238 Parameters
3339 ----------
3440 df: dataframe from output from update_sensor
@@ -39,7 +45,7 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
3945 Returns
4046 -------
4147 filtered and formatted dataframe
42- '''
48+ """
4349 # report in percentage
4450 df ['val' ] = df ['val' ] * 100
4551 df ["se" ] = df ["se" ] * 100
@@ -66,7 +72,7 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
6672 valid_cond = (df ['se' ] > 0 ) & (df ['val' ] > 0 )
6773 invalid_df = df [~ valid_cond ]
6874 if len (invalid_df ) > 0 :
69- logger .info (f "p=0, std_err=0 invalid" )
75+ logger .info ("p=0, std_err=0 invalid" )
7076 df = df [valid_cond ]
7177 else :
7278 df ["se" ] = np .NAN
@@ -76,7 +82,8 @@ def format_df(df: pd.DataFrame, geo_id: str, se: bool, logger):
7682 return df
7783
7884def write_to_csv (output_df : pd .DataFrame , prefix : str , geo_id : str , weekday : bool , se :bool , logger , output_path = "." ):
79- """Write sensor values to csv.
85+ """
86+ Write sensor values to csv.
8087
8188 Args:
8289 output_dict: dictionary containing sensor rates, se, unique dates, and unique geo_id
@@ -106,9 +113,9 @@ def write_to_csv(output_df: pd.DataFrame, prefix: str, geo_id: str, weekday: boo
106113
107114
108115def csv_to_df (filepath : str , startdate : datetime , enddate : datetime , dropdate : datetime , logger ) -> pd .DataFrame :
109- '''
110- Reads csv using Dask and filters out based on date range and currently unused column,
111- then converts back into pandas dataframe.
116+ """
117+ Read csv using Dask, filters unneeded data, then converts back into pandas dataframe.
118+
112119 Parameters
113120 ----------
114121 filepath: path to the aggregated doctor-visits data
@@ -117,7 +124,7 @@ def csv_to_df(filepath: str, startdate: datetime, enddate: datetime, dropdate: d
117124 dropdate: data drop date (YYYY-mm-dd)
118125
119126 -------
120- '''
127+ """
121128 filepath = Path (filepath )
122129 logger .info (f"Processing { filepath } " )
123130
0 commit comments