From 4c6e72d61c18a6ae223363c558f558313ec827bf Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 13:44:34 -0400 Subject: [PATCH 01/13] feat: update meta doc --- .../endpoints/covidcast_utils/db_signals.csv | 15 ++++++++++++++- .../endpoints/covidcast_utils/db_sources.csv | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv index 29b18f1da..a1be459a1 100644 --- a/src/server/endpoints/covidcast_utils/db_signals.csv +++ b/src/server/endpoints/covidcast_utils/db_signals.csv @@ -374,4 +374,17 @@ usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_num,TRUE,"Confirmed C usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID deaths, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file +usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,"Percentage of Expected Deaths (Weekly new, per 100k people)",TRUE,Number of weekly new deaths for all causes in 2020 compared to the average number across the same week in 2017–2019 ,,week,Week,Value,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file diff --git a/src/server/endpoints/covidcast_utils/db_sources.csv b/src/server/endpoints/covidcast_utils/db_sources.csv index b66838f08..4a99059ef 100644 --- a/src/server/endpoints/covidcast_utils/db_sources.csv +++ b/src/server/endpoints/covidcast_utils/db_sources.csv @@ -18,4 +18,5 @@ ght,ght,Google Health Trends,"Google Health Trends tracks Google searches on hea google-survey,google-survey,Google Symptom Surveys,"Delphi ran symptom surveys using a Google tool which collects responses through publisher websites, Google's Opinions Reward app, and similar applications. No longer updated after May 15, 2020.",smoothed_cli,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html) indicator-combination,indicator-combination-nmf,Statistical Combination (NMF),"This source provides signals which are statistical combinations of other sources, calculated by Delphi. It is not a primary data source. No longer updated after Marcy 17, 2021.",nmf_day_doc_fbs_ght,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/indicator-combination-inactive.html) quidel,quidel-flu,Quidel Inc. (Flu),"Quidel, Inc. manufactures diagnostic equipment for healthcare applications, and provides Delphi with anonymized data on tests and test results. This source includes flu tests. No longer updated after May 19, 2020.",smoothed_pct_negative,CC BY,https://cmu.box.com/s/sax48yxnahllrnbqlq6wqxblg6lsyq24,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#flu-tests) -safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html) \ No newline at end of file +safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html) +nchs-mortality,nchs-mortality,NCHS Mortality Data,"This data source of national provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)). This data is different from the death data available from USAFacts and JHU CSSE: deaths are reported by the date they occur, not the date they are reported by local health departments, and data is frequently reissued as additional death certificates from recent weeks are received and tabulated.",deaths_allcause_incidence_num,[NCHS Data Use Agreement](https://www.cdc.gov/nchs/data_access/restrictions.htm),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/nchs-mortality.html) \ No newline at end of file From 136792d206402773fbbe4b27621f3723a767574e Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 14:52:10 -0400 Subject: [PATCH 02/13] feat: work on week support in covidcast --- src/server/_params.py | 49 +++++++++++++++++ src/server/endpoints/covidcast.py | 55 ++++++++++++------- .../endpoints/covidcast_utils/correlation.py | 10 ++-- src/server/endpoints/covidcast_utils/trend.py | 1 - src/server/utils/__init__.py | 2 +- src/server/utils/dates.py | 6 ++ 6 files changed, 97 insertions(+), 26 deletions(-) diff --git a/src/server/_params.py b/src/server/_params.py index 5505b2714..b91f3ddd2 100644 --- a/src/server/_params.py +++ b/src/server/_params.py @@ -109,6 +109,15 @@ class TimePair: time_type: str time_values: Union[bool, Sequence[Union[int, Tuple[int, int]]]] + @property + def is_week(self) -> bool: + return self.time_type == 'week' + + @property + def is_day(self) -> bool: + return self.time_type != 'week' + + def count(self) -> float: """ returns the count of items in this pair @@ -225,3 +234,43 @@ def parse_day_arg(key: str) -> int: if not isinstance(r, int): raise ValidationFailedException(f"{key} must match YYYYMMDD or YYYY-MM-DD") return r + +def parse_week_arg(key: str) -> int: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + r = parse_week_value(v) + if not isinstance(r, int): + raise ValidationFailedException(f"{key} must match YYYYWW") + return r + + +def parse_week_range_arg(key: str) -> Tuple[int, int]: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + r = parse_week_value(v) + if not isinstance(r, tuple): + raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW") + return r + +def parse_day_or_week_arg(key: str) -> Tuple[int, bool]: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + # format is either YYYY-MM-DD or YYYYMMDD or YYYYMM + is_week = len(v) == 6 + if is_week: + return parse_week_arg(key), False + return parse_day_arg(key), True + +def parse_day_or_week_range_arg(key: str) -> Tuple[Tuple[int, int], bool]: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + # format is either YYYY-MM-DD--YYYY-MM-DD or YYYYMMDD-YYYYMMDD or YYYYMM-YYYYMM + # so if the first before the - has length 6, it must be a week + is_week = len(v.split('-', 2)[0]) == 6 + if is_week: + return parse_week_range_arg(key), False + return parse_day_range_arg(key), True diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index 11552a987..a381707c0 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -5,7 +5,7 @@ from flask.json import loads, jsonify from bisect import bisect_right from sqlalchemy import text -from pandas import read_csv +from pandas import read_csv, to_datetime from .._common import is_compatibility_mode, db from .._exceptions import ValidationFailedException, DatabaseErrorException @@ -16,8 +16,9 @@ parse_geo_arg, parse_source_signal_arg, parse_time_arg, - parse_day_arg, + parse_day_or_week_arg, parse_day_range_arg, + parse_day_or_week_range_arg, parse_single_source_signal_arg, parse_single_time_arg, parse_single_geo_arg, @@ -34,7 +35,7 @@ ) from .._pandas import as_pandas, print_pandas from .covidcast_utils import compute_trend, compute_trends, compute_correlations, compute_trend_value, CovidcastMetaEntry -from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date +from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, shift_week_value, week_value_to_week from .covidcast_utils.model import TimeType, data_sources, create_source_signal_alias_mapper # first argument is the endpoint name @@ -172,15 +173,18 @@ def transform_row(row, proxy): @bp.route("/trend", methods=("GET", "POST")) def handle_trend(): - require_all("date", "window") + require_all("window", "date") source_signal_pairs = parse_source_signal_pairs() source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) - # TODO alias geo_pairs = parse_geo_pairs() - time_value = parse_day_arg("date") - time_window = parse_day_range_arg("window") - basis_time_value = extract_date("basis") or shift_time_value(time_value, -7) + time_window, is_day = parse_day_or_week_range_arg("window") + time_value, is_also_day = parse_day_or_week_arg("date") + if is_day != is_also_day: + raise ValidationFailedException('mixing weeks with day arguments') + basis_time_value = extract_date("basis") + if basis_time_value is None: + basis_time_value = (shift_time_value(time_value, -7) if is_day else shift_week_value(time_value, -7)) # build query q = QueryBuilder("covidcast", "t") @@ -193,7 +197,7 @@ def handle_trend(): q.where_source_signal_pairs("source", "signal", source_signal_pairs) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast _handle_lag_issues_as_of(q, None, None, None) @@ -225,7 +229,7 @@ def handle_trendseries(): source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) geo_pairs = parse_geo_pairs() - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") basis_shift = extract_integer("basis") if basis_shift is None: basis_shift = 7 @@ -241,7 +245,7 @@ def handle_trendseries(): q.where_source_signal_pairs("source", "signal", source_signal_pairs) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else 'week', [time_window])]) # fetch most recent issue fast _handle_lag_issues_as_of(q, None, None, None) @@ -249,6 +253,8 @@ def handle_trendseries(): p = create_printer() shifter = lambda x: shift_time_value(x, -basis_shift) + if not is_day: + shifter = lambda x: shift_week_value(x, -basis_shift) def gen(rows): for key, group in groupby((parse_row(row, fields_string, fields_int, fields_float) for row in rows), lambda row: (row["geo_type"], row["geo_value"], row["source"], row["signal"])): @@ -276,7 +282,8 @@ def handle_correlation(): other_pairs = parse_source_signal_arg("others") source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(other_pairs + [reference]) geo_pairs = parse_geo_arg() - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") + lag = extract_integer("lag") if lag is None: lag = 28 @@ -296,12 +303,17 @@ def handle_correlation(): source_signal_pairs, ) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast q.conditions.append(f"({q.alias}.is_latest_issue IS TRUE)") - df = as_pandas(str(q), q.params, parse_dates={"time_value": "%Y%m%d"}) + df = as_pandas(str(q), q.params) + if is_day: + df['time_value'] = to_datetime(df['time_value'], format="%Y%m%d") + else: + # week but convert to date for simpler shifting + df['time_value'] = to_datetime(df['time_value'].apply(lambda v: week_value_to_week(v).startdate())) p = create_printer() @@ -329,7 +341,7 @@ def gen(): for (source, signal), other_group in other_groups: if alias_mapper: source = alias_mapper(source, signal) - for cor in compute_correlations(geo_type, geo_value, source, signal, lag, reference_group, other_group): + for cor in compute_correlations(geo_type, geo_value, source, signal, lag, reference_group, other_group, is_day): yield cor.asdict() # now use a generator for sending the rows and execute all the other queries @@ -345,6 +357,8 @@ def handle_export(): geo_type = request.args.get("geo_type", "county") geo_values = request.args.get("geo_values", "*") + # TODO weekly signals + if geo_values != "*": geo_values = geo_values.split(",") @@ -424,8 +438,9 @@ def handle_backfill(): # don't need the alias mapper since we don't return the source time_pair = parse_single_time_arg("time") + is_day = time_pair.is_day geo_pair = parse_single_geo_arg("geo") - reference_anchor_lag = extract_integer("anchor_lag") # in days + reference_anchor_lag = extract_integer("anchor_lag") # in days or weeks if reference_anchor_lag is None: reference_anchor_lag = 60 @@ -461,7 +476,8 @@ def gen(rows): for time_value, group in groupby((parse_row(row, fields_string, fields_int, fields_float) for row in rows), lambda row: row["time_value"]): # compute data per time value issues: List[Dict[str, Any]] = [r for r in group] - anchor_row = find_anchor_row(issues, shift_time_value(time_value, reference_anchor_lag)) + shifted_time_value = shift_time_value(time_value, reference_anchor_lag) if is_day else shift_week_value(time_value, reference_anchor_lag) + anchor_row = find_anchor_row(issues, shifted_time_value) for i, row in enumerate(issues): if i > 0: @@ -576,8 +592,9 @@ def handle_coverage(): source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) geo_type = request.args.get("geo_type", "county") if "window" in request.values: - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") else: + is_day = False # TODO now_time = extract_date("latest") now = date.today() if now_time is None else time_value_to_date(now_time) last = extract_integer("days") @@ -601,7 +618,7 @@ def handle_coverage(): else: q.where(geo_type=geo_type) q.where_source_signal_pairs("source", "signal", source_signal_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else 'week', [time_window])]) q.group_by = "c.source, c.signal, c.time_value" q.set_order("source", "signal", "time_value") diff --git a/src/server/endpoints/covidcast_utils/correlation.py b/src/server/endpoints/covidcast_utils/correlation.py index b0b2b623c..3f8268b25 100644 --- a/src/server/endpoints/covidcast_utils/correlation.py +++ b/src/server/endpoints/covidcast_utils/correlation.py @@ -49,7 +49,7 @@ class Correlation: """ -def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame: +def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> pd.DataFrame: # x_t_i ~ y_t_(i-lag) # aka x_t_(i+lag) ~ y_t_i @@ -60,24 +60,24 @@ def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame: # x_t_i ~ y_shifted_t_i # shift y such that y_t(i - lag) -> y_shifted_t_i x_shifted = x - y_shifted = y.shift(lag, freq="D") + y_shifted = y.shift(lag, freq="D" if is_day else 'W') else: # lag < 0 # x_shifted_t_i ~ y_t_i # shift x such that x_t(i+lag) -> x_shifted_t_i # lag < 0 -> - - lag = + lag - x_shifted = x.shift(-lag, freq="D") + x_shifted = x.shift(-lag, freq="D" if is_day else 'W') y_shifted = y # inner join to remove invalid pairs r = x_shifted.join(y_shifted, how="inner", lsuffix="_x", rsuffix="_y") return r.rename(columns=dict(value_x="x", value_y="y")) -def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame) -> Iterable[CorrelationResult]: +def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> Iterable[CorrelationResult]: """ x,y ... DataFrame with "time_value" (Date) index and "value" (float) column """ for current_lag in range(-lag, lag + 1): - xy = lag_join(current_lag, x, y) + xy = lag_join(current_lag, x, y, is_day) c = compute_correlation(xy) yield CorrelationResult(geo_type, geo_value, signal_source, signal_signal, current_lag, r2=c.r2, intercept=c.intercept, slope=c.slope, samples=c.samples) diff --git a/src/server/endpoints/covidcast_utils/trend.py b/src/server/endpoints/covidcast_utils/trend.py index c7df3ba88..43c4ac21b 100644 --- a/src/server/endpoints/covidcast_utils/trend.py +++ b/src/server/endpoints/covidcast_utils/trend.py @@ -2,7 +2,6 @@ from typing import Optional, Iterable, Tuple, Dict, List, Callable from enum import Enum from collections import OrderedDict -from ...utils import shift_time_value class TrendEnum(str, Enum): diff --git a/src/server/utils/__init__.py b/src/server/utils/__init__.py index 2cf5e85d9..43e615094 100644 --- a/src/server/utils/__init__.py +++ b/src/server/utils/__init__.py @@ -1 +1 @@ -from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range +from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week diff --git a/src/server/utils/dates.py b/src/server/utils/dates.py index a55c3c2c4..ead7f53f3 100644 --- a/src/server/utils/dates.py +++ b/src/server/utils/dates.py @@ -37,6 +37,12 @@ def shift_time_value(time_value: int, days: int) -> int: shifted = d + timedelta(days=days) return date_to_time_value(shifted) +def shift_week_value(week_value: int, weeks: int) -> int: + if weeks == 0: + return week_value + week = week_value_to_week(week_value) + shifted = week + weeks + return week_to_time_value(shifted) def days_in_range(range: Tuple[int, int]) -> int: """ From dac83989f83831803dcf24af3715f665aab1dd71 Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 15:21:35 -0400 Subject: [PATCH 03/13] feat: add helper to count daily/weekly signal types --- src/server/endpoints/covidcast_utils/model.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/server/endpoints/covidcast_utils/model.py b/src/server/endpoints/covidcast_utils/model.py index dcb32484e..2274d8f69 100644 --- a/src/server/endpoints/covidcast_utils/model.py +++ b/src/server/endpoints/covidcast_utils/model.py @@ -228,12 +228,39 @@ def _load_data_signals(sources: List[DataSource]): data_signals, data_signals_df = _load_data_signals(data_sources) data_signals_by_key = {d.key: d for d in data_signals} +# also add the resolved signal version to the signal lookup +for d in data_signals: + source = data_source_by_id.get(d.source) + if source and source.uses_db_alias: + data_signals_by_key[(source.db_source, d.signal)] = d + def get_related_signals(signal: DataSignal) -> List[DataSignal]: return [s for s in data_signals if s != signal and s.signal_basename == signal.signal_basename] +def count_signal_time_types(source_signals: List[SourceSignalPair]) -> Tuple[int, int]: + """ + count the number of weekly signals in this queries + @returns daily counts, weekly counts + """ + weekly = 0 + daily = 0 + for pair in source_signals: + if pair.signal == True: + continue + for s in pair.signal: + signal = data_signals_by_key.get((pair.source, s)) + if not signal: + continue + if signal.time_type == TimeType.week: + weekly += 1 + else: + daily += 1 + return daily, weekly + + def create_source_signal_alias_mapper(source_signals: List[SourceSignalPair]) -> Tuple[List[SourceSignalPair], Optional[Callable[[str, str], str]]]: alias_to_data_sources: Dict[str, List[DataSource]] = {} transformed_pairs: List[SourceSignalPair] = [] From fb4abfd566a477d3a07fbd41af0e5b16ddbf0b4f Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 15:21:50 -0400 Subject: [PATCH 04/13] feat: verify matching argument types --- src/server/endpoints/covidcast.py | 70 ++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index a381707c0..5ad50e3b3 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -1,6 +1,7 @@ from typing import List, Optional, Union, Tuple, Dict, Any from itertools import groupby from datetime import date, datetime, timedelta +from epiweeks import Week from flask import Blueprint, request from flask.json import loads, jsonify from bisect import bisect_right @@ -17,7 +18,6 @@ parse_source_signal_arg, parse_time_arg, parse_day_or_week_arg, - parse_day_range_arg, parse_day_or_week_range_arg, parse_single_source_signal_arg, parse_single_time_arg, @@ -35,8 +35,8 @@ ) from .._pandas import as_pandas, print_pandas from .covidcast_utils import compute_trend, compute_trends, compute_correlations, compute_trend_value, CovidcastMetaEntry -from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, shift_week_value, week_value_to_week -from .covidcast_utils.model import TimeType, data_sources, create_source_signal_alias_mapper +from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, shift_week_value, week_value_to_week, guess_time_value_is_day, week_to_time_value +from .covidcast_utils.model import TimeType, count_signal_time_types, data_sources, create_source_signal_alias_mapper # first argument is the endpoint name bp = Blueprint("covidcast", __name__) @@ -162,7 +162,7 @@ def handle(): _handle_lag_issues_as_of(q, issues, lag, as_of) def transform_row(row, proxy): - if is_compatibility or not alias_mapper or 'source' not in row: + if is_compatibility or not alias_mapper or "source" not in row: return row row["source"] = alias_mapper(row["source"], proxy["signal"]) return row @@ -171,20 +171,29 @@ def transform_row(row, proxy): return execute_query(str(q), q.params, fields_string, fields_int, fields_float, transform=transform_row) +def _verify_argument_time_type_matches(is_day_argument: bool, count_daily_signal: int, count_weekly_signal: int) -> None: + if is_day_argument and count_weekly_signal > 0: + raise ValidationFailedException("day arguments for weekly signals") + if not is_day_argument and count_daily_signal > 0: + raise ValidationFailedException("week arguments for daily signals") + + @bp.route("/trend", methods=("GET", "POST")) def handle_trend(): require_all("window", "date") source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) geo_pairs = parse_geo_pairs() time_window, is_day = parse_day_or_week_range_arg("window") time_value, is_also_day = parse_day_or_week_arg("date") if is_day != is_also_day: - raise ValidationFailedException('mixing weeks with day arguments') + raise ValidationFailedException("mixing weeks with day arguments") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) basis_time_value = extract_date("basis") if basis_time_value is None: - basis_time_value = (shift_time_value(time_value, -7) if is_day else shift_week_value(time_value, -7)) + basis_time_value = shift_time_value(time_value, -7) if is_day else shift_week_value(time_value, -7) # build query q = QueryBuilder("covidcast", "t") @@ -226,14 +235,17 @@ def gen(rows): def handle_trendseries(): require_all("window") source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) geo_pairs = parse_geo_pairs() time_window, is_day = parse_day_or_week_range_arg("window") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) basis_shift = extract_integer("basis") if basis_shift is None: basis_shift = 7 + # build query q = QueryBuilder("covidcast", "t") @@ -245,7 +257,7 @@ def handle_trendseries(): q.where_source_signal_pairs("source", "signal", source_signal_pairs) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else 'week', [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast _handle_lag_issues_as_of(q, None, None, None) @@ -280,9 +292,11 @@ def handle_correlation(): require_all("reference", "window", "others", "geo") reference = parse_single_source_signal_arg("reference") other_pairs = parse_source_signal_arg("others") + daily_signals, weekly_signals = count_signal_time_types(other_pairs + [reference]) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(other_pairs + [reference]) geo_pairs = parse_geo_arg() time_window, is_day = parse_day_or_week_range_arg("window") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) lag = extract_integer("lag") if lag is None: @@ -310,10 +324,10 @@ def handle_correlation(): df = as_pandas(str(q), q.params) if is_day: - df['time_value'] = to_datetime(df['time_value'], format="%Y%m%d") + df["time_value"] = to_datetime(df["time_value"], format="%Y%m%d") else: # week but convert to date for simpler shifting - df['time_value'] = to_datetime(df['time_value'].apply(lambda v: week_value_to_week(v).startdate())) + df["time_value"] = to_datetime(df["time_value"].apply(lambda v: week_value_to_week(v).startdate())) p = create_printer() @@ -351,14 +365,16 @@ def gen(): @bp.route("/csv", methods=("GET", "POST")) def handle_export(): source, signal = request.args.get("signal", "jhu-csse:confirmed_incidence_num").split(":") - source_signal_pairs, alias_mapper = create_source_signal_alias_mapper([SourceSignalPair(source, [signal])]) + source_signal_pairs = [SourceSignalPair(source, [signal])] + _, weekly_signals = count_signal_time_types(source_signal_pairs) + if weekly_signals > 0: + raise ValidationFailedException('weekly signals are not supported') + source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) start_day = request.args.get("start_day", "2020-04-01") end_day = request.args.get("end_day", "2020-09-01") geo_type = request.args.get("geo_type", "county") geo_values = request.args.get("geo_values", "*") - # TODO weekly signals - if geo_values != "*": geo_values = geo_values.split(",") @@ -434,11 +450,14 @@ def handle_backfill(): """ require_all("geo", "time", "signal") signal_pair = parse_single_source_signal_arg("signal") + daily_signals, weekly_signals = count_signal_time_types([signal_pair]) source_signal_pairs, _ = create_source_signal_alias_mapper([signal_pair]) # don't need the alias mapper since we don't return the source time_pair = parse_single_time_arg("time") is_day = time_pair.is_day + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + geo_pair = parse_single_geo_arg("geo") reference_anchor_lag = extract_integer("anchor_lag") # in days or weeks if reference_anchor_lag is None: @@ -589,18 +608,31 @@ def handle_coverage(): """ source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) + geo_type = request.args.get("geo_type", "county") if "window" in request.values: time_window, is_day = parse_day_or_week_range_arg("window") else: - is_day = False # TODO now_time = extract_date("latest") - now = date.today() if now_time is None else time_value_to_date(now_time) last = extract_integer("days") - if last is None: - last = 30 - time_window = (date_to_time_value(now - timedelta(days=last)), date_to_time_value(now)) + last_weeks = extract_integer("weeks") + # week if latest is week like or weeks are given otherwise we don't know and guess days + if (now_time is not None and not guess_time_value_is_day(now_time)) or last_weeks is not None or weekly_signals > 0: + # week + if last_weeks is None: + last_weeks = last or 30 + is_day = False + now_week = Week.thisweek() if now_time is None else week_value_to_week(now_time) + time_window = (week_to_time_value(now_week - last_weeks), week_to_time_value(now_week)) + else: + is_day = True + if last is None: + last = 30 + now = date.today() if now_time is None else time_value_to_date(now_time) + time_window = (date_to_time_value(now - timedelta(days=last)), date_to_time_value(now)) + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) q = QueryBuilder("covidcast", "c") fields_string = ["source", "signal"] @@ -618,14 +650,14 @@ def handle_coverage(): else: q.where(geo_type=geo_type) q.where_source_signal_pairs("source", "signal", source_signal_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else 'week', [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) q.group_by = "c.source, c.signal, c.time_value" q.set_order("source", "signal", "time_value") _handle_lag_issues_as_of(q, None, None, None) def transform_row(row, proxy): - if not alias_mapper or 'source' not in row: + if not alias_mapper or "source" not in row: return row row["source"] = alias_mapper(row["source"], proxy["signal"]) return row From c86131b3b533ef89a1969bf2744e0fc41c0866a7 Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 15:22:01 -0400 Subject: [PATCH 05/13] feat: add helper for guessing time type --- src/server/utils/__init__.py | 2 +- src/server/utils/dates.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/server/utils/__init__.py b/src/server/utils/__init__.py index 43e615094..c8c323b9b 100644 --- a/src/server/utils/__init__.py +++ b/src/server/utils/__init__.py @@ -1 +1 @@ -from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week +from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day diff --git a/src/server/utils/dates.py b/src/server/utils/dates.py index ead7f53f3..4053ad2ad 100644 --- a/src/server/utils/dates.py +++ b/src/server/utils/dates.py @@ -19,6 +19,10 @@ def week_value_to_week(value: int) -> Week: return Week(date.max.year - 1, 1) # minus 1 since internally it does some checks with a year + 1 return Week(year=year, week=week) +def guess_time_value_is_day(value: int) -> bool: + # YYYYMMDD type and not YYYYMM + return len(str(value)) > 6 + def date_to_time_value(d: date) -> int: return int(d.strftime("%Y%m%d")) From aad54e09540690bc9cc5955e76801ecd27b8de26 Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Fri, 6 Aug 2021 15:35:33 -0400 Subject: [PATCH 06/13] feat: support week in /csv --- src/server/_params.py | 7 ++++-- src/server/endpoints/covidcast.py | 42 ++++++++++++++----------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/server/_params.py b/src/server/_params.py index b91f3ddd2..fa4f63483 100644 --- a/src/server/_params.py +++ b/src/server/_params.py @@ -5,8 +5,9 @@ from flask import request + from ._exceptions import ValidationFailedException -from .utils import days_in_range, weeks_in_range +from .utils import days_in_range, weeks_in_range, guess_time_value_is_day def _parse_common_multi_arg(key: str) -> List[Tuple[str, Union[bool, Sequence[str]]]]: @@ -254,9 +255,11 @@ def parse_week_range_arg(key: str) -> Tuple[int, int]: raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW") return r -def parse_day_or_week_arg(key: str) -> Tuple[int, bool]: +def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> Tuple[int, bool]: v = request.values.get(key) if not v: + if default_value is not None: + return default_value, guess_time_value_is_day(default_value) raise ValidationFailedException(f"{key} param is required") # format is either YYYY-MM-DD or YYYYMMDD or YYYYMM is_week = len(v) == 6 diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index 5ad50e3b3..e5ac6e96d 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -1,6 +1,6 @@ from typing import List, Optional, Union, Tuple, Dict, Any from itertools import groupby -from datetime import date, datetime, timedelta +from datetime import date, timedelta from epiweeks import Week from flask import Blueprint, request from flask.json import loads, jsonify @@ -366,43 +366,39 @@ def gen(): def handle_export(): source, signal = request.args.get("signal", "jhu-csse:confirmed_incidence_num").split(":") source_signal_pairs = [SourceSignalPair(source, [signal])] - _, weekly_signals = count_signal_time_types(source_signal_pairs) - if weekly_signals > 0: - raise ValidationFailedException('weekly signals are not supported') + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) - start_day = request.args.get("start_day", "2020-04-01") - end_day = request.args.get("end_day", "2020-09-01") + start_day, is_day = parse_day_or_week_arg("start_day", 202001 if weekly_signals > 0 else 20200401) + end_day , is_end_day = parse_day_or_week_arg("end_day", 202020 if weekly_signals > 0 else 20200901) + if is_day != is_end_day: + raise ValidationFailedException("mixing weeks with day arguments") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + geo_type = request.args.get("geo_type", "county") geo_values = request.args.get("geo_values", "*") if geo_values != "*": geo_values = geo_values.split(",") - as_of = request.args.get("as_of", None) - - start_day = datetime.strptime(start_day, "%Y-%m-%d").date() - end_day = datetime.strptime(end_day, "%Y-%m-%d").date() - - if as_of is not None: - as_of = datetime.strptime(as_of, "%Y-%m-%d").date() + as_of, is_as_of_day = parse_day_or_week_arg('as_of') if 'as_of' in request.args else None, is_day + if is_day != is_as_of_day: + raise ValidationFailedException("mixing weeks with day arguments") # build query q = QueryBuilder("covidcast", "t") q.set_fields(["geo_value", "signal", "time_value", "issue", "lag", "value", "stderr", "sample_size", "geo_type", "source"], [], []) q.set_order("time_value", "geo_value") - q.where(time_type="day") q.where_source_signal_pairs("source", "signal", source_signal_pairs) - q.conditions.append("time_value BETWEEN :start_day AND :end_day") - q.params["start_day"] = date_to_time_value(start_day) - q.params["end_day"] = date_to_time_value(end_day) + q.where_time_pairs("time_type", "time_value", [TimePair('day' if is_day else 'week', [(start_day, end_day)])]) q.where_geo_pairs("geo_type", "geo_value", [GeoPair(geo_type, True if geo_values == "*" else geo_values)]) - _handle_lag_issues_as_of(q, None, None, date_to_time_value(as_of) if as_of is not None else None) + _handle_lag_issues_as_of(q, None, None, as_of) + format_date = time_value_to_iso if is_day else lambda x: week_value_to_week(x).cdcformat() # tag as_of in filename, if it was specified - as_of_str = "-asof-{as_of}".format(as_of=as_of.isoformat()) if as_of is not None else "" - filename = "covidcast-{source}-{signal}-{start_day}-to-{end_day}{as_of}".format(source=source, signal=signal, start_day=start_day.isoformat(), end_day=end_day.isoformat(), as_of=as_of_str) + as_of_str = "-asof-{as_of}".format(as_of=format_date(as_of)) if as_of is not None else "" + filename = "covidcast-{source}-{signal}-{start_day}-to-{end_day}{as_of}".format(source=source, signal=signal, start_day=format_date(start_day), end_day=format_date(end_day), as_of=as_of_str) p = CSVPrinter(filename) def parse_row(i, row): @@ -411,8 +407,8 @@ def parse_row(i, row): "": i, "geo_value": row["geo_value"], "signal": row["signal"], - "time_value": time_value_to_iso(row["time_value"]), - "issue": time_value_to_iso(row["issue"]), + "time_value": time_value_to_iso(row["time_value"]) if is_day else row["time_value"], + "issue": time_value_to_iso(row["issue"]) if is_day else row["issue"], "lag": row["lag"], "value": row["value"], "stderr": row["stderr"], @@ -436,7 +432,7 @@ def gen(first_row, rows): first_row = next(r, None) if not first_row: return "No matching data found for signal {source}:{signal} " "at {geo} level from {start_day} to {end_day}, as of {as_of}.".format( - source=source, signal=signal, geo=geo_type, start_day=start_day.isoformat(), end_day=end_day.isoformat(), as_of=(date.today().isoformat() if as_of is None else as_of.isoformat()) + source=source, signal=signal, geo=geo_type, start_day=format_date(start_day), end_day=format_date(end_day), as_of=(date.today().isoformat() if as_of is None else format_date(as_of)) ) # now use a generator for sending the rows and execute all the other queries From 69d5652682cc4d415f28852e114435ebf5299caf Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Thu, 12 Aug 2021 10:07:52 -0400 Subject: [PATCH 07/13] feat: add missing confirmed_admissions_covid_1d_7dav meta signal --- src/server/endpoints/covidcast_utils/db_signals.csv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv index 29b18f1da..ab96195b1 100644 --- a/src/server/endpoints/covidcast_utils/db_signals.csv +++ b/src/server/endpoints/covidcast_utils/db_signals.csv @@ -290,7 +290,9 @@ google-symptoms,anosmia_raw_search,TRUE,anosmia_smoothed_search,TRUE,{base_name} google-symptoms,sum_anosmia_ageusia_raw_search,FALSE,sum_anosmia_ageusia_raw_search,FALSE,Sum Anosmia Ageusia Searches,TRUE,"The sum of Google search volume for anosmia and ageusia related searches, in arbitrary units that are normalized for overall search users",,day,Date,Value,raw,public,bad,FALSE,FALSE,FALSE,FALSE,FALSE, google-symptoms,sum_anosmia_ageusia_raw_search,TRUE,sum_anosmia_ageusia_smoothed_search,TRUE,{base_name} (7-day average),TRUE,,,day,Date,Value,raw,public,bad,TRUE,FALSE,FALSE,FALSE,FALSE, hhs,confirmed_admissions_covid_1d,FALSE,confirmed_admissions_covid_1d,FALSE,Confirmed COVID-19 Admissions per day,TRUE,Sum of adult and pediatric confirmed COVID-19 hospital admissions occurring each day. ,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +hhs,confirmed_admissions_covid_1d,,confirmed_admissions_covid_1d_7dav,TRUE,{base_name} (7-day average),TRUE,,,day,Date,Value,count,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, hhs,sum_confirmed_suspected_admissions_covid_1d,FALSE,sum_confirmed_suspected_admissions_covid_1d,FALSE,Confirmed and Suspected COVID-19 Admissions per day,TRUE,Sum of adult and pediatric confirmed and suspected COVID-19 hospital admissions occurring each day. ,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +hhs,sum_confirmed_suspected_admissions_covid_1d,,sum_confirmed_suspected_admissions_covid_1d_7dav,TRUE,{base_name} (7-day average),TRUE,,,day,Date,Value,count,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, hospital-admissions,smoothed_covid19,FALSE,smoothed_covid19,FALSE,COVID-19 Admissions (EMR and Claims),FALSE,Estimated percentage of new hospital admissions with COVID-associated diagnoses,"{short_description}, based on counts of electronic medical records and claims from health system partners, smoothed in time using a Gaussian linear smoother. Discontinued October 1, 2020.",day,Date,Value,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, From 6cb46b749e8dda35fe353540b6369b1d67da4663 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 01:46:46 +0000 Subject: [PATCH 08/13] build(deps): bump path-parse in /src/client/packaging/npm Bumps [path-parse](https://github.com/jbgutierrez/path-parse) from 1.0.6 to 1.0.7. - [Release notes](https://github.com/jbgutierrez/path-parse/releases) - [Commits](https://github.com/jbgutierrez/path-parse/commits/v1.0.7) --- updated-dependencies: - dependency-name: path-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- src/client/packaging/npm/package-lock.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/client/packaging/npm/package-lock.json b/src/client/packaging/npm/package-lock.json index 099890570..07b5e0e65 100644 --- a/src/client/packaging/npm/package-lock.json +++ b/src/client/packaging/npm/package-lock.json @@ -1,6 +1,6 @@ { "name": "delphi_epidata", - "version": "0.1.0", + "version": "0.2.7", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -3430,9 +3430,9 @@ "dev": true }, "path-parse": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", - "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", "dev": true }, "performance-now": { From eaf016d073cb5114cacb3e498d9bfe3a2954e485 Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Tue, 17 Aug 2021 13:20:36 -0400 Subject: [PATCH 09/13] Apply suggestions from code review Co-authored-by: Katie Mazaitis --- src/server/endpoints/covidcast_utils/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covidcast_utils/model.py b/src/server/endpoints/covidcast_utils/model.py index 2274d8f69..7ea50e496 100644 --- a/src/server/endpoints/covidcast_utils/model.py +++ b/src/server/endpoints/covidcast_utils/model.py @@ -242,7 +242,7 @@ def get_related_signals(signal: DataSignal) -> List[DataSignal]: def count_signal_time_types(source_signals: List[SourceSignalPair]) -> Tuple[int, int]: """ - count the number of weekly signals in this queries + count the number of signals in this query for each time type @returns daily counts, weekly counts """ weekly = 0 From 25fba3f762adf8222a8ac5526f5b1a2d5c3424a9 Mon Sep 17 00:00:00 2001 From: Samuel Gratzl Date: Tue, 17 Aug 2021 13:26:15 -0400 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Katie Mazaitis --- src/server/endpoints/covidcast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index e5ac6e96d..a7e40bd83 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -380,7 +380,7 @@ def handle_export(): if geo_values != "*": geo_values = geo_values.split(",") - as_of, is_as_of_day = parse_day_or_week_arg('as_of') if 'as_of' in request.args else None, is_day + as_of, is_as_of_day = parse_day_or_week_arg('as_of') if 'as_of' in request.args else (None, is_day) if is_day != is_as_of_day: raise ValidationFailedException("mixing weeks with day arguments") From 493f788cd134aff065ec0dd6efa4034ce5bba52e Mon Sep 17 00:00:00 2001 From: Kathryn M Mazaitis Date: Wed, 18 Aug 2021 14:33:46 -0400 Subject: [PATCH 11/13] Add pairwise logging for meta --- src/acquisition/covidcast/database.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py index b8bf4f525..eeaeb3108 100644 --- a/src/acquisition/covidcast/database.py +++ b/src/acquisition/covidcast/database.py @@ -257,6 +257,7 @@ def compute_covidcast_meta(self, table_name='covidcast', use_index=True): n_threads = max(1, cpu_count()*9//10) # aka number of concurrent db connections, which [sh|c]ould be ~<= 90% of the #cores available to SQL server # NOTE: this may present a small problem if this job runs on different hardware than the db, # but we should not run into that issue in prod. + logger.info(f"using {n_threads} workers") srcsigs = Queue() # multi-consumer threadsafe! @@ -305,7 +306,8 @@ def compute_covidcast_meta(self, table_name='covidcast', use_index=True): meta_lock = threading.Lock() def worker(): - logger.info("starting thread: " + threading.current_thread().name) + name = threading.current_thread().name + logger.info("starting thread", thread=name) # set up new db connection for thread worker_dbc = Database() worker_dbc.connect(connector_impl=self._connector_impl) @@ -319,8 +321,9 @@ def worker(): dict(zip(w_cursor.column_names, x)) for x in w_cursor )) srcsigs.task_done() + logger.info("completed pair", thread=name, pair=f"({source}, {signal})") except Empty: - logger.info("no jobs left, thread terminating: " + threading.current_thread().name) + logger.info("no jobs left, thread terminating", thread=name) finally: worker_dbc.disconnect(False) # cleanup @@ -334,7 +337,7 @@ def worker(): logger.info("jobs complete") for t in threads: t.join() - logger.error("threads terminated") + logger.info("all threads terminated") # sort the metadata because threaded workers dgaf sorting_fields = "data_source signal time_type geo_type".split() From 447f0c31abb61e31e399fe399f0ea6ea7e72ce6f Mon Sep 17 00:00:00 2001 From: Kathryn M Mazaitis Date: Wed, 18 Aug 2021 14:56:38 -0400 Subject: [PATCH 12/13] Moved pair log before execution --- src/acquisition/covidcast/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py index eeaeb3108..35d443cdc 100644 --- a/src/acquisition/covidcast/database.py +++ b/src/acquisition/covidcast/database.py @@ -315,13 +315,13 @@ def worker(): try: while True: (source, signal) = srcsigs.get_nowait() # this will throw the Empty caught below + logger.info("starting pair", thread=name, pair=f"({source}, {signal})") w_cursor.execute(inner_sql, (source, signal)) with meta_lock: meta.extend(list( dict(zip(w_cursor.column_names, x)) for x in w_cursor )) srcsigs.task_done() - logger.info("completed pair", thread=name, pair=f"({source}, {signal})") except Empty: logger.info("no jobs left, thread terminating", thread=name) finally: From 63da871ebb9e077c1a7f57af7dc6823643d19412 Mon Sep 17 00:00:00 2001 From: krivard Date: Wed, 18 Aug 2021 19:06:17 +0000 Subject: [PATCH 13/13] chore: release delphi-epidata 0.2.8 --- .bumpversion.cfg | 2 +- src/client/delphi_epidata.R | 2 +- src/client/delphi_epidata.js | 2 +- src/client/packaging/npm/package.json | 2 +- src/client/packaging/pypi/delphi_epidata/__init__.py | 2 +- src/client/packaging/pypi/setup.py | 2 +- src/server/_config.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 265bec840..e54f44993 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.7 +current_version = 0.2.8 commit = False tag = False diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 9ce58814e..09fa04981 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -15,7 +15,7 @@ Epidata <- (function() { # API base url BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php' - client_version <- '0.2.7' + client_version <- '0.2.8' # Helper function to cast values and/or ranges to strings .listitem <- function(value) { diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index c3b66efdf..5a89dae48 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -22,7 +22,7 @@ } })(this, function (exports, fetchImpl, jQuery) { const BASE_URL = "https://delphi.cmu.edu/epidata/"; - const client_version = "0.2.7"; + const client_version = "0.2.8"; // Helper function to cast values and/or ranges to strings function _listitem(value) { diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json index 5da895a88..99ecca29f 100644 --- a/src/client/packaging/npm/package.json +++ b/src/client/packaging/npm/package.json @@ -2,7 +2,7 @@ "name": "delphi_epidata", "description": "Delphi Epidata API Client", "authors": "Delphi Group", - "version": "0.2.7", + "version": "0.2.8", "license": "MIT", "homepage": "https://github.com/cmu-delphi/delphi-epidata", "bugs": { diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py index b8461ef8c..ab9069231 100644 --- a/src/client/packaging/pypi/delphi_epidata/__init__.py +++ b/src/client/packaging/pypi/delphi_epidata/__init__.py @@ -1,4 +1,4 @@ from .delphi_epidata import Epidata name = 'delphi_epidata' -__version__ = '0.2.7' +__version__ = '0.2.8' diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py index 5473967a5..eb413c8a4 100644 --- a/src/client/packaging/pypi/setup.py +++ b/src/client/packaging/pypi/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="delphi_epidata", - version="0.2.7", + version="0.2.8", author="David Farrow", author_email="dfarrow0@gmail.com", description="A programmatic interface to Delphi's Epidata API.", diff --git a/src/server/_config.py b/src/server/_config.py index 56d2af64d..f5144f090 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -5,7 +5,7 @@ load_dotenv() -VERSION = "0.2.7" +VERSION = "0.2.8" MAX_RESULTS = int(10e6) MAX_COMPATIBILITY_RESULTS = int(3650)