diff --git a/src/server/_params.py b/src/server/_params.py index 5505b2714..fa4f63483 100644 --- a/src/server/_params.py +++ b/src/server/_params.py @@ -5,8 +5,9 @@ from flask import request + from ._exceptions import ValidationFailedException -from .utils import days_in_range, weeks_in_range +from .utils import days_in_range, weeks_in_range, guess_time_value_is_day def _parse_common_multi_arg(key: str) -> List[Tuple[str, Union[bool, Sequence[str]]]]: @@ -109,6 +110,15 @@ class TimePair: time_type: str time_values: Union[bool, Sequence[Union[int, Tuple[int, int]]]] + @property + def is_week(self) -> bool: + return self.time_type == 'week' + + @property + def is_day(self) -> bool: + return self.time_type != 'week' + + def count(self) -> float: """ returns the count of items in this pair @@ -225,3 +235,45 @@ def parse_day_arg(key: str) -> int: if not isinstance(r, int): raise ValidationFailedException(f"{key} must match YYYYMMDD or YYYY-MM-DD") return r + +def parse_week_arg(key: str) -> int: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + r = parse_week_value(v) + if not isinstance(r, int): + raise ValidationFailedException(f"{key} must match YYYYWW") + return r + + +def parse_week_range_arg(key: str) -> Tuple[int, int]: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + r = parse_week_value(v) + if not isinstance(r, tuple): + raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW") + return r + +def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> Tuple[int, bool]: + v = request.values.get(key) + if not v: + if default_value is not None: + return default_value, guess_time_value_is_day(default_value) + raise ValidationFailedException(f"{key} param is required") + # format is either YYYY-MM-DD or YYYYMMDD or YYYYMM + is_week = len(v) == 6 + if is_week: + return parse_week_arg(key), False + return parse_day_arg(key), True + +def parse_day_or_week_range_arg(key: str) -> Tuple[Tuple[int, int], bool]: + v = request.values.get(key) + if not v: + raise ValidationFailedException(f"{key} param is required") + # format is either YYYY-MM-DD--YYYY-MM-DD or YYYYMMDD-YYYYMMDD or YYYYMM-YYYYMM + # so if the first before the - has length 6, it must be a week + is_week = len(v.split('-', 2)[0]) == 6 + if is_week: + return parse_week_range_arg(key), False + return parse_day_range_arg(key), True diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index 11552a987..a7e40bd83 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -1,11 +1,12 @@ from typing import List, Optional, Union, Tuple, Dict, Any from itertools import groupby -from datetime import date, datetime, timedelta +from datetime import date, timedelta +from epiweeks import Week from flask import Blueprint, request from flask.json import loads, jsonify from bisect import bisect_right from sqlalchemy import text -from pandas import read_csv +from pandas import read_csv, to_datetime from .._common import is_compatibility_mode, db from .._exceptions import ValidationFailedException, DatabaseErrorException @@ -16,8 +17,8 @@ parse_geo_arg, parse_source_signal_arg, parse_time_arg, - parse_day_arg, - parse_day_range_arg, + parse_day_or_week_arg, + parse_day_or_week_range_arg, parse_single_source_signal_arg, parse_single_time_arg, parse_single_geo_arg, @@ -34,8 +35,8 @@ ) from .._pandas import as_pandas, print_pandas from .covidcast_utils import compute_trend, compute_trends, compute_correlations, compute_trend_value, CovidcastMetaEntry -from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date -from .covidcast_utils.model import TimeType, data_sources, create_source_signal_alias_mapper +from ..utils import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, shift_week_value, week_value_to_week, guess_time_value_is_day, week_to_time_value +from .covidcast_utils.model import TimeType, count_signal_time_types, data_sources, create_source_signal_alias_mapper # first argument is the endpoint name bp = Blueprint("covidcast", __name__) @@ -161,7 +162,7 @@ def handle(): _handle_lag_issues_as_of(q, issues, lag, as_of) def transform_row(row, proxy): - if is_compatibility or not alias_mapper or 'source' not in row: + if is_compatibility or not alias_mapper or "source" not in row: return row row["source"] = alias_mapper(row["source"], proxy["signal"]) return row @@ -170,17 +171,29 @@ def transform_row(row, proxy): return execute_query(str(q), q.params, fields_string, fields_int, fields_float, transform=transform_row) +def _verify_argument_time_type_matches(is_day_argument: bool, count_daily_signal: int, count_weekly_signal: int) -> None: + if is_day_argument and count_weekly_signal > 0: + raise ValidationFailedException("day arguments for weekly signals") + if not is_day_argument and count_daily_signal > 0: + raise ValidationFailedException("week arguments for daily signals") + + @bp.route("/trend", methods=("GET", "POST")) def handle_trend(): - require_all("date", "window") + require_all("window", "date") source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) - # TODO alias geo_pairs = parse_geo_pairs() - time_value = parse_day_arg("date") - time_window = parse_day_range_arg("window") - basis_time_value = extract_date("basis") or shift_time_value(time_value, -7) + time_window, is_day = parse_day_or_week_range_arg("window") + time_value, is_also_day = parse_day_or_week_arg("date") + if is_day != is_also_day: + raise ValidationFailedException("mixing weeks with day arguments") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + basis_time_value = extract_date("basis") + if basis_time_value is None: + basis_time_value = shift_time_value(time_value, -7) if is_day else shift_week_value(time_value, -7) # build query q = QueryBuilder("covidcast", "t") @@ -193,7 +206,7 @@ def handle_trend(): q.where_source_signal_pairs("source", "signal", source_signal_pairs) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast _handle_lag_issues_as_of(q, None, None, None) @@ -222,14 +235,17 @@ def gen(rows): def handle_trendseries(): require_all("window") source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) geo_pairs = parse_geo_pairs() - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) basis_shift = extract_integer("basis") if basis_shift is None: basis_shift = 7 + # build query q = QueryBuilder("covidcast", "t") @@ -241,7 +257,7 @@ def handle_trendseries(): q.where_source_signal_pairs("source", "signal", source_signal_pairs) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast _handle_lag_issues_as_of(q, None, None, None) @@ -249,6 +265,8 @@ def handle_trendseries(): p = create_printer() shifter = lambda x: shift_time_value(x, -basis_shift) + if not is_day: + shifter = lambda x: shift_week_value(x, -basis_shift) def gen(rows): for key, group in groupby((parse_row(row, fields_string, fields_int, fields_float) for row in rows), lambda row: (row["geo_type"], row["geo_value"], row["source"], row["signal"])): @@ -274,9 +292,12 @@ def handle_correlation(): require_all("reference", "window", "others", "geo") reference = parse_single_source_signal_arg("reference") other_pairs = parse_source_signal_arg("others") + daily_signals, weekly_signals = count_signal_time_types(other_pairs + [reference]) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(other_pairs + [reference]) geo_pairs = parse_geo_arg() - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + lag = extract_integer("lag") if lag is None: lag = 28 @@ -296,12 +317,17 @@ def handle_correlation(): source_signal_pairs, ) q.where_geo_pairs("geo_type", "geo_value", geo_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) # fetch most recent issue fast q.conditions.append(f"({q.alias}.is_latest_issue IS TRUE)") - df = as_pandas(str(q), q.params, parse_dates={"time_value": "%Y%m%d"}) + df = as_pandas(str(q), q.params) + if is_day: + df["time_value"] = to_datetime(df["time_value"], format="%Y%m%d") + else: + # week but convert to date for simpler shifting + df["time_value"] = to_datetime(df["time_value"].apply(lambda v: week_value_to_week(v).startdate())) p = create_printer() @@ -329,7 +355,7 @@ def gen(): for (source, signal), other_group in other_groups: if alias_mapper: source = alias_mapper(source, signal) - for cor in compute_correlations(geo_type, geo_value, source, signal, lag, reference_group, other_group): + for cor in compute_correlations(geo_type, geo_value, source, signal, lag, reference_group, other_group, is_day): yield cor.asdict() # now use a generator for sending the rows and execute all the other queries @@ -339,40 +365,40 @@ def gen(): @bp.route("/csv", methods=("GET", "POST")) def handle_export(): source, signal = request.args.get("signal", "jhu-csse:confirmed_incidence_num").split(":") - source_signal_pairs, alias_mapper = create_source_signal_alias_mapper([SourceSignalPair(source, [signal])]) - start_day = request.args.get("start_day", "2020-04-01") - end_day = request.args.get("end_day", "2020-09-01") + source_signal_pairs = [SourceSignalPair(source, [signal])] + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) + source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) + start_day, is_day = parse_day_or_week_arg("start_day", 202001 if weekly_signals > 0 else 20200401) + end_day , is_end_day = parse_day_or_week_arg("end_day", 202020 if weekly_signals > 0 else 20200901) + if is_day != is_end_day: + raise ValidationFailedException("mixing weeks with day arguments") + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + geo_type = request.args.get("geo_type", "county") geo_values = request.args.get("geo_values", "*") if geo_values != "*": geo_values = geo_values.split(",") - as_of = request.args.get("as_of", None) - - start_day = datetime.strptime(start_day, "%Y-%m-%d").date() - end_day = datetime.strptime(end_day, "%Y-%m-%d").date() - - if as_of is not None: - as_of = datetime.strptime(as_of, "%Y-%m-%d").date() + as_of, is_as_of_day = parse_day_or_week_arg('as_of') if 'as_of' in request.args else (None, is_day) + if is_day != is_as_of_day: + raise ValidationFailedException("mixing weeks with day arguments") # build query q = QueryBuilder("covidcast", "t") q.set_fields(["geo_value", "signal", "time_value", "issue", "lag", "value", "stderr", "sample_size", "geo_type", "source"], [], []) q.set_order("time_value", "geo_value") - q.where(time_type="day") q.where_source_signal_pairs("source", "signal", source_signal_pairs) - q.conditions.append("time_value BETWEEN :start_day AND :end_day") - q.params["start_day"] = date_to_time_value(start_day) - q.params["end_day"] = date_to_time_value(end_day) + q.where_time_pairs("time_type", "time_value", [TimePair('day' if is_day else 'week', [(start_day, end_day)])]) q.where_geo_pairs("geo_type", "geo_value", [GeoPair(geo_type, True if geo_values == "*" else geo_values)]) - _handle_lag_issues_as_of(q, None, None, date_to_time_value(as_of) if as_of is not None else None) + _handle_lag_issues_as_of(q, None, None, as_of) + format_date = time_value_to_iso if is_day else lambda x: week_value_to_week(x).cdcformat() # tag as_of in filename, if it was specified - as_of_str = "-asof-{as_of}".format(as_of=as_of.isoformat()) if as_of is not None else "" - filename = "covidcast-{source}-{signal}-{start_day}-to-{end_day}{as_of}".format(source=source, signal=signal, start_day=start_day.isoformat(), end_day=end_day.isoformat(), as_of=as_of_str) + as_of_str = "-asof-{as_of}".format(as_of=format_date(as_of)) if as_of is not None else "" + filename = "covidcast-{source}-{signal}-{start_day}-to-{end_day}{as_of}".format(source=source, signal=signal, start_day=format_date(start_day), end_day=format_date(end_day), as_of=as_of_str) p = CSVPrinter(filename) def parse_row(i, row): @@ -381,8 +407,8 @@ def parse_row(i, row): "": i, "geo_value": row["geo_value"], "signal": row["signal"], - "time_value": time_value_to_iso(row["time_value"]), - "issue": time_value_to_iso(row["issue"]), + "time_value": time_value_to_iso(row["time_value"]) if is_day else row["time_value"], + "issue": time_value_to_iso(row["issue"]) if is_day else row["issue"], "lag": row["lag"], "value": row["value"], "stderr": row["stderr"], @@ -406,7 +432,7 @@ def gen(first_row, rows): first_row = next(r, None) if not first_row: return "No matching data found for signal {source}:{signal} " "at {geo} level from {start_day} to {end_day}, as of {as_of}.".format( - source=source, signal=signal, geo=geo_type, start_day=start_day.isoformat(), end_day=end_day.isoformat(), as_of=(date.today().isoformat() if as_of is None else as_of.isoformat()) + source=source, signal=signal, geo=geo_type, start_day=format_date(start_day), end_day=format_date(end_day), as_of=(date.today().isoformat() if as_of is None else format_date(as_of)) ) # now use a generator for sending the rows and execute all the other queries @@ -420,12 +446,16 @@ def handle_backfill(): """ require_all("geo", "time", "signal") signal_pair = parse_single_source_signal_arg("signal") + daily_signals, weekly_signals = count_signal_time_types([signal_pair]) source_signal_pairs, _ = create_source_signal_alias_mapper([signal_pair]) # don't need the alias mapper since we don't return the source time_pair = parse_single_time_arg("time") + is_day = time_pair.is_day + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) + geo_pair = parse_single_geo_arg("geo") - reference_anchor_lag = extract_integer("anchor_lag") # in days + reference_anchor_lag = extract_integer("anchor_lag") # in days or weeks if reference_anchor_lag is None: reference_anchor_lag = 60 @@ -461,7 +491,8 @@ def gen(rows): for time_value, group in groupby((parse_row(row, fields_string, fields_int, fields_float) for row in rows), lambda row: row["time_value"]): # compute data per time value issues: List[Dict[str, Any]] = [r for r in group] - anchor_row = find_anchor_row(issues, shift_time_value(time_value, reference_anchor_lag)) + shifted_time_value = shift_time_value(time_value, reference_anchor_lag) if is_day else shift_week_value(time_value, reference_anchor_lag) + anchor_row = find_anchor_row(issues, shifted_time_value) for i, row in enumerate(issues): if i > 0: @@ -573,17 +604,31 @@ def handle_coverage(): """ source_signal_pairs = parse_source_signal_pairs() + daily_signals, weekly_signals = count_signal_time_types(source_signal_pairs) source_signal_pairs, alias_mapper = create_source_signal_alias_mapper(source_signal_pairs) + geo_type = request.args.get("geo_type", "county") if "window" in request.values: - time_window = parse_day_range_arg("window") + time_window, is_day = parse_day_or_week_range_arg("window") else: now_time = extract_date("latest") - now = date.today() if now_time is None else time_value_to_date(now_time) last = extract_integer("days") - if last is None: - last = 30 - time_window = (date_to_time_value(now - timedelta(days=last)), date_to_time_value(now)) + last_weeks = extract_integer("weeks") + # week if latest is week like or weeks are given otherwise we don't know and guess days + if (now_time is not None and not guess_time_value_is_day(now_time)) or last_weeks is not None or weekly_signals > 0: + # week + if last_weeks is None: + last_weeks = last or 30 + is_day = False + now_week = Week.thisweek() if now_time is None else week_value_to_week(now_time) + time_window = (week_to_time_value(now_week - last_weeks), week_to_time_value(now_week)) + else: + is_day = True + if last is None: + last = 30 + now = date.today() if now_time is None else time_value_to_date(now_time) + time_window = (date_to_time_value(now - timedelta(days=last)), date_to_time_value(now)) + _verify_argument_time_type_matches(is_day, daily_signals, weekly_signals) q = QueryBuilder("covidcast", "c") fields_string = ["source", "signal"] @@ -601,14 +646,14 @@ def handle_coverage(): else: q.where(geo_type=geo_type) q.where_source_signal_pairs("source", "signal", source_signal_pairs) - q.where_time_pairs("time_type", "time_value", [TimePair("day", [time_window])]) + q.where_time_pairs("time_type", "time_value", [TimePair("day" if is_day else "week", [time_window])]) q.group_by = "c.source, c.signal, c.time_value" q.set_order("source", "signal", "time_value") _handle_lag_issues_as_of(q, None, None, None) def transform_row(row, proxy): - if not alias_mapper or 'source' not in row: + if not alias_mapper or "source" not in row: return row row["source"] = alias_mapper(row["source"], proxy["signal"]) return row diff --git a/src/server/endpoints/covidcast_utils/correlation.py b/src/server/endpoints/covidcast_utils/correlation.py index b0b2b623c..3f8268b25 100644 --- a/src/server/endpoints/covidcast_utils/correlation.py +++ b/src/server/endpoints/covidcast_utils/correlation.py @@ -49,7 +49,7 @@ class Correlation: """ -def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame: +def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> pd.DataFrame: # x_t_i ~ y_t_(i-lag) # aka x_t_(i+lag) ~ y_t_i @@ -60,24 +60,24 @@ def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame: # x_t_i ~ y_shifted_t_i # shift y such that y_t(i - lag) -> y_shifted_t_i x_shifted = x - y_shifted = y.shift(lag, freq="D") + y_shifted = y.shift(lag, freq="D" if is_day else 'W') else: # lag < 0 # x_shifted_t_i ~ y_t_i # shift x such that x_t(i+lag) -> x_shifted_t_i # lag < 0 -> - - lag = + lag - x_shifted = x.shift(-lag, freq="D") + x_shifted = x.shift(-lag, freq="D" if is_day else 'W') y_shifted = y # inner join to remove invalid pairs r = x_shifted.join(y_shifted, how="inner", lsuffix="_x", rsuffix="_y") return r.rename(columns=dict(value_x="x", value_y="y")) -def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame) -> Iterable[CorrelationResult]: +def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> Iterable[CorrelationResult]: """ x,y ... DataFrame with "time_value" (Date) index and "value" (float) column """ for current_lag in range(-lag, lag + 1): - xy = lag_join(current_lag, x, y) + xy = lag_join(current_lag, x, y, is_day) c = compute_correlation(xy) yield CorrelationResult(geo_type, geo_value, signal_source, signal_signal, current_lag, r2=c.r2, intercept=c.intercept, slope=c.slope, samples=c.samples) diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv index 29b18f1da..a1be459a1 100644 --- a/src/server/endpoints/covidcast_utils/db_signals.csv +++ b/src/server/endpoints/covidcast_utils/db_signals.csv @@ -374,4 +374,17 @@ usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_num,TRUE,"Confirmed C usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID deaths, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file +usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,"Percentage of Expected Deaths (Weekly new, per 100k people)",TRUE,Number of weekly new deaths for all causes in 2020 compared to the average number across the same week in 2017–2019 ,,week,Week,Value,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file diff --git a/src/server/endpoints/covidcast_utils/db_sources.csv b/src/server/endpoints/covidcast_utils/db_sources.csv index b66838f08..4a99059ef 100644 --- a/src/server/endpoints/covidcast_utils/db_sources.csv +++ b/src/server/endpoints/covidcast_utils/db_sources.csv @@ -18,4 +18,5 @@ ght,ght,Google Health Trends,"Google Health Trends tracks Google searches on hea google-survey,google-survey,Google Symptom Surveys,"Delphi ran symptom surveys using a Google tool which collects responses through publisher websites, Google's Opinions Reward app, and similar applications. No longer updated after May 15, 2020.",smoothed_cli,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html) indicator-combination,indicator-combination-nmf,Statistical Combination (NMF),"This source provides signals which are statistical combinations of other sources, calculated by Delphi. It is not a primary data source. No longer updated after Marcy 17, 2021.",nmf_day_doc_fbs_ght,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/indicator-combination-inactive.html) quidel,quidel-flu,Quidel Inc. (Flu),"Quidel, Inc. manufactures diagnostic equipment for healthcare applications, and provides Delphi with anonymized data on tests and test results. This source includes flu tests. No longer updated after May 19, 2020.",smoothed_pct_negative,CC BY,https://cmu.box.com/s/sax48yxnahllrnbqlq6wqxblg6lsyq24,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#flu-tests) -safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html) \ No newline at end of file +safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html) +nchs-mortality,nchs-mortality,NCHS Mortality Data,"This data source of national provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)). This data is different from the death data available from USAFacts and JHU CSSE: deaths are reported by the date they occur, not the date they are reported by local health departments, and data is frequently reissued as additional death certificates from recent weeks are received and tabulated.",deaths_allcause_incidence_num,[NCHS Data Use Agreement](https://www.cdc.gov/nchs/data_access/restrictions.htm),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/nchs-mortality.html) \ No newline at end of file diff --git a/src/server/endpoints/covidcast_utils/model.py b/src/server/endpoints/covidcast_utils/model.py index dcb32484e..7ea50e496 100644 --- a/src/server/endpoints/covidcast_utils/model.py +++ b/src/server/endpoints/covidcast_utils/model.py @@ -228,12 +228,39 @@ def _load_data_signals(sources: List[DataSource]): data_signals, data_signals_df = _load_data_signals(data_sources) data_signals_by_key = {d.key: d for d in data_signals} +# also add the resolved signal version to the signal lookup +for d in data_signals: + source = data_source_by_id.get(d.source) + if source and source.uses_db_alias: + data_signals_by_key[(source.db_source, d.signal)] = d + def get_related_signals(signal: DataSignal) -> List[DataSignal]: return [s for s in data_signals if s != signal and s.signal_basename == signal.signal_basename] +def count_signal_time_types(source_signals: List[SourceSignalPair]) -> Tuple[int, int]: + """ + count the number of signals in this query for each time type + @returns daily counts, weekly counts + """ + weekly = 0 + daily = 0 + for pair in source_signals: + if pair.signal == True: + continue + for s in pair.signal: + signal = data_signals_by_key.get((pair.source, s)) + if not signal: + continue + if signal.time_type == TimeType.week: + weekly += 1 + else: + daily += 1 + return daily, weekly + + def create_source_signal_alias_mapper(source_signals: List[SourceSignalPair]) -> Tuple[List[SourceSignalPair], Optional[Callable[[str, str], str]]]: alias_to_data_sources: Dict[str, List[DataSource]] = {} transformed_pairs: List[SourceSignalPair] = [] diff --git a/src/server/endpoints/covidcast_utils/trend.py b/src/server/endpoints/covidcast_utils/trend.py index c7df3ba88..43c4ac21b 100644 --- a/src/server/endpoints/covidcast_utils/trend.py +++ b/src/server/endpoints/covidcast_utils/trend.py @@ -2,7 +2,6 @@ from typing import Optional, Iterable, Tuple, Dict, List, Callable from enum import Enum from collections import OrderedDict -from ...utils import shift_time_value class TrendEnum(str, Enum): diff --git a/src/server/utils/__init__.py b/src/server/utils/__init__.py index 2cf5e85d9..c8c323b9b 100644 --- a/src/server/utils/__init__.py +++ b/src/server/utils/__init__.py @@ -1 +1 @@ -from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range +from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day diff --git a/src/server/utils/dates.py b/src/server/utils/dates.py index a55c3c2c4..4053ad2ad 100644 --- a/src/server/utils/dates.py +++ b/src/server/utils/dates.py @@ -19,6 +19,10 @@ def week_value_to_week(value: int) -> Week: return Week(date.max.year - 1, 1) # minus 1 since internally it does some checks with a year + 1 return Week(year=year, week=week) +def guess_time_value_is_day(value: int) -> bool: + # YYYYMMDD type and not YYYYMM + return len(str(value)) > 6 + def date_to_time_value(d: date) -> int: return int(d.strftime("%Y%m%d")) @@ -37,6 +41,12 @@ def shift_time_value(time_value: int, days: int) -> int: shifted = d + timedelta(days=days) return date_to_time_value(shifted) +def shift_week_value(week_value: int, weeks: int) -> int: + if weeks == 0: + return week_value + week = week_value_to_week(week_value) + shifted = week + weeks + return week_to_time_value(shifted) def days_in_range(range: Tuple[int, int]) -> int: """