Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion src/server/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from flask import request


from ._exceptions import ValidationFailedException
from .utils import days_in_range, weeks_in_range
from .utils import days_in_range, weeks_in_range, guess_time_value_is_day


def _parse_common_multi_arg(key: str) -> List[Tuple[str, Union[bool, Sequence[str]]]]:
Expand Down Expand Up @@ -109,6 +110,15 @@ class TimePair:
time_type: str
time_values: Union[bool, Sequence[Union[int, Tuple[int, int]]]]

@property
def is_week(self) -> bool:
return self.time_type == 'week'

@property
def is_day(self) -> bool:
return self.time_type != 'week'


def count(self) -> float:
"""
returns the count of items in this pair
Expand Down Expand Up @@ -225,3 +235,45 @@ def parse_day_arg(key: str) -> int:
if not isinstance(r, int):
raise ValidationFailedException(f"{key} must match YYYYMMDD or YYYY-MM-DD")
return r

def parse_week_arg(key: str) -> int:
v = request.values.get(key)
if not v:
raise ValidationFailedException(f"{key} param is required")
r = parse_week_value(v)
if not isinstance(r, int):
raise ValidationFailedException(f"{key} must match YYYYWW")
return r


def parse_week_range_arg(key: str) -> Tuple[int, int]:
v = request.values.get(key)
if not v:
raise ValidationFailedException(f"{key} param is required")
r = parse_week_value(v)
if not isinstance(r, tuple):
raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW")
return r

def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> Tuple[int, bool]:
v = request.values.get(key)
if not v:
if default_value is not None:
return default_value, guess_time_value_is_day(default_value)
raise ValidationFailedException(f"{key} param is required")
# format is either YYYY-MM-DD or YYYYMMDD or YYYYMM
is_week = len(v) == 6
if is_week:
return parse_week_arg(key), False
return parse_day_arg(key), True

def parse_day_or_week_range_arg(key: str) -> Tuple[Tuple[int, int], bool]:
v = request.values.get(key)
if not v:
raise ValidationFailedException(f"{key} param is required")
# format is either YYYY-MM-DD--YYYY-MM-DD or YYYYMMDD-YYYYMMDD or YYYYMM-YYYYMM
# so if the first before the - has length 6, it must be a week
is_week = len(v.split('-', 2)[0]) == 6
if is_week:
return parse_week_range_arg(key), False
return parse_day_range_arg(key), True
141 changes: 93 additions & 48 deletions src/server/endpoints/covidcast.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/server/endpoints/covidcast_utils/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Correlation:
"""


def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> pd.DataFrame:
# x_t_i ~ y_t_(i-lag)
# aka x_t_(i+lag) ~ y_t_i

Expand All @@ -60,24 +60,24 @@ def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
# x_t_i ~ y_shifted_t_i
# shift y such that y_t(i - lag) -> y_shifted_t_i
x_shifted = x
y_shifted = y.shift(lag, freq="D")
y_shifted = y.shift(lag, freq="D" if is_day else 'W')
else: # lag < 0
# x_shifted_t_i ~ y_t_i
# shift x such that x_t(i+lag) -> x_shifted_t_i
# lag < 0 -> - - lag = + lag
x_shifted = x.shift(-lag, freq="D")
x_shifted = x.shift(-lag, freq="D" if is_day else 'W')
y_shifted = y
# inner join to remove invalid pairs
r = x_shifted.join(y_shifted, how="inner", lsuffix="_x", rsuffix="_y")
return r.rename(columns=dict(value_x="x", value_y="y"))


def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame) -> Iterable[CorrelationResult]:
def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> Iterable[CorrelationResult]:
"""
x,y ... DataFrame with "time_value" (Date) index and "value" (float) column
"""
for current_lag in range(-lag, lag + 1):
xy = lag_join(current_lag, x, y)
xy = lag_join(current_lag, x, y, is_day)
c = compute_correlation(xy)

yield CorrelationResult(geo_type, geo_value, signal_source, signal_signal, current_lag, r2=c.r2, intercept=c.intercept, slope=c.slope, samples=c.samples)
Expand Down
15 changes: 14 additions & 1 deletion src/server/endpoints/covidcast_utils/db_signals.csv
Original file line number Diff line number Diff line change
Expand Up @@ -374,4 +374,17 @@ usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_num,TRUE,"Confirmed C
usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID deaths, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE,
usa-facts,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE,
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,"Percentage of Expected Deaths (Weekly new, per 100k people)",TRUE,Number of weekly new deaths for all causes in 2020 compared to the average number across the same week in 2017–2019 ,,week,Week,Value,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE,
3 changes: 2 additions & 1 deletion src/server/endpoints/covidcast_utils/db_sources.csv
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ ght,ght,Google Health Trends,"Google Health Trends tracks Google searches on hea
google-survey,google-survey,Google Symptom Surveys,"Delphi ran symptom surveys using a Google tool which collects responses through publisher websites, Google's Opinions Reward app, and similar applications. No longer updated after May 15, 2020.",smoothed_cli,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html)
indicator-combination,indicator-combination-nmf,Statistical Combination (NMF),"This source provides signals which are statistical combinations of other sources, calculated by Delphi. It is not a primary data source. No longer updated after Marcy 17, 2021.",nmf_day_doc_fbs_ght,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/indicator-combination-inactive.html)
quidel,quidel-flu,Quidel Inc. (Flu),"Quidel, Inc. manufactures diagnostic equipment for healthcare applications, and provides Delphi with anonymized data on tests and test results. This source includes flu tests. No longer updated after May 19, 2020.",smoothed_pct_negative,CC BY,https://cmu.box.com/s/sax48yxnahllrnbqlq6wqxblg6lsyq24,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#flu-tests)
safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html)
safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html)
nchs-mortality,nchs-mortality,NCHS Mortality Data,"This data source of national provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)). This data is different from the death data available from USAFacts and JHU CSSE: deaths are reported by the date they occur, not the date they are reported by local health departments, and data is frequently reissued as additional death certificates from recent weeks are received and tabulated.",deaths_allcause_incidence_num,[NCHS Data Use Agreement](https://www.cdc.gov/nchs/data_access/restrictions.htm),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/nchs-mortality.html)
27 changes: 27 additions & 0 deletions src/server/endpoints/covidcast_utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,39 @@ def _load_data_signals(sources: List[DataSource]):

data_signals, data_signals_df = _load_data_signals(data_sources)
data_signals_by_key = {d.key: d for d in data_signals}
# also add the resolved signal version to the signal lookup
for d in data_signals:
source = data_source_by_id.get(d.source)
if source and source.uses_db_alias:
data_signals_by_key[(source.db_source, d.signal)] = d



def get_related_signals(signal: DataSignal) -> List[DataSignal]:
return [s for s in data_signals if s != signal and s.signal_basename == signal.signal_basename]


def count_signal_time_types(source_signals: List[SourceSignalPair]) -> Tuple[int, int]:
"""
count the number of signals in this query for each time type
@returns daily counts, weekly counts
"""
weekly = 0
daily = 0
for pair in source_signals:
if pair.signal == True:
continue
for s in pair.signal:
signal = data_signals_by_key.get((pair.source, s))
if not signal:
continue
if signal.time_type == TimeType.week:
weekly += 1
else:
daily += 1
return daily, weekly


def create_source_signal_alias_mapper(source_signals: List[SourceSignalPair]) -> Tuple[List[SourceSignalPair], Optional[Callable[[str, str], str]]]:
alias_to_data_sources: Dict[str, List[DataSource]] = {}
transformed_pairs: List[SourceSignalPair] = []
Expand Down
1 change: 0 additions & 1 deletion src/server/endpoints/covidcast_utils/trend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Optional, Iterable, Tuple, Dict, List, Callable
from enum import Enum
from collections import OrderedDict
from ...utils import shift_time_value


class TrendEnum(str, Enum):
Expand Down
2 changes: 1 addition & 1 deletion src/server/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day
10 changes: 10 additions & 0 deletions src/server/utils/dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def week_value_to_week(value: int) -> Week:
return Week(date.max.year - 1, 1) # minus 1 since internally it does some checks with a year + 1
return Week(year=year, week=week)

def guess_time_value_is_day(value: int) -> bool:
# YYYYMMDD type and not YYYYMM
return len(str(value)) > 6

def date_to_time_value(d: date) -> int:
return int(d.strftime("%Y%m%d"))

Expand All @@ -37,6 +41,12 @@ def shift_time_value(time_value: int, days: int) -> int:
shifted = d + timedelta(days=days)
return date_to_time_value(shifted)

def shift_week_value(week_value: int, weeks: int) -> int:
if weeks == 0:
return week_value
week = week_value_to_week(week_value)
shifted = week + weeks
return week_to_time_value(shifted)

def days_in_range(range: Tuple[int, int]) -> int:
"""
Expand Down