Skip to content

Commit ff51a0a

Browse files
authored
Merge pull request #676 from cmu-delphi/sgratzl/nchs-mortality-meta
add nchs mortality to meta data and adapt covidcast endpoints
2 parents 22b3778 + 25fba3f commit ff51a0a

File tree

9 files changed

+205
-58
lines changed

9 files changed

+205
-58
lines changed

src/server/_params.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66
from flask import request
77

8+
89
from ._exceptions import ValidationFailedException
9-
from .utils import days_in_range, weeks_in_range
10+
from .utils import days_in_range, weeks_in_range, guess_time_value_is_day
1011

1112

1213
def _parse_common_multi_arg(key: str) -> List[Tuple[str, Union[bool, Sequence[str]]]]:
@@ -109,6 +110,15 @@ class TimePair:
109110
time_type: str
110111
time_values: Union[bool, Sequence[Union[int, Tuple[int, int]]]]
111112

113+
@property
114+
def is_week(self) -> bool:
115+
return self.time_type == 'week'
116+
117+
@property
118+
def is_day(self) -> bool:
119+
return self.time_type != 'week'
120+
121+
112122
def count(self) -> float:
113123
"""
114124
returns the count of items in this pair
@@ -225,3 +235,45 @@ def parse_day_arg(key: str) -> int:
225235
if not isinstance(r, int):
226236
raise ValidationFailedException(f"{key} must match YYYYMMDD or YYYY-MM-DD")
227237
return r
238+
239+
def parse_week_arg(key: str) -> int:
240+
v = request.values.get(key)
241+
if not v:
242+
raise ValidationFailedException(f"{key} param is required")
243+
r = parse_week_value(v)
244+
if not isinstance(r, int):
245+
raise ValidationFailedException(f"{key} must match YYYYWW")
246+
return r
247+
248+
249+
def parse_week_range_arg(key: str) -> Tuple[int, int]:
250+
v = request.values.get(key)
251+
if not v:
252+
raise ValidationFailedException(f"{key} param is required")
253+
r = parse_week_value(v)
254+
if not isinstance(r, tuple):
255+
raise ValidationFailedException(f"{key} must match YYYYWW-YYYYWW")
256+
return r
257+
258+
def parse_day_or_week_arg(key: str, default_value: Optional[int] = None) -> Tuple[int, bool]:
259+
v = request.values.get(key)
260+
if not v:
261+
if default_value is not None:
262+
return default_value, guess_time_value_is_day(default_value)
263+
raise ValidationFailedException(f"{key} param is required")
264+
# format is either YYYY-MM-DD or YYYYMMDD or YYYYMM
265+
is_week = len(v) == 6
266+
if is_week:
267+
return parse_week_arg(key), False
268+
return parse_day_arg(key), True
269+
270+
def parse_day_or_week_range_arg(key: str) -> Tuple[Tuple[int, int], bool]:
271+
v = request.values.get(key)
272+
if not v:
273+
raise ValidationFailedException(f"{key} param is required")
274+
# format is either YYYY-MM-DD--YYYY-MM-DD or YYYYMMDD-YYYYMMDD or YYYYMM-YYYYMM
275+
# so if the first before the - has length 6, it must be a week
276+
is_week = len(v.split('-', 2)[0]) == 6
277+
if is_week:
278+
return parse_week_range_arg(key), False
279+
return parse_day_range_arg(key), True

src/server/endpoints/covidcast.py

Lines changed: 93 additions & 48 deletions
Large diffs are not rendered by default.

src/server/endpoints/covidcast_utils/correlation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class Correlation:
4949
"""
5050

5151

52-
def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
52+
def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> pd.DataFrame:
5353
# x_t_i ~ y_t_(i-lag)
5454
# aka x_t_(i+lag) ~ y_t_i
5555

@@ -60,24 +60,24 @@ def lag_join(lag: int, x: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
6060
# x_t_i ~ y_shifted_t_i
6161
# shift y such that y_t(i - lag) -> y_shifted_t_i
6262
x_shifted = x
63-
y_shifted = y.shift(lag, freq="D")
63+
y_shifted = y.shift(lag, freq="D" if is_day else 'W')
6464
else: # lag < 0
6565
# x_shifted_t_i ~ y_t_i
6666
# shift x such that x_t(i+lag) -> x_shifted_t_i
6767
# lag < 0 -> - - lag = + lag
68-
x_shifted = x.shift(-lag, freq="D")
68+
x_shifted = x.shift(-lag, freq="D" if is_day else 'W')
6969
y_shifted = y
7070
# inner join to remove invalid pairs
7171
r = x_shifted.join(y_shifted, how="inner", lsuffix="_x", rsuffix="_y")
7272
return r.rename(columns=dict(value_x="x", value_y="y"))
7373

7474

75-
def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame) -> Iterable[CorrelationResult]:
75+
def compute_correlations(geo_type: str, geo_value: str, signal_source: str, signal_signal: str, lag: int, x: pd.DataFrame, y: pd.DataFrame, is_day = True) -> Iterable[CorrelationResult]:
7676
"""
7777
x,y ... DataFrame with "time_value" (Date) index and "value" (float) column
7878
"""
7979
for current_lag in range(-lag, lag + 1):
80-
xy = lag_join(current_lag, x, y)
80+
xy = lag_join(current_lag, x, y, is_day)
8181
c = compute_correlation(xy)
8282

8383
yield CorrelationResult(geo_type, geo_value, signal_source, signal_signal, current_lag, r2=c.r2, intercept=c.intercept, slope=c.slope, samples=c.samples)

src/server/endpoints/covidcast_utils/db_signals.csv

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,4 +376,17 @@ usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_num,TRUE,"Confirmed C
376376
usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID deaths, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE,
377377
usa-facts,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE,
378378
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
379-
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
379+
usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
380+
nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
381+
nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
382+
nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
383+
nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
384+
nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
385+
nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
386+
nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
387+
nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
388+
nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
389+
nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
390+
nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ",National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)),week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
391+
nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people",,week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
392+
nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,"Percentage of Expected Deaths (Weekly new, per 100k people)",TRUE,Number of weekly new deaths for all causes in 2020 compared to the average number across the same week in 2017–2019 ,,week,Week,Value,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE,

src/server/endpoints/covidcast_utils/db_sources.csv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ ght,ght,Google Health Trends,"Google Health Trends tracks Google searches on hea
1818
google-survey,google-survey,Google Symptom Surveys,"Delphi ran symptom surveys using a Google tool which collects responses through publisher websites, Google's Opinions Reward app, and similar applications. No longer updated after May 15, 2020.",smoothed_cli,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html)
1919
indicator-combination,indicator-combination-nmf,Statistical Combination (NMF),"This source provides signals which are statistical combinations of other sources, calculated by Delphi. It is not a primary data source. No longer updated after Marcy 17, 2021.",nmf_day_doc_fbs_ght,CC BY,,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/indicator-combination-inactive.html)
2020
quidel,quidel-flu,Quidel Inc. (Flu),"Quidel, Inc. manufactures diagnostic equipment for healthcare applications, and provides Delphi with anonymized data on tests and test results. This source includes flu tests. No longer updated after May 19, 2020.",smoothed_pct_negative,CC BY,https://cmu.box.com/s/sax48yxnahllrnbqlq6wqxblg6lsyq24,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#flu-tests)
21-
safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html)
21+
safegraph,safegraph-daily,SafeGraph (Daily),"[SafeGraph](https://docs.safegraph.com/docs/social-distancing-metrics) compiles daily mobility information using anonymized location data from mobile phones. This source includes a range of isolation/lockdown behaviors and home dwell time. No longer updated after April 19, 2021.",completely_home_prop,CC BY,https://cmu.box.com/s/m0p1wpet4vuvey7od83n70h0e97ky2kg,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/safegraph.html)
22+
nchs-mortality,nchs-mortality,NCHS Mortality Data,"This data source of national provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm)). This data is different from the death data available from USAFacts and JHU CSSE: deaths are reported by the date they occur, not the date they are reported by local health departments, and data is frequently reissued as additional death certificates from recent weeks are received and tabulated.",deaths_allcause_incidence_num,[NCHS Data Use Agreement](https://www.cdc.gov/nchs/data_access/restrictions.htm),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/nchs-mortality.html)

src/server/endpoints/covidcast_utils/model.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,39 @@ def _load_data_signals(sources: List[DataSource]):
228228

229229
data_signals, data_signals_df = _load_data_signals(data_sources)
230230
data_signals_by_key = {d.key: d for d in data_signals}
231+
# also add the resolved signal version to the signal lookup
232+
for d in data_signals:
233+
source = data_source_by_id.get(d.source)
234+
if source and source.uses_db_alias:
235+
data_signals_by_key[(source.db_source, d.signal)] = d
236+
231237

232238

233239
def get_related_signals(signal: DataSignal) -> List[DataSignal]:
234240
return [s for s in data_signals if s != signal and s.signal_basename == signal.signal_basename]
235241

236242

243+
def count_signal_time_types(source_signals: List[SourceSignalPair]) -> Tuple[int, int]:
244+
"""
245+
count the number of signals in this query for each time type
246+
@returns daily counts, weekly counts
247+
"""
248+
weekly = 0
249+
daily = 0
250+
for pair in source_signals:
251+
if pair.signal == True:
252+
continue
253+
for s in pair.signal:
254+
signal = data_signals_by_key.get((pair.source, s))
255+
if not signal:
256+
continue
257+
if signal.time_type == TimeType.week:
258+
weekly += 1
259+
else:
260+
daily += 1
261+
return daily, weekly
262+
263+
237264
def create_source_signal_alias_mapper(source_signals: List[SourceSignalPair]) -> Tuple[List[SourceSignalPair], Optional[Callable[[str, str], str]]]:
238265
alias_to_data_sources: Dict[str, List[DataSource]] = {}
239266
transformed_pairs: List[SourceSignalPair] = []

src/server/endpoints/covidcast_utils/trend.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from typing import Optional, Iterable, Tuple, Dict, List, Callable
33
from enum import Enum
44
from collections import OrderedDict
5-
from ...utils import shift_time_value
65

76

87
class TrendEnum(str, Enum):

src/server/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range
1+
from .dates import shift_time_value, date_to_time_value, time_value_to_iso, time_value_to_date, days_in_range, weeks_in_range, shift_week_value, week_to_time_value, week_value_to_week, guess_time_value_is_day

src/server/utils/dates.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ def week_value_to_week(value: int) -> Week:
1919
return Week(date.max.year - 1, 1) # minus 1 since internally it does some checks with a year + 1
2020
return Week(year=year, week=week)
2121

22+
def guess_time_value_is_day(value: int) -> bool:
23+
# YYYYMMDD type and not YYYYMM
24+
return len(str(value)) > 6
25+
2226
def date_to_time_value(d: date) -> int:
2327
return int(d.strftime("%Y%m%d"))
2428

@@ -37,6 +41,12 @@ def shift_time_value(time_value: int, days: int) -> int:
3741
shifted = d + timedelta(days=days)
3842
return date_to_time_value(shifted)
3943

44+
def shift_week_value(week_value: int, weeks: int) -> int:
45+
if weeks == 0:
46+
return week_value
47+
week = week_value_to_week(week_value)
48+
shifted = week + weeks
49+
return week_to_time_value(shifted)
4050

4151
def days_in_range(range: Tuple[int, int]) -> int:
4252
"""

0 commit comments

Comments
 (0)