|
| 1 | +from typing import Dict, List, Tuple, Union |
| 2 | +from requests import get |
| 3 | +import sys |
| 4 | +import pandas as pd |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +base_dir = Path(__file__).parent.parent |
| 8 | +base_url = 'https://delphi.cmu.edu/epidata' |
| 9 | + |
| 10 | +def is_known_missing(source: str, signal: str) -> bool: |
| 11 | + if '7dav_cumulative' in signal: |
| 12 | + return True |
| 13 | + if source in ('youtube-survey', 'indicator-combination'): |
| 14 | + return True |
| 15 | + return False |
| 16 | + |
| 17 | +def compute_missing_signals() -> List[Tuple[Tuple[str, str], Dict]]: |
| 18 | + defined_meta = get(f"{base_url}/covidcast/meta").json() |
| 19 | + defined_signals: Dict[Tuple[str, str], Dict] = {} |
| 20 | + for source in defined_meta: |
| 21 | + for signal in source['signals']: |
| 22 | + defined_signals[(signal['source'], signal['signal'])] = signal |
| 23 | + defined_signals[(source['db_source'], signal['signal'])] = signal |
| 24 | + |
| 25 | + computed_meta = get(f"{base_url}/covidcast_meta/?format=json").json() |
| 26 | + computed_signals: Dict[Tuple[str, str], List[Dict]] = {} |
| 27 | + for entry in computed_meta: |
| 28 | + computed_signals.setdefault((entry['data_source'], entry['signal']), []).append(entry) |
| 29 | + |
| 30 | + missing_signals: List[Tuple[Tuple[str, str], Dict]] = [] |
| 31 | + |
| 32 | + for key, infos in computed_signals.items(): |
| 33 | + defined_info = defined_signals.get(key) |
| 34 | + if not defined_info: |
| 35 | + if not is_known_missing(key[0], key[1]): |
| 36 | + missing_signals.append((key, infos[0])) |
| 37 | + return missing_signals |
| 38 | + |
| 39 | + |
| 40 | +def gen_row(source: str, signal: str, info: Dict) -> Dict: |
| 41 | + is_weighted = signal.startswith('smoothed_w') and not (signal.startswith('smoothed_wa') or signal.startswith('smoothed_we') or signal.startswith('smoothed_wi') or signal.startswith('smoothed_wo') or signal.startswith('smoothed_wu')) |
| 42 | + base_name = signal.replace('smoothed_w', 'smoothed_') if is_weighted else signal |
| 43 | + bool_str = lambda x: 'TRUE' if x else 'FALSE' |
| 44 | + |
| 45 | + return { |
| 46 | + 'Source Subdivision': source, |
| 47 | + 'Signal BaseName': base_name, |
| 48 | + 'base_is_other': bool_str(False), |
| 49 | + 'Signal': signal, |
| 50 | + 'Compute From Base': False, |
| 51 | + 'Name': "{base_name} (Weighted)" if is_weighted else signal, |
| 52 | + 'Active': bool_str(True), |
| 53 | + 'Short Description': 'TODO' if base_name == signal else '', |
| 54 | + 'Description': 'TODO' if base_name == signal else '', |
| 55 | + 'Time Type': info['time_type'], |
| 56 | + 'Time Label': 'Week' if info['time_type'] == 'week' else 'Day', |
| 57 | + 'Value Label': 'Percentage' if source == 'fb-survey' else 'Value', |
| 58 | + 'Format': 'percent' if source == 'fb-survey' else 'raw', |
| 59 | + 'Category': 'public' if source == 'fb-survey' else 'other', |
| 60 | + 'High Values Are': 'neutral', |
| 61 | + 'Is Smoothed': bool_str(signal.startswith('smoothed') or '7dav' in signal), |
| 62 | + 'Is Weighted': bool_str(is_weighted), |
| 63 | + 'Is Cumulative': bool_str('cumulative' in signal), |
| 64 | + 'Has StdErr': 'TRUE' if source == 'fb-survey' else '', |
| 65 | + 'Has Sample Size': 'TRUE' if source == 'fb-survey' else '', |
| 66 | + 'Link': 'TODO' |
| 67 | + } |
| 68 | + |
| 69 | +def generate_missing_info_hint(missing_signals: List[Tuple[Tuple[str, str], Dict]]) -> None: |
| 70 | + missing = pd.DataFrame.from_records([gen_row(s[0], s[1], info) for s, info in missing_signals]) |
| 71 | + |
| 72 | + # use the current as base to have the right column order |
| 73 | + current = pd.read_csv(base_dir / 'src/server/endpoints/covidcast_utils/db_signals.csv') |
| 74 | + # clear |
| 75 | + current = current[0:0] |
| 76 | + guessed: pd.DataFrame = pd.concat([current, missing]) |
| 77 | + guessed.to_csv(base_dir / 'missing_db_signals.csv', index=False) |
| 78 | + |
| 79 | +missing = compute_missing_signals() |
| 80 | +if missing: |
| 81 | + print(f'found {len(missing)} missing signals') |
| 82 | + generate_missing_info_hint(missing) |
| 83 | + sys.exit(1) |
| 84 | +else: |
| 85 | + print(f'all signals found') |
| 86 | + sys.exit(0) |
| 87 | + |
0 commit comments