You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# How much missingness is there, and for what reasons? Is it possible to
736
795
# distinguish a missing value from a true zero? This is an unstructured text
@@ -743,27 +802,88 @@ col <- "Missingness"
743
802
# not sure what to do. Maybe just summarize the current state, e.g. "85%
744
803
# counties available in mid 2020, then gradually declined to 8% of counties
745
804
# by April 2024", and leave it at that. We could occasionally update it.
805
+
806
+
all_counties_terr<-"Data is available for all counties and some territorial county equivalents."
807
+
all_states<-"Data is available for all states."
808
+
all_states_terr<-"Data is available for all states and some territories."
746
809
missingness<- c(
747
-
"chng"=NA_character_,
748
-
"covid-act-now"="A few counties, most notably in California, are not covered by this data source",
749
-
"doctor-visits"=NA_character_,
750
-
"dsew-cpr"=NA_character_,
751
-
"fb-survey"="A missing value indicates no valid data OR, for test positivity, that the value was censored due to small sample size (<= 5)",
752
-
"ght"=NA_character_,
753
-
"google-survey"=NA_character_,
754
-
"google-symptoms"=NA_character_,
755
-
"hhs"=NA_character_,
756
-
"hospital-admissions"=NA_character_,
757
-
"indicator-combination"=NA_character_,
758
-
"jhu-csse"=NA_character_,
759
-
"nchs-mortality"=NA_character_,
760
-
"quidel"=NA_character_,
761
-
"safegraph"=NA_character_,
762
-
"usa-facts"=NA_character_,
763
-
"youtube-survey"=NA_character_
810
+
"chng"=paste("Data is available for nearly all (99%) of counties.", all_states_terr),
811
+
"covid-act-now"=paste("Data is available for nearly all (99%) of counties. A few counties, most notably in California, are not covered by this data source", all_states),
812
+
"doctor-visits"=paste("Data is available for about 80% of counties", all_states_terr),
"fb-survey"="Geographic coverage varies widely by signal, with anywhere from 0.4% to 25% of counties available and 15% to 100% of states. A handful of signals are available for 40-50% of counties, and all states and some territories. Signals based on questions that were asked to a subset of survey respondents are available for fewer locations. Availability declines over time as survey response rate decreases. A missing value indicates no valid data OR, for test positivity, that the value was censored due to small sample size (<= 5)",
815
+
"ght"=all_states,
816
+
"google-survey"=paste("Data is available for about 20% of counties", all_states),
817
+
"google-symptoms"=NA_character_,# below
818
+
"hhs"=all_states_terr,
819
+
"hospital-admissions"=paste("Data is available for about 35% of counties", all_states),
"quidel"="Geographic coverage for some age groups (e.g. age 0-4 and age 65+) are extremely limited at HRR and MSA level, and can even be limited at state level on weekends.", # TODO
google_symptoms_note<-"Signals associated with rarer symptoms (e.g. ageusia) will tend to have fewer locations available, due to upstream privacy censoring. Locations with lower populations will tend to be less available for the same reason"
831
+
signal_specific_missingness<-tibble::tribble(
832
+
~data_source, ~signal, ~note,
833
+
"indicator-combination", "nmf_day_doc_fbc_fbs_ght", paste("Data is available for about 80% of counties", all_states_terr),
834
+
"indicator-combination", "nmf_day_doc_fbs_ght", paste("Data is available for about 70% of counties", all_states_terr),
835
+
836
+
"safegraph", "bars_visit_num", "Data is available for about 10% of counties. Data is available for about 90% of states",
837
+
"safegraph", "bars_visit_prop", "Data is available for about 10% of counties. Data is available for about 90% of states",
838
+
"safegraph", "restaurants_visit_num", paste("Data is available for about 80% of counties", all_states_terr),
839
+
"safegraph", "restaurants_visit_prop", paste("Data is available for about 80% of counties", all_states_terr),
840
+
841
+
"fb-survey", "smoothed_cli", paste("Data is available for about 50% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
842
+
"fb-survey", "smoothed_ili", paste("Data is available for about 50% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
843
+
"fb-survey", "smoothed_wcli", paste("Data is available for about 50% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
844
+
"fb-survey", "smoothed_wili", paste("Data is available for about 50% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
845
+
"fb-survey", "smoothed_travel_outside_state_5d", paste("Data is available for about 45% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
846
+
"fb-survey", "smoothed_wtravel_outside_state_5d", paste("Data is available for about 45% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
847
+
"fb-survey", "smoothed_nohh_cmnty_cli", paste("Data is available for about 40% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
848
+
"fb-survey", "smoothed_hh_cmnty_cli", paste("Data is available for about 40% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
849
+
"fb-survey", "smoothed_whh_cmnty_cli", paste("Data is available for about 35% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
850
+
"fb-survey", "smoothed_wnohh_cmnty_cli", paste("Data is available for about 35% of counties.", all_states_terr, "Availability declines over time as survey response rate decreases"),
851
+
852
+
"youtube-survey", "raw_cli", "Data is available for about 40% of states",
853
+
"youtube-survey", "raw_ili", "Data is available for about 40% of states",
854
+
"youtube-survey", "smoothed_cli", "Data is available for about 80% of states",
855
+
"youtube-survey", "smoothed_ili", "Data is available for about 80% of states",
856
+
857
+
"google-symptoms", "ageusia_raw_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
858
+
"google-symptoms", "ageusia_smoothed_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
859
+
"google-symptoms", "anosmia_raw_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
860
+
"google-symptoms", "anosmia_smoothed_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
861
+
"google-symptoms", "s01_raw_search", paste("Data is available for about 50% of counties.", all_states, google_symptoms_note),
862
+
"google-symptoms", "s01_smoothed_search", paste("Data is available for about 50% of counties.", all_states, google_symptoms_note),
863
+
"google-symptoms", "s02_raw_search", paste("Data is available for about 65% of counties.", all_states, google_symptoms_note),
864
+
"google-symptoms", "s02_smoothed_search", paste("Data is available for about 65% of counties.", all_states, google_symptoms_note),
865
+
"google-symptoms", "s03_raw_search", paste("Data is available for about 50% of counties.", all_states, google_symptoms_note),
866
+
"google-symptoms", "s03_smoothed_search", paste("Data is available for about 50% of counties.", all_states, google_symptoms_note),
867
+
"google-symptoms", "s04_raw_search", paste("Data is available for about 30% of counties.", all_states, google_symptoms_note),
868
+
"google-symptoms", "s04_smoothed_search", paste("Data is available for about 30% of counties.", all_states, google_symptoms_note),
869
+
"google-symptoms", "s05_raw_search", paste("Data is available for about 3-4% of counties. Data is available for about 90% of states.", google_symptoms_note),
870
+
"google-symptoms", "s05_smoothed_search", paste("Data is available for about 3-4% of counties. Data is available for about 90% of states.", google_symptoms_note),
871
+
"google-symptoms", "s06_raw_search", paste("Data is available for about 30% of counties.", all_states, google_symptoms_note),
872
+
"google-symptoms", "s06_smoothed_search", paste("Data is available for about 30% of counties.", all_states, google_symptoms_note),
873
+
"google-symptoms", "scontrol_raw_search", paste("Data is available for about 45% of counties.", all_states, google_symptoms_note),
874
+
"google-symptoms", "scontrol_smoothed_search", paste("Data is available for about 45% of counties.", all_states, google_symptoms_note),
875
+
"google-symptoms", "sum_anosmia_ageusia_raw_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
876
+
"google-symptoms", "sum_anosmia_ageusia_smoothed_search", paste("Data is available for about 3-4% of counties. Data is available for about 85% of states.", google_symptoms_note),
0 commit comments