From 22e88e2b193df340724055216fe747600bd6942e Mon Sep 17 00:00:00 2001 From: JedGrabman Date: Mon, 9 Nov 2020 13:35:57 -0500 Subject: [PATCH] Make geo_id checks robust Automatically determine signals and data sources to use for retrieving geo_values. This adds robustness at the cost of efficiency. --- validator/scripts/unique_geoids.R | 23 ++++---- validator/static/county_geo.csv | 89 +++++++++++++++++++++++++++++++ validator/static/msa_geo.csv | 11 ++++ validator/static/state_geo.csv | 4 ++ 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/validator/scripts/unique_geoids.R b/validator/scripts/unique_geoids.R index 52edec904..676223be3 100644 --- a/validator/scripts/unique_geoids.R +++ b/validator/scripts/unique_geoids.R @@ -1,15 +1,18 @@ library(covidcast) +library(dplyr) +meta_info = covidcast_meta() +locations_by_type = meta_info %>% group_by(geo_type) %>% summarize(Value = max(num_locations)) -geo_types = c("county", "state", "hrr", "msa") -for(type in geo_types){ - dtf = covidcast_signal("indicator-combination", "confirmed_7dav_incidence_num", start_day = "2020-10-01", end_day = "2020-10-01", geo_type = type) +results = list() +for (i in 1:nrow(locations_by_type)){ + type = locations_by_type$geo_type[i] + max_locations = locations_by_type$Value[i] + max_row = with(meta_info, meta_info[geo_type == type & num_locations == max_locations,][1,]) + data_source = max_row$data_source + signal = max_row$signal + results[[i]] = covidcast_signal(data_source, signal, geo_type = type) + geo_values = sort(unique(results[[i]]$geo_value)) file_name = paste0("../static/", type, "_geo.csv") - write.table(unique(dtf$geo_value), file = file_name, row.names = F, col.names = "geo_id") + write.table(geo_values, file = file_name, row.names = F, col.names = "geo_id") } -dtf = covidcast_signal("ght", "raw_search", start_day = "2020-10-01", end_day = "2020-10-01", geo_type = "dma") -file_name = "../static/dma_geo.csv" -write.table(unique(dtf$geo_value), file = file_name, row.names = F, col.names = "geo_id") - -national_file = "../static/national_geo.csv" -write.table("us", file = national_file, row.names = F, col.names = "geo_id") diff --git a/validator/static/county_geo.csv b/validator/static/county_geo.csv index a6127cf9b..3812b9693 100644 --- a/validator/static/county_geo.csv +++ b/validator/static/county_geo.csv @@ -94,6 +94,7 @@ "02230" "02240" "02261" +"02270" "02275" "02282" "02290" @@ -326,6 +327,7 @@ "10001" "10003" "10005" +"11000" "11001" "12000" "12001" @@ -2458,6 +2460,7 @@ "46107" "46109" "46111" +"46113" "46115" "46117" "46119" @@ -3191,4 +3194,90 @@ "56041" "56043" "56045" +"60000" +"66000" +"69000" +"70002" +"70003" "72000" +"72001" +"72003" +"72005" +"72007" +"72009" +"72011" +"72013" +"72015" +"72017" +"72019" +"72021" +"72023" +"72025" +"72027" +"72029" +"72031" +"72033" +"72035" +"72037" +"72039" +"72041" +"72043" +"72045" +"72047" +"72049" +"72051" +"72053" +"72054" +"72055" +"72057" +"72059" +"72061" +"72063" +"72065" +"72067" +"72069" +"72071" +"72073" +"72075" +"72077" +"72079" +"72081" +"72083" +"72085" +"72087" +"72089" +"72091" +"72093" +"72095" +"72097" +"72099" +"72101" +"72103" +"72105" +"72107" +"72109" +"72111" +"72113" +"72115" +"72117" +"72119" +"72121" +"72123" +"72125" +"72127" +"72129" +"72131" +"72133" +"72135" +"72137" +"72139" +"72141" +"72143" +"72145" +"72147" +"72149" +"72151" +"72153" +"72888" +"72999" +"78000" diff --git a/validator/static/msa_geo.csv b/validator/static/msa_geo.csv index 9025de71a..a8d1043d6 100644 --- a/validator/static/msa_geo.csv +++ b/validator/static/msa_geo.csv @@ -1,5 +1,6 @@ "geo_id" "10180" +"10380" "10420" "10500" "10540" @@ -14,6 +15,7 @@ "11460" "11500" "11540" +"11640" "11700" "12020" "12060" @@ -144,11 +146,13 @@ "24660" "24780" "24860" +"25020" "25060" "25180" "25220" "25260" "25420" +"25500" "25540" "25620" "25860" @@ -175,6 +179,7 @@ "27780" "27860" "27900" +"27980" "28020" "28100" "28140" @@ -218,6 +223,7 @@ "31740" "31860" "31900" +"32420" "32580" "32780" "32820" @@ -272,6 +278,7 @@ "38300" "38340" "38540" +"38660" "38860" "38900" "38940" @@ -309,7 +316,9 @@ "41700" "41740" "41860" +"41900" "41940" +"41980" "42020" "42100" "42140" @@ -334,6 +343,7 @@ "44180" "44220" "44300" +"44420" "44700" "44940" "45060" @@ -376,6 +386,7 @@ "49180" "49340" "49420" +"49500" "49620" "49660" "49700" diff --git a/validator/static/state_geo.csv b/validator/static/state_geo.csv index e4d129ad6..8bba20eac 100644 --- a/validator/static/state_geo.csv +++ b/validator/static/state_geo.csv @@ -2,6 +2,7 @@ "ak" "al" "ar" +"as" "az" "ca" "co" @@ -10,6 +11,7 @@ "de" "fl" "ga" +"gu" "hi" "ia" "id" @@ -24,6 +26,7 @@ "mi" "mn" "mo" +"mp" "ms" "mt" "nc" @@ -46,6 +49,7 @@ "tx" "ut" "va" +"vi" "vt" "wa" "wi"