Skip to content

Commit a1b98a5

Browse files
authored
Merge pull request #954 from cmu-delphi/v4-schema-revisions-release-prep
Epidata v4.1
2 parents fe8b77b + aeba869 commit a1b98a5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1711
-2461
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
- name: Start services
6363
run: |
6464
docker network create --driver bridge delphi-net
65-
docker run --rm -d -p 13306:3306 --network delphi-net --name delphi_database_epidata delphi_database_epidata
65+
docker run --rm -d -p 13306:3306 --network delphi-net --name delphi_database_epidata --cap-add=sys_nice delphi_database_epidata
6666
docker run --rm -d -p 10080:80 --env "SQLALCHEMY_DATABASE_URI=mysql+mysqldb://user:pass@delphi_database_epidata:3306/epidata" --env "FLASK_SECRET=abc" --env "FLASK_PREFIX=/epidata" --network delphi-net --name delphi_web_epidata delphi_web_epidata
6767
docker ps
6868
@@ -108,7 +108,9 @@ jobs:
108108
image:
109109
needs: build
110110
# only on main and dev branch
111-
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev'
111+
#if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' # TODO: Revert after v4 migration is complete
112+
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/v4-schema-revisions-release-prep'
113+
112114
runs-on: ubuntu-latest
113115
steps:
114116
- name: Check out code

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
__pycache__/
2+
*.pyc
3+
*~
4+
\#*#
25
.DS_Store
36
/.vscode
47
/delphi-epidata
Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1-
# start with the `delphi_database` image
2-
FROM delphi_database
1+
# start with a standard percona mysql image
2+
FROM percona:ps-8
3+
4+
# percona exits with the mysql user but we need root for additional setup
5+
USER root
6+
7+
# use delphi's timezome
8+
RUN ln -s -f /usr/share/zoneinfo/America/New_York /etc/localtime
9+
10+
# specify a development-only password for the database user "root"
11+
ENV MYSQL_ROOT_PASSWORD pass
312

413
# create the `epidata` database
514
ENV MYSQL_DATABASE epidata
@@ -8,8 +17,17 @@ ENV MYSQL_DATABASE epidata
817
ENV MYSQL_USER user
918
ENV MYSQL_PASSWORD pass
1019

20+
# provide DDL which will configure dev environment at container startup
21+
COPY repos/delphi/delphi-epidata/dev/docker/database/epidata/_init.sql /docker-entrypoint-initdb.d/
22+
1123
# provide DDL which will create empty tables at container startup
1224
COPY repos/delphi/delphi-epidata/src/ddl/*.sql /docker-entrypoint-initdb.d/
1325

26+
# provide additional configuration needed for percona
27+
COPY repos/delphi/delphi-epidata/dev/docker/database/mysql.d/*.cnf /etc/my.cnf.d/
28+
1429
# grant access to SQL scripts
1530
RUN chmod o+r /docker-entrypoint-initdb.d/*.sql
31+
32+
# restore mysql user for percona
33+
USER mysql
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
CREATE DATABASE covid;
2+
GRANT ALL ON covid.* TO 'user';

dev/docker/database/mysql.d/my.cnf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[mysqld]
2+
default_authentication_plugin=mysql_native_password

dev/local/Makefile

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,19 @@
1010
# Creates all prereq images (delphi_database, delphi_python) only if they don't
1111
# exist. If you need to rebuild a prereq, you're probably doing something
1212
# complicated, and can figure out the rebuild command on your own.
13-
#
14-
#
13+
#
14+
#
1515
# Commands:
16-
#
16+
#
1717
# web: Stops currently-running delphi_web_epidata instances, if any.
1818
# Rebuilds delphi_web_epidata image.
1919
# Runs image in the background and pipes stdout to a log file.
20-
#
20+
#
2121
# db: Stops currently-running delphi_database_epidata instances, if any.
2222
# Rebuilds delphi_database_epidata image.
2323
# Runs image in the background and pipes stdout to a log file.
2424
# Blocks until database is ready to receive connections.
25-
#
25+
#
2626
# python: Rebuilds delphi_web_python image. You shouldn't need to do this
2727
# often; only if you are installing a new environment, or have
2828
# made changes to delphi-epidata/dev/docker/python/Dockerfile.
@@ -35,7 +35,7 @@
3535
#
3636
# clean: Cleans up dangling Docker images.
3737
#
38-
#
38+
#
3939
# Optional arguments:
4040
# pdb=1 Drops you into debug mode upon test failure, if running tests.
4141
# test= Only runs tests in the directories provided here, e.g.
@@ -108,6 +108,7 @@ db:
108108
@# Run the database
109109
@docker run --rm -p 127.0.0.1:13306:3306 \
110110
--network delphi-net --name delphi_database_epidata \
111+
--cap-add=sys_nice \
111112
delphi_database_epidata >$(LOG_DB) 2>&1 &
112113

113114
@# Block until DB is ready
@@ -130,7 +131,7 @@ py:
130131
all: db web py
131132

132133
.PHONY=test
133-
test:
134+
test:
134135
@docker run -i --rm --network delphi-net \
135136
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata,target=/usr/src/app/repos/delphi/delphi-epidata,readonly \
136137
--mount type=bind,source=$(CWD)repos/delphi/delphi-epidata/src,target=/usr/src/app/delphi/epidata,readonly \

devops/Dockerfile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ WORKDIR /src
33
COPY . /src
44
RUN npm ci && npm run build
55

6-
FROM tiangolo/meinheld-gunicorn:python3.7
6+
FROM tiangolo/meinheld-gunicorn:python3.8
77
LABEL org.opencontainers.image.source=https://github.com/cmu-delphi/delphi-epidata
88
# use delphi's timezome
99
RUN ln -s -f /usr/share/zoneinfo/America/New_York /etc/localtime
1010

11-
COPY requirements.txt /app
12-
RUN pip install --no-cache-dir -r requirements.txt
11+
COPY requirements.txt /app/requirements_also.txt
12+
RUN pip install --no-cache-dir -r /tmp/requirements.txt -r requirements_also.txt
13+
# the file /tmp/requirements.txt is created in the parent docker definition. (see:
14+
# https://github.com/tiangolo/meinheld-gunicorn-docker/blob/master/docker-images/python3.8.dockerfile#L5 )
15+
# this combined requirements installation ensures all version constrants are accounted for.
1316

1417
# disable python stdout buffering
1518
ENV PYTHONUNBUFFERED 1
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
geo_id,value,stderr,sample_size,issue,time_value,geo_type,signal,source
22
d_nonlatest,0,0,0,1,0,geo,sig,src
3-
d_latest, 0,0,0,3,0,geo,sig,src
3+
d_latest, 0,0,0,3,0,geo,sig,src
4+
d_justone, 0,0,0,1,0,geo,sig,src

integrations/acquisition/covidcast/test_covidcast_meta_caching.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,17 @@ def setUp(self):
3636
user='user',
3737
password='pass',
3838
host='delphi_database_epidata',
39-
database='epidata')
39+
database='covid')
4040
cur = cnx.cursor()
4141

42-
# clear the `covidcast` table
43-
cur.execute('truncate table covidcast')
42+
# clear all tables
43+
cur.execute("truncate table epimetric_load")
44+
cur.execute("truncate table epimetric_full")
45+
cur.execute("truncate table epimetric_latest")
46+
cur.execute("truncate table geo_dim")
47+
cur.execute("truncate table signal_dim")
4448
# reset the `covidcast_meta_cache` table (it should always have one row)
45-
cur.execute('update covidcast_meta_cache set timestamp = 0, epidata = ""')
49+
cur.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"')
4650
cnx.commit()
4751
cur.close()
4852

@@ -67,30 +71,29 @@ def test_caching(self):
6771

6872
# insert dummy data
6973
self.cur.execute(f'''
70-
INSERT INTO
71-
`covidcast` (`id`, `source`, `signal`, `time_type`, `geo_type`,
72-
`time_value`, `geo_value`, `value_updated_timestamp`,
73-
`value`, `stderr`, `sample_size`, `direction_updated_timestamp`,
74-
`direction`, `issue`, `lag`, `is_latest_issue`, `is_wip`,`missing_value`,
75-
`missing_stderr`,`missing_sample_size`)
74+
INSERT INTO `signal_dim` (`signal_key_id`, `source`, `signal`)
7675
VALUES
77-
(0, 'src', 'sig', 'day', 'state', 20200422, 'pa',
78-
123, 1, 2, 3, 456, 1, 20200422, 0, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
79-
(0, 'src', 'sig', 'day', 'state', 20200422, 'wa',
80-
789, 1, 2, 3, 456, 1, 20200423, 1, 1, False, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
76+
(42, 'src', 'sig');
77+
''')
78+
self.cur.execute(f'''
79+
INSERT INTO `geo_dim` (`geo_key_id`, `geo_type`, `geo_value`)
80+
VALUES
81+
(96, 'state', 'pa'),
82+
(97, 'state', 'wa');
8183
''')
8284
self.cur.execute(f'''
8385
INSERT INTO
84-
`covidcast` (`id`, `source`, `signal`, `time_type`, `geo_type`,
85-
`time_value`, `geo_value`, `value_updated_timestamp`,
86-
`value`, `stderr`, `sample_size`, `direction_updated_timestamp`,
87-
`direction`, `issue`, `lag`, `is_latest_issue`, `is_wip`,`missing_value`,
88-
`missing_stderr`,`missing_sample_size`)
86+
`epimetric_latest` (`epimetric_id`, `signal_key_id`, `geo_key_id`, `time_type`,
87+
`time_value`, `value_updated_timestamp`,
88+
`value`, `stderr`, `sample_size`,
89+
`issue`, `lag`, `missing_value`,
90+
`missing_stderr`,`missing_sample_size`)
8991
VALUES
90-
(100, 'src', 'wip_sig', 'day', 'state', 20200422, 'pa',
91-
456, 4, 5, 6, 789, -1, 20200422, 0, 1, True, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
92+
(15, 42, 96, 'day', 20200422,
93+
123, 1, 2, 3, 20200422, 0, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}),
94+
(16, 42, 97, 'day', 20200422,
95+
789, 1, 2, 3, 20200423, 1, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING})
9296
''')
93-
9497
self.cnx.commit()
9598

9699
# make sure the live utility is serving something sensible

integrations/acquisition/covidcast/test_csv_uploading.py

Lines changed: 18 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,18 @@ def setUp(self):
3232
user='user',
3333
password='pass',
3434
host='delphi_database_epidata',
35-
database='epidata')
35+
database='covid')
3636
cur = cnx.cursor()
37-
cur.execute('truncate table covidcast')
37+
38+
# clear all tables
39+
cur.execute("truncate table epimetric_load")
40+
cur.execute("truncate table epimetric_full")
41+
cur.execute("truncate table epimetric_latest")
42+
cur.execute("truncate table geo_dim")
43+
cur.execute("truncate table signal_dim")
44+
# reset the `covidcast_meta_cache` table (it should always have one row)
45+
cur.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"')
46+
3847
cnx.commit()
3948
cur.close()
4049

@@ -68,11 +77,12 @@ def apply_lag(expected_epidata):
6877
return expected_epidata
6978

7079
def verify_timestamps_and_defaults(self):
71-
self.cur.execute('select value_updated_timestamp, direction_updated_timestamp, direction from covidcast')
72-
for value_updated_timestamp, direction_updated_timestamp, direction in self.cur:
80+
self.cur.execute('''
81+
select value_updated_timestamp from epimetric_full
82+
UNION ALL
83+
select value_updated_timestamp from epimetric_latest''')
84+
for (value_updated_timestamp,) in self.cur:
7385
self.assertGreater(value_updated_timestamp, 0)
74-
self.assertEqual(direction_updated_timestamp, 0)
75-
self.assertIsNone(direction)
7686

7787
def test_uploading(self):
7888
"""Scan, parse, upload, archive, serve, and fetch a covidcast signal."""
@@ -91,8 +101,6 @@ def test_uploading(self):
91101
log_file=log_file_directory +
92102
"output.log",
93103
data_dir=data_dir,
94-
is_wip_override=False,
95-
not_wip_override=False,
96104
specific_issue_date=False)
97105
uploader_column_rename = {"geo_id": "geo_value", "val": "value", "se": "stderr", "missing_val": "missing_value", "missing_se": "missing_stderr"}
98106

@@ -217,41 +225,6 @@ def test_uploading(self):
217225
self.setUp()
218226

219227

220-
with self.subTest("Valid wip"):
221-
values = pd.DataFrame({
222-
"geo_id": ["me", "nd", "wa"],
223-
"val": [10.0, 20.0, 30.0],
224-
"se": [0.01, 0.02, 0.03],
225-
"sample_size": [100.0, 200.0, 300.0],
226-
"missing_val": [Nans.NOT_MISSING] * 3,
227-
"missing_se": [Nans.NOT_MISSING] * 3,
228-
"missing_sample_size": [Nans.NOT_MISSING] * 3
229-
})
230-
signal_name = "wip_prototype"
231-
values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False)
232-
233-
# upload CSVs
234-
main(args)
235-
response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*')
236-
237-
expected_values = pd.concat([values, pd.DataFrame({
238-
"time_value": [20200419] * 3,
239-
"signal": [signal_name] * 3,
240-
"direction": [None] * 3
241-
})], axis=1).rename(columns=uploader_column_rename).to_dict(orient="records")
242-
expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'}
243-
244-
self.assertEqual(response, expected_response)
245-
self.verify_timestamps_and_defaults()
246-
247-
# Verify that files were archived
248-
path = data_dir + f'/archive/successful/src-name/20200419_state_wip_prototype.csv.gz'
249-
self.assertIsNotNone(os.stat(path))
250-
251-
self.tearDown()
252-
self.setUp()
253-
254-
255228
with self.subTest("Valid signal with name length 32<x<64"):
256229
values = pd.DataFrame({
257230
"geo_id": ["pa"],
@@ -262,7 +235,7 @@ def test_uploading(self):
262235
"missing_se": [Nans.NOT_MISSING],
263236
"missing_sample_size": [Nans.NOT_MISSING]
264237
})
265-
signal_name = "wip_really_long_name_that_will_be_accepted"
238+
signal_name = "really_long_name_that_will_be_accepted"
266239
values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False)
267240

268241
# upload CSVs
@@ -293,7 +266,7 @@ def test_uploading(self):
293266
"missing_se": [Nans.NOT_MISSING],
294267
"missing_sample_size": [Nans.NOT_MISSING]
295268
})
296-
signal_name = "wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet"
269+
signal_name = "really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet"
297270
values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False)
298271

299272
# upload CSVs

0 commit comments

Comments
 (0)