Skip to content

Commit cd24e13

Browse files
zhuoran-Cheng16krivard
authored andcommitted
add back the path check, add valid/unvalid path check
1 parent 623539e commit cd24e13

File tree

2 files changed

+22
-12
lines changed

2 files changed

+22
-12
lines changed

src/acquisition/covidcast/csv_importer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def find_issue_specific_csv_files(scan_dir, glob=glob):
8888
logger = get_structured_logger('find_issue_specific_csv_files')
8989
for path in sorted(glob.glob(os.path.join(scan_dir, '*'))):
9090
issuedir_match = CsvImporter.PATTERN_ISSUE_DIR.match(path.lower())
91-
if issuedir_match:
91+
if issuedir_match and os.path.isdir(path):
9292
issue_date_value = int(issuedir_match.group(2))
9393
issue_date = CsvImporter.is_sane_day(issue_date_value)
9494
if issue_date:

tests/acquisition/covidcast/test_csv_importer.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from datetime import date
77
import math
88
import numpy as np
9-
9+
import os
1010
# third party
1111
import pandas
1212
import epiweeks as epi
@@ -44,23 +44,33 @@ def test_is_sane_week(self):
4444

4545
def test_find_issue_specific_csv_files(self):
4646
"""Recursively explore and find issue specific CSV files."""
47-
path_prefix='prefix/to/the/data/'
48-
#valid day path
49-
issue_path='issue_20200408'
50-
glob_issue_path = path_prefix + issue_path
51-
glob_file_path = path_prefix + issue_path + '/ght/20200408_state_rawsearch.csv'
47+
# valid path
48+
path_prefix='prefix/to/the/data/issue_20200408'
49+
os.makedirs(path_prefix, exist_ok=True)
50+
self.assertTrue(os.path.isdir(path_prefix))
51+
52+
issue_path=path_prefix+'ght/20200408_state_rawsearch.csv'
5253
mock_glob = MagicMock()
53-
mock_glob.glob.side_effect = ([glob_issue_path], [glob_file_path])
54+
mock_glob.glob.side_effect = ([path_prefix], [issue_path])
5455

55-
#check if the day is a valid issue day.
56-
issuedir_match = CsvImporter.PATTERN_ISSUE_DIR.match(glob_issue_path.lower())
56+
#check if the day is a valid day.
57+
issuedir_match= CsvImporter.PATTERN_ISSUE_DIR.match(path_prefix.lower())
5758
issue_date_value = int(issuedir_match.group(2))
58-
5959
self.assertTrue(CsvImporter.is_sane_day(issue_date_value))
60-
60+
6161
found = set(CsvImporter.find_issue_specific_csv_files(path_prefix, glob=mock_glob))
6262
self.assertTrue(len(found)>0)
6363

64+
# unvalid path:
65+
path_prefix_invalid='invalid/prefix/to/the/data/issue_20200408'
66+
self.assertFalse(os.path.isdir(path_prefix_invalid))
67+
issue_path_invalid=path_prefix_invalid+'ght/20200408_state_rawsearch.csv'
68+
mock_glob_invalid = MagicMock()
69+
mock_glob_invalid.glob.side_effect = ([path_prefix_invalid], [issue_path_invalid])
70+
found = set(CsvImporter.find_issue_specific_csv_files(path_prefix_invalid, glob=mock_glob_invalid))
71+
self.assertFalse(len(found)>0)
72+
73+
6474
def test_find_csv_files(self):
6575
"""Recursively explore and find CSV files."""
6676

0 commit comments

Comments
 (0)