Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit e9d7593

Browse files
datumboxfacebook-github-bot
authored andcommitted
Import torchtext #1314 99557ef
Reviewed By: parmeet Differential Revision: D28683381 fbshipit-source-id: 7bfbf445dd512f0ce21c34096cf3f08332d90138
1 parent 0c55dd9 commit e9d7593

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

.circleci/config.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ commands:
4444
steps:
4545
- run:
4646
name: Generate CCI cache key
47-
command: echo "$(date "+%D")" > .cachekey
47+
command:
48+
echo "$(date "+%D")" > .cachekey
49+
cat cached_datasets_list.txt >> .cachekey
4850
- persist_to_workspace:
4951
root: .
5052
paths:

.circleci/config.yml.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ commands:
4444
steps:
4545
- run:
4646
name: Generate CCI cache key
47-
command: echo "$(date "+%D")" > .cachekey
47+
command:
48+
echo "$(date "+%D")" > .cachekey
49+
cat cached_datasets_list.txt >> .cachekey
4850
- persist_to_workspace:
4951
root: .
5052
paths:

test/common/cache_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@
99
def check_cache_status():
1010
assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists"
1111
with open(CACHE_STATUS_FILE, 'r') as f:
12+
missing_datasets = []
1213
cache_status = json.load(f)
1314
for dataset_name in cache_status:
1415
for split in cache_status[dataset_name]:
1516
if cache_status[dataset_name][split]['status'] == "fail":
16-
raise FileNotFoundError("Failing all raw dataset unit tests as cache is missing atleast one raw dataset")
17+
missing_datasets.append(dataset_name + '_' + split)
18+
if missing_datasets:
19+
raise FileNotFoundError("Failing all raw dataset unit tests as cache is missing {} datasets".format(missing_datasets))
1720

1821

1922
def generate_data_cache():
@@ -30,7 +33,7 @@ def generate_data_cache():
3033
if dataset_name not in cache_status:
3134
cache_status[dataset_name] = {}
3235
try:
33-
if dataset_name == "Multi30k" or dataset_name == 'WMT14':
36+
if dataset_name == 'WMT14':
3437
_ = torchtext.experimental.datasets.raw.DATASETS[dataset_name](split=split)
3538
else:
3639
_ = torchtext.datasets.DATASETS[dataset_name](split=split)

0 commit comments

Comments
 (0)