diff --git a/.circleci/config.yml b/.circleci/config.yml index b2824bbd21..3e7e385e75 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,7 +41,7 @@ commands: steps: - run: name: Generate CCI cache key - command: + command: | echo "$(date "+%D")" > .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey - persist_to_workspace: @@ -380,24 +380,24 @@ jobs: name: Generate cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/linux/scripts/setup_env.sh .circleci/unittest/linux/scripts/install.sh .circleci/unittest/linux/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat /root/.torchtext/cache/cache_status_file.json - save_cache: key: v1-linux-dataset-{{ checksum ".cachekey" }} paths: - - .data + - /root/.torchtext/cache - save_cache: key: v1-linux-cache-index-{{ checksum ".cachekey" }} paths: - - .data/cache_status_file.json + - /root/.torchtext/cache/cache_status_file.json unittest_linux: <<: *binary_common @@ -432,7 +432,7 @@ jobs: paths: - .vector_cache - - .data + - /root/.torchtext/cache - run: name: Post process command: .circleci/unittest/linux/scripts/post_process.sh @@ -457,24 +457,24 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat C:/Users/circleci/.torchtext/cache/cache_status_file.json - save_cache: key: v1-windows-dataset-{{ checksum ".cachekey" }} paths: - - .data + - C:/Users/circleci/.torchtext/cache - save_cache: key: v1-windows-cache-index-{{ checksum ".cachekey" }} paths: - - .data/cache_status_file.json + - C:/Users/circleci/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common @@ -509,7 +509,7 @@ jobs: paths: - .vector_cache - - .data + - C:/Users/circleci/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index bb516c6eb0..cb4ee309c6 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -41,7 +41,7 @@ commands: steps: - run: name: Generate CCI cache key - command: + command: | echo "$(date "+%D")" > .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey - persist_to_workspace: @@ -380,24 +380,24 @@ jobs: name: Generate cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/linux/scripts/setup_env.sh .circleci/unittest/linux/scripts/install.sh .circleci/unittest/linux/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat /root/.torchtext/cache/cache_status_file.json - save_cache: {% raw %} key: v1-linux-dataset-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data + - /root/.torchtext/cache - save_cache: {% raw %} key: v1-linux-cache-index-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data/cache_status_file.json + - /root/.torchtext/cache/cache_status_file.json unittest_linux: <<: *binary_common @@ -432,7 +432,7 @@ jobs: {% endraw %} paths: - .vector_cache - - .data + - /root/.torchtext/cache - run: name: Post process command: .circleci/unittest/linux/scripts/post_process.sh @@ -457,24 +457,24 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat C:/Users/circleci/.torchtext/cache/cache_status_file.json - save_cache: {% raw %} key: v1-windows-dataset-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data + - C:/Users/circleci/.torchtext/cache - save_cache: {% raw %} key: v1-windows-cache-index-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data/cache_status_file.json + - C:/Users/circleci/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common @@ -509,7 +509,7 @@ jobs: {% endraw %} paths: - .vector_cache - - .data + - C:/Users/circleci/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh diff --git a/test/common/cache_utils.py b/test/common/cache_utils.py index 16c62e7e35..c0a3421db3 100644 --- a/test/common/cache_utils.py +++ b/test/common/cache_utils.py @@ -3,11 +3,11 @@ import torchtext from .parameterized_utils import load_params -CACHE_STATUS_FILE = '.data/cache_status_file.json' +CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'), 'cache_status_file.json') def check_cache_status(): - assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists" + assert os.path.exists(CACHE_STATUS_FILE), "Cache status file [{}] does not exists".format(CACHE_STATUS_FILE) with open(CACHE_STATUS_FILE, 'r') as f: missing_datasets = [] cache_status = json.load(f) diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index b17df76354..571b43c479 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -213,7 +213,7 @@ def _wrap_split_argument_with_fn(fn, splits): raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn)) @functools.wraps(fn) - def new_fn(root='.data', split=splits, **kwargs): + def new_fn(root=os.path.expanduser('~/.torchtext/cache'), split=splits, **kwargs): result = [] for item in _check_default_set(split, splits, fn.__name__): result.append(fn(root, item, **kwargs)) @@ -250,7 +250,7 @@ def decorator(func): raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn)) @functools.wraps(func) - def wrapper(root='.data', *args, **kwargs): + def wrapper(root=os.path.expanduser('~/.torchtext/cache'), *args, **kwargs): new_root = os.path.join(root, dataset_name) if not os.path.exists(new_root): os.makedirs(new_root)