From 3766d9b430fe21cc673bf736960fdf3073c04735 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 10:53:21 -0400 Subject: [PATCH 01/10] chaging default root for datasets --- test/common/cache_utils.py | 2 +- torchtext/data/datasets_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/common/cache_utils.py b/test/common/cache_utils.py index 16c62e7e35..2ded15640e 100644 --- a/test/common/cache_utils.py +++ b/test/common/cache_utils.py @@ -3,7 +3,7 @@ import torchtext from .parameterized_utils import load_params -CACHE_STATUS_FILE = '.data/cache_status_file.json' +CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'),'cache_status_file.json') def check_cache_status(): diff --git a/torchtext/data/datasets_utils.py b/torchtext/data/datasets_utils.py index b17df76354..571b43c479 100644 --- a/torchtext/data/datasets_utils.py +++ b/torchtext/data/datasets_utils.py @@ -213,7 +213,7 @@ def _wrap_split_argument_with_fn(fn, splits): raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn)) @functools.wraps(fn) - def new_fn(root='.data', split=splits, **kwargs): + def new_fn(root=os.path.expanduser('~/.torchtext/cache'), split=splits, **kwargs): result = [] for item in _check_default_set(split, splits, fn.__name__): result.append(fn(root, item, **kwargs)) @@ -250,7 +250,7 @@ def decorator(func): raise ValueError("Internal Error: Given function {} did not adhere to standard signature.".format(fn)) @functools.wraps(func) - def wrapper(root='.data', *args, **kwargs): + def wrapper(root=os.path.expanduser('~/.torchtext/cache'), *args, **kwargs): new_root = os.path.join(root, dataset_name) if not os.path.exists(new_root): os.makedirs(new_root) From 77a942434257e5649b49c0870c3b9434d7a589c4 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 11:35:18 -0400 Subject: [PATCH 02/10] temp change to enable recache + fixing flake test --- .circleci/cached_datasets_list.txt | 1 + test/common/cache_utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/cached_datasets_list.txt b/.circleci/cached_datasets_list.txt index 9989d5f278..060ab820e9 100644 --- a/.circleci/cached_datasets_list.txt +++ b/.circleci/cached_datasets_list.txt @@ -19,3 +19,4 @@ PennTreebank SQuAD1 SQuAD2 EnWik9 +temp_change \ No newline at end of file diff --git a/test/common/cache_utils.py b/test/common/cache_utils.py index 2ded15640e..8b76631593 100644 --- a/test/common/cache_utils.py +++ b/test/common/cache_utils.py @@ -3,7 +3,7 @@ import torchtext from .parameterized_utils import load_params -CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'),'cache_status_file.json') +CACHE_STATUS_FILE = os.path.join(os.path.expanduser('~/.torchtext/cache'), 'cache_status_file.json') def check_cache_status(): From 7c04eb467e42ffcc1b454b85ba537754689f4f29 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 11:44:51 -0400 Subject: [PATCH 03/10] update error message --- test/common/cache_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/cache_utils.py b/test/common/cache_utils.py index 8b76631593..c0a3421db3 100644 --- a/test/common/cache_utils.py +++ b/test/common/cache_utils.py @@ -7,7 +7,7 @@ def check_cache_status(): - assert os.path.exists(CACHE_STATUS_FILE), "Cache status file does not exists" + assert os.path.exists(CACHE_STATUS_FILE), "Cache status file [{}] does not exists".format(CACHE_STATUS_FILE) with open(CACHE_STATUS_FILE, 'r') as f: missing_datasets = [] cache_status = json.load(f) From 2620cfa63a0ea9fa70b9f2142f063b29c12ae375 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 12:44:29 -0400 Subject: [PATCH 04/10] updating circleci config --- .circleci/config.yml | 12 ++++++------ .circleci/config.yml.in | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b2824bbd21..21893be688 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -380,12 +380,12 @@ jobs: name: Generate cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/linux/scripts/setup_env.sh .circleci/unittest/linux/scripts/install.sh .circleci/unittest/linux/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat /root/.torchtext/cache/cache_status_file.json - save_cache: key: v1-linux-dataset-{{ checksum ".cachekey" }} @@ -397,7 +397,7 @@ jobs: key: v1-linux-cache-index-{{ checksum ".cachekey" }} paths: - - .data/cache_status_file.json + - /root/.torchtext/cache/cache_status_file.json unittest_linux: <<: *binary_common @@ -457,12 +457,12 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f C:/Users/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat C:/Users/.torchtext/cache/cache_status_file.json - save_cache: key: v1-windows-dataset-{{ checksum ".cachekey" }} @@ -474,7 +474,7 @@ jobs: key: v1-windows-cache-index-{{ checksum ".cachekey" }} paths: - - .data/cache_status_file.json + - C:/Users/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index bb516c6eb0..c9a4456748 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -380,12 +380,12 @@ jobs: name: Generate cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f /root/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/linux/scripts/setup_env.sh .circleci/unittest/linux/scripts/install.sh .circleci/unittest/linux/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat /root/.torchtext/cache/cache_status_file.json - save_cache: {% raw %} key: v1-linux-dataset-{{ checksum ".cachekey" }} @@ -397,7 +397,7 @@ jobs: key: v1-linux-cache-index-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data/cache_status_file.json + - /root/.torchtext/cache/cache_status_file.json unittest_linux: <<: *binary_common @@ -457,12 +457,12 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f .data/cache_status_file.json ] ; then + if [ ! -f C:/Users/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat .data/cache_status_file.json + cat C:/Users/.torchtext/cache/cache_status_file.json - save_cache: {% raw %} key: v1-windows-dataset-{{ checksum ".cachekey" }} @@ -474,7 +474,7 @@ jobs: key: v1-windows-cache-index-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data/cache_status_file.json + - C:/Users/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common From 29f66d7ead69f074ec9a4a1bbc2bf1adf81b1227 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 12:53:28 -0400 Subject: [PATCH 05/10] update config --- .circleci/config.yml | 8 ++++---- .circleci/config.yml.in | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 21893be688..a59158226b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -391,7 +391,7 @@ jobs: key: v1-linux-dataset-{{ checksum ".cachekey" }} paths: - - .data + - /root/.torchtext/cache - save_cache: key: v1-linux-cache-index-{{ checksum ".cachekey" }} @@ -432,7 +432,7 @@ jobs: paths: - .vector_cache - - .data + - /root/.torchtext/cache - run: name: Post process command: .circleci/unittest/linux/scripts/post_process.sh @@ -468,7 +468,7 @@ jobs: key: v1-windows-dataset-{{ checksum ".cachekey" }} paths: - - .data + - C:/Users/.torchtext/cache - save_cache: key: v1-windows-cache-index-{{ checksum ".cachekey" }} @@ -509,7 +509,7 @@ jobs: paths: - .vector_cache - - .data + - C:/Users/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index c9a4456748..9924ed9e09 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -391,7 +391,7 @@ jobs: key: v1-linux-dataset-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data + - /root/.torchtext/cache - save_cache: {% raw %} key: v1-linux-cache-index-{{ checksum ".cachekey" }} @@ -432,7 +432,7 @@ jobs: {% endraw %} paths: - .vector_cache - - .data + - /root/.torchtext/cache - run: name: Post process command: .circleci/unittest/linux/scripts/post_process.sh @@ -468,7 +468,7 @@ jobs: key: v1-windows-dataset-{{ checksum ".cachekey" }} {% endraw %} paths: - - .data + - C:/Users/.torchtext/cache - save_cache: {% raw %} key: v1-windows-cache-index-{{ checksum ".cachekey" }} @@ -509,7 +509,7 @@ jobs: {% endraw %} paths: - .vector_cache - - .data + - C:/Users/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh From b906f191a139c7cea6e200bc0b218f18fb6df56c Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 13:19:05 -0400 Subject: [PATCH 06/10] config updates --- .circleci/cached_datasets_list.txt | 2 +- .circleci/config.yml | 7 ++++--- .circleci/config.yml.in | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.circleci/cached_datasets_list.txt b/.circleci/cached_datasets_list.txt index 060ab820e9..c53bfb7171 100644 --- a/.circleci/cached_datasets_list.txt +++ b/.circleci/cached_datasets_list.txt @@ -19,4 +19,4 @@ PennTreebank SQuAD1 SQuAD2 EnWik9 -temp_change \ No newline at end of file +temp_change_1 \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index a59158226b..9303a8fc19 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,6 +44,7 @@ commands: command: echo "$(date "+%D")" > .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey + cat .cachekey - persist_to_workspace: root: . paths: @@ -457,12 +458,12 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f C:/Users/.torchtext/cache/cache_status_file.json ] ; then + if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat C:/Users/.torchtext/cache/cache_status_file.json + cat C:/Users/circleci/.torchtext/cache/cache_status_file.json - save_cache: key: v1-windows-dataset-{{ checksum ".cachekey" }} @@ -474,7 +475,7 @@ jobs: key: v1-windows-cache-index-{{ checksum ".cachekey" }} paths: - - C:/Users/.torchtext/cache/cache_status_file.json + - C:/Users/circleci/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 9924ed9e09..a608e479e1 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -44,6 +44,7 @@ commands: command: echo "$(date "+%D")" > .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey + cat .cachekey - persist_to_workspace: root: . paths: @@ -457,12 +458,12 @@ jobs: name: Generate daily data Cache no_output_timeout: 30m command: | - if [ ! -f C:/Users/.torchtext/cache/cache_status_file.json ] ; then + if [ ! -f C:/Users/circleci/.torchtext/cache/cache_status_file.json ] ; then .circleci/unittest/windows/scripts/setup_env.sh .circleci/unittest/windows/scripts/install.sh .circleci/unittest/windows/scripts/generate_cache.sh fi - cat C:/Users/.torchtext/cache/cache_status_file.json + cat C:/Users/circleci/.torchtext/cache/cache_status_file.json - save_cache: {% raw %} key: v1-windows-dataset-{{ checksum ".cachekey" }} @@ -474,7 +475,7 @@ jobs: key: v1-windows-cache-index-{{ checksum ".cachekey" }} {% endraw %} paths: - - C:/Users/.torchtext/cache/cache_status_file.json + - C:/Users/circleci/.torchtext/cache/cache_status_file.json unittest_windows: <<: *binary_common From ae68a7cde945802c38719da1ebe66a89a5de3c86 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 13:24:18 -0400 Subject: [PATCH 07/10] testing config --- .circleci/config.yml | 3 ++- .circleci/config.yml.in | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9303a8fc19..b044592e78 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,8 +41,9 @@ commands: steps: - run: name: Generate CCI cache key - command: + command: | echo "$(date "+%D")" > .cachekey + cat .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey cat .cachekey - persist_to_workspace: diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index a608e479e1..3d76731292 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -41,8 +41,9 @@ commands: steps: - run: name: Generate CCI cache key - command: + command: | echo "$(date "+%D")" > .cachekey + cat .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey cat .cachekey - persist_to_workspace: From 0bbf3c1f8941d2df241316fcfda06054b6b1e33b Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 15:13:27 -0400 Subject: [PATCH 08/10] fixing config for win --- .circleci/cached_datasets_list.txt | 3 +-- .circleci/config.yml | 4 +--- .circleci/config.yml.in | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.circleci/cached_datasets_list.txt b/.circleci/cached_datasets_list.txt index c53bfb7171..c345588868 100644 --- a/.circleci/cached_datasets_list.txt +++ b/.circleci/cached_datasets_list.txt @@ -18,5 +18,4 @@ WikiText103 PennTreebank SQuAD1 SQuAD2 -EnWik9 -temp_change_1 \ No newline at end of file +EnWik9 \ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index b044592e78..16ce199f04 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -43,9 +43,7 @@ commands: name: Generate CCI cache key command: | echo "$(date "+%D")" > .cachekey - cat .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey - cat .cachekey - persist_to_workspace: root: . paths: @@ -511,7 +509,7 @@ jobs: paths: - .vector_cache - - C:/Users/.torchtext/cache + - C:/Users/circleci/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 3d76731292..1a4604b58c 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -43,9 +43,7 @@ commands: name: Generate CCI cache key command: | echo "$(date "+%D")" > .cachekey - cat .cachekey cat .circleci/cached_datasets_list.txt >> .cachekey - cat .cachekey - persist_to_workspace: root: . paths: @@ -511,7 +509,7 @@ jobs: {% endraw %} paths: - .vector_cache - - C:/Users/.torchtext/cache + - C:/Users/circleci/.torchtext/cache - run: name: Post process command: .circleci/unittest/windows/scripts/post_process.sh From fe35ad49f834669622817e4fe1f33bcfa88dab10 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 15:15:57 -0400 Subject: [PATCH 09/10] minor change --- .circleci/cached_datasets_list.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/cached_datasets_list.txt b/.circleci/cached_datasets_list.txt index c345588868..9989d5f278 100644 --- a/.circleci/cached_datasets_list.txt +++ b/.circleci/cached_datasets_list.txt @@ -18,4 +18,4 @@ WikiText103 PennTreebank SQuAD1 SQuAD2 -EnWik9 \ No newline at end of file +EnWik9 From 93a63f1d618d1ba031640b2dd010fe8a3899f284 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 21 Jul 2021 16:12:34 -0400 Subject: [PATCH 10/10] fix win caching --- .circleci/config.yml | 2 +- .circleci/config.yml.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 16ce199f04..3e7e385e75 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -468,7 +468,7 @@ jobs: key: v1-windows-dataset-{{ checksum ".cachekey" }} paths: - - C:/Users/.torchtext/cache + - C:/Users/circleci/.torchtext/cache - save_cache: key: v1-windows-cache-index-{{ checksum ".cachekey" }} diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 1a4604b58c..cb4ee309c6 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -468,7 +468,7 @@ jobs: key: v1-windows-dataset-{{ checksum ".cachekey" }} {% endraw %} paths: - - C:/Users/.torchtext/cache + - C:/Users/circleci/.torchtext/cache - save_cache: {% raw %} key: v1-windows-cache-index-{{ checksum ".cachekey" }}