Skip to content

Commit af8575d

Browse files
committed
Add support for cached output files
When constructing a step, if ``cached=True``, the outputs for this step will be downloaded to the appropriate local database and symlinked instead of being computed. Inputs (other than the cached outputs) and the run method will be skipped. Each MPAS core can optionally have a database (a python dictionary in a json file called cached_files.json) that keeps track of which files are available in the cache and what date stamp is in the filename. When setting up test cases, a user can supply test-case numbers with a "c" suffix to indicate that they should be cached. When setting up test cases individually with a path, a user can supply a list of steps in the test case that should use cached outputs. A test suite can supply a line with "cached" or "cached: <step> <step>" to indicate either that all steps in the test case or the listed steps should use cached outputs.
1 parent 1cf790d commit af8575d

File tree

5 files changed

+151
-23
lines changed

5 files changed

+151
-23
lines changed

compass/mpas_core.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from importlib import resources
2+
import json
3+
4+
15
class MpasCore:
26
"""
37
The base class for housing all the tests for a given MPAS core, such as
@@ -10,6 +14,11 @@ class MpasCore:
1014
1115
test_groups : dict
1216
A dictionary of test groups for the MPAS core with their names as keys
17+
18+
cached_files : dict
19+
A dictionary that maps from output file names in test cases to cached
20+
files in the ``compass_cache`` database for the MPAS core. These
21+
file mappings are read in from ``cached_files.json`` in the MPAS core.
1322
"""
1423

1524
def __init__(self, name):
@@ -26,6 +35,9 @@ def __init__(self, name):
2635
# test groups are added with add_test_groups()
2736
self.test_groups = dict()
2837

38+
self.cached_files = dict()
39+
self._read_cached_files()
40+
2941
def add_test_group(self, test_group):
3042
"""
3143
Add a test group to the MPAS core
@@ -36,3 +48,16 @@ def add_test_group(self, test_group):
3648
the test group to add
3749
"""
3850
self.test_groups[test_group.name] = test_group
51+
52+
def _read_cached_files(self):
53+
""" Read in the dictionary of cached files from cached_files.json """
54+
55+
package = f'compass.{self.name}'
56+
filename = 'cached_files.json'
57+
try:
58+
with resources.path(package, filename) as path:
59+
with open(path) as data_file:
60+
self.cached_files = json.load(data_file)
61+
except FileNotFoundError:
62+
# no cached files for this core
63+
pass

compass/setup.py

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import configparser
44
import os
55
import pickle
6+
import warnings
67

78
from compass.mpas_cores import get_mpas_cores
89
from compass.config import add_config, ensure_absolute_paths
@@ -11,7 +12,8 @@
1112

1213

1314
def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
14-
work_dir=None, baseline_dir=None, mpas_model_path=None):
15+
work_dir=None, baseline_dir=None, mpas_model_path=None,
16+
cached=None):
1517
"""
1618
Set up one or more test cases
1719
@@ -20,8 +22,10 @@ def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
2022
tests : list of str, optional
2123
Relative paths for a test cases to set up
2224
23-
numbers : list of int, optional
24-
Case numbers to setup, as listed from ``compass list``
25+
numbers : list of str, optional
26+
Case numbers to setup, as listed from ``compass list``, optionally with
27+
a suffix ``c`` to indicate that all steps in that test case should be
28+
cached
2529
2630
config_file : str, optional
2731
Configuration file with custom options for setting up and running test
@@ -41,6 +45,10 @@ def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
4145
The relative or absolute path to the root of a branch where the MPAS
4246
model has been built
4347
48+
cached : list of list of str, optional
49+
For each test in ``tests``, which steps (if any) should be cached,
50+
or "_all" if all steps should be cached
51+
4452
Returns
4553
-------
4654
test_cases : dict of compass.TestCase
@@ -56,6 +64,14 @@ def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
5664
if tests is None and numbers is None:
5765
raise ValueError('At least one of tests or numbers is needed.')
5866

67+
if cached is not None:
68+
if tests is None:
69+
warnings.warn('Ignoring "cached" argument becasue "tests" was '
70+
'not provided')
71+
elif len(cached) != len(tests):
72+
raise ValueError('A list of cached steps must be provided for '
73+
'each test in "tests"')
74+
5975
if work_dir is None:
6076
work_dir = os.getcwd()
6177

@@ -71,18 +87,36 @@ def setup_cases(tests=None, numbers=None, config_file=None, machine=None,
7187
if numbers is not None:
7288
keys = list(all_test_cases)
7389
for number in numbers:
90+
cache_all = False
91+
if number.endswith('c'):
92+
cache_all = True
93+
number = int(number[:-1])
94+
else:
95+
number = int(number)
96+
7497
if number >= len(keys):
7598
raise ValueError('test number {} is out of range. There are '
7699
'only {} tests.'.format(number, len(keys)))
77100
path = keys[number]
78-
test_cases[path] = all_test_cases[path]
101+
test_case = all_test_cases[path]
102+
if cache_all:
103+
for step in test_case.steps.values():
104+
step.cached = True
105+
test_cases[path] = test_case
79106

80107
if tests is not None:
81-
for path in tests:
108+
for index, path in enumerate(tests):
82109
if path not in all_test_cases:
83110
raise ValueError('Test case with path {} is not in '
84111
'test_cases'.format(path))
85-
test_cases[path] = all_test_cases[path]
112+
test_case = all_test_cases[path]
113+
if cached is not None:
114+
step_names = cached[index]
115+
if len(step_names) > 0 and step_names[0] == '_all':
116+
step_names = list(test_case.steps.keys())
117+
for step_name in step_names:
118+
test_case.steps[step_name].cached = True
119+
test_cases[path] = test_case
86120

87121
# get the MPAS core of the first test case. We'll assume all tests are
88122
# for this core
@@ -133,6 +167,10 @@ def setup_case(path, test_case, config_file, machine, work_dir, baseline_dir,
133167
"""
134168

135169
print(' {}'.format(path))
170+
cached_steps = [step.name for step in test_case.steps.values() if step.cached]
171+
if len(cached_steps) > 0:
172+
cached_steps = ' '.join(cached_steps)
173+
print(f' steps with cached outputs: {cached_steps}')
136174

137175
config = configparser.ConfigParser(
138176
interpolation=configparser.ExtendedInterpolation())
@@ -253,10 +291,12 @@ def main():
253291
help="Relative path for a test case to set up",
254292
metavar="PATH")
255293
parser.add_argument("-n", "--case_number", nargs='+', dest="case_num",
256-
type=int,
294+
type=str,
257295
help="Case number(s) to setup, as listed from "
258296
"'compass list'. Can be a space-separated"
259-
"list of case numbers.", metavar="NUM")
297+
"list of case numbers. A suffix 'c' indicates"
298+
"that all steps in the test should use cached"
299+
"outputs.", metavar="NUM")
260300
parser.add_argument("-f", "--config_file", dest="config_file",
261301
help="Configuration file for test case setup",
262302
metavar="FILE")
@@ -274,13 +314,21 @@ def main():
274314
help="The path to the build of the MPAS model for the "
275315
"core.",
276316
metavar="PATH")
317+
parser.add_argument("--cached", dest="cached", nargs='+',
318+
help="A list of steps in the test case supplied with"
319+
"--test that should use cached outputs, or "
320+
"'_all' if all steps should be cached",
321+
metavar="STEP")
277322

278323
args = parser.parse_args(sys.argv[2:])
324+
cached = None
279325
if args.test is None:
280326
tests = None
281327
else:
282328
tests = [args.test]
329+
if args.cached is not None:
330+
cached = [args.cached]
283331
setup_cases(tests=tests, numbers=args.case_num,
284332
config_file=args.config_file, machine=args.machine,
285333
work_dir=args.work_dir, baseline_dir=args.baseline_dir,
286-
mpas_model_path=args.mpas_model)
334+
mpas_model_path=args.mpas_model, cached=cached)

compass/step.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ class Step:
7676
time or the step will raise an exception
7777
7878
outputs : list of str
79-
a list of absolute paths of output files produced by this step and
80-
available as inputs to other test cases and steps. These files must
81-
exist after the test has run or an exception will be raised
79+
a list of absolute paths of output files produced by this step (or
80+
cached) and available as inputs to other test cases and steps. These
81+
files must exist after the test has run or an exception will be raised
8282
8383
namelist_data : dict
8484
a dictionary used internally to keep track of updates to the default
@@ -111,10 +111,14 @@ class Step:
111111
log_filename : str
112112
At run time, the name of a log file where output/errors from the step
113113
are being logged, or ``None`` if output is to stdout/stderr
114+
115+
cached : bool
116+
Whether to get all of the outputs for the step from the database of
117+
cached outputs for this MPAS core
114118
"""
115119

116120
def __init__(self, test_case, name, subdir=None, cores=1, min_cores=1,
117-
threads=1, max_memory=1000, max_disk=1000):
121+
threads=1, max_memory=1000, max_disk=1000, cached=False):
118122
"""
119123
Create a new test case
120124
@@ -150,6 +154,10 @@ def __init__(self, test_case, name, subdir=None, cores=1, min_cores=1,
150154
the amount of disk space that the step is allowed to use in MB.
151155
This is currently just a placeholder for later use with task
152156
parallelism
157+
158+
cached : bool, optional
159+
Whether to get all of the outputs for the step from the database of
160+
cached outputs for this MPAS core
153161
"""
154162
self.name = name
155163
self.test_case = test_case
@@ -186,6 +194,9 @@ def __init__(self, test_case, name, subdir=None, cores=1, min_cores=1,
186194
self.logger = None
187195
self.log_filename = None
188196

197+
# output caching
198+
self.cached = cached
199+
189200
def setup(self):
190201
"""
191202
Set up the test case in the work directory, including downloading any
@@ -454,6 +465,22 @@ def process_inputs_and_outputs(self):
454465
step_dir = self.work_dir
455466
config = self.config
456467

468+
# process the outputs first because cached outputs will add more inputs
469+
if self.cached:
470+
# forget about the inputs -- we won't used them, but we will add
471+
# the cached outputs as inputs
472+
self.input_data = list()
473+
for output in self.outputs:
474+
filename = os.path.join(self.path, output)
475+
if filename not in self.mpas_core.cached_files:
476+
raise ValueError(f'The file {filename} has not been added '
477+
f'to the cache database')
478+
target = self.mpas_core.cached_files[filename]
479+
self.add_input_file(
480+
filename=output,
481+
target=target,
482+
database='compass_cache')
483+
457484
inputs = []
458485
for entry in self.input_data:
459486
filename = entry['filename']
@@ -534,6 +561,10 @@ def _generate_namelists(self):
534561
by parsing the files and dictionaries in the step's ``namelist_data``.
535562
"""
536563

564+
if self.cached:
565+
# no need for namelists
566+
return
567+
537568
step_work_dir = self.work_dir
538569
config = self.config
539570

@@ -570,6 +601,9 @@ def _generate_streams(self):
570601
Writes out a streams file in the work directory with new values given
571602
by parsing the files and dictionaries in the step's ``streams_data``.
572603
"""
604+
if self.cached:
605+
# no need for streams
606+
return
573607

574608
step_work_dir = self.work_dir
575609
config = self.config

compass/suite.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def setup_suite(mpas_core, suite_name, config_file=None, machine=None,
3737
directories
3838
3939
baseline_dir : str, optional
40-
Location of baseslines that can be compared to
40+
Location of baselines that can be compared to
4141
4242
mpas_model_path : str, optional
4343
The relative or absolute path to the root of a branch where the MPAS
@@ -51,20 +51,16 @@ def setup_suite(mpas_core, suite_name, config_file=None, machine=None,
5151

5252
text = resources.read_text('compass.{}.suites'.format(mpas_core),
5353
'{}.txt'.format(suite_name))
54-
tests = list()
55-
for test in text.split('\n'):
56-
test = test.strip()
57-
if (len(test) > 0 and test not in tests
58-
and not test.startswith('#')):
59-
tests.append(test)
54+
55+
tests, cached = _parse_suite(text)
6056

6157
if work_dir is None:
6258
work_dir = os.getcwd()
6359
work_dir = os.path.abspath(work_dir)
6460

6561
test_cases = setup_cases(tests, config_file=config_file, machine=machine,
6662
work_dir=work_dir, baseline_dir=baseline_dir,
67-
mpas_model_path=mpas_model_path)
63+
mpas_model_path=mpas_model_path, cached=cached)
6864

6965
test_suite = {'name': suite_name,
7066
'test_cases': test_cases,
@@ -108,8 +104,8 @@ def clean_suite(mpas_core, suite_name, work_dir=None):
108104

109105
text = resources.read_text('compass.{}.suites'.format(mpas_core),
110106
'{}.txt'.format(suite_name))
111-
tests = [test.strip() for test in text.split('\n') if
112-
len(test.strip()) > 0 and not test.startswith('#')]
107+
108+
tests, _ = _parse_suite(text)
113109

114110
if work_dir is None:
115111
work_dir = os.getcwd()
@@ -186,3 +182,26 @@ def _get_required_cores(test_cases):
186182
max_of_min_cores = max(max_of_min_cores, step.min_cores)
187183

188184
return max_cores, max_of_min_cores
185+
186+
187+
def _parse_suite(text):
188+
""" Parse the text of a file defining a test suite """
189+
190+
tests = list()
191+
cached = list()
192+
for test in text.split('\n'):
193+
test = test.strip()
194+
if len(test) == 0 or test.startswith('#'):
195+
# a blank line or comment
196+
continue
197+
198+
if test == 'cached':
199+
cached[-1] = ['_all']
200+
elif test.startswith('cached:'):
201+
steps = test[len('cached:'):].strip().split(' ')
202+
cached[-1] = steps
203+
else:
204+
tests.append(test)
205+
cached.append(list())
206+
207+
return tests, cached

compass/testcase.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ def run(self):
145145
cwd = os.getcwd()
146146
for step_name in self.steps_to_run:
147147
step = self.steps[step_name]
148+
if step.cached:
149+
continue
148150
step.config = self.config
149151
new_log_file = self.new_step_log_file
150152
if self.log_filename is not None:

0 commit comments

Comments
 (0)