Skip to content

Commit 3a3535c

Browse files
committed
ft: now docker environment manager handles custom volumes, with the purpose of giving different input folders for different containers, so there is no file conflict for parallelization. allowed the possibility of storing input data in the results folder for transparency, or simply in /tmp/
refac: td models now does not pass "args" as argument to run command. that means that model src code should automatically read the input/{args} file, and specify {args} in its configuration if necessary. modified pymock from case_g for the new requirements, as well as fixed its Dockerfile to be compatible with new floatcsep versions.
1 parent 349b5b5 commit 3a3535c

File tree

8 files changed

+111
-96
lines changed

8 files changed

+111
-96
lines changed

floatcsep/commands/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def reproduce(config: str, **kwargs) -> None:
158158
reproduced_exp.run()
159159

160160
original_config = reproduced_exp.original_config
161-
original_exp = Experiment.from_yml(original_config, rundir=reproduced_exp.original_run_dir)
161+
original_exp = Experiment.from_yml(original_config, run_dir=reproduced_exp.original_run_dir)
162162
original_exp.stage_models()
163163
original_exp.set_tasks()
164164

floatcsep/experiment.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,9 @@ def __init__(
101101
tests: str = None,
102102
postprocess: str = None,
103103
default_test_kwargs: dict = None,
104-
rundir: str = "results",
104+
run_dir: str = "results",
105105
run_mode: str = "serial",
106+
stage_dir: ... = "results",
106107
report_hook: dict = None,
107108
**kwargs,
108109
) -> None:
@@ -116,20 +117,21 @@ def __init__(
116117

117118
workdir = Path(kwargs.get("path", os.getcwd())).resolve()
118119
if kwargs.get("timestamp", False):
119-
rundir = Path(rundir, f"run_{datetime.datetime.utcnow().date().isoformat()}")
120-
os.makedirs(Path(workdir, rundir), exist_ok=True)
120+
run_dir = Path(run_dir, f"run_{datetime.datetime.utcnow().date().isoformat()}")
121+
os.makedirs(Path(workdir, run_dir), exist_ok=True)
121122

122123
self.name = name if name else "floatingExp"
123-
self.registry = ExperimentRegistry.factory(workdir=workdir, run_dir=rundir)
124+
self.registry = ExperimentRegistry.factory(workdir=workdir, run_dir=run_dir)
124125
self.results_repo = ResultsRepository(self.registry)
125126
self.catalog_repo = CatalogRepository(self.registry)
126127
self.run_id = "run"
127128

128129
self.config_file = kwargs.get("config_file", None)
129130
self.original_config = kwargs.get("original_config", None)
130-
self.original_run_dir = kwargs.get("original_rundir", None)
131-
self.run_dir = rundir
131+
self.original_run_dir = kwargs.get("original_run_dir", None)
132+
self.run_dir = run_dir
132133
self.run_mode = run_mode
134+
self.stage_dir = stage_dir
133135
self.seed = kwargs.get("seed", None)
134136
self.time_config = read_time_cfg(time_config, **kwargs)
135137
self.region_config = read_region_cfg(region_config, **kwargs)
@@ -139,7 +141,7 @@ def __init__(
139141
logger = kwargs.get("logging", False)
140142
if logger:
141143
filename = "experiment.log" if logger is True else logger
142-
self.registry.logger = os.path.join(workdir, rundir, filename)
144+
self.registry.logger = os.path.join(workdir, run_dir, filename)
143145
log.info(f"Logging at {self.registry.logger}")
144146
add_fhandler(self.registry.logger)
145147

@@ -304,7 +306,7 @@ def stage_models(self) -> None:
304306
i.stage(
305307
self.time_windows,
306308
run_mode=self.run_mode,
307-
stage_dir=self.registry.run_dir,
309+
stage_dir=self.stage_dir,
308310
run_id=self.run_id,
309311
)
310312
self.registry.add_model_registry(i)
@@ -587,8 +589,6 @@ def make_repr(self) -> None:
587589
if not exists(target_cat):
588590
shutil.copy2(self.registry.abs(self.catalog_repo.cat_path), target_cat)
589591

590-
# relative_path = self.registry.rel(self.registry.run_dir)
591-
# print(self.registry.workdir.__class__, self.registry.run_dir.__class__)
592592
relative_path = Path(
593593
os.path.relpath(self.registry.workdir.as_posix(), self.registry.run_dir.as_posix())
594594
)
@@ -687,14 +687,13 @@ def from_yml(cls, config_yml: str, repr_dir=None, **kwargs):
687687
# Only ABSOLUTE PATH
688688
_dict["path"] = abspath(join(_dir_yml, _dict.get("path", "")))
689689

690-
# replaces rundir case reproduce option is used
690+
# replaces run_dir case reproduce option is used
691691
if repr_dir:
692-
_dict["original_rundir"] = _dict.get("rundir", "results")
693-
_dict["rundir"] = relpath(join(_dir_yml, repr_dir), _dict["path"])
692+
_dict["original_run_dir"] = _dict.get("run_dir", "results")
693+
_dict["run_dir"] = relpath(join(_dir_yml, repr_dir), _dict["path"])
694694
_dict["original_config"] = abspath(join(_dict["path"], _dict["config_file"]))
695695
else:
696-
697-
_dict["rundir"] = _dict.get("rundir", kwargs.pop("rundir", "results"))
696+
_dict["run_dir"] = _dict.get("run_dir", kwargs.pop("run_dir", "results"))
698697
_dict["config_file"] = relpath(config_yml, _dir_yml)
699698
if "logging" in _dict:
700699
kwargs.pop("logging")

floatcsep/infrastructure/environments.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import sys
88
import venv
99
from abc import ABC, abstractmethod
10+
from pathlib import Path
1011
from typing import Union
1112

1213
import docker
@@ -58,7 +59,7 @@ def env_exists(self):
5859
pass
5960

6061
@abstractmethod
61-
def run_command(self, command):
62+
def run_command(self, command, **kwargs):
6263
"""
6364
Executes a command within the context of the environment.
6465
@@ -267,7 +268,7 @@ def install_dependencies(self) -> None:
267268
]
268269
subprocess.run(cmd, check=True)
269270

270-
def run_command(self, command) -> None:
271+
def run_command(self, command, **kwargs) -> None:
271272
"""
272273
Runs a specified command within the conda environment.
273274
@@ -350,7 +351,7 @@ def install_dependencies(self) -> None:
350351
cmd = f"{pip_executable} install -e {os.path.abspath(self.model_directory)}"
351352
self.run_command(cmd)
352353

353-
def run_command(self, command) -> None:
354+
def run_command(self, command, **kwargs) -> None:
354355
"""
355356
Executes a specified command in the virtual environment and logs the output.
356357
@@ -459,15 +460,17 @@ def env_exists(self) -> bool:
459460
except ImageNotFound:
460461
return False
461462

462-
def run_command(self, command=None) -> None:
463+
def run_command(self, command=None, input_dir=None, forecast_dir=None) -> None:
463464
"""
464465
Runs the model’s Docker container with input/ and forecasts/ mounted.
465466
Streams logs and checks for non-zero exit codes.
466467
"""
467-
model_root = os.path.abspath(self.model_directory)
468+
model_root = Path(self.model_directory).resolve()
469+
host_volume_input = input_dir or model_root / "input"
470+
host_volume_forecasts = forecast_dir or model_root / "forecasts"
468471
mounts = {
469-
os.path.join(model_root, "input"): {"bind": "/app/input", "mode": "rw"},
470-
os.path.join(model_root, "forecasts"): {"bind": "/app/forecasts", "mode": "rw"},
472+
host_volume_input: {"bind": "/app/input", "mode": "rw"},
473+
host_volume_forecasts: {"bind": "/app/forecasts", "mode": "rw"},
471474
}
472475

473476
uid, gid = os.getuid(), os.getgid()

floatcsep/model.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import shutil
21
import json
32
import logging
43
import os
4+
import shutil
55
from abc import ABC, abstractmethod
66
from datetime import datetime
77
from pathlib import Path
@@ -10,10 +10,10 @@
1010
import yaml
1111
from csep.core.forecasts import GriddedForecast, CatalogForecast
1212

13-
from floatcsep.utils.accessors import from_zenodo, from_git
1413
from floatcsep.infrastructure.environments import EnvironmentFactory
1514
from floatcsep.infrastructure.registries import ModelRegistry
1615
from floatcsep.infrastructure.repositories import ForecastRepository
16+
from floatcsep.utils.accessors import from_zenodo, from_git
1717
from floatcsep.utils.helpers import timewindow2str, str2timewindow, parse_nested_dicts
1818

1919
log = logging.getLogger("floatLogger")
@@ -393,7 +393,8 @@ def create_forecast(self, tstring: str, **kwargs) -> None:
393393
f"Running {self.name} using {self.environment.__class__.__name__}:"
394394
f" {timewindow2str([start_date, end_date])}"
395395
)
396-
self.environment.run_command(f"{self.func} {self.registry.get_args_key(tstring)}")
396+
input_dir = self.registry.get_input_dir(tstring)
397+
self.environment.run_command(command=f"{self.func}", input_dir=input_dir)
397398

398399
def prepare_args(self, start: datetime, end: datetime, **kwargs) -> None:
399400
"""

tutorials/case_g/custom_plot_script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def main(experiment):
1313
"""
1414

1515
# Get all the timewindows
16-
timewindows = experiment.timewindows
16+
timewindows = experiment.time_windows
1717

1818
# Get the pymock model
1919
model = experiment.get_model("pymock")

tutorials/case_g/pymock/Dockerfile

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,37 @@ FROM python:3.8.13
55
ARG USERNAME=modeler
66
ARG USER_UID=1100
77
ARG USER_GID=$USER_UID
8-
RUN groupadd --non-unique -g $USER_GID $USERNAME \
9-
&& useradd -u $USER_UID -g $USER_GID -s /bin/sh -m $USERNAME
108

11-
# Set up work directory in the Docker container.
12-
## *Change {pymock} to {model_name} when used as template*
13-
WORKDIR /usr/src/pymock/
9+
# User setup
10+
RUN groupadd --gid $USER_GID $USERNAME \
11+
&& useradd --uid $USER_UID --gid $USER_GID --shell /bin/bash --create-home $USERNAME
12+
13+
# Create floatcsep IO interface
14+
RUN mkdir -p /app/input /app/forecasts && chown -R $USERNAME:$USERNAME /app
15+
16+
# Create user and venv
17+
USER $USERNAME
18+
WORKDIR /app
19+
20+
# Set up Python venv
21+
ENV VIRTUAL_ENV=/home/$USERNAME/venv
22+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
23+
ENV PYTHONUNBUFFERED=1
24+
25+
# Create virtual environment and install python basic packages
26+
RUN python3 -m venv $VIRTUAL_ENV && pip install --upgrade pip setuptools wheel
1427

1528
# Copy the repository from the local machine to the Docker container.
1629
## *Only the needed folders/files for the model build*
17-
COPY --chown=$USER_UID:$USER_GID pymock ./pymock/
18-
COPY --chown=$USER_UID:$USER_GID tests ./tests/
30+
COPY --chown=$USER_UID:$USER_GID pymock/ ./pymock/
1931
COPY --chown=$USER_UID:$USER_GID setup.cfg run.py setup.py ./
2032

21-
# Set up and create python virtual environment
22-
ENV VIRTUAL_ENV=/opt/venv
23-
RUN python3 -m venv $VIRTUAL_ENV
24-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
25-
2633
# Install the pymock package.
2734
## *Uses pip to install setup.cfg and requirements/instructions therein*
28-
RUN pip install --no-cache-dir --upgrade pip
29-
RUN pip install .
35+
RUN pip3 install --no-cache-dir -e .
3036

3137
# Docker can now be initialized as user
3238
USER $USERNAME
3339

40+
ENTRYPOINT ["pymock"]
41+

tutorials/case_g/pymock/pymock/libs.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ def syncat_path(start, end, folder):
1111
start = start.date()
1212
end = end.date()
1313

14-
return os.path.join(folder,
15-
f"pymock_{start.isoformat()}_{end.isoformat()}.csv"
16-
)
14+
return os.path.join(folder, f"pymock_{start.isoformat()}_{end.isoformat()}.csv")
1715

1816

1917
def load_cat(path):
@@ -30,10 +28,16 @@ def load_cat(path):
3028
catalog = []
3129
with open(path) as f_:
3230
for line in f_.readlines()[1:]:
33-
line = line.split(',')
34-
event = [float(line[0]), float(line[1]), float(line[2]),
35-
datetime.fromisoformat(line[3]),
36-
float(line[4]), int(line[5]), int(line[6])]
31+
line = line.split(",")
32+
event = [
33+
float(line[0]),
34+
float(line[1]),
35+
float(line[2]),
36+
datetime.fromisoformat(line[3]),
37+
float(line[4]),
38+
int(line[5]),
39+
int(line[6]),
40+
]
3741
catalog.append(event)
3842

3943
return catalog
@@ -46,13 +50,15 @@ def write_forecast(start, end, forecast, folder=None):
4650
"""
4751

4852
if folder is None:
49-
folder = 'forecasts'
53+
folder = "forecasts"
5054
os.makedirs(folder, exist_ok=True)
51-
with open(syncat_path(start, end, folder), 'w') as file_:
52-
file_.write('lon, lat, M, time_string, depth, catalog_id, event_id\n')
55+
with open(syncat_path(start, end, folder), "w") as file_:
56+
file_.write("lon, lat, M, time_string, depth, catalog_id, event_id\n")
5357
for event in forecast:
54-
line = f'{event[0]},{event[1]},{event[2]:.2f},' \
55-
f'{event[3].isoformat()},{event[4]},{event[5]},{event[6]}\n'
58+
line = (
59+
f"{event[0]},{event[1]},{event[2]:.2f},"
60+
f"{event[3].isoformat()},{event[4]},{event[5]},{event[6]}\n"
61+
)
5662
file_.write(line)
5763

5864

@@ -62,23 +68,23 @@ def read_args(path):
6268
{argument} = {argument_value}
6369
"""
6470
import os, sys
65-
params = {'start_date': None, 'end_date': None}
6671

67-
with open(path, 'r') as f_:
72+
params = {"start_date": None, "end_date": None}
73+
74+
with open(path, "r") as f_:
6875
for line in f_.readlines():
69-
line_ = [i.strip() for i in line.split('=')]
70-
if line_[0] == 'start_date':
71-
params['start_date'] = datetime.fromisoformat(line_[1])
72-
elif line_[0] == 'end_date':
73-
params['end_date'] = datetime.fromisoformat(line_[1])
74-
elif line_[0] == 'catalog':
75-
params['catalog'] = os.path.join(os.path.dirname(path),
76-
line_[1])
77-
elif line_[0] == 'mag_min':
78-
params['mag_min'] = float(line_[1])
79-
elif line_[0] == 'n_sims':
80-
params['n_sims'] = int(line_[1])
81-
elif line_[0] == 'seed':
82-
params['seed'] = int(line_[1])
76+
line_ = [i.strip() for i in line.split("=")]
77+
if line_[0] == "start_date":
78+
params["start_date"] = datetime.fromisoformat(line_[1])
79+
elif line_[0] == "end_date":
80+
params["end_date"] = datetime.fromisoformat(line_[1])
81+
elif line_[0] == "catalog":
82+
params["catalog"] = os.path.join(os.path.dirname(path), line_[1])
83+
elif line_[0] == "mag_min":
84+
params["mag_min"] = float(line_[1])
85+
elif line_[0] == "n_sims":
86+
params["n_sims"] = int(line_[1])
87+
elif line_[0] == "seed":
88+
params["seed"] = int(line_[1])
8389

8490
return params

0 commit comments

Comments
 (0)