diff --git a/docs/conf.py b/docs/conf.py index 9812034..79f8e6c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,54 +6,62 @@ import sys from sphinx_gallery.sorting import FileNameSortKey -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) + +# NOTE: To expose sphinx warnings, comment `setup_logger()` in floatcsep.commands.main + # -- Project information ----------------------------------------------------- -project = 'floatCSEP' -copyright = '2022, Pablo Iturrieta' -author = 'Pablo Iturrieta' -release = 'v0.1.0' +project = "floatCSEP" +copyright = "2022, Pablo Iturrieta" +author = "Pablo Iturrieta" +release = "v0.1.0" # -- General configuration --------------------------------------------------- extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.todo', - 'sphinx.ext.autosummary', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - # 'sphinx_gallery.gen_gallery', - # 'sphinx.ext.githubpages' + "sphinx.ext.autodoc", + "sphinx.ext.todo", + "sphinx.ext.autosummary", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", ] -templates_path = ['_templates'] -source_suffix = '.rst' -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -pygments_style = 'default' # todo -autodoc_typehints = 'none' +# language = 'en' +autosummary_generate = False +autoclass_content = "both" +suppress_warnings = [ + "autosummary", + "autosummary.missing", +] +templates_path = ["_templates"] +source_suffix = ".rst" +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +pygments_style = "default" +autodoc_typehints = "description" intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), - "scipy": ('http://docs.scipy.org/doc/scipy/reference', None), - "matplotlib": ('http://matplotlib.sourceforge.net/', None), - 'pycsep': ('https://docs.cseptesting.org/', None) + "numpy": ("https://numpy.org/doc/stable/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "matplotlib": ("https://matplotlib.org/stable", None), + "pycsep": ("https://docs.cseptesting.org/", None), } # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'sphinx_rtd_theme' -html_static_path = ['_static'] +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] html_theme_options = { - 'display_version': True, - 'prev_next_buttons_location': 'both', - 'collapse_navigation': False, - 'style_nav_header_background': '#343131ff', - 'logo_only': True, + "display_version": True, + "prev_next_buttons_location": "both", + "collapse_navigation": False, + "style_nav_header_background": "#343131ff", + "logo_only": True, } -html_logo = '_static/floatcsep_logo.svg' +html_logo = "_static/floatcsep_logo.svg" todo_include_todos = False diff --git a/docs/examples/case_f.rst b/docs/examples/case_g.rst similarity index 91% rename from docs/examples/case_f.rst rename to docs/examples/case_g.rst index 8d4c77b..203f7b8 100644 --- a/docs/examples/case_f.rst +++ b/docs/examples/case_g.rst @@ -1,5 +1,5 @@ -A - Simple(st) Time-Dependent, Catalog-based Model -================================================== +G - Time-Dependent, Catalog-Based Model (from Source Code) +========================================================== .. currentmodule:: floatcsep @@ -8,7 +8,7 @@ A - Simple(st) Time-Dependent, Catalog-based Model .. admonition:: **TL; DR** - In a terminal, navigate to ``floatcsep/examples/case_f`` and type: + In a terminal, navigate to ``floatcsep/examples/case_g`` and type: .. code-block:: console @@ -23,7 +23,7 @@ Artifacts This example shows how a time-dependent model should be set up for a time-dependent experiment :: - case_f + case_g └── pymock ├── input ├── args.txt (model arguments) @@ -49,7 +49,7 @@ The experiment's complexity increases from time-independent to dependent, since 1. The input data is, at the least, a catalog filtered until the forecast beginning, which is automatically allocated by ``fecsep`` in the `{model}/input` prior to each model's run. It is stored inside the model in ``csep.ascii`` format for simplicity's sake (see :doc:`pycsep:concepts/catalogs`). - .. literalinclude:: ../../examples/case_f/catalog.csv + .. literalinclude:: ../../examples/case_g/catalog.csv :lines: 1-2 2. The input arguments controls how the source code works. The minimum arguments to run a model (which should be modified dynamically during an experiment) are the forecast ``start_date`` and ``end_date``. The experiment will read `{model}/input/args.txt` and change the values of ``start_date = {datetime}`` and ``end_date = {datetime}`' before the model is run. Additional arguments can be set by convenience, such as ``catalog`` (the input catalog name), ``n_sims`` (number of synthetic catalogs) and random ``seed`` for reproducibility. @@ -80,7 +80,7 @@ Time The configuration is identical to time-independent models, with the exception that now a ``horizon`` can be defined instead of ``intervals``, which is the forecast time-window length. The experiment's class should now be explicited as ``exp_class: td`` - .. literalinclude:: ../../examples/case_f/config.yml + .. literalinclude:: ../../examples/case_g/config.yml :language: yaml :lines: 3-7 @@ -94,7 +94,7 @@ Models Additional arguments should be passed to time-independent models. - .. literalinclude:: ../../examples/case_f/models.yml + .. literalinclude:: ../../examples/case_g/models.yml :language: yaml :lines: 3-7 @@ -110,7 +110,7 @@ Tests With time-dependent models, now catalog evaluations found in :obj:`csep.core.catalog_evaluations` can be used. - .. literalinclude:: ../../examples/case_f/tests.yml + .. literalinclude:: ../../examples/case_g/tests.yml :language: yaml .. note:: @@ -120,7 +120,7 @@ Tests Running the experiment ---------------------- - The experiment can be run by simply navigating to the ``examples/case_f`` folder in the terminal and typing. + The experiment can be run by simply navigating to the ``examples/case_g`` folder in the terminal and typing. .. code-block:: console diff --git a/docs/index.rst b/docs/index.rst index 583bff7..d4db04a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,7 +26,7 @@ Preliminary documentation. examples/case_c.rst examples/case_d.rst examples/case_e.rst - examples/case_f.rst + examples/case_g.rst .. toctree:: @@ -46,7 +46,7 @@ Preliminary documentation. deployment/intro.rst .. toctree:: - :maxdepth: 2 + :maxdepth: 0 :caption: Help & Reference reference/api_reference diff --git a/docs/intro/concepts.rst b/docs/intro/concepts.rst index 71e375f..dde7229 100644 --- a/docs/intro/concepts.rst +++ b/docs/intro/concepts.rst @@ -13,7 +13,7 @@ A Floating Testing Experiment encapsulates each experiment into its own runnable evaluations. ``floatCSEP`` goals ----------------- +------------------- This is an application to deploy reproducible and prospective experiments of earthquake forecasting, namely a Floating Eperiment, that can operate independent of a particular testing server. With this application, researchers, institutions and users can diff --git a/docs/reference/api_reference.rst b/docs/reference/api_reference.rst index a3d7871..298d97d 100644 --- a/docs/reference/api_reference.rst +++ b/docs/reference/api_reference.rst @@ -1,115 +1,111 @@ -API Reference -============= +API Documentation +================= -This contains a reference document to the floatCSEP API. +.. Here are the listed and linked the rst pages of the API docs. Hidden means it wont show on +.. this api reference landing page. -Commands --------- +.. toctree:: + :maxdepth: 1 + :hidden: -The commands and entry-points with which to call `floatcsep` from the terminal -are: + commands + experiment + model + evaluation + postprocess + utilities + infrastructure -.. :currentmodule:: floatcsep.commands.main -.. automodule:: floatcsep.commands.main +.. Here we create fake autosummaries, which are excluded in the conf.py, so they are not shown +.. in the documentation, but still we are displaying the neat summary tables for each classs + +**Commands** + +.. currentmodule:: floatcsep.commands.main + +The main entrypoint functions from the Command Line Interface are: .. autosummary:: - :toctree: generated + :nosignatures: + floatcsep run + stage plot reproduce -Experiment ----------- - -.. :currentmodule:: floatcsep.experiment - -.. automodule:: floatcsep.experiment +**Experiment** +.. currentmodule:: floatcsep.experiment -The experiment is defined using the :class:`Experiment` class. +The :class:`~floatcsep.experiment.Experiment` class is the main handler of floatCSEP, which +orchestrates the :class:`~floatcsep.model.Model` and :class:`~floatcsep.evaluation.Evaluation` +instances onto an experimental workflow. The class and its main methods are: .. autosummary:: - :toctree: generated + :nosignatures: Experiment Experiment.set_models - Experiment.get_model - Experiment.stage_models Experiment.set_tests - Experiment.catalog + Experiment.stage_models + Experiment.set_input_cat Experiment.set_test_cat Experiment.set_tasks Experiment.run Experiment.read_results - Experiment.plot_results - Experiment.plot_catalog - Experiment.plot_forecasts - Experiment.generate_report - Experiment.to_dict - Experiment.to_yml - Experiment.from_yml - + Experiment.make_repr -Models ------- +**Model** -.. :currentmodule:: floatcsep.model +.. currentmodule:: floatcsep.model -.. automodule:: floatcsep.model - -A model is defined using the :class:`Model` class. +The :class:`~floatcsep.model.Model` class is the handler of forecasts creation, storage and +reading. The abstract and concrete classes, and their main methods are: .. autosummary:: - :toctree: generated + :nosignatures: + + Model + Model.get_source + Model.factory - Model - Model.get_source - Model.stage - Model.init_db - Model.rm_db - Model.get_forecast - Model.create_forecast - Model.forecast_from_func - Model.forecast_from_file - Model.to_dict - Model.from_dict + TimeIndependentModel + TimeIndependentModel.init_db + TimeIndependentModel.get_forecast + TimeDependentModel.stage + TimeDependentModel.prepare_args + TimeDependentModel.create_forecast + TimeDependentModel.get_forecast -Evaluations ------------ -.. :currentmodule:: floatcsep.evaluation +**Evaluations** -.. automodule:: floatcsep.evaluation +.. currentmodule:: floatcsep.evaluation -A test is defined using the :class:`Evaluation` class. +The :class:`~floatcsep.evaluation.Evaluation` class is a wrapper for `pycsep` functions, +encapsulating the multiple function, arguments, forecast and catalogs of the entire experiment. +The class and main methods are: .. autosummary:: - :toctree: generated + :nosignatures: Evaluation - Evaluation.type - Evaluation.get_catalog Evaluation.prepare_args Evaluation.compute - Evaluation.write_result - Evaluation.to_dict - Evaluation.from_dict +**Accessors** -Accessors ---------- +These are functions that access a model source from a web repository. -.. :currentmodule:: floatcsep.utils.accessors - -.. automodule:: floatcsep.utils.accessors +.. currentmodule:: floatcsep.utils.accessors .. autosummary:: - :toctree: generated + :nosignatures: from_zenodo from_git @@ -117,23 +113,30 @@ Accessors check_hash -Helper Functions ----------------- +**Helper Functions** -.. :currentmodule:: floatcsep.utils.helpers +These are the helper functions of ``floatCSEP`` -.. automodule:: floatcsep.utils.helpers +.. currentmodule:: floatcsep.utils.helpers .. autosummary:: - :toctree: generated + :nosignatures: parse_csep_func + timewindow2str + str2timewindow parse_timedelta_string - read_time_config - read_region_config + read_time_cfg + read_region_cfg timewindows_ti timewindows_td - timewindow2str + + +Some additional plotting functions to pyCSEP are: + +.. autosummary:: + :nosignatures: + plot_sequential_likelihood magnitude_vs_time sequential_likelihood @@ -141,15 +144,14 @@ Helper Functions vector_poisson_t_w_test -Readers -------- +**Readers** -.. :currentmodule:: floatcsep.utils.readers +A small wrapper for ``pyCSEP`` readers -.. automodule:: floatcsep.utils.readers +.. currentmodule:: floatcsep.utils.readers .. autosummary:: - :toctree: generated + :nosignatures: ForecastParsers.dat ForecastParsers.xml @@ -160,15 +162,14 @@ Readers serialize -Environments ------------- +**Environments** -.. :currentmodule:: floatcsep.infrastructure.environments +The computational environment managers for ``floatcsep``. -.. automodule:: floatcsep.infrastructure.environments +.. currentmodule:: floatcsep.infrastructure.environments .. autosummary:: - :toctree: generated + :nosignatures: CondaManager CondaManager.create_environment @@ -183,99 +184,77 @@ Environments CondaManager.run_command -Registries ----------- +**Registries** -.. :currentmodule:: floatcsep.infrastructure.registries +The registries hold references to the access points (e.g., filepaths) of the experiment +components (e.g., forecasts, catalogs, results, etc.), and allows to be aware of their status. -.. automodule:: floatcsep.infrastructure.registries +.. currentmodule:: floatcsep.infrastructure.registries .. autosummary:: - :toctree: generated - - FileRegistry - FileRegistry.abs - FileRegistry.absdir - FileRegistry.rel - FileRegistry.rel_dir - FileRegistry.file_exists + :nosignatures: ForecastRegistry - ForecastRegistry.get ForecastRegistry.get_forecast - ForecastRegistry.dir ForecastRegistry.fmt - ForecastRegistry.as_dict - ForecastRegistry.forecast_exist + ForecastRegistry.forecast_exists ForecastRegistry.build_tree - ForecastRegistry.log_tree ExperimentRegistry ExperimentRegistry.add_forecast_registry ExperimentRegistry.get_forecast_registry - ExperimentRegistry.log_forecast_trees - ExperimentRegistry.get ExperimentRegistry.get_result ExperimentRegistry.get_test_catalog ExperimentRegistry.get_figure ExperimentRegistry.result_exist - ExperimentRegistry.as_dict ExperimentRegistry.build_tree - ExperimentRegistry.log_results_tree -Repositories ------------- +**Repositories** -.. :currentmodule:: floatcsep.infrastructure.repositories +The repositories here are designed to store and access the experiment artifacts (results, +catalogs, forecasts), abstracting the experiment logic from the pyCSEP io functionality. -.. automodule:: floatcsep.infrastructure.repositories + +.. currentmodule:: floatcsep.infrastructure.repositories .. autosummary:: - :toctree: generated + :nosignatures: + + CatalogRepository + CatalogRepository.set_main_catalog + CatalogRepository.catalog + CatalogRepository.get_test_cat + CatalogRepository.set_test_cat + CatalogRepository.set_input_cat - ForecastRepository - ForecastRepository.factory + GriddedForecastRepository + GriddedForecastRepository.load_forecast CatalogForecastRepository CatalogForecastRepository.load_forecast - CatalogForecastRepository._load_single_forecast - - GriddedForecastRepository.load_forecast - GriddedForecastRepository._get_or_load_forecast - GriddedForecastRepository._load_single_forecast ResultsRepository - ResultsRepository._load_result ResultsRepository.load_results ResultsRepository.write_result - CatalogRepository - CatalogRepository.set_main_catalog - CatalogRepository.catalog - CatalogRepository.get_test_cat - CatalogRepository.set_test_cat - CatalogRepository.set_input_cat -Engine ------- +**Engine** -.. :currentmodule:: floatcsep.infrastructure.engine +The engine routines are designed for the execution of an experiment. -.. automodule:: floatcsep.infrastructure.engine +.. currentmodule:: floatcsep.infrastructure.engine .. autosummary:: - :toctree: generated + :nosignatures: Task - Task.sign_match Task.run - Task.check_exist + Task.sign_match TaskGraph TaskGraph.ntasks TaskGraph.add TaskGraph.add_dependency TaskGraph.run - TaskGraph.check_exist \ No newline at end of file diff --git a/docs/reference/commands.rst b/docs/reference/commands.rst new file mode 100644 index 0000000..89a6df3 --- /dev/null +++ b/docs/reference/commands.rst @@ -0,0 +1,7 @@ +Commands +======== + +.. automodule:: floatcsep.commands.main + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/evaluation.rst b/docs/reference/evaluation.rst new file mode 100644 index 0000000..96358be --- /dev/null +++ b/docs/reference/evaluation.rst @@ -0,0 +1,7 @@ +Evaluation Class +================ + +.. autoclass:: floatcsep.evaluation.Evaluation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/experiment.rst b/docs/reference/experiment.rst new file mode 100644 index 0000000..b14ec89 --- /dev/null +++ b/docs/reference/experiment.rst @@ -0,0 +1,7 @@ +Experiment Handler +================== + +.. autoclass:: floatcsep.experiment.Experiment + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/infrastructure.rst b/docs/reference/infrastructure.rst new file mode 100644 index 0000000..d31b090 --- /dev/null +++ b/docs/reference/infrastructure.rst @@ -0,0 +1,82 @@ +Infrastructure Module +===================== + +Here are shown the modules that manage the relations between the core classes of ``floatCSEP`` +and the required workflow to run an Experiment. + +Registries +---------- + +.. autoclass:: floatcsep.infrastructure.registries.ForecastRegistry + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.registries.ExperimentRegistry + :members: + :undoc-members: + :show-inheritance: + + +Repositories +------------ + +.. autoclass:: floatcsep.infrastructure.repositories.CatalogForecastRepository + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.repositories.GriddedForecastRepository + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.repositories.ResultsRepository + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.repositories.CatalogRepository + :members: + :undoc-members: + :show-inheritance: + +Environments +------------ + +.. autoclass:: floatcsep.infrastructure.environments.CondaManager + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.environments.VenvManager + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.environments.DockerManager + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: floatcsep.infrastructure.environments.EnvironmentFactory + :members: + :undoc-members: + :show-inheritance: + + +Engine +------ + +The components here are in charge of managing and executing the ``floatCSEP`` workflow. + +.. autoclass:: floatcsep.infrastructure.engine.Task + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + +.. autoclass:: floatcsep.infrastructure.engine.TaskGraph + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/model.rst b/docs/reference/model.rst new file mode 100644 index 0000000..1e76097 --- /dev/null +++ b/docs/reference/model.rst @@ -0,0 +1,19 @@ +Model Classes +============= + +.. autoclass:: floatcsep.model.Model + :members: + :undoc-members: + :show-inheritance: + + +.. autoclass:: floatcsep.model.TimeIndependentModel + :members: + :undoc-members: + :show-inheritance: + + +.. autoclass:: floatcsep.model.TimeDependentModel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/reference/postprocess.rst b/docs/reference/postprocess.rst new file mode 100644 index 0000000..79764a3 --- /dev/null +++ b/docs/reference/postprocess.rst @@ -0,0 +1,25 @@ +Postprocess Module +================== + +This module includes utilities for handling plots and reporting. + +Plot Handler +------------ +This section documents the `plot_handler` module. + +.. automodule:: floatcsep.postprocess.plot_handler + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + + +Reporting +--------- +This section documents the `reporting` module. + +.. automodule:: floatcsep.postprocess.reporting + :members: + :undoc-members: + :show-inheritance: + :inherited-members: diff --git a/docs/reference/utilities.rst b/docs/reference/utilities.rst new file mode 100644 index 0000000..c36c747 --- /dev/null +++ b/docs/reference/utilities.rst @@ -0,0 +1,38 @@ +Utilities Module +================ + +This module includes commonly used utilities throughout the ``floatCSEP`` packages. + + +Helper Functions +---------------- +This section documents the `helpers` module. + +.. automodule:: floatcsep.utils.helpers + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + :exclude-members: NoAliasLoader + + +Accessors +--------- +This section documents the `accessors` module. + +.. automodule:: floatcsep.utils.accessors + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + + +Readers +------- +This section documents the `readers` module. + +.. automodule:: floatcsep.utils.readers + :members: + :undoc-members: + :show-inheritance: + :inherited-members: diff --git a/environment.yml b/environment.yml index 9c671c3..c214d8d 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,6 @@ dependencies: - pycsep - dateparser - docker-py - - flake8 - gitpython - h5py - matplotlib @@ -18,9 +17,5 @@ dependencies: - requests - scipy - seaborn - - sphinx - - sphinx-autoapi - - sphinx-gallery - - sphinx-rtd-theme - pytables - xmltodict diff --git a/floatcsep/commands/main.py b/floatcsep/commands/main.py index 7035a65..461dccf 100644 --- a/floatcsep/commands/main.py +++ b/floatcsep/commands/main.py @@ -16,8 +16,26 @@ log = logging.getLogger("floatLogger") -def stage(config, **_): - +def stage(config: str, **_) -> None: + """ + This function is a preliminary step that stages the models before the experiment is run. It + is helpful to deal with models that generate forecasts from a source code. Staging means to + get a source code from a web repository (e.g., zenodo, github) or locate the model in the + filesystem. It will build the computational environment, install each model dependencies and + build the source codes. + + Example usage from a terminal: + :: + + floatcsep stage + + Args: + config (str): Path to the experiment configuration file (YAML format). + **_: Additional keyword arguments are not used. + + Returns: + None + """ log.info(f"floatCSEP v{__version__} | Stage") exp = Experiment.from_yml(config_yml=config) exp.stage_models() @@ -26,8 +44,37 @@ def stage(config, **_): log.debug("") -def run(config, **kwargs): - +def run(config: str, **kwargs) -> None: + """ + Core routine of the floatCSEP workflow. It runs the experiment using the specified YAML + configuration file. The main steps are: + + 1) An Experiment is initialized from the configuration parameters, setting + the time window, region, testing catalogs, models and evaluations. + 2) Stages the models by accessing the model's forecast files or source code, or by + detecting them in the filesystem. If necessary, the computational environment is built + for each model. + 3) According to the experiment and model characteristics (e.g., time-dependent, + time-windows, evaluations) a set of tasks is created to create/load the forecasts, filter + testing catalogs, and evaluate each forecasts with its corresponding test catalog. + 4) The tasks are executed according to the experiment logic. Soon to be parallelized. + 5) Postprocessing, such as plotting the catalogs, forecasts, results and user-based + functions is carried out, as well as creating a human-readable report. + 6) Makes the experiment reproducible, by creating a new configuration file that can be run + in the future and then compared to old results. + + Example usage from a terminal: + :: + + floatcsep run + + Args: + config (str): Path to the experiment configuration file (YAML format). + **kwargs: Additional configuration parameters to pass to the experiment. + + Returns: + None + """ log.info(f"floatCSEP v{__version__} | Run") exp = Experiment.from_yml(config_yml=config, **kwargs) exp.stage_models() @@ -46,8 +93,27 @@ def run(config, **kwargs): log.debug(f"-------- END OF RUN --------") -def plot(config, **kwargs): +def plot(config: str, **kwargs) -> None: + """ + Generates plots for an already executed experiment. It will not create any forecasts nor run + any evaluation. + + This function loads the experiment configuration, stages the models to identify the required + time-windows and results to be plotted. + + Example usage from a terminal: + :: + floatcsep plot + + + Args: + config (str): Path to the experiment configuration file (YAML format). + **kwargs: Additional configuration parameters to pass to the experiment. + + Returns: + None + """ log.info(f"floatCSEP v{__version__} | Plot") exp = Experiment.from_yml(config_yml=config, **kwargs) @@ -64,8 +130,26 @@ def plot(config, **kwargs): log.debug("") -def reproduce(config, **kwargs): +def reproduce(config: str, **kwargs) -> None: + """ + Reproduces the results of a previously run experiment. + + This function re-runs an experiment based on its original configuration and compares the new + results with the original run. It generates a reproducibility report by comparing the two + sets of results. + Example usage from a terminal: + :: + + floatcsep reproduce + + Args: + config (str): Path to the experiment configuration file (YAML format). + **kwargs: Additional configuration parameters to pass to the experiment. + + Returns: + None + """ log.info(f"floatCSEP v{__version__} | Reproduce") reproduced_exp = Experiment.from_yml(config, repr_dir="reproduced", **kwargs) @@ -86,7 +170,25 @@ def reproduce(config, **kwargs): log.debug("") -def floatcsep(): +def floatcsep() -> None: + """ + Entry point for the floatCSEP command-line interface (CLI). + + This function parses command-line arguments and executes the appropriate function + (`run`, `stage`, `plot`, or `reproduce`) based on the user's input. It also supports + logging and debugging options + + Example usage from a terminal: + :: + + floatcsep run + + Args: + None (arguments are parsed via the command-line interface). + + Returns: + None + """ parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS) parser.add_argument( "func", diff --git a/floatcsep/evaluation.py b/floatcsep/evaluation.py index cbf5bd9..e63954e 100644 --- a/floatcsep/evaluation.py +++ b/floatcsep/evaluation.py @@ -1,6 +1,6 @@ import datetime import os -from typing import Dict, Callable, Union, Sequence, List +from typing import Dict, Callable, Union, Sequence, List, Optional, Any from csep.core.catalogs import CSEPCatalog from csep.core.forecasts import GriddedForecast @@ -17,13 +17,14 @@ class Evaluation: parameters and hyperparameters. Args: - name (str): Name of the Test - func (str, ~typing.Callable): Test function/callable - func_kwargs (dict): Keyword arguments of the test function - ref_model (str): String of the reference model, if any - plot_func (str, ~typing.Callable): Test's plotting function - plot_args (list,dict): Positional arguments of the plotting function - plot_kwargs (list,dict): Keyword arguments of the plotting function + name (str): Name of the Test. + func (str, ~typing.Callable): Test function/callable. + func_kwargs (dict): Keyword arguments of the test function. + ref_model (str): String of the reference model, if any. + plot_func (str, ~typing.Callable): Test's plotting function. + plot_args (list,dict): Positional arguments of the plotting function. + plot_kwargs (list,dict): Keyword arguments of the plotting function. + markdown (str): The caption to be placed beneath the result figure. """ _TYPES = { @@ -93,8 +94,26 @@ def type(self, type_list: Union[str, Sequence[str]]): self._type = type_list - def parse_plots(self, plot_func, plot_args, plot_kwargs): + def parse_plots( + self, + plot_func: Any, + plot_args: Any, + plot_kwargs: Any, + ) -> None: + """ + It parses the plot function(s) and its(their) arguments from the test configuration + file. The plot function can belong to :mod:`csep.utils.plots` or a custom function. + Each plotting function is parsed by using the function + :func:`~floatcsep.utils.helpers.parse_csep_function`, and assigned to its respective + `args` and `kwargs` + Args: + plot_func: The name of the plotting function + plot_args: The arguments of the plotting function + plot_kwargs: The keyword arguments of the plotting function + + + """ if isinstance(plot_func, str): self.plot_func = [parse_csep_func(plot_func)] @@ -133,7 +152,6 @@ def prepare_args( Args: timewindow (str, list): Time window string (or list of str) formatted from :meth:`floatcsep.utils.timewindow2str` - catpath (str,list): Path(s) pointing to the filtered catalog(s) model (:class:`floatcsep:model.Model`): Model to be evaluated ref_model (:class:`floatcsep:model.Model`, list): Reference model (or models) reference for the evaluation. diff --git a/floatcsep/experiment.py b/floatcsep/experiment.py index a105810..2cdadaf 100644 --- a/floatcsep/experiment.py +++ b/floatcsep/experiment.py @@ -354,10 +354,8 @@ def set_test_cat(self, tstring: str) -> None: def set_input_cat(self, tstring: str, model: Model) -> None: """ - Filters the complete experiment catalog to a input sub-catalog filtered. - - to the beginning of the test time-window. Writes it to filepath defined - in :attr:`Model.tree.catalog` + Filters the complete experiment catalog to an input sub-catalog filtered to the + beginning of the test time-window. Args: tstring (str): Time window string @@ -367,7 +365,7 @@ def set_input_cat(self, tstring: str, model: Model) -> None: self.catalog_repo.set_input_cat(tstring, model) - def set_tasks(self): + def set_tasks(self) -> None: """ Lazy definition of the experiment core tasks by wrapping instances, methods and arguments. Creates a graph with task nodes, while assigning @@ -382,7 +380,6 @@ def set_tasks(self): * A sequential test requires the forecasts exist for all windows * A batch test requires all forecast exist for a given window. - Returns: """ # Set the file path structure @@ -429,9 +426,11 @@ def set_tasks(self): # A catalog needs to have been filtered if isinstance(model_j, TimeDependentModel): task_graph.add_dependency( - task_ij, dinst=self, dmeth="set_input_cat", dkw=(time_i, model_j) + task_ij, dep_inst=self, dep_meth="set_input_cat", dkw=(time_i, model_j) ) - task_graph.add_dependency(task_ij, dinst=self, dmeth="set_test_cat", dkw=time_i) + task_graph.add_dependency( + task_ij, dep_inst=self, dep_meth="set_test_cat", dkw=time_i + ) # Set up the Consistency Tests for test_k in self.tests: @@ -448,7 +447,7 @@ def set_tasks(self): task_graph.add(task_ijk) # the forecast needs to have been created task_graph.add_dependency( - task_ijk, dinst=model_j, dmeth="create_forecast", dkw=time_i + task_ijk, dep_inst=model_j, dep_meth="create_forecast", dkw=time_i ) # Set up the Comparative Tests elif test_k.type == "comparative": @@ -464,12 +463,12 @@ def set_tasks(self): ) task_graph.add(task_ik) task_graph.add_dependency( - task_ik, dinst=model_j, dmeth="create_forecast", dkw=time_i + task_ik, dep_inst=model_j, dep_meth="create_forecast", dkw=time_i ) task_graph.add_dependency( task_ik, - dinst=self.get_model(test_k.ref_model), - dmeth="create_forecast", + dep_inst=self.get_model(test_k.ref_model), + dep_meth="create_forecast", dkw=time_i, ) # Set up the Sequential Scores @@ -485,7 +484,7 @@ def set_tasks(self): task_graph.add(task_k) for tw_i in tw_strings: task_graph.add_dependency( - task_k, dinst=model_j, dmeth="create_forecast", dkw=tw_i + task_k, dep_inst=model_j, dep_meth="create_forecast", dkw=tw_i ) # Set up the Sequential_Comparative Scores elif test_k.type == "sequential_comparative": @@ -502,12 +501,12 @@ def set_tasks(self): task_graph.add(task_k) for tw_i in tw_strings: task_graph.add_dependency( - task_k, dinst=model_j, dmeth="create_forecast", dkw=tw_i + task_k, dep_inst=model_j, dep_meth="create_forecast", dkw=tw_i ) task_graph.add_dependency( task_k, - dinst=self.get_model(test_k.ref_model), - dmeth="create_forecast", + dep_inst=self.get_model(test_k.ref_model), + dep_meth="create_forecast", dkw=tw_i, ) # Set up the Batch comparative Scores @@ -525,7 +524,7 @@ def set_tasks(self): task_graph.add(task_k) for m_j in self.models: task_graph.add_dependency( - task_k, dinst=m_j, dmeth="create_forecast", dkw=time_str + task_k, dep_inst=m_j, dep_meth="create_forecast", dkw=time_str ) self.task_graph = task_graph @@ -560,8 +559,12 @@ def read_results(self, test: Evaluation, window: str) -> List: return test.read_results(window, self.models) - def make_repr(self): + def make_repr(self) -> None: + """ + Creates a reproducibility configuration file, re-directing the forecasts/catalog paths, + in order to reproduce the existing results and compare them with previous runs. + """ log.info("Creating reproducibility config file") repr_config = self.registry.get("repr_config") diff --git a/floatcsep/infrastructure/engine.py b/floatcsep/infrastructure/engine.py index 6a99afb..998116d 100644 --- a/floatcsep/infrastructure/engine.py +++ b/floatcsep/infrastructure/engine.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from typing import Union, Any class Task: @@ -12,17 +13,15 @@ class Task: For instance, can wrap a floatcsep.model.Model, its method 'create_forecast' and the argument 'time_window', which can be executed later with Task.call() when, for example, task dependencies (parent nodes) have been completed. - """ - def __init__(self, instance, method, **kwargs): - """ + Args: + instance (object): The instance whose method will be executed later. + method (str): The method of the instance that will be called. + **kwargs: Arguments to pass to the method when it is invoked. - Args: - instance: The object instance whose method will be executed later. - method (str): The method of the instance that will be called. - **kwargs: Arguments to pass to the method when it is invoked. + """ - """ + def __init__(self, instance: object, method: str, **kwargs): self.obj = instance self.method = method @@ -30,7 +29,7 @@ def __init__(self, instance, method, **kwargs): self.store = None # Bool for nested tasks. - def sign_match(self, obj=None, met=None, kw_arg=None): + def sign_match(self, obj: Union[object, str] = None, meth: str = None, kw_arg: Any = None): """ Checks whether the task matches a given function signature. @@ -40,7 +39,7 @@ def sign_match(self, obj=None, met=None, kw_arg=None): Args: obj: The object instance or its name (str) to match against. - met: The method name to match against. + meth: The method name to match against. kw_arg: A specific keyword argument value to match against in the task's arguments. Returns: @@ -48,7 +47,7 @@ def sign_match(self, obj=None, met=None, kw_arg=None): """ if self.obj == obj or obj == getattr(self.obj, "name", None): - if met == self.method: + if meth == self.method: if kw_arg in self.kwargs.values(): return True return False @@ -101,26 +100,20 @@ def __call__(self, *args, **kwargs): """ return self.run() - def check_exist(self): - pass - class TaskGraph: """ - Context manager of floatcsep workload distribution. A TaskGraph is responsible for adding - tasks, managing dependencies between tasks, and executing tasks in the correct order. - Tasks in the graph can depend on one another, and the graph ensures that each task is run - after all of its dependencies have been satisfied. Contains a 'tasks' dictionary whose - dict_keys are the Task to be executed with dict_values as the Task's dependencies. - - Attributes: - tasks (OrderedDict): A dictionary where the keys are Task objects and the values are - lists of dependent Task objects. - _ntasks (int): The current number of tasks in the graph. - name (str): A name identifier for the task graph. + Context manager of floatcsep workload distribution. + + A TaskGraph is responsible for adding tasks, managing dependencies between tasks, and + executing tasks in the correct order. Tasks in the graph can depend on one another, and + the graph ensures that each task is run after all of its dependencies have been satisfied. + Contains a `Task` dictionary whose dict_keys are the Task to be executed with dict_values + as the Task's dependencies. + """ - def __init__(self): + def __init__(self) -> None: """ Initializes the TaskGraph with an empty task dictionary and task count. """ @@ -129,7 +122,7 @@ def __init__(self): self.name = "floatcsep.infrastructure.engine.TaskGraph" @property - def ntasks(self): + def ntasks(self) -> int: """ Returns the number of tasks currently in the graph. @@ -142,7 +135,7 @@ def ntasks(self): def ntasks(self, n): self._ntasks = n - def add(self, task): + def add(self, task: Task): """ Adds a new task to the task graph. @@ -154,9 +147,10 @@ def add(self, task): self.tasks[task] = [] self.ntasks += 1 - def add_dependency(self, task, dinst=None, dmeth=None, dkw=None): + def add_dependency(self, task, dep_inst: Union[object, str] = None, dep_meth: str = None, + dkw: Any = None): """ - Adds a dependency to a task already in the graph. + Adds a dependency to a task already within the graph. Searches for other tasks within the graph whose signature matches the provided object instance, method name, or keyword argument. Any matches are added as @@ -164,8 +158,8 @@ def add_dependency(self, task, dinst=None, dmeth=None, dkw=None): Args: task (Task): The task to which dependencies will be added. - dinst: The object instance or name of the dependency. - dmeth: The method name of the dependency. + dep_inst: The object instance or name of the dependency. + dep_meth: The method name of the dependency. dkw: A specific keyword argument value of the dependency. Returns: @@ -173,7 +167,7 @@ def add_dependency(self, task, dinst=None, dmeth=None, dkw=None): """ deps = [] for i, other_tasks in enumerate(self.tasks.keys()): - if other_tasks.sign_match(dinst, dmeth, dkw): + if other_tasks.sign_match(dep_inst, dep_meth, dkw): deps.append(other_tasks) self.tasks[task].extend(deps) @@ -199,6 +193,3 @@ def __call__(self, *args, **kwargs): None """ return self.run() - - def check_exist(self): - pass diff --git a/floatcsep/infrastructure/environments.py b/floatcsep/infrastructure/environments.py index 1f9cb46..843f6b9 100644 --- a/floatcsep/infrastructure/environments.py +++ b/floatcsep/infrastructure/environments.py @@ -7,6 +7,7 @@ import sys import venv from abc import ABC, abstractmethod +from typing import Union from packaging.specifiers import SpecifierSet @@ -39,7 +40,8 @@ def create_environment(self, force=False): with the same name before creating a new one. Args: - force (bool): Whether to forcefully remove an existing environment. + force (bool): Whether to forcefully remove an existing environment and create it + again """ pass @@ -245,7 +247,7 @@ def is_version_compatible(requirement, current_version): return current_python_version - def install_dependencies(self): + def install_dependencies(self) -> None: """ Installs dependencies in the conda environment using pip, based on the model setup file. @@ -263,7 +265,7 @@ def install_dependencies(self): ] subprocess.run(cmd, check=True) - def run_command(self, command): + def run_command(self, command) -> None: """ Runs a specified command within the conda environment. @@ -294,7 +296,7 @@ class VenvManager(EnvironmentManager): create, check, and manipulate virtual environments. """ - def __init__(self, base_name: str, model_directory: str): + def __init__(self, base_name: str, model_directory: str) -> None: """ Initializes the virtual environment manager with the specified base name and model directory. @@ -336,7 +338,7 @@ def env_exists(self) -> bool: """ return os.path.isdir(self.env_path) - def install_dependencies(self): + def install_dependencies(self) -> None: """ Installs dependencies in the virtual environment using pip, based on the model directory's configuration. @@ -346,7 +348,7 @@ def install_dependencies(self): cmd = f"{pip_executable} install -e {os.path.abspath(self.model_directory)}" self.run_command(cmd) - def run_command(self, command): + def run_command(self, command) -> None: """ Executes a specified command in the virtual environment and logs the output. @@ -384,20 +386,20 @@ class DockerManager(EnvironmentManager): containers for the environment. """ - def __init__(self, base_name: str, model_directory: str): + def __init__(self, base_name: str, model_directory: str) -> None: self.base_name = base_name self.model_directory = model_directory - def create_environment(self, force=False): + def create_environment(self, force=False) -> None: pass - def env_exists(self): + def env_exists(self) -> None: pass - def run_command(self, command): + def run_command(self, command) -> None: pass - def install_dependencies(self): + def install_dependencies(self) -> None: pass @@ -455,7 +457,7 @@ def get_env( ) @staticmethod - def check_environment_type(): + def check_environment_type() -> Union[str, None]: if "VIRTUAL_ENV" in os.environ: log.info("Detected virtual environment.") return "venv" @@ -493,10 +495,3 @@ def check_environment_type(): return None - -if __name__ == "__main__": - - env = EnvironmentFactory.get_env( - "conda", model_path="../../examples/case_h/models/pymock_poisson" - ) - env.create_environment(force=True) diff --git a/floatcsep/infrastructure/logger.py b/floatcsep/infrastructure/logger.py index b5e07db..bb6980e 100644 --- a/floatcsep/infrastructure/logger.py +++ b/floatcsep/infrastructure/logger.py @@ -1,3 +1,4 @@ +import sys, os import logging.config import warnings @@ -31,16 +32,24 @@ def add_fhandler(filename): fhandler = logging.FileHandler(filename) fhandler.setFormatter(formatter) fhandler.setLevel(logging.DEBUG) - logging.getLogger("floatLogger").addHandler(fhandler) +def is_sphinx_build(): + # Check if Sphinx is running + return 'sphinx' in sys.argv[0] or os.getenv('SPHINX_BUILD') is not None + + def setup_logger(): - logging.config.dictConfig(LOGGING_CONFIG) - logging.getLogger("numexpr").setLevel(logging.WARNING) - logging.getLogger("matplotlib").setLevel(logging.CRITICAL) - # numpy.seterr(all="ignore") - warnings.filterwarnings("ignore") + if is_sphinx_build(): + # Reduce logging or disable it during Sphinx builds + logging.basicConfig(level=logging.WARNING) + else: + logging.config.dictConfig(LOGGING_CONFIG) + logging.getLogger("numexpr").setLevel(logging.WARNING) + logging.getLogger("matplotlib").setLevel(logging.CRITICAL) + # numpy.seterr(all="ignore") + warnings.filterwarnings("ignore") def set_console_log_level(log_level): diff --git a/floatcsep/infrastructure/registries.py b/floatcsep/infrastructure/registries.py index cfcc1d9..12a5565 100644 --- a/floatcsep/infrastructure/registries.py +++ b/floatcsep/infrastructure/registries.py @@ -80,6 +80,12 @@ def file_exists(self, *args: Sequence[str]): class ForecastRegistry(FileRegistry): + """ + The class has the responsibility of managing the keys (based on timewindow strings) and path + structure of the forecast pertaining to a model (i.e., forecasts from different + time-windows), keeping track of the forecast existence and path in the filesystem. + """ + def __init__( self, workdir: str, @@ -88,6 +94,15 @@ def __init__( args_file: str = None, input_cat: str = None, ) -> None: + """ + + Args: + workdir (str): The current working directory of the experiment. + path (str): The path of the model working directory (or model filepath). + database (str): The path of the database, in case forecasts are stored therein. + args_file (str): The path of the arguments file (only for TimeDependentModel). + input_cat (str): : The path of the arguments file (only for TimeDependentModel). + """ super().__init__(workdir) self.path = path @@ -96,7 +111,16 @@ def __init__( self.input_cat = input_cat self.forecasts = {} - def get(self, *args: Sequence[str]): + def get(self, *args: Sequence[str]) -> str: + """ + Args: + *args: A sequence of keys (usually time-window strings) + + Returns: + The registry element (forecast, catalogs, etc.) from a sequence of key value + (usually time-window strings) + """ + val = self.__dict__ for i in args: parsed_arg = self._parse_arg(i) @@ -104,14 +128,22 @@ def get(self, *args: Sequence[str]): return self.abs(val) def get_forecast(self, *args: Sequence[str]) -> str: + """ + Gets the filepath of a forecast for a given sequence of keys (usually a timewindow + string). + + Args: + *args: A sequence of keys (usually time-window strings) + Returns: + The forecast registry from a sequence of key values + """ return self.get("forecasts", *args) @property def dir(self) -> str: """ Returns: - The directory containing the model source. """ if os.path.isdir(self.get("path")): @@ -121,12 +153,22 @@ def dir(self) -> str: @property def fmt(self) -> str: + """ + + Returns: + The extension or format of the forecast + """ if self.database: return os.path.splitext(self.database)[1][1:] else: return os.path.splitext(self.path)[1][1:] def as_dict(self) -> dict: + """ + + Returns: + Simple dictionary serialization of the instance with the core attributes + """ return { "workdir": self.workdir, "path": self.path, @@ -137,7 +179,15 @@ def as_dict(self) -> dict: } def forecast_exists(self, timewindow: Union[str, list]) -> Union[bool, Sequence[bool]]: + """ + Checks if forecasts exist for a sequence of timewindows + + Args: + timewindow (str, list): A single or sequence of strings representing a time window + Returns: + A list of bool representing the existence of such forecasts. + """ if isinstance(timewindow, str): return self.file_exists("forecasts", timewindow) else: @@ -161,12 +211,6 @@ def build_tree( args_file (str, bool): input arguments path of the model if TD input_cat (str, bool): input catalog path of the model if TD - Returns: - run_folder: Path to the run. - exists: flag if forecasts, catalogs and test_results if they - exist already - target_paths: flag to each element of the gefe (catalog and - evaluation results) """ windows = timewindow2str(timewindows) @@ -215,7 +259,20 @@ def log_tree(self) -> None: class ExperimentRegistry(FileRegistry): + """ + The class has the responsibility of managing the keys (based on models, timewindow and + evaluation name strings) to the structure of the experiment inputs (catalogs, models etc) + and results from the competing evaluations. It keeps track of the forecast registries, as + well as the existence of results and their path in the filesystem. + """ + def __init__(self, workdir: str, run_dir: str = "results") -> None: + """ + + Args: + workdir: The working directory for the experiment run-time. + run_dir: The directory in which the results will be stored. + """ super().__init__(workdir) self.run_dir = run_dir self.results = {} @@ -258,7 +315,15 @@ def log_forecast_trees(self, timewindows: list) -> None: registry.log_tree() log.debug("===================") - def get(self, *args: Sequence[any]) -> str: + def get(self, *args: Any) -> str: + """ + Args: + *args: A sequence of keys (usually models, tests and/or time-window strings) + + Returns: + The filepath from a sequence of key values (usually models first, then time-window + strings) + """ val = self.__dict__ for i in args: parsed_arg = self._parse_arg(i) @@ -266,6 +331,15 @@ def get(self, *args: Sequence[any]) -> str: return self.abs(self.run_dir, val) def get_result(self, *args: Sequence[any]) -> str: + """ + Gets the file path of an evaluation result. + + Args: + args: A sequence of keys (usually models, tests and/or time-window strings) + + Returns: + The filepath of a serialized result + """ val = self.results for i in args: parsed_arg = self._parse_arg(i) @@ -273,6 +347,15 @@ def get_result(self, *args: Sequence[any]) -> str: return self.abs(self.run_dir, val) def get_test_catalog(self, *args: Sequence[any]) -> str: + """ + Gets the file path of a testing catalog. + + Args: + *args: A sequence of keys (time-window strings) + + Returns: + The filepath of the testing catalog for a given time-window + """ val = self.test_catalogs for i in args: parsed_arg = self._parse_arg(i) @@ -280,6 +363,15 @@ def get_test_catalog(self, *args: Sequence[any]) -> str: return self.abs(self.run_dir, val) def get_figure(self, *args: Sequence[any]) -> str: + """ + Gets the file path of a result figure. + + Args: + *args: A sequence of keys (usually tests and/or time-window strings) + + Returns: + The filepath of the figure for a given result + """ val = self.figures for i in args: parsed_arg = self._parse_arg(i) @@ -287,7 +379,15 @@ def get_figure(self, *args: Sequence[any]) -> str: return self.abs(self.run_dir, val) def result_exist(self, timewindow_str: str, test_name: str, model_name: str) -> bool: + """ + Checks if a given test results exist + + Args: + timewindow_str (str): String representing the time window + test_name (str): Name of the evaluation + model_name (str): Name of the model + """ return self.file_exists("results", timewindow_str, test_name, model_name) def as_dict(self) -> str: @@ -301,19 +401,13 @@ def build_tree( tests: Sequence["Evaluation"], ) -> None: """ - Creates the run directory, and reads the file structure inside. + Creates the run directory and reads the file structure inside. Args: timewindows: List of time windows, or representing string. models: List of models or model names tests: List of tests or test names - Returns: - run_folder: Path to the run. - exists: flag if forecasts, catalogs and test_results if they - exist already - target_paths: flag to each element of the experiment (catalog and - evaluation results) """ windows = timewindow2str(timewindows) diff --git a/floatcsep/infrastructure/repositories.py b/floatcsep/infrastructure/repositories.py index fd34855..c485d26 100644 --- a/floatcsep/infrastructure/repositories.py +++ b/floatcsep/infrastructure/repositories.py @@ -8,7 +8,7 @@ import csep import numpy from csep.core.catalogs import CSEPCatalog -from csep.core.forecasts import GriddedForecast +from csep.core.forecasts import GriddedForecast, CatalogForecast from csep.models import EvaluationResult from csep.utils.time_utils import decimal_year @@ -82,14 +82,38 @@ def factory( class CatalogForecastRepository(ForecastRepository): + """ + The class is responsible to access (or store in memory) the catalog-based forecasts of a + model. The flag `lazy_load` can be set to False so the catalogs are stored in memory and + reduce the time required to parse files. + + """ def __init__(self, registry: ForecastRegistry, **kwargs): + """ + + Args: + registry (ForecastRegistry): The registry containing the keys/path to the forecasts + given their time-windows. + **kwargs: + """ self.registry = registry self.lazy_load = kwargs.get("lazy_load", True) self.forecasts = {} - def load_forecast(self, tstring: Union[str, list], region=None): + def load_forecast( + self, tstring: Union[str, list], region=None + ) -> Union[CatalogForecast, list[CatalogForecast]]: + """ + Returns a forecast object or a sequence of them for a set of time window strings. + Args: + tstring (str, list): String representing the time-window + region (optional): A region, in case the forecast requires to be filtered lazily. + + Returns: + The CSEP CatalogForecast object or a list of them. + """ if isinstance(tstring, str): return self._load_single_forecast(tstring, region) else: @@ -106,8 +130,20 @@ def remove(self, tstring: Union[str, Sequence[str]]): class GriddedForecastRepository(ForecastRepository): + """ + The class is responsible to access (or store in memory) the gridded-based forecasts of a + model. A keyword `lazy_load` can be set to False so the catalogs are stored in memory and + avoid parsing files repeatedly (Skip for large files). + """ def __init__(self, registry: ForecastRegistry, **kwargs): + """ + + Args: + registry (ForecastRegistry): The registry containing the keys/path to the forecasts + given their time-windows. + **kwargs: + """ self.registry = registry self.lazy_load = kwargs.get("lazy_load", False) self.forecasts = {} @@ -115,14 +151,25 @@ def __init__(self, registry: ForecastRegistry, **kwargs): def load_forecast( self, tstring: Union[str, list] = None, name="", region=None, forecast_unit=1 ) -> Union[GriddedForecast, Sequence[GriddedForecast]]: - """Returns a forecast when requested.""" + """ + Returns a forecast object or a sequence of them for a set of time window strings. + + Args: + tstring (str, list): String representing the time-window + name (str): Forecast name + region (optional): A region, in case the forecast requires to be filtered lazily. + forecast_unit (float): The time unit (in decimal years) that the forecast represents + + Returns: + The CSEP CatalogForecast object or a list of them. + """ if isinstance(tstring, str): return self._get_or_load_forecast(tstring, name, forecast_unit) else: return [self._get_or_load_forecast(tw, name, forecast_unit) for tw in tstring] def _get_or_load_forecast( - self, tstring: str, name: str, forecast_unit: int + self, tstring: str, name: str, forecast_unit: float ) -> GriddedForecast: """Helper method to get or load a single forecast.""" if tstring in self.forecasts: @@ -135,7 +182,7 @@ def _get_or_load_forecast( self.forecasts[tstring] = forecast return forecast - def _load_single_forecast(self, tstring: str, fc_unit=1, name_=""): + def _load_single_forecast(self, tstring: str, fc_unit: float = 1, name_=""): start_date, end_date = str2timewindow(tstring) @@ -172,8 +219,16 @@ def remove(self, tstring: Union[str, Sequence[str]]): class ResultsRepository: - + """ + The class is responsible to access, read and write the results of a given evaluation + """ def __init__(self, registry: ExperimentRegistry): + """ + + Args: + registry (ExperimentRegistry): The registry of an experiment, which keeps track + of the filepaths of each result. + """ self.registry = registry def _load_result( @@ -197,13 +252,18 @@ def _load_result( def load_results( self, - test, + test: "Evaluation", window: Union[str, Sequence[datetime.datetime]], models: Union[list["Model"], "Model"], ) -> Union[List, EvaluationResult]: """ Reads an Evaluation result for a given time window and returns a list of the results for all tested models. + + Args: + test (Evaluation): The tests for which the results are to be loaded + window (str, list): The time-windows for which the results are to be loaded + models (Model, list): The models for which the results are to be loaded """ if isinstance(models, list): @@ -216,7 +276,17 @@ def load_results( return self._load_result(test, window, models) def write_result(self, result: EvaluationResult, test, model, window) -> None: + """ + Writes the evaluation results using their method .to_dict() as json file. + + Args: + result: CSEP evaluation result + test: Name of the test + model: Name of the model + window: Name of the time-window + + """ path = self.registry.get_result(window, test, model) class NumpyEncoder(json.JSONEncoder): @@ -234,8 +304,20 @@ def default(self, obj): class CatalogRepository: + """ + The class handles the main and sub-catalogs from the experiment. It is responsible of + accessing, downloading, storing the main catalog, as well as filtering and storing the + corresponding input-catalogs (e.g., input for a model to be run) and test-catalogs (catalogs + for the model's forecasts to be evaluated against). + """ def __init__(self, registry: ExperimentRegistry): + """ + + Args: + registry (ExperimentRegistry): The registry of the experiment + + """ self.cat_path = None self._catalog = None self.registry = registry @@ -412,10 +494,8 @@ def set_test_cat(self, tstring: str) -> None: def set_input_cat(self, tstring: str, model: "Model") -> None: """ - Filters the complete experiment catalog to input sub-catalog filtered. - - to the beginning of thetest time-window. Writes it to filepath defined - in :attr:`Model.tree.catalog` + Filters the complete experiment catalog to input sub-catalog filtered to the beginning + of the test time-window. Args: tstring (str): Time window string diff --git a/floatcsep/model.py b/floatcsep/model.py index 53a4f67..1f4c585 100644 --- a/floatcsep/model.py +++ b/floatcsep/model.py @@ -189,16 +189,21 @@ def factory(cls, model_cfg: dict) -> "Model": class TimeIndependentModel(Model): """ - A Model that does not change in time, commonly represented by static data. + A Model whose forecast is invariant in time. A TimeIndependentModel is commonly represented + by a single forecast as static data. - Args - name (str): The name of the model. - model_path (str): The path to the model data. - forecast_unit (float): The unit of time for the forecast. - store_db (bool): flag to indicate whether to store the model in a database. """ def __init__(self, name: str, model_path: str, forecast_unit=1, store_db=False, **kwargs): + """ + + Args: + name (str): The name of the model. + model_path (str): The path to the model data. + forecast_unit (float): The unit of time for the forecast. + store_db (bool): flag to indicate whether to store the model in a database. + + """ super().__init__(name, **kwargs) self.forecast_unit = forecast_unit @@ -271,15 +276,9 @@ def create_forecast(self, tstring: str, **kwargs) -> None: Creates a forecast from the model source and a given time window. Note: - The argument `tstring` is formatted according to how the Experiment - handles timewindows, specified in the functions - :func:'floatcsep.utils.timewindow2str` and - :func:'floatcsep.utils.str2timewindow` + Dummy function for this class, although eventually could also be a source + code (e.g., a Smoothed-Seismicity-Model built from the input-catalog). - Args: - tstring: String representing the start and end of the forecast, - formatted as 'YY1-MM1-DD1_YY2-MM2-DD2'. - **kwargs: """ return @@ -287,7 +286,7 @@ def create_forecast(self, tstring: str, **kwargs) -> None: class TimeDependentModel(Model): """ Model that creates varying forecasts depending on a time window. Requires either a - collection of Forecasts or a function that returns a Forecast. + collection of Forecasts or a function/source code that returns a Forecast. """ def __init__( @@ -298,7 +297,21 @@ def __init__( func_kwargs: dict = None, **kwargs, ) -> None: + """ + Args: + name: The name of the model + model_path: The path to either the source code, or the folder containing static + forecasts. + func: A function/command that runs the model. + func_kwargs: The keyword arguments to run the model. They are usually (over)written + into the file `{model_path}/input/{args_file}` + **kwargs: Additional keyword parameters, such as a ``prefix`` (str) for the + resulting forecast file paths, ``args_file`` (str) as the path for the model + arguments file or ``input_cat`` that indicates where the input catalog will be + placed for the model. + + """ super().__init__(name, **kwargs) self.func = func @@ -317,12 +330,13 @@ def __init__( def stage(self, timewindows=None) -> None: """ - Pre-steps to make the model runnable before integrating. + Core method to interface a model with the experiment. + + 1) Get the model from filesystem, Zenodo or Git. Prepares the directory + 2) If source code, creates the computational environment (conda, venv or Docker) + 3) Prepares the registry tree: filepaths/keys corresponding to existing forecasts + and those to be generated, as well as input catalog and arguments file. - - Get from filesystem, Zenodo or Git - - Pre-check model fileformat - - Initialize database - - Run model quality assurance (unit tests, runnable from floatcsep) """ if self.force_stage or not self.registry.file_exists("path"): os.makedirs(self.registry.dir, exist_ok=True) @@ -342,7 +356,22 @@ def stage(self, timewindows=None) -> None: def get_forecast( self, tstring: Union[str, list] = None, region=None ) -> Union[GriddedForecast, CatalogForecast, List[GriddedForecast], List[CatalogForecast]]: - """Wrapper that just returns a forecast, hiding the access method under the hood.""" + """ + Wrapper that returns a forecast, by accessing the model's forecast repository. + + Note: + The argument ``tstring`` is formatted according to how the Experiment + handles timewindows, specified in the functions + :func:`~floatcsep.utils.helpers.timewindow2str` and + :func:`~floatcsep.utils.helpers.str2timewindow` + + Args: + tstring: String representing the start and end of the forecast, + formatted as 'YY1-MM1-DD1_YY2-MM2-DD2'. + region: String representing the region for which to return a forecast. + If None, will return a forecast for all regions. + + """ return self.repository.load_forecast(tstring, region=region) def create_forecast(self, tstring: str, **kwargs) -> None: @@ -350,10 +379,10 @@ def create_forecast(self, tstring: str, **kwargs) -> None: Creates a forecast from the model source and a given time window. Note: - The argument `tstring` is formatted according to how the Experiment + The argument ``tstring`` is formatted according to how the Experiment handles timewindows, specified in the functions - :func:'floatcsep.utils.timewindow2str` and - :func:'floatcsep.utils.str2timewindow` + :func:`~floatcsep.utils.helpers.timewindow2str` and + :func:`~floatcsep.utils.helpers.str2timewindow` Args: tstring: String representing the start and end of the forecast, @@ -374,8 +403,19 @@ def create_forecast(self, tstring: str, **kwargs) -> None: ) self.environment.run_command(f"{self.func} {self.registry.get('args_file')}") - def prepare_args(self, start, end, **kwargs): + def prepare_args(self, start: datetime, end: datetime, **kwargs) -> None: + """ + When the model is a source code, the args file is a plain text file with the required + input arguments. At minimum, it consists of the start and end of the forecast + timewindow, but it can also contain other arguments (e.g., minimum magnitude, number of + simulations, cutoff learning magnitude, etc.) + Args: + start: start date of the forecast timewindow + end: end date of the forecast timewindow + **kwargs: represents additional model arguments (name/value pair) + + """ filepath = self.registry.get("args_file") fmt = os.path.splitext(filepath)[1] diff --git a/floatcsep/postprocess/plot_handler.py b/floatcsep/postprocess/plot_handler.py index 9e3e23b..c8d0671 100644 --- a/floatcsep/postprocess/plot_handler.py +++ b/floatcsep/postprocess/plot_handler.py @@ -19,7 +19,14 @@ def plot_results(experiment: "Experiment") -> None: - """Plots all evaluation results.""" + """ + Plots all evaluation results, according to the plotting function given in the tests + configuration file. + + Args: + experiment: The experiment instance, whose results were already calculated. + + """ log.info("Plotting evaluation results") timewindows = timewindow2str(experiment.timewindows) @@ -28,7 +35,33 @@ def plot_results(experiment: "Experiment") -> None: def plot_forecasts(experiment: "Experiment") -> None: - """Plots and saves all the generated forecasts.""" + """ + Plots and saves all the generated forecasts. + + It can be set specified in the experiment ``config.yml`` as: + :: + + postprocess: + plot_forecasts: True + + + + or by specifying arguments as: + :: + + postprocess: + plot_forecasts: + projection: Mercator + basemap: google-satellite + cmap: magma + + The default is ``plot_forecasts: True`` + + Args: + experiment: The experiment instance, whose models were already run and their forecast + are located in the filesystem/database + + """ # Parsing plot configuration file plot_forecast_config: dict = parse_plot_config( @@ -43,7 +76,7 @@ def plot_forecasts(experiment: "Experiment") -> None: log.info("Plotting forecasts") # Get the time windows to be plotted. Defaults to only the last time window. - time_windows: list[list[datetime]] = ( + time_windows = ( timewindow2str(experiment.timewindows) if plot_forecast_config.get("all_time_windows") else [timewindow2str(experiment.timewindows[-1])] @@ -78,7 +111,33 @@ def plot_forecasts(experiment: "Experiment") -> None: def plot_catalogs(experiment: "Experiment") -> None: + """ + Plots and saves the testing catalogs. + + It can be set specified in the experiment ``config.yml`` as: + :: + + postprocess: + plot_catalog: True + + + or by specifying arguments as: + :: + + postprocess: + plot_catalog: + projection: Mercator + basemap: google-satellite + markersize: 2 + + The default is ``plot_catalog: True`` + + + Args: + experiment: The experiment instance, whose catalogs were already accessed and filtered. + + """ # Parsing plot configuration file plot_catalog_config: dict = parse_plot_config( experiment.postprocess.get("plot_catalog", {}) @@ -135,7 +194,19 @@ def plot_catalogs(experiment: "Experiment") -> None: def plot_custom(experiment: "Experiment"): + """ + Hook for user-based plotting functions. It corresponds to a function within a python file, + specified in the experiment ``config.yml`` as: + :: + + postprocess: + plot_custom: {module}.py:{function} + + Args: + experiment: The experiment instance, whose models were already run and their forecast + are located in the filesystem/database + """ plot_config = parse_plot_config(experiment.postprocess.get("plot_custom", False)) if plot_config is None: return @@ -184,7 +255,22 @@ def plot_custom(experiment: "Experiment"): def parse_plot_config(plot_config: Union[dict, str, bool]): + """ + Parses the configuration of a given plot directive, usually gotten from the experiment + ``config.yml`` as: + :: + + postprocess: + {plot_config} + + Args: + plot_config: The plotting directive, which can be a dictionary, a boolean, or a string. + If it is a dictionary, then it is directly returned. If it is a boolean, then + the default plotting configuration is used. If it is a string, then it is + expected to be of the form ``{script_path}.py:{func_name}``. + + """ if plot_config is True: return {} @@ -217,11 +303,15 @@ def parse_plot_config(plot_config: Union[dict, str, bool]): def parse_projection(proj_config: Union[dict, str, bool]): - """Retrieve projection configuration. + """ + Retrieve projection configuration. e.g., as defined in the config file: + :: + projection: Mercator: central_longitude: 0.0 + """ if proj_config is None: return ccrs.PlateCarree(central_longitude=0.0) diff --git a/floatcsep/utils/helpers.py b/floatcsep/utils/helpers.py index 29c5409..75b4821 100644 --- a/floatcsep/utils/helpers.py +++ b/floatcsep/utils/helpers.py @@ -7,21 +7,16 @@ from datetime import datetime, date from typing import Union, Mapping, Sequence +# pyCSEP libraries +import csep.core +import csep.utils + # third-party libraries import numpy import pandas import scipy.stats import seaborn import yaml -from matplotlib import pyplot -from matplotlib.lines import Line2D - -# pyCSEP libraries -import csep.core -import csep.utils -from csep.core.regions import CartesianGrid2D -from csep.utils.calc import cleaner_range -from csep.utils.plots import plot_spatial_dataset from csep.core.catalogs import CSEPCatalog from csep.core.exceptions import CSEPCatalogException from csep.core.forecasts import GriddedForecast @@ -30,13 +25,17 @@ w_test, _poisson_likelihood_test, ) +from csep.core.regions import CartesianGrid2D from csep.models import EvaluationResult +from csep.utils.calc import cleaner_range +from csep.utils.plots import plot_spatial_dataset +from matplotlib import pyplot +from matplotlib.lines import Line2D # floatCSEP libraries import floatcsep.utils.accessors import floatcsep.utils.readers - _UNITS = ["years", "months", "weeks", "days"] _PD_FORMAT = ["YS", "MS", "W", "D"] @@ -46,9 +45,7 @@ def parse_csep_func(func): """ - Searchs in pyCSEP and floatCSEP a function or method whose name matches the. - - provided string. + Search in pyCSEP and floatCSEP a function or method whose name matches the provided string. Args: func (str, obj) : representing the name of the pycsep/floatcsep function @@ -236,15 +233,18 @@ def read_region_cfg(region_config, **kwargs): return region_config -def timewindow2str(datetimes: Union[Sequence[datetime], Sequence[Sequence[datetime]]]): +def timewindow2str( + datetimes: Union[Sequence[datetime], Sequence[Sequence[datetime]]] +) -> Union[str, list[str]]: """ - Converts a time window (list/tuple of datetimes) to a string that. + Converts a time window (list/tuple of datetimes) to a string that represents it. Can be a + single timewindow or a list of time windows. - represents it. Can be a single timewindow or a list of time windows. Args: - datetimes: + datetimes: A sequence (of sequences) of datetimes, representing a list of timewindows Returns: + A sequence of strings for each time window """ if isinstance(datetimes[0], datetime): return "_".join([j.date().isoformat() for j in datetimes]) @@ -253,15 +253,18 @@ def timewindow2str(datetimes: Union[Sequence[datetime], Sequence[Sequence[dateti return ["_".join([j.date().isoformat() for j in i]) for i in datetimes] -def str2timewindow(tw_string: Union[str, Sequence[str]]): +def str2timewindow( + tw_string: Union[str, Sequence[str]] +) -> Union[Sequence[datetime], Sequence[Sequence[datetime]]]: """ - Converts a string representation of a time window into a list of datetimes. + Converts a string representation of a time window into a list of datetimes representing the + time window edges. - representing the time window edges. Args: - tw_string: + tw_string: A string representing the time window ('{datetime}_{datetime}') Returns: + A list (of list) containing the pair of datetimes objects """ if isinstance(tw_string, str): start_date, end_date = [datetime.fromisoformat(i) for i in tw_string.split("_")] @@ -520,6 +523,8 @@ def sequential_information_gain( random_numbers: numpy.ndarray = None, ): """ + Evaluates the Information Gain for multiple time-windows. + Args: gridded_forecasts: list csep.core.forecasts.GriddedForecast @@ -625,6 +630,17 @@ def vector_poisson_t_w_test( def plot_sequential_likelihood(evaluation_results, plot_args=None): + """ + Plot of likelihood against time. + + Args: + evaluation_results (list): An evaluation result containing the likelihoods + plot_args (dict): A configuration dictionary for the plotting. + + Returns: + Ax object + + """ if plot_args is None: plot_args = {} title = plot_args.get("title", None) @@ -691,6 +707,16 @@ def plot_sequential_likelihood(evaluation_results, plot_args=None): def magnitude_vs_time(catalog): + """ + Simple magnitude vs. time plot (TBI in pyCSEP) + + Args: + catalog: Catalog to be plotted + + Returns: + Ax object + + """ mag = catalog.data["magnitude"] time = [datetime.fromtimestamp(i / 1000.0) for i in catalog.data["origin_time"]] fig, ax = pyplot.subplots(figsize=(12, 4)) @@ -702,7 +728,7 @@ def magnitude_vs_time(catalog): def plot_matrix_comparative_test(evaluation_results, p=0.05, order=True, plot_args={}): - """Produces matrix plot for comparative tests for all models. + """Produces matrix plot for comparative tests for all models (TBI in pyCSEP) Args: evaluation_results (list of result objects): paired t-test results diff --git a/requirements.txt b/requirements.txt index eefaee8..5c7b108 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,9 @@ numpy dateparser docker -flake8 gitpython h5py matplotlib -packaging pandas pycsep pyshp diff --git a/setup.cfg b/setup.cfg index bb655ac..3528ec5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,11 +26,9 @@ install_requires = numpy dateparser docker - flake8 gitpython h5py matplotlib - packaging pandas pycsep pyshp diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py index d8b03c5..2928b92 100644 --- a/tests/unit/test_engine.py +++ b/tests/unit/test_engine.py @@ -23,8 +23,10 @@ def test_init(self): self.assertEqual(self.task.kwargs["value"], 10) def test_sign_match(self): - self.assertTrue(self.task.sign_match(obj=self.obj, met="dummy_method", kw_arg=10)) - self.assertFalse(self.task.sign_match(obj="NonMatching", met="dummy_method", kw_arg=10)) + self.assertTrue(self.task.sign_match(obj=self.obj, meth="dummy_method", kw_arg=10)) + self.assertFalse( + self.task.sign_match(obj="NonMatching", meth="dummy_method", kw_arg=10) + ) def test___str__(self): task_str = str(self.task) @@ -41,17 +43,14 @@ def test___call__(self): result = self.task() self.assertEqual(result, 20) - def test_check_exist(self): - self.assertIsNone(self.task.check_exist()) - class TestTaskGraph(unittest.TestCase): def setUp(self): self.graph = TaskGraph() self.obj = DummyClass("TestObj") - self.task_a = Task(instance=self.obj, method='dummy_method', value=10) - self.task_b = Task(instance=self.obj, method='dummy_method', value=20) + self.task_a = Task(instance=self.obj, method="dummy_method", value=10) + self.task_b = Task(instance=self.obj, method="dummy_method", value=20) def test_init(self): self.assertEqual(self.graph.ntasks, 0) @@ -65,7 +64,9 @@ def test_add(self): def test_add_dependency(self): self.graph.add(self.task_a) self.graph.add(self.task_b) - self.graph.add_dependency(self.task_b, dinst=self.obj, dmeth='dummy_method', dkw=10) + self.graph.add_dependency( + self.task_b, dep_inst=self.obj, dep_meth="dummy_method", dkw=10 + ) self.assertIn(self.task_a, self.graph.tasks[self.task_b]) def test_run(self): @@ -78,9 +79,6 @@ def test___call__(self): self.graph() self.assertEqual(self.task_a.store, 20) - def test_check_exist(self): - self.assertIsNone(self.graph.check_exist()) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()