From 036f39ead5e21506be9f1f0f4387ef378b469835 Mon Sep 17 00:00:00 2001 From: gnadathur Date: Thu, 29 Feb 2024 17:05:58 -0800 Subject: [PATCH] Add job description field in toml Summary: Adding a description field, useful for integration tests to describe the test. Test Plan: Reviewers: Subscribers: Tasks: Tags: --- torchtrain/config_manager.py | 7 ++++++- train.py | 2 +- train_configs/debug_model.toml | 1 + train_configs/llama_7b.toml | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/torchtrain/config_manager.py b/torchtrain/config_manager.py index 439f6595c9..5bcc9c1669 100644 --- a/torchtrain/config_manager.py +++ b/torchtrain/config_manager.py @@ -74,7 +74,12 @@ def init_args_from_command_line( default="./torchtrain/outputs", help="folder to dump job outputs", ) - + parser.add_argument( + "--job.description", + type=str, + default="default job", + help="description of the job", + ) # profiling configs parser.add_argument( "--profiling.run_profiler", diff --git a/train.py b/train.py index f64e82e421..5145462b2e 100644 --- a/train.py +++ b/train.py @@ -93,7 +93,7 @@ def main(job_config: JobConfig): world_size=world_size, ) world_mesh = parallel_dims.build_mesh(device_type="cuda") - + rank0_log(f"Starting job: {job_config.job.description}") model_name = job_config.model.name rank0_log(f"Building {model_name}") # build tokenizer diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml index 4fe33b2371..d0f24431c9 100644 --- a/train_configs/debug_model.toml +++ b/train_configs/debug_model.toml @@ -1,6 +1,7 @@ # TorchTrain Config.toml [job] dump_folder = "./outputs" +description = "debug training" [profiling] run_profiler = true diff --git a/train_configs/llama_7b.toml b/train_configs/llama_7b.toml index 3a2d6806e3..2b8b5015f5 100644 --- a/train_configs/llama_7b.toml +++ b/train_configs/llama_7b.toml @@ -1,6 +1,7 @@ # TorchTrain Config.toml [job] dump_folder = "./outputs" +description = "llama 7b training" [profiling] run_profiler = true