File tree Expand file tree Collapse file tree 2 files changed +41
-0
lines changed
benchmarks/float8/training Expand file tree Collapse file tree 2 files changed +41
-0
lines changed File renamed without changes.
Original file line number Diff line number Diff line change 1+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2+ # All rights reserved.
3+ #
4+ # This source code is licensed under the BSD 3-Clause license found in the
5+ # LICENSE file in the root directory of this source tree.
6+ #! /bin/bash
7+ # This script can be used to launch a torchtitan float8 training run
8+ # with the given parameters,
9+
10+ # script arguments
11+ LOCAL_BATCH_SIZE=${LOCAL_BATCH_SIZE:- 1}
12+ STEPS=${STEPS:- 100}
13+
14+ # temporary log file which is deleted after performance data is parsed out and metrics are calculated.
15+ LOG_FILE=" /tmp/float8_training_log.txt"
16+
17+ # validate user has specified torchtitan root directory
18+ if [ -z " ${TORCHTITAN_ROOT} " ]; then
19+ echo " Error: TORCHTITAN environment variable is not set. Please set it before running this script."
20+ echo " Usage: TORCHTITAN_ROOT=<directory> ./torchtitan_llama4.sh"
21+ echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
22+ exit 1
23+ fi
24+
25+ # remember current directory to return to it later
26+ original_dir=$( pwd)
27+
28+ # navigate to torchtitan root dir
29+ cd ${TORCHTITAN_ROOT}
30+
31+ # run the command with the specified arguments
32+ CONFIG_FILE=" ./torchtitan/experiments/llama4/train_configs/debug_model.toml" ${TORCHTITAN_ROOT} /run_train.sh ${EXTRA_ARGS} 2>&1 | tee ${LOG_FILE}
33+
34+ # return to original working directory
35+ cd $original_dir
36+
37+ # parse logs to calculate top line metrics
38+ python parse_torchtitan_logs.py --log-file ${LOG_FILE}
39+
40+ # clean up logs
41+ rm ${LOG_FILE}
You can’t perform that action at this time.
0 commit comments