From 6280c303dcf75974656349391f263de85261405e Mon Sep 17 00:00:00 2001 From: Soma Nakamura
Date: Thu, 10 Jul 2025 22:49:30 +0900 Subject: [PATCH] =?UTF-8?q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/train_multi_gpu.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/scripts/train_multi_gpu.py b/scripts/train_multi_gpu.py index 93b606d..e13ee68 100755 --- a/scripts/train_multi_gpu.py +++ b/scripts/train_multi_gpu.py @@ -50,13 +50,11 @@ def launch_ddp_training(config_path, num_gpus): setup_environment_for_strategy("ddp") # Use torchrun for DDP - python_cmd = ["python", "scripts/train_progressive.py"] - cmd = [ "torchrun", "--nproc_per_node", str(num_gpus), "--master_port", "12355", - ] + python_cmd + [ + "scripts/train_progressive.py", "--config", config_path, "--distributed" ] @@ -104,12 +102,10 @@ use_cpu: false with open(config_file, "w") as f: f.write(accelerate_config) - python_cmd = ["python", "scripts/train_progressive.py"] - cmd = [ "accelerate", "launch", "--config_file", str(config_file), - ] + python_cmd + [ + "scripts/train_progressive.py", "--config", config_path ] @@ -128,13 +124,11 @@ def launch_deepspeed_training(config_path, num_gpus): setup_environment_for_strategy("deepspeed") - python_cmd = ["python", "scripts/train_progressive.py"] - # Use --num_gpus without hostfile for single node cmd = [ "deepspeed", "--num_gpus", str(num_gpus), - ] + python_cmd + [ + "scripts/train_progressive.py", "--config", config_path, "--deepspeed" ]