こんにちは

This commit is contained in:
Soma Nakamura 2025-07-10 22:49:30 +09:00
parent 4799392e24
commit 6280c303dc

View file

@ -50,13 +50,11 @@ def launch_ddp_training(config_path, num_gpus):
setup_environment_for_strategy("ddp") setup_environment_for_strategy("ddp")
# Use torchrun for DDP # Use torchrun for DDP
python_cmd = ["python", "scripts/train_progressive.py"]
cmd = [ cmd = [
"torchrun", "torchrun",
"--nproc_per_node", str(num_gpus), "--nproc_per_node", str(num_gpus),
"--master_port", "12355", "--master_port", "12355",
] + python_cmd + [ "scripts/train_progressive.py",
"--config", config_path, "--config", config_path,
"--distributed" "--distributed"
] ]
@ -104,12 +102,10 @@ use_cpu: false
with open(config_file, "w") as f: with open(config_file, "w") as f:
f.write(accelerate_config) f.write(accelerate_config)
python_cmd = ["python", "scripts/train_progressive.py"]
cmd = [ cmd = [
"accelerate", "launch", "accelerate", "launch",
"--config_file", str(config_file), "--config_file", str(config_file),
] + python_cmd + [ "scripts/train_progressive.py",
"--config", config_path "--config", config_path
] ]
@ -128,13 +124,11 @@ def launch_deepspeed_training(config_path, num_gpus):
setup_environment_for_strategy("deepspeed") setup_environment_for_strategy("deepspeed")
python_cmd = ["python", "scripts/train_progressive.py"]
# Use --num_gpus without hostfile for single node # Use --num_gpus without hostfile for single node
cmd = [ cmd = [
"deepspeed", "deepspeed",
"--num_gpus", str(num_gpus), "--num_gpus", str(num_gpus),
] + python_cmd + [ "scripts/train_progressive.py",
"--config", config_path, "--config", config_path,
"--deepspeed" "--deepspeed"
] ]