こんにちは

This commit is contained in:
Soma Nakamura 2025-07-10 22:49:30 +09:00
parent 4799392e24
commit 6280c303dc

View file

@ -50,13 +50,11 @@ def launch_ddp_training(config_path, num_gpus):
setup_environment_for_strategy("ddp")
# Use torchrun for DDP
python_cmd = ["python", "scripts/train_progressive.py"]
cmd = [
"torchrun",
"--nproc_per_node", str(num_gpus),
"--master_port", "12355",
] + python_cmd + [
"scripts/train_progressive.py",
"--config", config_path,
"--distributed"
]
@ -104,12 +102,10 @@ use_cpu: false
with open(config_file, "w") as f:
f.write(accelerate_config)
python_cmd = ["python", "scripts/train_progressive.py"]
cmd = [
"accelerate", "launch",
"--config_file", str(config_file),
] + python_cmd + [
"scripts/train_progressive.py",
"--config", config_path
]
@ -128,13 +124,11 @@ def launch_deepspeed_training(config_path, num_gpus):
setup_environment_for_strategy("deepspeed")
python_cmd = ["python", "scripts/train_progressive.py"]
# Use --num_gpus without hostfile for single node
cmd = [
"deepspeed",
"--num_gpus", str(num_gpus),
] + python_cmd + [
"scripts/train_progressive.py",
"--config", config_path,
"--deepspeed"
]