こんにちは
This commit is contained in:
parent
4799392e24
commit
6280c303dc
1 changed files with 3 additions and 9 deletions
|
|
@ -50,13 +50,11 @@ def launch_ddp_training(config_path, num_gpus):
|
|||
setup_environment_for_strategy("ddp")
|
||||
|
||||
# Use torchrun for DDP
|
||||
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||
|
||||
cmd = [
|
||||
"torchrun",
|
||||
"--nproc_per_node", str(num_gpus),
|
||||
"--master_port", "12355",
|
||||
] + python_cmd + [
|
||||
"scripts/train_progressive.py",
|
||||
"--config", config_path,
|
||||
"--distributed"
|
||||
]
|
||||
|
|
@ -104,12 +102,10 @@ use_cpu: false
|
|||
with open(config_file, "w") as f:
|
||||
f.write(accelerate_config)
|
||||
|
||||
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||
|
||||
cmd = [
|
||||
"accelerate", "launch",
|
||||
"--config_file", str(config_file),
|
||||
] + python_cmd + [
|
||||
"scripts/train_progressive.py",
|
||||
"--config", config_path
|
||||
]
|
||||
|
||||
|
|
@ -128,13 +124,11 @@ def launch_deepspeed_training(config_path, num_gpus):
|
|||
|
||||
setup_environment_for_strategy("deepspeed")
|
||||
|
||||
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||
|
||||
# Use --num_gpus without hostfile for single node
|
||||
cmd = [
|
||||
"deepspeed",
|
||||
"--num_gpus", str(num_gpus),
|
||||
] + python_cmd + [
|
||||
"scripts/train_progressive.py",
|
||||
"--config", config_path,
|
||||
"--deepspeed"
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue