こんにちは
This commit is contained in:
parent
6906a09c8f
commit
4430e0b363
2 changed files with 12 additions and 7 deletions
|
|
@ -27,17 +27,17 @@ deepspeed:
|
|||
optimizer:
|
||||
type: "AdamW"
|
||||
params:
|
||||
lr: 5e-4
|
||||
betas: [0.9, 0.999]
|
||||
eps: 1e-8
|
||||
weight_decay: 0.001
|
||||
lr: "auto"
|
||||
betas: "auto"
|
||||
eps: "auto"
|
||||
weight_decay: "auto"
|
||||
|
||||
scheduler:
|
||||
type: "WarmupLR"
|
||||
params:
|
||||
warmup_min_lr: 0
|
||||
warmup_max_lr: 5e-4
|
||||
warmup_num_steps: 100
|
||||
warmup_min_lr: "auto"
|
||||
warmup_max_lr: "auto"
|
||||
warmup_num_steps: "auto"
|
||||
|
||||
fp16:
|
||||
enabled: false
|
||||
|
|
|
|||
|
|
@ -381,6 +381,8 @@ class ProgressiveTrainer:
|
|||
"bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
|
||||
"gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False),
|
||||
"max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0),
|
||||
"weight_decay": stage_config["training"].get("weight_decay", 0.001), # Set weight_decay
|
||||
"adam_epsilon": 1e-8, # Set adam_epsilon
|
||||
"report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none",
|
||||
"run_name": f"{self.config['experiment']['name']}_{stage_name}",
|
||||
"dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False),
|
||||
|
|
@ -402,6 +404,9 @@ class ProgressiveTrainer:
|
|||
deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config")
|
||||
if deepspeed_config:
|
||||
training_args_dict["deepspeed"] = deepspeed_config
|
||||
elif "deepspeed" in self.config:
|
||||
# Use deepspeed config directly from main config
|
||||
training_args_dict["deepspeed"] = self.config["deepspeed"]
|
||||
|
||||
# Add FSDP configuration
|
||||
if "fsdp" in self.config:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue