こんにちは
This commit is contained in:
parent
6906a09c8f
commit
4430e0b363
2 changed files with 12 additions and 7 deletions
|
|
@ -27,17 +27,17 @@ deepspeed:
|
||||||
optimizer:
|
optimizer:
|
||||||
type: "AdamW"
|
type: "AdamW"
|
||||||
params:
|
params:
|
||||||
lr: 5e-4
|
lr: "auto"
|
||||||
betas: [0.9, 0.999]
|
betas: "auto"
|
||||||
eps: 1e-8
|
eps: "auto"
|
||||||
weight_decay: 0.001
|
weight_decay: "auto"
|
||||||
|
|
||||||
scheduler:
|
scheduler:
|
||||||
type: "WarmupLR"
|
type: "WarmupLR"
|
||||||
params:
|
params:
|
||||||
warmup_min_lr: 0
|
warmup_min_lr: "auto"
|
||||||
warmup_max_lr: 5e-4
|
warmup_max_lr: "auto"
|
||||||
warmup_num_steps: 100
|
warmup_num_steps: "auto"
|
||||||
|
|
||||||
fp16:
|
fp16:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
|
||||||
|
|
@ -381,6 +381,8 @@ class ProgressiveTrainer:
|
||||||
"bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
|
"bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
|
||||||
"gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False),
|
"gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False),
|
||||||
"max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0),
|
"max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0),
|
||||||
|
"weight_decay": stage_config["training"].get("weight_decay", 0.001), # Set weight_decay
|
||||||
|
"adam_epsilon": 1e-8, # Set adam_epsilon
|
||||||
"report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none",
|
"report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none",
|
||||||
"run_name": f"{self.config['experiment']['name']}_{stage_name}",
|
"run_name": f"{self.config['experiment']['name']}_{stage_name}",
|
||||||
"dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False),
|
"dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False),
|
||||||
|
|
@ -402,6 +404,9 @@ class ProgressiveTrainer:
|
||||||
deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config")
|
deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config")
|
||||||
if deepspeed_config:
|
if deepspeed_config:
|
||||||
training_args_dict["deepspeed"] = deepspeed_config
|
training_args_dict["deepspeed"] = deepspeed_config
|
||||||
|
elif "deepspeed" in self.config:
|
||||||
|
# Use deepspeed config directly from main config
|
||||||
|
training_args_dict["deepspeed"] = self.config["deepspeed"]
|
||||||
|
|
||||||
# Add FSDP configuration
|
# Add FSDP configuration
|
||||||
if "fsdp" in self.config:
|
if "fsdp" in self.config:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue