こんにちは

This commit is contained in:
Soma Nakamura 2025-07-10 22:54:28 +09:00
parent 6906a09c8f
commit 4430e0b363
2 changed files with 12 additions and 7 deletions

View file

@ -27,17 +27,17 @@ deepspeed:
optimizer: optimizer:
type: "AdamW" type: "AdamW"
params: params:
lr: 5e-4 lr: "auto"
betas: [0.9, 0.999] betas: "auto"
eps: 1e-8 eps: "auto"
weight_decay: 0.001 weight_decay: "auto"
scheduler: scheduler:
type: "WarmupLR" type: "WarmupLR"
params: params:
warmup_min_lr: 0 warmup_min_lr: "auto"
warmup_max_lr: 5e-4 warmup_max_lr: "auto"
warmup_num_steps: 100 warmup_num_steps: "auto"
fp16: fp16:
enabled: false enabled: false

View file

@ -381,6 +381,8 @@ class ProgressiveTrainer:
"bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(), "bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
"gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False), "gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False),
"max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0), "max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0),
"weight_decay": stage_config["training"].get("weight_decay", 0.001), # Set weight_decay
"adam_epsilon": 1e-8, # Set adam_epsilon
"report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none", "report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none",
"run_name": f"{self.config['experiment']['name']}_{stage_name}", "run_name": f"{self.config['experiment']['name']}_{stage_name}",
"dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False), "dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False),
@ -402,6 +404,9 @@ class ProgressiveTrainer:
deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config") deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config")
if deepspeed_config: if deepspeed_config:
training_args_dict["deepspeed"] = deepspeed_config training_args_dict["deepspeed"] = deepspeed_config
elif "deepspeed" in self.config:
# Use deepspeed config directly from main config
training_args_dict["deepspeed"] = self.config["deepspeed"]
# Add FSDP configuration # Add FSDP configuration
if "fsdp" in self.config: if "fsdp" in self.config: