From 4430e0b3638dbd6b925ea3d133ce67da5a7c49fd Mon Sep 17 00:00:00 2001 From: Soma Nakamura Date: Thu, 10 Jul 2025 22:54:28 +0900 Subject: [PATCH] =?UTF-8?q?=E3=81=93=E3=82=93=E3=81=AB=E3=81=A1=E3=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../training_config_gemma3_1b_8gpu_deepspeed.yaml | 14 +++++++------- src/training.py | 5 +++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/config/training_config_gemma3_1b_8gpu_deepspeed.yaml b/config/training_config_gemma3_1b_8gpu_deepspeed.yaml index 521fdc5..5bff584 100644 --- a/config/training_config_gemma3_1b_8gpu_deepspeed.yaml +++ b/config/training_config_gemma3_1b_8gpu_deepspeed.yaml @@ -27,17 +27,17 @@ deepspeed: optimizer: type: "AdamW" params: - lr: 5e-4 - betas: [0.9, 0.999] - eps: 1e-8 - weight_decay: 0.001 + lr: "auto" + betas: "auto" + eps: "auto" + weight_decay: "auto" scheduler: type: "WarmupLR" params: - warmup_min_lr: 0 - warmup_max_lr: 5e-4 - warmup_num_steps: 100 + warmup_min_lr: "auto" + warmup_max_lr: "auto" + warmup_num_steps: "auto" fp16: enabled: false diff --git a/src/training.py b/src/training.py index 8f05f2e..9d98d82 100644 --- a/src/training.py +++ b/src/training.py @@ -381,6 +381,8 @@ class ProgressiveTrainer: "bf16": torch.cuda.is_available() and torch.cuda.is_bf16_supported(), "gradient_checkpointing": self.config["model"].get("gradient_checkpointing", False), "max_grad_norm": stage_config["training"].get("max_grad_norm", 1.0), + "weight_decay": stage_config["training"].get("weight_decay", 0.001), # Set weight_decay + "adam_epsilon": 1e-8, # Set adam_epsilon "report_to": "wandb" if self.config["experiment"]["use_wandb"] else "none", "run_name": f"{self.config['experiment']['name']}_{stage_name}", "dataloader_pin_memory": stage_config["training"].get("dataloader_pin_memory", False), @@ -402,6 +404,9 @@ class ProgressiveTrainer: deepspeed_config = self.config.get("training_args", {}).get("deepspeed_config") if deepspeed_config: training_args_dict["deepspeed"] = deepspeed_config + elif "deepspeed" in self.config: + # Use deepspeed config directly from main config + training_args_dict["deepspeed"] = self.config["deepspeed"] # Add FSDP configuration if "fsdp" in self.config: