Merge remote changes with local modifications

- Updated training config for Gemma3 1B with CPU offload support - Enhanced progressive_model.py with better error handling - Added support for Mixture-of-Thoughts dataset - Improved compatibility across different server environments 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-10 21:07:48 +09:00 · 2025-07-10 21:07:48 +09:00 · 3c513fee17
commit 3c513fee17
parent 01ef446cc9 37f1ad9408
3 changed files with 8 additions and 3 deletions
--- a/config/training_config_gemma3_1b.yaml
+++ b/config/training_config_gemma3_1b.yaml
@ -91,7 +91,7 @@ progressive_stages:
    dataset_config:
      # Mixture-of-Thoughts specific settings
      streaming: true  # Use streaming for large dataset
-      max_samples: 30000  # Limit samples for faster training
+      max_samples: 300000  # Limit samples for faster training
      split: "train"

 evaluation:
--- a/flake.nix
+++ b/flake.nix
@ -61,6 +61,12 @@
                    pytestCheckPhase = "echo 'Skipping tests'";
                    pythonImportsCheck = [];  # Disable import checks
                  });
+                  curl-cffi = python-super.curl-cffi.overrideAttrs (oldAttrs: {
+                    doCheck = false;
+                    doInstallCheck = false;
+                    pytestCheckPhase = "echo 'Skipping tests'";
+                    pythonImportsCheck = [];  # Disable import checks
+                  });
                };
              };
            })
@ -192,4 +198,4 @@
          LC_ALL = "en_US.UTF-8";
        };
      });
-}
+}
--- a/src/progressive_model.py
+++ b/src/progressive_model.py
@ -381,7 +381,6 @@ class ProgressiveReasoningModel:
            self.model.save_pretrained(self.adapters[stage_name])
            # Also save tokenizer for convenience
            self.tokenizer.save_pretrained(self.adapters[stage_name])
-            
    def load_for_inference(self, adapter_names: List[str], weights: Optional[Dict[str, float]] = None):
        """Load model with specific adapters for inference"""
        if len(adapter_names) == 1: