こんにちは
This commit is contained in:
parent
5ca971b0a4
commit
6d823eb371
9 changed files with 17 additions and 66 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -31,5 +31,3 @@ hostfile
|
||||||
.DS_Store
|
.DS_Store
|
||||||
*~
|
*~
|
||||||
|
|
||||||
# Keep lock files
|
|
||||||
!uv.lock
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
3.11
|
|
||||||
15
README.md
15
README.md
|
|
@ -5,16 +5,15 @@ Progressive training for LLMs with 8-GPU support for 松尾研LLMコンペ2025.
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Install uv
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
|
|
||||||
# Setup project
|
# Setup project
|
||||||
git clone <repository-url>
|
git clone <repository-url>
|
||||||
cd progressive-llm-training
|
cd progressive-llm-training
|
||||||
uv sync
|
|
||||||
|
# Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
# Start training
|
# Start training
|
||||||
uv run scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml
|
python scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml
|
||||||
./scripts/train_gemma3_1b_8gpu.sh --strategy deepspeed
|
./scripts/train_gemma3_1b_8gpu.sh --strategy deepspeed
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -27,10 +26,10 @@ uv run scripts/train_progressive.py --config config/training_config_gemma3_1b.ya
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv sync # Install dependencies
|
pip install -r requirements.txt # Install dependencies
|
||||||
uv run scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml # Single GPU
|
python scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml # Single GPU
|
||||||
./scripts/train_gemma3_1b_8gpu.sh --strategy deepspeed # 8 GPUs
|
./scripts/train_gemma3_1b_8gpu.sh --strategy deepspeed # 8 GPUs
|
||||||
uv run pytest # Run tests
|
pytest # Run tests
|
||||||
```
|
```
|
||||||
|
|
||||||
## Key Files
|
## Key Files
|
||||||
|
|
|
||||||
|
|
@ -3,15 +3,14 @@
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
pip install -r requirements.txt
|
||||||
uv sync
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Training
|
## Training
|
||||||
|
|
||||||
### Single GPU
|
### Single GPU
|
||||||
```bash
|
```bash
|
||||||
uv run scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml
|
python scripts/train_progressive.py --config config/training_config_gemma3_1b.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 8 GPUs
|
### 8 GPUs
|
||||||
|
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
[project]
|
|
||||||
name = "progressive-llm-training"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "Progressive LLM Training for 松尾研LLMコンペ2025"
|
|
||||||
requires-python = ">=3.9"
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
"torch>=2.0.0",
|
|
||||||
"transformers>=4.40.0",
|
|
||||||
"accelerate>=0.27.0",
|
|
||||||
"peft>=0.11.0",
|
|
||||||
"trl>=0.9.0",
|
|
||||||
"datasets>=2.18.0",
|
|
||||||
"bitsandbytes>=0.43.0",
|
|
||||||
"wandb>=0.16.0",
|
|
||||||
"pyyaml>=6.0",
|
|
||||||
"jsonlines>=4.0.0",
|
|
||||||
"deepspeed>=0.12.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
dev = ["pytest", "black", "isort"]
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["hatchling"]
|
|
||||||
build-backend = "hatchling.build"
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
# Use uv instead: uv sync
|
|
||||||
torch>=2.0.0
|
torch>=2.0.0
|
||||||
transformers>=4.40.0
|
transformers>=4.40.0
|
||||||
accelerate>=0.27.0
|
accelerate>=0.27.0
|
||||||
|
|
|
||||||
|
|
@ -7,17 +7,11 @@ GREEN='\033[0;32m'
|
||||||
YELLOW='\033[1;33m'
|
YELLOW='\033[1;33m'
|
||||||
NC='\033[0m' # No Color
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
echo -e "${GREEN}Progressive LLM Training - Gemma3 1B 8GPU Launcher (uv)${NC}"
|
echo -e "${GREEN}Progressive LLM Training - Gemma3 1B 8GPU Launcher${NC}"
|
||||||
echo "======================================================="
|
echo "=================================================="
|
||||||
|
|
||||||
# Check if uv is available
|
# Use standard python
|
||||||
if command -v uv &> /dev/null; then
|
|
||||||
echo -e "${GREEN}Using uv for Python environment management${NC}"
|
|
||||||
UV_PREFIX="uv run"
|
|
||||||
else
|
|
||||||
echo -e "${YELLOW}uv not found, using standard python${NC}"
|
|
||||||
UV_PREFIX="python"
|
UV_PREFIX="python"
|
||||||
fi
|
|
||||||
|
|
||||||
# Default values
|
# Default values
|
||||||
STRATEGY="deepspeed"
|
STRATEGY="deepspeed"
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
|
||||||
import torch
|
import torch
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -51,8 +50,7 @@ def launch_ddp_training(config_path, num_gpus):
|
||||||
setup_environment_for_strategy("ddp")
|
setup_environment_for_strategy("ddp")
|
||||||
|
|
||||||
# Use torchrun for DDP
|
# Use torchrun for DDP
|
||||||
# Check if uv is available
|
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||||
python_cmd = ["uv", "run", "scripts/train_progressive.py"] if shutil.which("uv") else ["python", "scripts/train_progressive.py"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"torchrun",
|
"torchrun",
|
||||||
|
|
@ -106,8 +104,7 @@ use_cpu: false
|
||||||
with open(config_file, "w") as f:
|
with open(config_file, "w") as f:
|
||||||
f.write(accelerate_config)
|
f.write(accelerate_config)
|
||||||
|
|
||||||
# Check if uv is available
|
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||||
python_cmd = ["uv", "run", "scripts/train_progressive.py"] if shutil.which("uv") else ["python", "scripts/train_progressive.py"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"accelerate", "launch",
|
"accelerate", "launch",
|
||||||
|
|
@ -136,8 +133,7 @@ def launch_deepspeed_training(config_path, num_gpus):
|
||||||
with open(hostfile, "w") as f:
|
with open(hostfile, "w") as f:
|
||||||
f.write(f"localhost slots={num_gpus}\n")
|
f.write(f"localhost slots={num_gpus}\n")
|
||||||
|
|
||||||
# Check if uv is available
|
python_cmd = ["python", "scripts/train_progressive.py"]
|
||||||
python_cmd = ["uv", "run", "scripts/train_progressive.py"] if shutil.which("uv") else ["python", "scripts/train_progressive.py"]
|
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"deepspeed",
|
"deepspeed",
|
||||||
|
|
|
||||||
7
uv.lock
generated
7
uv.lock
generated
|
|
@ -1,7 +0,0 @@
|
||||||
# This file is automatically @generated by uv.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 1
|
|
||||||
requires-python = ">=3.9"
|
|
||||||
|
|
||||||
# Note: This is a placeholder lock file.
|
|
||||||
# Run `uv lock` to generate the actual lock file with resolved dependencies.
|
|
||||||
Loading…
Add table
Reference in a new issue