progressive-llm/scripts/analyze_adapter_size.py
2025-07-10 18:09:14 +09:00

137 lines
No EOL
5.6 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Analyze the size and structure of LoRA adapters
"""
import sys
from pathlib import Path
import torch
import yaml
from peft import PeftModel, LoraConfig
# Add src to path
sys.path.append(str(Path(__file__).parent.parent))
from src.progressive_model import ProgressiveReasoningModel
def analyze_adapter_sizes():
# Load configuration
with open("config/training_config.yaml") as f:
config = yaml.safe_load(f)
print("=" * 60)
print("LoRA Adapter Size Analysis")
print("=" * 60)
# Get adapter configuration from config
basic_cot_config = config["progressive_stages"][0]
adapter_config = basic_cot_config["adapter_config"]
print(f"\nConfiguration for 'basic_cot' adapter:")
print(f" - r (rank): {adapter_config['r']}")
print(f" - lora_alpha: {adapter_config['lora_alpha']}")
print(f" - lora_dropout: {adapter_config['lora_dropout']}")
print(f" - target_modules: {adapter_config['target_modules']}")
# Load the base model to get dimensions
print("\nLoading base model to analyze dimensions...")
model_wrapper = ProgressiveReasoningModel(config)
model_wrapper.setup_base_model()
# Analyze model architecture
print(f"\nBase model: {config['experiment']['base_model']}")
# Count parameters in base model
total_params = sum(p.numel() for p in model_wrapper.model.parameters())
print(f"Total base model parameters: {total_params:,}")
# Load saved adapter if it exists
adapter_path = Path(config["experiment"]["output_dir"]) / "adapters" / "basic_cot"
if adapter_path.exists():
print(f"\nLoading saved adapter from: {adapter_path}")
# Load adapter state dict
adapter_model_path = adapter_path / "adapter_model.safetensors"
if not adapter_model_path.exists():
adapter_model_path = adapter_path / "adapter_model.bin"
if adapter_model_path.exists():
if adapter_model_path.suffix == ".safetensors":
from safetensors.torch import load_file
adapter_weights = load_file(adapter_model_path)
else:
adapter_weights = torch.load(adapter_model_path, map_location="cpu")
print("\nLoRA Adapter Layer Details:")
print("-" * 60)
total_lora_params = 0
layer_info = {}
for name, tensor in adapter_weights.items():
size = tensor.numel()
total_lora_params += size
# Parse layer name
parts = name.split('.')
if 'lora_A' in name or 'lora_B' in name:
# Extract module info
module_name = '.'.join(parts[:-2])
lora_type = parts[-2] # lora_A or lora_B
if module_name not in layer_info:
layer_info[module_name] = {}
layer_info[module_name][lora_type] = {
'shape': list(tensor.shape),
'params': size
}
# Display layer information
for module, info in sorted(layer_info.items()):
print(f"\nModule: {module}")
if 'lora_A' in info and 'lora_B' in info:
shape_a = info['lora_A']['shape']
shape_b = info['lora_B']['shape']
params_a = info['lora_A']['params']
params_b = info['lora_B']['params']
print(f" LoRA A: {shape_a} = {params_a:,} parameters")
print(f" LoRA B: {shape_b} = {params_b:,} parameters")
print(f" Total: {params_a + params_b:,} parameters")
# Calculate original layer size (approximation)
original_size = shape_a[1] * shape_b[0]
compression_ratio = original_size / (params_a + params_b)
print(f" Original layer size (approx): {original_size:,} parameters")
print(f" Compression ratio: {compression_ratio:.1f}x")
print("\n" + "=" * 60)
print(f"Total LoRA parameters: {total_lora_params:,}")
print(f"Percentage of base model: {(total_lora_params / total_params) * 100:.2f}%")
# Calculate theoretical size
r = adapter_config['r']
num_modules = len(adapter_config['target_modules'])
# For GPT models, typical dimensions
if "DialoGPT" in config['experiment']['base_model']:
hidden_size = 768 # DialoGPT-small uses 768
print(f"\nTheoretical calculation (hidden_size={hidden_size}, r={r}):")
print(f" Per module: 2 * {hidden_size} * {r} = {2 * hidden_size * r:,} parameters")
print(f" Total ({num_modules} modules): {2 * hidden_size * r * num_modules:,} parameters")
else:
print(f"\nNo saved adapter found at: {adapter_path}")
print("Run training first to generate the adapter.")
# Show theoretical sizes based on config
r = adapter_config['r']
print(f"\nTheoretical LoRA sizes with r={r}:")
print(f" For hidden_size=768 (DialoGPT-small): {2 * 768 * r:,} params per module")
print(f" For hidden_size=1024 (medium models): {2 * 1024 * r:,} params per module")
print(f" For hidden_size=1280 (GPT-2 large): {2 * 1280 * r:,} params per module")
if __name__ == "__main__":
analyze_adapter_sizes()