137 lines
No EOL
5.6 KiB
Python
Executable file
137 lines
No EOL
5.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Analyze the size and structure of LoRA adapters
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
import torch
|
|
import yaml
|
|
from peft import PeftModel, LoraConfig
|
|
|
|
# Add src to path
|
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
from src.progressive_model import ProgressiveReasoningModel
|
|
|
|
|
|
def analyze_adapter_sizes():
|
|
# Load configuration
|
|
with open("config/training_config.yaml") as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
print("=" * 60)
|
|
print("LoRA Adapter Size Analysis")
|
|
print("=" * 60)
|
|
|
|
# Get adapter configuration from config
|
|
basic_cot_config = config["progressive_stages"][0]
|
|
adapter_config = basic_cot_config["adapter_config"]
|
|
|
|
print(f"\nConfiguration for 'basic_cot' adapter:")
|
|
print(f" - r (rank): {adapter_config['r']}")
|
|
print(f" - lora_alpha: {adapter_config['lora_alpha']}")
|
|
print(f" - lora_dropout: {adapter_config['lora_dropout']}")
|
|
print(f" - target_modules: {adapter_config['target_modules']}")
|
|
|
|
# Load the base model to get dimensions
|
|
print("\nLoading base model to analyze dimensions...")
|
|
model_wrapper = ProgressiveReasoningModel(config)
|
|
model_wrapper.setup_base_model()
|
|
|
|
# Analyze model architecture
|
|
print(f"\nBase model: {config['experiment']['base_model']}")
|
|
|
|
# Count parameters in base model
|
|
total_params = sum(p.numel() for p in model_wrapper.model.parameters())
|
|
print(f"Total base model parameters: {total_params:,}")
|
|
|
|
# Load saved adapter if it exists
|
|
adapter_path = Path(config["experiment"]["output_dir"]) / "adapters" / "basic_cot"
|
|
if adapter_path.exists():
|
|
print(f"\nLoading saved adapter from: {adapter_path}")
|
|
|
|
# Load adapter state dict
|
|
adapter_model_path = adapter_path / "adapter_model.safetensors"
|
|
if not adapter_model_path.exists():
|
|
adapter_model_path = adapter_path / "adapter_model.bin"
|
|
|
|
if adapter_model_path.exists():
|
|
if adapter_model_path.suffix == ".safetensors":
|
|
from safetensors.torch import load_file
|
|
adapter_weights = load_file(adapter_model_path)
|
|
else:
|
|
adapter_weights = torch.load(adapter_model_path, map_location="cpu")
|
|
|
|
print("\nLoRA Adapter Layer Details:")
|
|
print("-" * 60)
|
|
|
|
total_lora_params = 0
|
|
layer_info = {}
|
|
|
|
for name, tensor in adapter_weights.items():
|
|
size = tensor.numel()
|
|
total_lora_params += size
|
|
|
|
# Parse layer name
|
|
parts = name.split('.')
|
|
if 'lora_A' in name or 'lora_B' in name:
|
|
# Extract module info
|
|
module_name = '.'.join(parts[:-2])
|
|
lora_type = parts[-2] # lora_A or lora_B
|
|
|
|
if module_name not in layer_info:
|
|
layer_info[module_name] = {}
|
|
|
|
layer_info[module_name][lora_type] = {
|
|
'shape': list(tensor.shape),
|
|
'params': size
|
|
}
|
|
|
|
# Display layer information
|
|
for module, info in sorted(layer_info.items()):
|
|
print(f"\nModule: {module}")
|
|
if 'lora_A' in info and 'lora_B' in info:
|
|
shape_a = info['lora_A']['shape']
|
|
shape_b = info['lora_B']['shape']
|
|
params_a = info['lora_A']['params']
|
|
params_b = info['lora_B']['params']
|
|
|
|
print(f" LoRA A: {shape_a} = {params_a:,} parameters")
|
|
print(f" LoRA B: {shape_b} = {params_b:,} parameters")
|
|
print(f" Total: {params_a + params_b:,} parameters")
|
|
|
|
# Calculate original layer size (approximation)
|
|
original_size = shape_a[1] * shape_b[0]
|
|
compression_ratio = original_size / (params_a + params_b)
|
|
print(f" Original layer size (approx): {original_size:,} parameters")
|
|
print(f" Compression ratio: {compression_ratio:.1f}x")
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"Total LoRA parameters: {total_lora_params:,}")
|
|
print(f"Percentage of base model: {(total_lora_params / total_params) * 100:.2f}%")
|
|
|
|
# Calculate theoretical size
|
|
r = adapter_config['r']
|
|
num_modules = len(adapter_config['target_modules'])
|
|
|
|
# For GPT models, typical dimensions
|
|
if "DialoGPT" in config['experiment']['base_model']:
|
|
hidden_size = 768 # DialoGPT-small uses 768
|
|
print(f"\nTheoretical calculation (hidden_size={hidden_size}, r={r}):")
|
|
print(f" Per module: 2 * {hidden_size} * {r} = {2 * hidden_size * r:,} parameters")
|
|
print(f" Total ({num_modules} modules): {2 * hidden_size * r * num_modules:,} parameters")
|
|
else:
|
|
print(f"\nNo saved adapter found at: {adapter_path}")
|
|
print("Run training first to generate the adapter.")
|
|
|
|
# Show theoretical sizes based on config
|
|
r = adapter_config['r']
|
|
print(f"\nTheoretical LoRA sizes with r={r}:")
|
|
print(f" For hidden_size=768 (DialoGPT-small): {2 * 768 * r:,} params per module")
|
|
print(f" For hidden_size=1024 (medium models): {2 * 1024 * r:,} params per module")
|
|
print(f" For hidden_size=1280 (GPT-2 large): {2 * 1280 * r:,} params per module")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
analyze_adapter_sizes() |