#!/usr/bin/env python3 """ Analyze the size and structure of LoRA adapters """ import sys from pathlib import Path import torch import yaml from peft import PeftModel, LoraConfig # Add src to path sys.path.append(str(Path(__file__).parent.parent)) from src.progressive_model import ProgressiveReasoningModel def analyze_adapter_sizes(): # Load configuration with open("config/training_config.yaml") as f: config = yaml.safe_load(f) print("=" * 60) print("LoRA Adapter Size Analysis") print("=" * 60) # Get adapter configuration from config basic_cot_config = config["progressive_stages"][0] adapter_config = basic_cot_config["adapter_config"] print(f"\nConfiguration for 'basic_cot' adapter:") print(f" - r (rank): {adapter_config['r']}") print(f" - lora_alpha: {adapter_config['lora_alpha']}") print(f" - lora_dropout: {adapter_config['lora_dropout']}") print(f" - target_modules: {adapter_config['target_modules']}") # Load the base model to get dimensions print("\nLoading base model to analyze dimensions...") model_wrapper = ProgressiveReasoningModel(config) model_wrapper.setup_base_model() # Analyze model architecture print(f"\nBase model: {config['experiment']['base_model']}") # Count parameters in base model total_params = sum(p.numel() for p in model_wrapper.model.parameters()) print(f"Total base model parameters: {total_params:,}") # Load saved adapter if it exists adapter_path = Path(config["experiment"]["output_dir"]) / "adapters" / "basic_cot" if adapter_path.exists(): print(f"\nLoading saved adapter from: {adapter_path}") # Load adapter state dict adapter_model_path = adapter_path / "adapter_model.safetensors" if not adapter_model_path.exists(): adapter_model_path = adapter_path / "adapter_model.bin" if adapter_model_path.exists(): if adapter_model_path.suffix == ".safetensors": from safetensors.torch import load_file adapter_weights = load_file(adapter_model_path) else: adapter_weights = torch.load(adapter_model_path, map_location="cpu") print("\nLoRA Adapter Layer Details:") print("-" * 60) total_lora_params = 0 layer_info = {} for name, tensor in adapter_weights.items(): size = tensor.numel() total_lora_params += size # Parse layer name parts = name.split('.') if 'lora_A' in name or 'lora_B' in name: # Extract module info module_name = '.'.join(parts[:-2]) lora_type = parts[-2] # lora_A or lora_B if module_name not in layer_info: layer_info[module_name] = {} layer_info[module_name][lora_type] = { 'shape': list(tensor.shape), 'params': size } # Display layer information for module, info in sorted(layer_info.items()): print(f"\nModule: {module}") if 'lora_A' in info and 'lora_B' in info: shape_a = info['lora_A']['shape'] shape_b = info['lora_B']['shape'] params_a = info['lora_A']['params'] params_b = info['lora_B']['params'] print(f" LoRA A: {shape_a} = {params_a:,} parameters") print(f" LoRA B: {shape_b} = {params_b:,} parameters") print(f" Total: {params_a + params_b:,} parameters") # Calculate original layer size (approximation) original_size = shape_a[1] * shape_b[0] compression_ratio = original_size / (params_a + params_b) print(f" Original layer size (approx): {original_size:,} parameters") print(f" Compression ratio: {compression_ratio:.1f}x") print("\n" + "=" * 60) print(f"Total LoRA parameters: {total_lora_params:,}") print(f"Percentage of base model: {(total_lora_params / total_params) * 100:.2f}%") # Calculate theoretical size r = adapter_config['r'] num_modules = len(adapter_config['target_modules']) # For GPT models, typical dimensions if "DialoGPT" in config['experiment']['base_model']: hidden_size = 768 # DialoGPT-small uses 768 print(f"\nTheoretical calculation (hidden_size={hidden_size}, r={r}):") print(f" Per module: 2 * {hidden_size} * {r} = {2 * hidden_size * r:,} parameters") print(f" Total ({num_modules} modules): {2 * hidden_size * r * num_modules:,} parameters") else: print(f"\nNo saved adapter found at: {adapter_path}") print("Run training first to generate the adapter.") # Show theoretical sizes based on config r = adapter_config['r'] print(f"\nTheoretical LoRA sizes with r={r}:") print(f" For hidden_size=768 (DialoGPT-small): {2 * 768 * r:,} params per module") print(f" For hidden_size=1024 (medium models): {2 * 1024 * r:,} params per module") print(f" For hidden_size=1280 (GPT-2 large): {2 * 1280 * r:,} params per module") if __name__ == "__main__": analyze_adapter_sizes()