From 2c30e06f206299aee3b1603393571d6e4ddd61de Mon Sep 17 00:00:00 2001 From: Soma Nakamura Date: Thu, 10 Jul 2025 18:09:14 +0900 Subject: [PATCH] initial --- .devenv.flake.nix | 163 +++++++ .devenv/bash | 1 + .devenv/devenv.json | 1 + .devenv/flake.json | 1 + .devenv/gc/shell | 1 + .devenv/gc/shell-1-link | 1 + .devenv/imports.txt | 0 .devenv/input-paths.txt | 11 + .devenv/load-exports | 3 + .devenv/nix-eval-cache.db | Bin 0 -> 4096 bytes .devenv/nix-eval-cache.db-shm | Bin 0 -> 32768 bytes .devenv/nix-eval-cache.db-wal | Bin 0 -> 358472 bytes .devenv/profile | 1 + .devenv/run | 1 + .devenv/state/git-hooks/config.json | 1 + .devenv/tasks.db | Bin 0 -> 4096 bytes .devenv/tasks.db-shm | Bin 0 -> 32768 bytes .devenv/tasks.db-wal | Bin 0 -> 61832 bytes .gitignore | 32 ++ =2.5.0 | 33 ++ LORA_TARGET_MODULES.md | 124 +++++ config/README.md | 85 ++++ config/training_config.yaml | 36 ++ config/training_config_13b.yaml | 83 ++++ config/training_config_70b.yaml | 101 ++++ config/training_config_gemma2_small.yaml | 91 ++++ config/training_config_gemma3_1b.yaml | 102 ++++ ...training_config_gemma3_1b_cpu_offload.yaml | 133 ++++++ config/training_config_large.yaml | 98 ++++ config/training_config_llama_auth.yaml | 85 ++++ config/training_config_public.yaml | 82 ++++ devenv.lock | 139 ++++++ flake-minimal.nix | 95 ++++ flake.lock | 61 +++ flake.nix | 195 ++++++++ requirements-cpu.txt | 15 + requirements-torch.txt | 3 + requirements.txt | 13 + scripts/analyze_adapter_size.py | 137 ++++++ scripts/check_vram.py | 199 ++++++++ scripts/compare_models_tui.py | 183 +++++++ scripts/evaluate.py | 59 +++ scripts/simple_compare.py | 189 ++++++++ scripts/train_progressive.py | 131 +++++ src/__init__.py | 0 src/data_utils.py | 88 ++++ src/progressive_model.py | 366 ++++++++++++++ src/training.py | 450 ++++++++++++++++++ test_data_load.py | 35 ++ 49 files changed, 3628 insertions(+) create mode 100644 .devenv.flake.nix create mode 120000 .devenv/bash create mode 100644 .devenv/devenv.json create mode 100644 .devenv/flake.json create mode 120000 .devenv/gc/shell create mode 120000 .devenv/gc/shell-1-link create mode 100644 .devenv/imports.txt create mode 100644 .devenv/input-paths.txt create mode 100755 .devenv/load-exports create mode 100644 .devenv/nix-eval-cache.db create mode 100644 .devenv/nix-eval-cache.db-shm create mode 100644 .devenv/nix-eval-cache.db-wal create mode 120000 .devenv/profile create mode 120000 .devenv/run create mode 100644 .devenv/state/git-hooks/config.json create mode 100644 .devenv/tasks.db create mode 100644 .devenv/tasks.db-shm create mode 100644 .devenv/tasks.db-wal create mode 100644 .gitignore create mode 100644 =2.5.0 create mode 100644 LORA_TARGET_MODULES.md create mode 100644 config/README.md create mode 100644 config/training_config.yaml create mode 100644 config/training_config_13b.yaml create mode 100644 config/training_config_70b.yaml create mode 100644 config/training_config_gemma2_small.yaml create mode 100644 config/training_config_gemma3_1b.yaml create mode 100644 config/training_config_gemma3_1b_cpu_offload.yaml create mode 100644 config/training_config_large.yaml create mode 100644 config/training_config_llama_auth.yaml create mode 100644 config/training_config_public.yaml create mode 100644 devenv.lock create mode 100644 flake-minimal.nix create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 requirements-cpu.txt create mode 100644 requirements-torch.txt create mode 100644 requirements.txt create mode 100755 scripts/analyze_adapter_size.py create mode 100644 scripts/check_vram.py create mode 100755 scripts/compare_models_tui.py create mode 100755 scripts/evaluate.py create mode 100755 scripts/simple_compare.py create mode 100755 scripts/train_progressive.py create mode 100644 src/__init__.py create mode 100644 src/data_utils.py create mode 100644 src/progressive_model.py create mode 100644 src/training.py create mode 100644 test_data_load.py diff --git a/.devenv.flake.nix b/.devenv.flake.nix new file mode 100644 index 0000000..b7b7adc --- /dev/null +++ b/.devenv.flake.nix @@ -0,0 +1,163 @@ +{ + inputs = + let + version = "1.6.1"; +system = "x86_64-linux"; +devenv_root = "/home/centra/dev/pnn/progressive-llm-training"; +devenv_dotfile = ./.devenv; +devenv_dotfile_string = ".devenv"; +container_name = null; +devenv_tmpdir = "/run/user/1000"; +devenv_runtime = "/run/user/1000/devenv-adeda32"; +devenv_istesting = false; +devenv_direnvrc_latest_version = 1; + + in { + git-hooks.url = "github:cachix/git-hooks.nix"; + git-hooks.inputs.nixpkgs.follows = "nixpkgs"; + pre-commit-hooks.follows = "git-hooks"; + nixpkgs.url = "github:cachix/devenv-nixpkgs/rolling"; + devenv.url = "github:cachix/devenv?dir=src/modules"; + } // (if builtins.pathExists (devenv_dotfile + "/flake.json") + then builtins.fromJSON (builtins.readFile (devenv_dotfile + "/flake.json")) + else { }); + + outputs = { nixpkgs, ... }@inputs: + let + version = "1.6.1"; +system = "x86_64-linux"; +devenv_root = "/home/centra/dev/pnn/progressive-llm-training"; +devenv_dotfile = ./.devenv; +devenv_dotfile_string = ".devenv"; +container_name = null; +devenv_tmpdir = "/run/user/1000"; +devenv_runtime = "/run/user/1000/devenv-adeda32"; +devenv_istesting = false; +devenv_direnvrc_latest_version = 1; + + devenv = + if builtins.pathExists (devenv_dotfile + "/devenv.json") + then builtins.fromJSON (builtins.readFile (devenv_dotfile + "/devenv.json")) + else { }; + getOverlays = inputName: inputAttrs: + map + (overlay: + let + input = inputs.${inputName} or (throw "No such input `${inputName}` while trying to configure overlays."); + in + input.overlays.${overlay} or (throw "Input `${inputName}` has no overlay called `${overlay}`. Supported overlays: ${nixpkgs.lib.concatStringsSep ", " (builtins.attrNames input.overlays)}")) + inputAttrs.overlays or [ ]; + overlays = nixpkgs.lib.flatten (nixpkgs.lib.mapAttrsToList getOverlays (devenv.inputs or { })); + pkgs = import nixpkgs { + inherit system; + config = { + allowUnfree = devenv.allowUnfree or false; + allowBroken = devenv.allowBroken or false; + permittedInsecurePackages = devenv.permittedInsecurePackages or [ ]; + }; + inherit overlays; + }; + lib = pkgs.lib; + importModule = path: + if lib.hasPrefix "./" path + then if lib.hasSuffix ".nix" path + then ./. + (builtins.substring 1 255 path) + else ./. + (builtins.substring 1 255 path) + "/devenv.nix" + else if lib.hasPrefix "../" path + then throw "devenv: ../ is not supported for imports" + else + let + paths = lib.splitString "/" path; + name = builtins.head paths; + input = inputs.${name} or (throw "Unknown input ${name}"); + subpath = "/${lib.concatStringsSep "/" (builtins.tail paths)}"; + devenvpath = "${input}" + subpath; + devenvdefaultpath = devenvpath + "/devenv.nix"; + in + if lib.hasSuffix ".nix" devenvpath + then devenvpath + else if builtins.pathExists devenvdefaultpath + then devenvdefaultpath + else throw (devenvdefaultpath + " file does not exist for input ${name}."); + project = pkgs.lib.evalModules { + specialArgs = inputs // { inherit inputs; }; + modules = [ + ({ config, ... }: { + _module.args.pkgs = pkgs.appendOverlays (config.overlays or [ ]); + }) + (inputs.devenv.modules + /top-level.nix) + { + devenv.cliVersion = version; + devenv.root = devenv_root; + devenv.dotfile = devenv_root + "/" + devenv_dotfile_string; + } + (pkgs.lib.optionalAttrs (inputs.devenv.isTmpDir or false) { + devenv.tmpdir = devenv_tmpdir; + devenv.runtime = devenv_runtime; + }) + (pkgs.lib.optionalAttrs (inputs.devenv.hasIsTesting or false) { + devenv.isTesting = devenv_istesting; + }) + (pkgs.lib.optionalAttrs (container_name != null) { + container.isBuilding = pkgs.lib.mkForce true; + containers.${container_name}.isBuilding = true; + }) + ({ options, ... }: { + config.devenv = pkgs.lib.optionalAttrs (builtins.hasAttr "direnvrcLatestVersion" options.devenv) { + direnvrcLatestVersion = devenv_direnvrc_latest_version; + }; + }) + ] ++ (map importModule (devenv.imports or [ ])) ++ [ + (if builtins.pathExists ./devenv.nix then ./devenv.nix else { }) + (devenv.devenv or { }) + (if builtins.pathExists ./devenv.local.nix then ./devenv.local.nix else { }) + (if builtins.pathExists (devenv_dotfile + "/cli-options.nix") then import (devenv_dotfile + "/cli-options.nix") else { }) + ]; + }; + config = project.config; + + options = pkgs.nixosOptionsDoc { + options = builtins.removeAttrs project.options [ "_module" ]; + warningsAreErrors = false; + # Unpack Nix types, e.g. literalExpression, mDoc. + transformOptions = + let isDocType = v: builtins.elem v [ "literalDocBook" "literalExpression" "literalMD" "mdDoc" ]; + in lib.attrsets.mapAttrs (_: v: + if v ? _type && isDocType v._type then + v.text + else if v ? _type && v._type == "derivation" then + v.name + else + v + ); + }; + + build = options: config: + lib.concatMapAttrs + (name: option: + if builtins.hasAttr "type" option then + if option.type.name == "output" || option.type.name == "outputOf" then { + ${name} = config.${name}; + } else { } + else + let v = build option config.${name}; + in if v != { } then { + ${name} = v; + } else { } + ) + options; + + systems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ]; + in + { + devShell = lib.genAttrs systems (system: config.shell); + packages = lib.genAttrs systems (system: { + optionsJSON = options.optionsJSON; + # deprecated + inherit (config) info procfileScript procfileEnv procfile; + ci = config.ciDerivation; + }); + devenv = config; + build = build project.options project.config; + }; + } diff --git a/.devenv/bash b/.devenv/bash new file mode 120000 index 0000000..3eab571 --- /dev/null +++ b/.devenv/bash @@ -0,0 +1 @@ +/nix/store/94lg0shvsfc845zy8gnflvpqxxiyijbz-bash-interactive-5.2p37 \ No newline at end of file diff --git a/.devenv/devenv.json b/.devenv/devenv.json new file mode 100644 index 0000000..bfa79af --- /dev/null +++ b/.devenv/devenv.json @@ -0,0 +1 @@ +{"inputs":{"nixpkgs":{"url":"github:NixOS/nixpkgs/nixos-unstable"},"nixpkgs-python":{"url":"github:cachix/nixpkgs-python","inputs":{"nixpkgs":{"follows":"nixpkgs"}}}},"allowUnfree":true} \ No newline at end of file diff --git a/.devenv/flake.json b/.devenv/flake.json new file mode 100644 index 0000000..c487dcb --- /dev/null +++ b/.devenv/flake.json @@ -0,0 +1 @@ +{"nixpkgs":{"url":"github:NixOS/nixpkgs/nixos-unstable"},"nixpkgs-python":{"url":"github:cachix/nixpkgs-python","inputs":{"nixpkgs":{"follows":"nixpkgs"}}}} \ No newline at end of file diff --git a/.devenv/gc/shell b/.devenv/gc/shell new file mode 120000 index 0000000..2b5306e --- /dev/null +++ b/.devenv/gc/shell @@ -0,0 +1 @@ +shell-1-link \ No newline at end of file diff --git a/.devenv/gc/shell-1-link b/.devenv/gc/shell-1-link new file mode 120000 index 0000000..eacdc2d --- /dev/null +++ b/.devenv/gc/shell-1-link @@ -0,0 +1 @@ +/nix/store/7fimdw1in7f1g0wxw5cr9pg26rs4rp5g-devenv-shell-env \ No newline at end of file diff --git a/.devenv/imports.txt b/.devenv/imports.txt new file mode 100644 index 0000000..e69de29 diff --git a/.devenv/input-paths.txt b/.devenv/input-paths.txt new file mode 100644 index 0000000..6d1c4e8 --- /dev/null +++ b/.devenv/input-paths.txt @@ -0,0 +1,11 @@ +/home/centra/.config/nixpkgs/config.nix +/home/centra/.config/nixpkgs/overlays +/home/centra/.config/nixpkgs/overlays.nix +/home/centra/.nixpkgs/config.nix +/home/centra/dev/pnn/progressive-llm-training/.devenv/flake.json +/home/centra/dev/pnn/progressive-llm-training/.devenv.flake.nix +/home/centra/dev/pnn/progressive-llm-training/.env +/home/centra/dev/pnn/progressive-llm-training/devenv.local.nix +/home/centra/dev/pnn/progressive-llm-training/devenv.lock +/home/centra/dev/pnn/progressive-llm-training/devenv.nix +/home/centra/dev/pnn/progressive-llm-training/devenv.yaml \ No newline at end of file diff --git a/.devenv/load-exports b/.devenv/load-exports new file mode 100755 index 0000000..c0b1498 --- /dev/null +++ b/.devenv/load-exports @@ -0,0 +1,3 @@ +export PATH='/home/centra/dev/pnn/progressive-llm-training/.devenv/state/venv/bin:/nix/store/bdqwd2frn9m7n3hj2436s0vlnv7mawpc-python3-3.11.13-env/bin:/nix/store/9w80x8njl1hcp8vlk1f3x17q4hcd2cqp-evaluate/bin:/nix/store/8df6wqahd2fqzl04kcs3xs32yqqimcxb-install-packages/bin:/nix/store/v5rz1h6ci23icfp6y228r2m0fqrdf408-install-packages-cpu/bin:/nix/store/69142b4sjmb4jffmyjb8nar6qzlgxnpg-prepare-data/bin:/nix/store/bhb6l6yfqknnwc7y5j5xc9k866hajv7b-train/bin:/nix/store/pbqah1qk4b5y14fqinr1h8zvhqy71v81-gcc-wrapper-14.3.0/bin:/nix/store/sa7j7cddyblhcb3ch3ds10w7nw75yjj1-gcc-14.3.0/bin:/nix/store/mdmsnfcvxyk5ynz7nx8nhss1wig0gljx-glibc-2.40-66-bin/bin:/nix/store/psy9v2asypgl9ylg8cnzkixc7fv0snj0-coreutils-9.7/bin:/nix/store/cadx5p7c0i06gf6h84iw9mrhx56imbv0-binutils-wrapper-2.44/bin:/nix/store/z3za8hfc24wb117s50p8b10agjkgm039-binutils-2.44/bin:/nix/store/dx4bdrs7mq3jfviqhszrc7l35ps9kg64-cmake-3.31.7/bin:/nix/store/1492q00cm64n0hs5966s8cqj6j0x5nxg-ninja-1.12.1/bin:/nix/store/h5khrpnjj3fb182sc32fx1z75w0lhksy-pkg-config-wrapper-0.29.2/bin:/nix/store/rzqvhv48m3nh8g3j4k6jmz6yqy8apr95-git-2.49.0/bin:/nix/store/nygfbkv0j6fvwwa82mdwxm4qfiq3p4q2-git-lfs-3.6.1/bin:/nix/store/fir4g1m8dvg46mh8silh3wnmm9mc0jix-htop-3.4.1/bin:/nix/store/9mc2m4sacbk4l7sc4w7m08m1x9bf5dgn-tmux-3.5a/bin:/nix/store/cxy72qdk41k3zjs5fw1nw1whv6wf7hv2-vim-9.1.1401/bin:/nix/store/74k8qwbfa6lm8psm2vjh2vj04fpr6c5g-openssl-3.4.1-bin/bin:/nix/store/m9k83ip1yx29xs94sa5x8j70s2vfgj6i-glib-2.84.2-dev/bin:/nix/store/zs5crhr67zp8cxn7dh4mwq08zw3sb31m-gettext-0.22.5/bin:/nix/store/rklrz4rwi03hxvz0kwh75vz55wb9b1qz-glib-2.84.2-bin/bin:/nix/store/xbpwk3xzanxj12157byj6wjagm2wfb3c-cuda-merged-12.8/bin:/nix/store/v0zrnzl3anb71ma5c2kx71dl8kyh0rf6-cuda_cuobjdump-12.8.90-bin/bin:/nix/store/v4mm21f67qki6ss6mqp3anlmaiw0r1zd-pre-commit-bin/bin:/nix/store/mq2i9br9h890bnahlds9jnff1jf6xjpb-python3.13-black-25.1.0/bin:/nix/store/sd81bvmch7njdpwx3lkjslixcbj5mivz-python3-3.13.4/bin:/nix/store/mdzm1l0rnpwp8ha0mbxll0db4r2p0xj3-python3.13-flake8-7.2.0/bin:/nix/store/xs72vlx7i6snrrrqx2zn529fbbqrwlwq-python3.13-pycodestyle-2.13.0/bin:/nix/store/5a8m3p0svp6myq1cz4ww431fsbh3xrg5-python3.13-pyflakes-3.3.2/bin:/nix/store/p6bch581drrxv3dm7vwxqazpbssjz4nv-python3.13-mypy-1.15.0/bin:/nix/store/1c8sm86wj45vwkb3ww2b870h9i9wna6r-patchelf-0.15.0/bin:/nix/store/psy9v2asypgl9ylg8cnzkixc7fv0snj0-coreutils-9.7/bin:/nix/store/c14zwgl8hf1wm0izij2i16xvk8ak70cy-findutils-4.10.0/bin:/nix/store/ibx4jfwlhjg4g0s6rrxrpaxa3ka8ns4m-diffutils-3.12/bin:/nix/store/pr318zsl44jdwpk9wk0sdrn19b6in7ah-gnused-4.9/bin:/nix/store/bc6zxzjnkjp4r9nhz5imy3cypvdh6r4n-gnugrep-3.12/bin:/nix/store/nv3y7zb1cwz1h9qy7nwz0s54j8dl1kqj-gawk-5.3.2/bin:/nix/store/lp82dcnrzljyix6yigwzrlpr1smvpmb0-gnutar-1.35/bin:/nix/store/6ag5dhk7sma61p6vl0kazfmpbrq08nqh-gzip-1.14/bin:/nix/store/ykdv4id6893gmkqwdmbimq237c1xqvq7-bzip2-1.0.8-bin/bin:/nix/store/6bwp1y45zlyvpr4ja2sk1yi9v5mrs94x-gnumake-4.4.1/bin:/nix/store/00zrahbb32nzawrmv9sjxn36h7qk9vrs-bash-5.2p37/bin:/nix/store/c9xmgszbf6i4dfq9r953khk9d7fdqigw-patch-2.8/bin:/nix/store/ikfwx7kbwz9zr7fziiac7f57jgbh3bnv-xz-5.8.1-bin/bin:/nix/store/3pdmbqy86wsbjdazxv1n3vrmj60vn0ri-file-5.45/bin:/run/wrappers/bin:/home/centra/.local/share/flatpak/exports/bin:/var/lib/flatpak/exports/bin:/home/centra/.nix-profile/bin:/nix/profile/bin:/home/centra/.local/state/nix/profile/bin:/etc/profiles/per-user/centra/bin:/nix/var/nix/profiles/default/bin:/run/current-system/sw/bin' +export VIRTUAL_ENV=/home/centra/dev/pnn/progressive-llm-training/.devenv/state/venv + diff --git a/.devenv/nix-eval-cache.db b/.devenv/nix-eval-cache.db new file mode 100644 index 0000000000000000000000000000000000000000..7ee7c113a09428e4daafacb6e70a35d18573e608 GIT binary patch literal 4096 zcmWFz^vNtqRY=P(%1ta$FlG>7U}9o$P*7lCU|@t|AVoG{WYDWB;00+HAlr;ljiVtj n8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd71*Aut*O6ovo*4{!$i literal 0 HcmV?d00001 diff --git a/.devenv/nix-eval-cache.db-shm b/.devenv/nix-eval-cache.db-shm new file mode 100644 index 0000000000000000000000000000000000000000..206cf3b59a44ab04fd29263ecf6b007be45088d3 GIT binary patch literal 32768 zcmeI5$x>566oxwthM7PRqKJYMprC>Sq9`&s&mb(=TD*zxLKPcVmM`Foxbp=pSK{9k zSV_uTl?dJ3?)!DspXA)zea`v6>`#}zPa3;gq?|E@4SkPGzxuv@Yx?FAYc~>6CO@8kEjRL(*C4oHQ(rNav*s z(naZ#bXgje#w7dyRQL5S9tS?Oodp31KmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P*PZ0<)g$gvt83>uSV%CyQUVhYrRhA9h$Q| zQRf-9r<=fHy4y=pcb%FrK55>is3TfAEPnytItZ+!qm2N%DCj@Cu)S_sV4vWYzqh?2nFC|$WQ^#`^9J6r$& literal 0 HcmV?d00001 diff --git a/.devenv/nix-eval-cache.db-wal b/.devenv/nix-eval-cache.db-wal new file mode 100644 index 0000000000000000000000000000000000000000..aea38379adff8648093aa167914f4b52743d9945 GIT binary patch literal 358472 zcmeFa3wR`1btb5{N?DI=x`ApQGaaQaTBYtvDP9?oS&aadlvO3SN>WRzem6-xGb1x1 zGBP5Suc9#3G|U(nv&_u0J6?Oo7{+7pU@%~N*R1g}>+yKT9)I@CF76ro!ycQ5jmKWU z7QYzSb8kdsMnp%e2nwy`LkrDTvbI<+n>)dnCy&-<@)N>=5-@SKa>JeAra$qY-uKF{Km4{YehwauMxTH1bn(~4U$|G=d34=qXp~-aEvZ3qKs`1#@+OLc zxMT1?j%b1pqMrzF;EMT0?)UpSHnPxlPNx5AWbD*?Mozsa{WJSTBF_i{1Ob8oL4Y7Y z5FiK;1PB5I0fGQQ;JXTe=jKPJPiHct&rdca+0=?&-E0>vqvT2rqh@=_`>~bn!t!o` z+Fic5UZ9ep)VUM|pQh$|Kuld+yS%osOKokhU0vROl6s`@2ftO~9 zlUxx52m%BFf&f8)AV3fx2oMAa0t5kq070OSKx`iYf8S62&O2Xo->=?gAHk_U`N$0X zKJq?-lhbd(`v^{+KK0m%PaoeraWnOend#K$j$NPrGYBFdf&f8)AV3fx2)v{axS2jV zeY!9|dUFZ8_!(-us90*zu${d|(UBVEqEfS*ng@=6@#m)kH_5e))xs0ht>~!D4JuB1 zPLaS#QT0<&&r`YqZkaeSL!G}lbz=JTBOuZ14w5JuJdC^M`qwku>KCJPy;&-W-|7s!(d9NBO@?ahK zfFEn&a$&pYFq(8~jS1AJ4#<$}X%xMF1S%5a+Q!3fL2*_hV+DqV$a<*|pZ({CsMjnVvdbxbL9y>j6|`dOwdzlT)X;Gov?prPi;dI3xYrse!`l zhxvI%eP^$;Jcp_Wf@1!;*N;!1K67UD`4zuE=tb#Ao`{uX|M_514&s(<`>xdLl_mTb z@MsU^3L*^@RJ6$P$b!og#U6-?%CxrEfZ-=#woh`gBHc&&(0%^ELggl=PG3HAP@(p* zf(F?`c9QHP=x<>7JKIO_UswP1>p$>2uYE1rM=+ZH$O!zA4?%z+KoB4Z5CjMU1Ob8o zL4Y7Y5FiK;1PB6u*9cr2ot}IJH)D?8UHqJ0FT>938zANddj2m%BFf&f8)AV3fx2oMAa0t5kq z06~Bt@RCAcW_)tSw=Y1p@E(BBE^z6W`Qz{W{Kr2<&K-P76*$QZL4Y7Y5FiK;1PB5I z0fGQQfFM8+AP5iy4nklO1%!5ixBsj6{4Zn6|LIHQ+`)t3BF_l|1Ob8oL4Y7Y5FiK; z1PB5I0fGQQfFSUaLSPEzCbSD^ulnq3p1AqLZzOhsmsA;(%n$?!0t5kq06~BtKoB4Z z5CjMU1Ob8oLEs<+rcpp>7x>gS%FH*u_m}P_`v?w#i##U?5CjMU1Ob8oL4Y7Y5FiK; z1PB5I0fN9w3V~xNH=$kNy&wAK`_C+<-a`BZUQ%UDGD8p`2oMAa0t5kq06~BtKoB4Z z5CjMU1c8GPm_Y%dU7+)dcQ3zp<@=_HUEmfrH>8&j|tq0fGQQfFM8+AP5iy2m%BFf&f8)An=kx;1uNM zRA?8VAARBSm%sGF$B13vB~`{GGXw#G06~BtKoB4Z5CjMU1Ob8oL4Y7Y5I6{dF$n0{ z1>W=N&%NyDAN%;fMot8y>3=x_f8;|DAP5iy2m%BFf&f8)AV3fx2oMAa0t5kqz~3zb zkB?4IzJi-E$L}tGdh(tx8n)43X*Qdo#SERJnA`=Py^s~J7QX(PlV5xGmw)_KPmiWP z_{kUE`@-h+w?9>X-}=e-AKU%m%x|thqJYOnvUy^%D`?s>>YSm@Q=1#qYGJ(q z=&UU7tSqkvP)d`^wjCgXGTXbQ?G8qxLK6;8Z~_Hr+}Mfk?ALLZ2EUb zuAVN;kKSDB$r7(4MWtprH4m7@pPw2KPqCQv)ak-~2T9Ot_eG}n^RAickz=R_y?NKk zX)MPZUj#XzkLb0AzWfNacpl<)DOvGu&K#dUeHEaYKTxD-rxa&>Vjp$hsEp&NeBA9hVK7Lk#G8i)XWcVcXa z4-KjLhf`ChH|Gy(6Y)w@Wa9V7(|6LAJ~4gz5ooS+J+$Iz_B&Y^kK#cmZ`WFTK%k*)uphy}exC!Q zjx;%SnmaRkqc2Clts@#*aJ~ftF7%dK$!`c?UP)rN$w;4 zZ5t&wajT)Sj}&SXZPpTw9KVTf%PEu-X04Wnjz$@{Sw{>jil_ibvf z2gKCHwad^U4ywJX<|(e>pacd{hyKs@8j@*3?}ftGt`>H7m#=R1y02dEv~q15)X&{w zFA74LqP#st(L8V8o)h}cvZhqMJ&TIFy0oDuPistz}yFl~5zvJI1ornG!`3sDtSHWKZzQ~6lKoB4Z5CjMU1Ob8o zL4Y7Y5FiK;1PB5Ifddhc#-=ATu$Vu3clj63{LxnwSA!+64A|x~*p#);!v5QT;=jE6 zwM!rV{;#V)@&A7E`X9ad@gKkZBlCa#|Ck@WlK+oiZhY{cfBfce{p_EeHGcf(zw)c@ zM(q!N^qb#Ge*t78=-CB6F>&nS_x;@152Eu0#?pT{lKvWekq<$DAV3fx2oMAa0t5kq z06~BtKoB4Z5CjMU-{}aX#wP>I#n{aF0Y zzQ9FvzQDzoT#iYG2m%BFf&f8)AV3fx2oMAa0t5kq06~Bt@S-801g8tYs{+17rwbg{ z|Br9w`2zS|0-;^t-u3zV-WS&YF|rGcp43L*k9-IM1Ob8oL4Y7Y5FiK;1PB5I0fGQQ z;7$>E=e?uHN8XjbdGhAT=UzLZsMPFYxn^mL3cSV1l@?X4x#-yTqT|*|E}UO)G_{Or zS{Vp~7t-0KS?YlYsExHJid)OOSC(ck%mTe_v==>iO^CLbx80_#+Jc&MoQl=zHe1<_ z;#v7zN4Mmr$!8#HhFxI!45R~x;VsIBjmUvZCgYWBEtHO;;ajZ@zcX6f(Oh^flnrO` zXLLA3Zx5t`aKs4So?}V2dWhJ%DOE99CR4)){sT=*a_tPf;iS=l8(lIzKiR6LIhhi? z*axx-LRnHfQ>n=)$sB&rqt}`s5q^b1gdRP_(JEfeh93*}`x#w85rcf%wTuI~$l$m+ zL0~vW&g)`UQ)E`-SxL*Q`Mjb_3MUFYpU)}`Pv>Ppkr}N$0t&kmnedV`VYN=EUo>nvGs8 zHhZDr?rG;|9Z9K5CAf_iD_ydkk|J|mOSQ6jxth&OZMh_~p2?{2Enpa+K$rz_A)kRV zodrgFra61zX;e^7wd7@-PNNKvoS*dpnSB;T@yfFoY>0RxRkrj`LDil_)jPBcyl+oA zelPQ(D%wXdn*PiP{E-hqfFM8+AP5iy2m%BFf&f8)AV3fx2oMAa0^fcJtWMlKIdi8r z4#?IN%35d__`>q?D?ak!r~e_b3w-+(IFX1TKoB4Z5CjMU1Ob8oL4Y7Y5FiK;1PB5V z1YRt=z%=3++6Dgn3&$7!(c>?7&_06c6Ym>Ie_CUB~{H!IWe2%7)ch&*2ZU%i}lT8Q^!W$ zdURR9n_J8(JdgazIi8UPLCI>66gZ-Dyu#-hkyaH&6g8I4YgzEu7kHY^)8KEOQz4M& zI1$A77#3w_@{V9JYf6=tRme0=i*k<3TOs^}N6- z8ZB}{j@Cpv%L70F%jb|R8xzMyZXP>!)Di+wBy$0`I;~fi# zrJCh)BC88>PN$KNzf9ANB(f}&wIWHXDCO0h%=2;{stCj6w7gC$oF<5p#HoCaXCaIC zV3A%n1|pp}x=4$@NN8h2PNZ2;&1EGqE32HYDgrC;Dlg_4N!4IO0@Mr%Dum7o5>!`} zWmO67LEy9Six+eiGU1kd2m%BFf&f8)AV3fx2oMAa0t5kq072lp8iA$p zw~ZdzUNimr@wXn08m9bHM@|~qL?%}zZXP>Y+C4kQ1gy`ehU76|xfm}@+&pnKCJ|f1 z*xJO+SI!Jq5C_-?pgPZ>DjnJd{`*IBfB*mfS2wqaUEsUAp_9xL1PB5I0fGQQfFM8+ zAP5iy2m%Cw0}wd!lufTDXozP$4AAB^sd69brpF236{`Hac-=_Z}{YU99 zr++*B0^E`hL4Y7Y5FiK;1PB5I0fGQQfFM8+AP5iy2m;^E2uw}PjE+uEQY6dU9f8?TM{NF7FhV*Vi{6FJ9Z&xwf^nxxHIhE$%$Ivs<`Yw)9V( z8HczJC*y8xZtQGc+g>T4D1UzpqGXd%E^co=QrJM@uO5Z)%*lz>!efPv$BHYP8@tPE z8-;BY1{t~w{fBk|Z;Sb=;{4Ea$X{S0{rw~9KTQ96`qS`(d4?pPK!in$( zu9#ose!rh%BMV*Uai1_o_Y85LsOrhd}Zo$$F84vaQgSh zKRo@h{m98Pf&f9_x!S4e)A!vs`p!&4l1)vsn?=KR_8Pq(>6Pum@@|3JUB0+ppn8v~ zb14cwhDxn%>=rH;wyBNHU25an`Z~3>y>@kZ`$_7N!jtE5Bt=rnnp#vWwYX1wKaMR~ z8ns(^0(b;`0@|7lvNVci$tzPUSC+TW3H*E_&Yq)64M+ewnE_!|3zwF!t?yFjyoRec z3`peM-0Qr#^VFPOYt7Bi`>9;o+%BwL-asN#=VIc{Q`?10h3&$|N@0iUhfwDXb)MSX zpjHd(1wdzId1qyLwcvBRwz2l;wL*f?c?xB5J~jW2rJ3o|^Yf$c%3{PZDhTnSBQ?tL zyHpJOL>MXnfOEgk01AmB>E{C#NbeJnp-kw8sreNl79+$LMe%bPN9%l?5ThHxHXeB% zSB`z*Md;#CK=|6$Dyn}0!r{|F+@tBl#2-j!wjVq@Y)Yy5onup{=gy2;hOKICLv0r^ zH&l3iwlhJ&t!=Cpo`6z{L_(D&CIF(G=ccDlUp_NBf|+`C6UNP=wAZNNdp`x-EQ?G( zkz>=pGjjEGVSe=HQcsq69Vsd`%c*(5EdKn|fOv|~_cXQ_W^y#Yr&HRBPMLVT9>l6FH6BjE= zdpptQQ{NbQ2X655e`gK;Ieu#T^qDiG&vRIWejV!nnvV4~(Hfc>=xJ`(zo07J>s2nU zZ(i(m*g>y>`VYe^s)nm6jhfq`E^cnF7nV1A@lXWiwq3tfQw`nld;YL%im`}{4Aejb z=(`hRLwsmR%|D!)I=wl6P@9NXnj#awKc4=p(ercnjh1>95M$lzC(w`EbQ3*-bGVNH zhAbMYpZACAUTE(}ya;=bzXQte=Jbi_(~m%No$H|$N3-9_!gv%9I(fU++5-X&WrO_) z4)*&T7&N}ZxZ?K*y(oR& z6S2nFe?C~0`&8@1VDCz;UR#7813FeHR}g7H$BH&t+%scU482KR8xrtj1m3^$BLH3ZHd~SYp`gA5U`urruya+>VyJ#6DS85nF+e_Y$#qdvt z2D)!kb3GuYF0NgM4slTJRW(m>4F@GKh&uFtw%3qM6M8QczIL^+v%7qCtJi(?dZ(3Z z+n|2#7JE?;$`s}8DT?NK`}UmBca}A!>g`!ntoQXPwYH}0A*qU39}dJ1)$Y{%8^@+j z?_>_@^^#x|ndFmkPzkVJ09QPx5A6bP`d`22xBhtKA3cfo5sXdUJ91KkFY+M>5CjMU z1cC2*1m5*AY3#(vd**JQym|7yzxwhCMWtpJ%QZ_|RKNt}N{g!2Ty$)E(Q#`f7Y0A0 zsbx&l%0QT58+K`ydf)*HOy9+=<=rbwvlnK8-Zt8c9?UVc#k}n{ZPgakoa0oiR=3&8 zb`;Ob=Q_G2H%&eRQ8VlU%V!`RnB^|YhKE@h&VPm?8TMh}>u| z)Z7vkhCs%gYRStuokqE4M`9#W$J`t4o(E>RjI$@3hLWjj9hArb^9*yL#VG67jGWr! z%64sM>+OdKQQMp}N6P-)#-Ok?b6`S3t zHXmPcm#--sPpxn4a8E5a^(Qy>o-}l&aBT@>aa_Z0pwiAjesLC{W}*22{2r>}EiMFA zrlc%-Wz94(GrI-Imghx< z*K|E6iSRJ1sk|hLd$y-FC@kESTCJMGP2}u#bh(hZkpfhUp40?w)Neay(PcLE#OmcD znBSI*t83dkOJ}dg?&EZ^przTfvHRK7^7hi43SI%CRCa10_$)hZm#HlF150X zK~--*F8RDAS+c6PZO0Pas#=kCrd%$mrJSfpZHdctO+(Hw3n0uvEUbJr#~BXOX|wsZ zm*+hx+s;))+GCq~sUjGe5@NwF$W&g<`^qe(i#ouPBZiZ%PwU1gTu^A zxhz}uvJPC@l~&1VJ7zuWmFsFt>FN^Bi;^+GVAO;x@$`O7+OpHCa_z2Uw<`?GWJS4C z5n2_gWU(z>=9G-Gr%D-1b4$==F#ybdO!C=^(&Ys+C&)}rG6bF$yD*M$Oa}}IHdjJS zY&(-*$Su$dIq@J&JT=G2O-m_@c13ksZO*J#Jkw|^awTgS&2F@|K}88A>DT9h${f`1 z^|g!Ju=E0&#LM1!@96Q7ccsx-`rK>DQ2HVdrMOE+LvY2b+0fZ9+#eg)2D|^9ATS&w z=XEiwDKe|^tfb}Dd|uHdg%bsy&u0~er}MI)NDNJ9b97$PWsMh9Cd-LRmQ}Kfs%1Gr zr3GCRXjV`)hL!|@&hwm}6IkG#mvcEft0=jgr1M->$a660u(FtAbK><`%|;7+vln1k z*3Qp5l2VmQa2wT-=#uS}6q)NBHBkVhRzq* zOMeE1lMg|FAV3fx2oMAa0t5kq06~BtKoB4Z5CjMUFA@TUiJK=!@6q7Mi8IH)a{Q+u zfP4r71Ob8oL4Y7Y5FiK;1PB5IfjdE9`NZ^ybL`~A#E{!#V7ml({(c7iFCL#BSs!4r z5cm%-f)l@Pl+bNRkV{REym^qokBIhOAiv|q8f?clrH&V+_CUB~^v(5n?vWF_J8lt&Pv3eB3-Xb!_CVN0$Xq6|)M@BY$!Zb{7ePlGPw7 za75>Lh0il0ttzlHNn`1}mW8cvuoZ&N)3A=5Qz4M&I1$A77#3w_@{V9JYf6=d%`lJv zEy_7Am)CSoR7E z#)h0ov!a^IN@7-4IbBr*R^U}$%rlaz!G;8=84^?oofRagt}4r_654~nXW@sP9F}v1c+Ursv%>~x1(JEnE-lrfd{_+w(}?dq;eVUBe*ABA4&gB z`p?t@N6V`k$rWpZ>n|&2$H%kq<$DAV3fx2oMAa z0t5kq06~BtKoB4Z5CjMUhe2TKo{5ps(Xr{NyYY2u>Sg#kIdvDlPE4io^~BUEd>x-U ziLYamC*V3WHgz1|9-m6#Yieo+UuUL{;p?%<>4}k<(do%4beWo*L>Kr*mx;-7bQzx< zLzl6MQMe#qf#XQ*&|l!*-#xRs^3D4 z|3LcP>F3hjw445S>CdJA_w-Mu|1sh~J_G@R06~BtKoB4Z5CjMU1Ob8oL4Y7Y5FiNL z9)anJQDhgm$G=Y9?O!Kf=3gi7@~J3dm$9*li5YAcIDupi?E+tZ@T+hBsekj(t9-k_ z2S(CgPyc25&%iS9`{~~UpMe+B|1te5>0bn|fuBu(I{g#r9|JqV52inG`&^S41Ob8o zL4Y7Y5FiK;1PB5I0fGQQfFM8+AP5{5fyr?&9o&Pz@5bLR!{2w|?==2Cg}+bY?-Tg@ zIQ~xI?-~4kY+@XI2hi6P`kF*v6XTSB z0-s5LE&WsJKS_T){nhk`(qByf5ZDL)C$JE_zh@)($YJwE;t>Q00t5kq06~BtKoB4Z z5CjMU1Ob8oL4Y7|O9aq_ej1wurm#t15}O1juu0$qHVKReCIK|J#}#pP9b?he}T(S?@q0JRJG+IeDDvzvi2U$CSAui-E9ygl3PyPKd zhyvM*N9pD6Q?DL{aFAsmQ9cF5co!^dG`$gP%@+ zf4Y@6(~qXP^a*%KJ_G@R06~BtKoB4Z5CjMU1Ob8oL4Y7Y5ctkU;N=se^P>rqMCyTw z(X0K32Tj&z{@%ps!^vp9dHS(ePmE@hq5c$oIyEu+icxHNn0n>JXl8W4W-$4>iJ7aT z@#+1bHDuzo6EhDdV)fD=f7QfHHWBD2Klbv8naoVmHUP!=GE|hIU7-4NKk_eI&#(V5 zJi5EMwY+;}X>JkTuej(nYOb~@tM!)3>aLx)M4KyDSe_F+x@p=?(UMw@l5skXa?R#4 z+ycWaFkD8nn~SC)Ux@PPv_+}xaXG>9s^y%T_0+0cbv#Ay<~yB=lc_nH?Rh5f;1`%A zb+gTLInxx|&1SxywY{8Gcbu|UV%2sj-!bx;E+m&>7C?X`sn}&XvFWibx2cFMmz8Bchxg|}_Fu)_ppqzE2YF#aLCD|~=ir`ktZi|)~ zy)JZkt|A}?ww=i@yiUE#i9)3#w77OP$--+$ z4MRzC@zfk6H!Y<&32eyNowo>-bIJ@imN z(I=irtrm6*+gH~%)^>K+R*Dy|t*x&v%`v?w`ve0CHfgSI?p|73M*x&-mbR#9cEgp> z3uqS|+g^0sTFKQs&q%(Zb^&5Q^N*wuF*YKKzobURW>XqR#_ z%{o=pt8%QMW_->WNLn{cEtx{VGE8B6b8~k{*^?X*iypgc8@p?`I=FlG;-06uiwsTE z!3%jalB%f^#}07tsX`_@yUV*r1r)kssWFgg%rRuYyS($r&eGh|*JmZYp}Dgco__ZH ztgPwKh5Nsinq^70I(uO@(lLt`UC(#NWzD8=B*QLIjj~30L1eA1DMdpsS~bba)9+L+}on`abwVG3;9>D!ME+dACjr9P7|Vhc(@7nX_d!Csa76Sqjnv0+Cg;>Hd44zH)pSVX$T`uhI z7M|E$UEW<@UEAI%UfSFaN3uviHms`Vl_gMZ1N|+ksSc&I#klmuKTzV-mCc>q-6ywT z6lmv!qQKALCxa@t<8|^)R`NPd$;@}mQckhERZy8ky-9m^h0Z9@tM4@o(*p$#YctU- zC=B0imCRgOXId6*bd3sYFhaXo%}G^}RyrBouvMQLlx=!}M(R+Mfg!i~irzBIl@ec~ zJ;8O`t|PT2t}5khkGC?=lj}YMP`TM8x#Mz7uIrgRUr}35H4pXDQ(c?M%YtFUG$2#5 zVF*<-{6aoSDl0;_-L2Twio?5kyWGthR)F)~ z%arr=j%c^Kw3p>8In`vU^-88BwW^>2a-eP}a%?&|R#j}bYgRf&Tj&_2R@XHhm+>s< zD`Xm_+mKu|$H*pW1*w!(%T>{{B!O{+rb$<&u5LLp^q@Jr4%zJ*4p1_DlCo1(o4lb4 zxja|0sxZs4WW%b%ghgT6^=4hn$Uw;gDQM^!tg^<-NswC()8VsSv(t23z9O++m4W$C zGi$kCo^KS`Up%Q#X) zDWkbfGF79hx7uP=ZgulrSJbX?LNBatq9U8Ri^-sNTs5El;kf zQn%e?Y_92A6@hNrv}-`kHZ>sUaUt~@Bx-j4>|yN^s7TaIYA&_5QG_{nIFB$aXV2~H zSJA-K`brVZ0!wq6UA(rlaBcTeCfDQahoz8OZL0mJtMDW?ZMn0F#K4qFYAVgHRnH4; zQLpgrvSE4Us==65Q|+47Y+_=D$(oWF%K9!yKb8Yb?#w)#a5d(e5+@#t74}_i)9ZONBNS ze-^p5UAP1VJ}~(@ig_d26!NVc-Okw+lPN1su4z^oooh20b*T(?m3r5t`Ksb^ZI5F+^}1mxZ8_AapdBeysib*P;%3%`4o*-EmNOLH5jrfJ zb6Jbl>#nNvbZ&q+qd0p}211_US(*1LmJI#8Zgnbh&X!yOL@Bjxrv#HM&4KYTqe=}a zN+_3Q!4x`ry=vPnMeJlN*|w6e<^-WERhpulfk|S#i*jVhB~!2Ra<;?pdfl*Hrkv|G z%g{YD%^Z^{DN3g0N{*vJ@5)2pOh*$`B(SeF%Dx%3{9WP%k34BJuJtVilF$-dpkm2)eou(B_ z;BC6>W%GjI<&=6wsL<`K-7aNp!>&lEUuPGXC}BBUExV3gsc^c?Pjdr2GOK<>Geh&2p2^S)5(YmADFD6)IL23`(7xeoyBCP?!f>m#KqPPmoG6^PHmQW!7ArG z!z^)WK65v6k57iZnB`CTAC>TOnE3H zW;ES=-jXa?)uC}&f?HK9vd)ytCAE|j6{J^1446rghp>G{Q?#H}at3GzZ8qNqUBHvF z?Oa8qJ+`TrDuNNUJtmX19ih$|END@l#fxfF;>wQL%-Wn%QRS?v$84ra;-L(#QHh7t z;G?8wUdm-zXrm5X+LczxX**^;3-(sErF3;E7;g+{bWJ?kM2X$1>2`UyWzbx?-R#oU zR$0t8yV-0@&dW@_8$&8lBZiqQMAIDF;1?y9g=)1-+iaIFRU|L#wOv=LW<6J~<{-y1 zw(LqpWv>P*(w+sHCum^{`B=Bw1Y4=yH95(aMaGh{3R`W9jB4hpoigp}2eClCZB&wc zwxV=-!ORKJ(Sbpmr^Rl!+2TM4wH=#-UM_B9PP9gEI5j4k@)4}tQSzf%wxb-Hyk)VB zE`V{_5Ij$S6$wDov?QZNyG&O_Lnw6emIb|Z0yWU^jlAsU%eg!)+fvz7y?n*ib*7>V z?TRCZ2642MBAYP&XV@(Cgs}po`F7J_`7*43@S^D%txm}VOP*M-s0#Efy)_xcLop5D z0UGr}E}DZO?ebAlu>F-iS9PRTo#ERRom1p)sheeTvf1q_EjF41tK)PsutoyQJ76yK z(0CcoiZ0bVyqIrS1kaRZwG4BEX1iN$XUi?dsXNsuGu;F@(cBl$|KfbaJkw&UU_J%| zPd%>)mdodD#g!#D+cgA*kMi-gDzH={Xk);_kivpW#Ot{(%X+HdTC5;-r7kCdwxZ^R zIG0A*b2J5(f08;tN6wd9x@4)9X1gwNs_a#0iB&kqYBlS68Rkd+ni(n|$CMgirL|N8 z7AcSh6e}i6?OKdUySCGEa%G9O8qyw6EDVX~LYhzo2yQ!ZV}nr&Eq zqosPO*_QHLt18JIv+UIKd4N00!%`GU)*wq_oJZRe*{0bRp|;zu>(<+B*UqweU6$)^ z%WT!7Je-bF19MrU1CudOjuJXuRsyBQp}nRfSe-fpJxHqsuGhK;6Tr4x%Eme2JRx-j zX6?~B=m@e>&gK{uj1x^xwZvu%knTFN=T*8q^z#v1a2raX`Q%4mhAq&`A>vdR8wd$f=60>qvuu2`fmG?L>!?UecX3qg-T+2v%jas8t1H%u@ z?L#fnx5j(e7~7jG9XR_z^P=L#wT<1)&2^X!M~rAk)3&22_V~sUDL%fvytP%>F77UG zgK-R&)?hyFTfmBYcGa%6>>{4G6(Lj61*)i|LoxA_N9$eSoWEUwY%(xq4OSH~*M1m| zyRs5P;t12082go#fa_3FC$qk?G8DO=N7GA%jGP(<^J~b=jX_~qu>ITdR z7_h@p8N=&%4b7q=HDLeAFq;2I?E>Q;_|s2*<)K&J2hU!98$STUR?IzB+c&Y{b@|0x zu^Up!z9p|=)#qC>8(N>fCAT5<gYG~C zOm<~?d$q8!ws9H4moFlp6PPfmpcGI@E0!AQ(lGQ+c;zU^P$*Y3-5Lc8h)Pv4!IKC4 zLK>QaToU|;!F|nzTh~w;wWj9kCNS-4F0wnDXbH;IN?t5iNm;Of>#I1gOLG}m1#6V4 z;ap;Bdkt`P@D|~UEZh{8S-xgWy2s^tl&5TYVW5Ws#O>JCVnzOBbz}3{| z#qU|!+Qf&dg@_1e`f903_~qY+R*YIA*c>v33p?g!upsm(3${M^Pl zgvmB`If<-{z9@^J$(8*>FN7uuhh7X#GC0zLXp+fLi=s*5!!3*^sg7V}NV4!Gu_7v} z+L6t&0)F9TPW2eN1+Fh*wo|G2>#P2{YLe7aEzj1KX1i0(cI>Wb!zy;!1KXETqDy84 z7SW+sSHji#{oFCo{6A_J_~^a=d8_p={`Sl8>=k$D!gQo{-((KETvq}wD23%)GQ)^o zmUeP7LrbMfwPb;#Tz~Z#tr{n(hgvyK5)ZX{oFqQ_3UZRu5Ua>Z(&1K;lhi}3CMQV` zzoML^9BNfLNj&`8aFWB&Mx(^S(6E|tf_&J8%%u3k>m3OJ?qF3hDa(+{Yf18>F0m!~ z4695h06FT~O;U_ITYE`zJNo)dlG72_V3KTxT{lV6-{DY0yk&{EEb*4Ldou{^>DiwP zEzR}))%tOIGphPRC$$ZGhZ z+~3KtFYLn(f-$M~sWHAM_SO!+H1`lSm)d%A_sZtRmF1l)JB0$wWa&OhEF_4yv3YH$ zu)VW}w$k;j%RZBSSP1Si=L}oIO|78q0#zvWUT?`xaWqk=ozq3zn|mo z5xsW&7I6svM=Gg9@V#SypF`*1PMm=boygHWg$|v>9k>x4I*mK?F*@Rqt|u_XF^ ztcGkaZ@}*Vr8#h60Xcgy{Ku*7!tyFugCSUhE!d{s5BCF7FZL+`m|tpV?I{?1JlG)F zOAtLu`Dc|g`DhOvYV5D#e! z8=xP?8a6;YggtD4b~uaJ0QoTM!UM!ZYsLc{hBb%{Fc{h-Ho#%1#pMC&BkJMN7C5X< zHz32%))ma5*R#cXm{if%oP7_@t@Y(y@c4jLEq`e#+|wMmS?(LR#{mv`_K$MG-Pgu4 zI$fg(4vGG>4{bav20o!a+vvu=L;{1XQpF%gzQbli1_8LZG zuc7TUU@WnUWyveHB-hr|B5Z{NCw#4_YT(kZzzH`sdudLWOb>d0dCxG_HFT6kG=a!d zKgJfY)0XD0Q<_q)QL~vl_{=_>q5?M5ng1Mo$U{&!c z1W_h;keN(R7L=>|scj8>`&9~@b0~}r4fk1FjlnVJYG!G$x&k1@s zb%T1t8>rc{;A1ljdwYZWfq7u;{bJDE*a$i7{^$6CL72Jbrj)X#RD%M>I|5gF59U&M zZ{a0VDn+YD|6$6n>`+Mu;^6IpVL(4#kPv3E62`#~=TfSsOM7PHV!TcUPdA`En_4U^ z;)OJ99}#sKe$hPFqf9H6a*K7!=%_BfTB7T)(TQtwx+lRAGPw@j%GkAx?AEH9o$)#r zbU#&Z!2{2%{&_-5tJ#ta-Zt`Ot1H!;c2j4JtOFbVm~w>^6!4^1VKYg?a$w&@f|i%1 zR#Rx_vOJe<3ZB67o?gu=s#D6BT-bt%HugYg;CUH1PzPrEus=7!p;dzISXIe{Q_f7+ zHaJdEnieMuO}X8bg{%OCuq{6nJp;~cxO@^^vYl>=ZkDSZudA~vBj#a?k6tacEhpQm zRN8#T0mAAMRGOu-Ty=EY>$LJ>S?0`+%!wZ3sE|A&1dUqY6G>K8zTJXd zNR_rBGL@<&R3+MWMAp#ZSP`y6`;;CmbR-A?1=t$QI8udCmAuzvvZg3oS%neXcDCB7 z287t-T+N^(lrnp8&`45p#e&11@`ejrfXr%HH1&>P^WZN{)*!8wdyKTc%c)@GJ)>haJ zwr69&(cTSwN?UUSh6CxQ^1BNlPh~o8O~R;#IjyDt8`a^9RQ7tSKD(Hyt7-&Xig0dX=v0kQ2SuJ zP9jqd94`W!X)0>FsXAq|#Fm-7L(6(QTc<1CDzrQ_AxRS5YQHy{|3~ctuUYu`U%dSj z7oUe`r}y6rj(Ro6!`&ACiVWlNfJD44w+0mwa-|+f(T~!n-Id@xy~chden>KApU1&8 z{WzHVO3iDa{{%t!I2NZ*dO+-c1We}(xo;+-K?8C7Y!c~2srr#pY7Gx^(a8Q22wC%w zf*Onv4S;xdV_8*K4NrpVysA07E;#Z=ExP;10MWtQ*M^g= ztb+cy3j68M<2h771Ek4Z={gt1vcgK{sik$e{N5ibf>%R`@J{6AflX4n0(BFyjKV7Ss^f=~)6F`>1#Pu#;b? zf^(_T9%yFJmB5BSIOGfF3Q!e;jwaG$C4*81?31&!R?V#zVW%9-@#a#7e-aw1<*}K= za1IACt#nvtgDCNk$8tqUhd7ceN;cGSTv5Uyj^c`v4RZ`vl_{X;u5M~Q}ZWR4Oa$%#42U?>OXDBz zoGA5>o;Xp`VOJQUv_qTbqr^wpn;a#7acbj`KC)2(j_T;yYr`F)vy~7^x5g8;B<3kE8Mxe$Gb)I=(WI~%adyL^XVg;7ziw3BPILNEKDqLCX{nfQ?JR$Q=;-K`EFF#uedPS$ht)==7%WXicF%hhI7un_2Y& z(PR;(_7UUmQAT%|ZA3U6dVG(Q4L!og$&WO~$5{+D%Ew6$H_pdtjxf^4865FUnOI>B zr7y&jA4+40lODa7j`JBpYlssLr#HlDhtM42gooE1;&el44{_4r^@lk9@H=|#CZ&h9Dy=>6@KWZ2FgS)r5E8~kz zc=oFOJaFRtkM4sL=QETSPMmZ&Kb$!2P@Xt((mU|Qi8DHiH%@}X5dJuE%Hch7;`B%K z$%!*LqE}9w$&28Z6K8m6&z!`-u^ z2p~@=3Szl}zAk~^0@%}%o(c}c4OX&366EHv6@R=>0@rM`|1YE|`iJ-KVAm^fP0OWv;0TfwOS*|#MOHKojo?#_YHFAhad`Y9_U5G7CSKCQGRM@*&VxiT(h7MskW$-w{E(l;&bb790zE@rFt%Bbf*hO8y@vV>PJA0bAQ3h5CW$^!`GnLzEq*SQGxdH3GNxW z`>77Z+Fv-o2iRY{0FneUZq3w+sD**!fA6Jkb185h@*90`eHbqQ$L!9-tf8aRBI2&w zm|qHiqo%dg%k&R&N|;5?H`-A|z-C0zp! zH-pVCzz5!dUvGp#uuTHWUQ^G*zJ&(fjuE67yk8UTvOwD~0!In73!}$ww?=_Cw7+FT30(T| z=8N+bxH-d$Oh^^v;sRA}G#q##)S{opf~_?ckqSlA(&SFDh58g;-TnC7HCY0v{87gk6x;q@Deds=V;ronv!CQbw{S*^=(af%Zxv?fx6r=RXxA0gp2T~ZsSNDpfKd-TzJiWlA8apXO;X`)8R#wUUeE0n+NQt# z;5PAbx8--Xzx#tf7Dzxb=#6ElC#|9Rf7CAUn@g|1_p`6h{ylj1YE;p53cV|1f!gx> zPhTHK0~vb7Tfh6n8H4=`_%*QT?H@Px+M_-gFK9j2rFc z(eAmDi~C(HUqpt~b;z{u)H{3O>~$FB3mcCWmsim7*qIv_W~rqm>cQCyLwO}!n0>J4 z`U~7c{$~Ky>%mbca89b9)b>ho^BQ`2Zeg_;>=#61GNj6&Q7%f;BiPp+JN-J zeFa#c`e_92-bhXJCB>LPg9$vldx?kn0&PLF(VhukVbC>rLAtA*hvU%kE>)bQUNhmK zbEwc10vIWrk^ElhCm!+~d_qw%aR#Iqq-Yt9@*!EH_hC}c%sz8A(5{0ra+UdKX8Vja z*kC)5?qPEO%v<^u=b5+o5H!l@7!`Pr7)nI-H}OEi(3HT5*MmI-hpY;)1qjRAudy(0 z8*a@;JCITSBNdmDHJCza)H82E>ass7J@b}BCIkTEv4}cE|G32TYhhS0gHI@G6J9L< zlO$9iefet+eg#3;=6o?xF~ZwE_xvH^&?5W93uqturoExH9@F4Wx-bV@qVV?fhX;iU zp9ED6U*HRNN24Lk@6i6_pZ~zyDNya8*Y!Nu6HKkIU!`FC6>N5Z>Q(6DKN}GEJEth< zmQm^#sI%w%S8G9+9Xy*4qvC0?KTJZ;79N2`p$%KXkgBOjXE;NJEEH-Mww!~O0HqBt z`<P-qUY-_69!819W?2!}LFX zQfSo)c9+6D_d5JsR+K#pZsE^+I>PlKis`XH)F@GCD6pURaLFQ?NYd!`+_|XOuV0Gu zo1aJBN~mD@IYm_H(-(YO(*wa#9&S8+HYQVsI@`~}vjG4;izt&Fcmu13*qlLF0LvSI zS0XmbFN8w33N~gO4^ur9QsKKBvp3>`CUT6D?v*<%K!GL(Wd&?!;nJ5B-9__}5Cz$f z`^oh0uwD5E)GSrqMy6qCXs~lNTDNe^28oeDIrgd!G#SJeK9~&lhIJpPKrurk;ft1L zgBLBGWsofd@Q4J$Y&58qpx0noc59%tU@6f2KWZ2FfuCENw>$I5R`8lw?FWO~m41bW zyWL1l^V#>RK#&>~Pn>GlW>ALyw%-g;!wp+OoNuxr^aMGm5#5>`)vH7XQ5}OqIHpm@j^qgYWWo`Y>7<+ z9n7k3K@(h>O;mXlGtvS99n?}UBK-<~;rt!chy#()gMH%qk0Jqn zQNALcHWl)<%4=X_4;#Ct4hsXEL>jOpswRWIM+nI8rW9Xwl_ECk-`a2-bO9H z`DrQBeHMK!XP%-n`Qoz=o?V>x=Oa{;x-mD8M-i9_m+VV$U`Qhhjb9%E8DRWo0)hSK zMF{Z4^6&j!UQ7PP5Ae!{-td2-nt~|&&i73Lr${ZpW2e7hdj>_sJz8R{L;z#+8ML=R ztQ)gPu|Ndjz=$HDkE?-c(DqYD_hHrOR|_cepXG^9OK#Tq|4 z+=G(fr=5WX??wlezkzL$t#S=)GPu^j)Ym6Rlib+6pWT@0B;@8%Ee#9? zFg#KIN*f@D)4LFW7S4fuCh&ds!63xP5U)eSRQAGt5MtuFfOO5bx2ZLpzZRU3hYrJ= z1=`tQxe65*HbSYI8yLpUgE6kEQ6AV`Q653_1}p1aALm#K$U0PmFR&HCo8sa0GW6z3 zY+z8s1+fLd0W4?8f-FO}!O#uXIhB$-=slNJG>?H=pX5Ique$5C|*pWC&9oNWVkE=5o7_Z9f;h|H(-ctkQfXI!*L{-{^{VlgRFAFOd}~!2&tbs zEQG^ZiT=7an8$F#@<*4Uy~^YHC6QL&NZ0c8j6A4xCYxR z2sqGI(b$B#DhOcvb*1MYbP2keFbb^qP-vlH5qSb6+TTFuhNM9ygpN%_5>azKK68}5 zXZtTCMXZBVQE^1l3R+*()(!ZFo(YkNw-5@k91slKzz>wne_=e#j$(2lM!og99vqR1 z9TgKr_M014Ch_Qx5Z<@Zp~&bnASUX}4we!o&4BGbQX*bqKiEq-+5hdUF$7f!mRmGE zQY46B*gr;pBjafbj{d|aEBce`!3w9_1NBXX_!Bzy3R*!g zI5wy>f&UEnxN1hCUY! z?VZ53b0(5*pW&*Z;ts;^&%oB%s=;awyjH<`W)>R}u|*tAFJP*lMMJ_fvu{i=3fptP zYDKYEU@;I@0O8C$4UQCQYD?(gPgt6@91ottoD*tHbqOt8z-<6%9FBcjm4X~;eDI-1 z?^!5NwjNw)NbW+rOT9JtX?88iziD^pV^LA`bJP=072)-YOQ5SX$_OuS>1^MF3cG;J zQot^wf~ta(?B^3PKp$9@(I_xLAd>{u)~O6+1zGr(9%{iaWbucZzXT@UUfyGJJRIkU zbI=q2LH}1N$dKG+{Ri?snF*5jfw?0Q27wAo`5?(PPg4AR1g*?J3Tm)@Wn2jvvSHeG z4s@5s4sIPVfr4QyxbtUBfs*KJV~9xAX>fA@Ax;b|cR-4daHsG$c7WVn-o9Me9ms`$ z_PtNyAAC>wmqDU}D z78Miv5$f6o4GGcC-LE##a)5Q1KQz(n36E|V*g`ii7cZ@5ZVdW~_?>Rpt@>BL@**|r5tzY2RSA>p2Nc>4GSG**F`l-!{(!+FqzF`${qrCZQw;>NDa#hstMP%s`&Fg8cut_vXQoCfR*hALw(s zyLP#|TrMeTtE*q z$Mh^|dkZs-LVaK6%a<=-zI^%ey_XSWLbNAWMsV>IWEUAnFAkuhgdm*2=<$KhE!ET& z5&E%4wkctkEmpNZIt+DmdSQJ=HgA4=wv0BTqkntyjfZ#Dy$bgI!6*~hO8j)eVuZcH zi#8GIngtVNlPTSXg`UAG{Gnfv`(===5SkFGu%ZR7O6KKlOq zH*qKu(k}_n{wU!@%DFIOU94;!|I?)&%#|Mb*q_Bk?0n)WK>)7Q^8qHQw7`b-RjP&- zp$<^o#z_+@}BF$ zt`y`jQPOjHnyc?_!cuQzy<}bDvLuJba-by#PtjdZobZcbN+{+qVm1P1%K%F7Nu*xS zeFku9SnDH4(N6b3`BX-n*KI1-Ob2uO!7HOs0H@;YU=Vub$c%{S?vCaqQy9BD+Ez3x z{%dgKNN%jd=>pak*x>>Ill>qHJviX5)r6_9we0RN?ST4?Y|40WEABJ}sIXSq#P!k$ zHf)l~)>HV^n|hOKL;wMbcDwu5=>U_iSQiZmlWCFgt5}NZIFc13Z5(g8@qbZ1Q;U-_ z7&Ss`fr47tkAr2b`zmf}w`d!^+ffa*Kb(12wI(#6*eFD8!72-0vRoG-m%ks_Y`WxZ zI^u@Ut>XCDWe-J+XQQQJDw)PUm?0d&r$>RQJOq3>wt@O!VX}=6(ntU=mc)6by>G9; z^$l?skZ)VdtLqh?>Z*tPpi0HeO-nJ4Rn~T}@5O$%imDY@OH;i~9u5gdv7@$vU3*M( zG-W!?ZLYzK)~!}5YDM8hZgqk$soQb5n9lg1(c=4;94v0FB4ecwKk-npdT^xJi1s)# zQh23rjyO^Z+|u10sKD5GV9EZ01`Zq>Lf6tdH2CTb4c~bq6G|?9;3$R4s?)&%6O8+R z*DmniT>jlV-@o+pB|Q71(qStL-O2YtnR4{&`x2<3=VmpD!8_WDWY$0l28M9)c{~$< zWvCrmUVVS^$B1gtvw7N*NRdf_&_%tCqmVs(r0-Hsxh}kLlCC2*U&j$fR zFY(5@=>eBT%N7BR*{ugFg4fO{FNyJ>>>Dr^1W_~I3|M1gw9>e>0Kk(qJxFvK60ZZL zsI!P^-)VRT{7i2xN*(qnSW4NeQ(2Z*KC9ql3*H(ToI2IOIB@et0~xg*M~uhe!vi0T zU4RrpS$O!T1!!24P6Zv}b8-*-G>QK7?2Ey>}_g{$rz| z+}#^|^I=lsl%6L09(~~Z6QtkoixDDjRd&EYEn^wX{A#QwjhaRj!-s8{a9?c!eTqEN84Cq;839>`~nTVCII&R9%XJ0 zY?z}gXXVv7Kg+97xWDS_ZS+~PVW_#xDPW#a=wBfcQXY!rYa%fI14D6gNv8eU*dlxp zWp}jy8qQHd0A364umh}vTmdfYC=<$NKdB?KLTTdt$ z-17AuJUF0At-iyI4jD!=@I1j%0V8#AMG7mwxQMZic%dwXTfwOS_;`YNYwjLn5)Qe< zm38JYRUx#1#HoBbNXjPSr=|##Po0P4*f_OB3p>17fqme$vN^=)&X^L^n!6uN`$@HuQ^*PaLy(>NO{c z2go3dWe(*-$vps}gA2vYTg05IRz=^Ccjbdb!B;ux6Q{#9Z)aqu}JK zSlfmk+W}9wpuUa3wkf#Ot-A9v_B0KupPaC8|L@ub?$-WM-(PP2wZDO9e{9J)eL--0 z?~5EP0XTx8-mJ7tW)%x%OC07&i4|>`-aq-uS}8yKyy2 z9cswH#8nvTP~oka2lp4^D%=cK(D}X;;wE^;;xHMCyab;%Tl8eBVgG`tdomlMOwA~N z?bVd{2TjSLts{6i(uQSnVKjW_7-b`AOT4YA<==TYJ;B}2qrB&(K+*Xq+3S~Mj)$0Y z$mnUIUxlD~CMBE9uiKT^$`&!jV-I(|J(|>ii?C>>dM&H+Zyy%Fy~ty3U`7WCz-1H4 zr=-q`xLi9cz*Xkiiflby=VUZtPl)ySDRRY}JLLs5kiz~R`2lesE0zK8E;?!I6CX0k z*@ZRbVYecu7x3!helOW~b6~aHhl3q@*?4=%53VKe)?0OigsZRbz)fhDM5izLVn4a~ zl~DQA2kWafB!>`h9Qfd;0;PAQu%6x#}2fZcx8LJ@X@X-f-?`M)#crAop^d+we z(O?%^#5LL%kCLh|LR7KCsm+e545NbRw$ETYL};XAvaKLq4;IvV8ByEVjjSY> zm^?@uqlqA@gGFcps5fqewA2XHcW&QZLZX>*W}cZh;APhkZ2*!ggnIpHGRq|tXR|_C zK`bh(B8}ejvT*s;VhLzQ%+*~oP^GpvkT(%t(9x!jK9U_^b{gOg#XTxv7NVQZEv)W* z3ljj2r?YR~onM-GF#hJ`((K&i9mG+}`1aCW9>ozHc~3S-k!chNvvVet6Sd;aH-doa zsqF(2DO=BT?t}@LE3aG491KQHZMLHYD19D41EnWW|tvn_hMbY{i6Hr zR3F2KupZnnp#;W^3kuA7SN|SBNot7`+ihe+xm{mvuEGCcGdY^{lWiq67&W5->4kw@ zA()s;i5-|{4#7ji7S~t`$)JdT6i{4g2euLy*#Q)5XCU^cGzmO$+<}bNuN)hD?I*6G9H1XZX9h87LL!2?b{pZhKJEw6{;2hSRqF zI{=Aa06Z{pHhe2-ghW`TxFEx75}^?yRmL|r%kXALQb=F$s(0iOPMB={F*04;Eq#MQE$xI~0M5#^T;oh=T(rpqzC@W@GW2f&7EA5>R{qElsG z|F*;ANJWfTuSFrWDq05X?|otZWCXwYQ%>miWtN*`zyOojx_$+CczUsIsv7whLc6le zhybz9Tjx$F4K&ZnWR_En;Os32gojVrN%pQITL5z!QJAn^%*%pPP^5z%M* zTeC{8Lg`kYA(KMDG!Pi(ea|NIRBSS%wyU^=8xB>iKw&~xTFuRJ|L@ubzWAqie)eDd zonO0-XJ4jDg5}RXXi+%Zq|k z_&`;NswaI1(3Mq*9GMM5wt4N^%}gk%1dCw6`SyC|X3R^p0P;OzQY+a z!SV;S?Jh_xVusJS`gLLkt*F0sFtjB^RZ33tPlMjJjX1j7cP{-qJ#))o&a#*xq1h$M zjP$pN2+m3~n+EoYnE{j0sg}8k4Ws(LfmxC1ho*+r2jz{VJLhy3Xpk(j>A0_;MSvA0 z|NGlAD4R^%F?IVbC~Hy4;ickMh&b1#E;ok+-9~%V=NxwoJXLhb%t7xn=r2m=s zD3ZL;EbmSh4@4{2?BDxL1CXBbs%ul0is2?>6m^v{=`wQ8uUE$Ujh=>m?@h;LLs%Aq z^%@HyegNxbdU*?bj_^4=$Lr|$I`h_IEOJ`LQIq^SYKRT)$H9$cl?YH(qL$?@0){$q zgU9WV_mopZVk?E<{S@vG5JoF<20KA~WnJg%-RXzklN-_PjZUN1x(@{_KvfiACq?mV za9p8IfDl3+Vwvs+R;|YRcB^Jl88f?NQqf3%_C|Co49yG8T{&y`S%?6zqqowJ)wTVN zhxey$FG&pD{(f@)-sIF2mJa!Jy>KB|5umiNu3=%Sxv{f)MC<*fFJvtAjsK^-Qu@n? zBN84FR!P14RKfOvr!^W#3OnvKtieWL;^T)5%`Hs8Ui*z}eCMh(EPa}Ju1lHN`$7?C40FTmUy+8h=b2Tdfu+4zY>Msmm7!9rd+ z862#5aj_Hd>O~)q8wv}bV&~JDRRxM0Umh2~M0A|y#!~Xd5q|-hz|u!*dkcZRM`=3J zJS#e_^Sz~z`?q~|EUI}UDt*idj%qy=UD%)V=?7-^@5c{sfF_5F!wI8||6s+TbV|hG z7&0}#F*83o_DqLX9;(cgLdsb>DM(b(MTDfh{eO*+6tZ!Slhr4z%iDpI-4@cm@u)^B zi@3er+#DT~g~F(9Mi@>kOwKPbnfB~Md`vIh8NWaEaBkABc;qk>Oc`9{1U)Qrv3@oI z*t6qv<5N?UQ%h5mZ%s}`g37Hp^QDwg4|t$~`+MXfqqrjz1ZBv(J5V9o$$7_?+A@#t3rL()C36=3Q=-AGW!^S7RFmc!hT)6}#)df=N z+iEnuKaKk^rlySXbj&zpo6cAPawwFEciyp!AMu-uszf{mVby!ZXTEs|pMAF{T;$=ZwU`4A{?zsb8KmkFaT&)lB0&y721; zn`SZj(dEU(5slc?`pBpH4E~$O6Ac!U`sOM!X)Hthz+QsXS4ga#7K>uvVg*K>$GFXA zv3+x~AHPyxUwpIw(Fb0@W$MQJw{G;QpDiV>AWo;&6`1-U0dtlEh?bo`{QCe>9@pvi zR=n4(X+h2rB$1iOgRsIzT=)~Bl~izu&mw^ZoDcXDs&{DO& zxx5RHr+w!{@5pfGV*$}W__*`w{`ADuL)?qtVr87K_Cp(lBV+eqJlIS6>-k>C$H(x_ zOb9)AC>wZmZ#*u*+cOgZc#sel9J~-SVOM5dz>eJcsrzpPpohfMao|k8Ga1Cx!lGls zaeJ+Ictuf@$&nFZR5vg=D?Lp`w4q}z(`po;`h{Cb#%Au^F@TReUkreEXD1=&eXV2x zM<=6cTqVg{w(K#|p%k5bXLe?8f$42+D8k(2-TPSk-W{8nJ}Mby@q0Lr6I&C`;KT5` zcPJ(vkokA#afRGLuw)Fd=!jk`sxil43~3QskISO#slFVRm7Q_8JI$)0k%;bbj0rBz zVB_dw{-2Ir#@}wp9#-rnoozA#!Y-6pR>l8F>V&N~pv6_OWA2!%RKn({7-N}Sxe_jC zRe#EJ-7^XjqXulvv)igc&9|Rf#?|#I)f?O}$Z)VQ7SlQ{l;J_Kn+7@ZSKdDE)*+q% z_ih(DWIy&ff#7}~H^7G( z>Z%?*Fh_em-P&Gbq;cPVh8`Ii9R2gQQ*#JGjf97bVH2{rIvIFT;}tm}=G01=LvOd0 zOBLHV4}#T{=7e$*u>*1m`{%HM$K(~&5yQWYzx!>5Nf7y2vJU}8 zgwLae_6>V;4m_MlaCM@PFq115b3<84D7(hHW^k_~Ji^?A_WVae{sOK>RP7K%3N5+ugo1gFiSVH`^+t7Drk;pyf*rpaZ z@YQveCoy<#;#NwN>2eFKaANMX15;qvpv5BHug-sp8t z#`c&uIGM(MD6L#FJqy5*Onk%-cTHH-4`?0fCXUy-`sy>!UQ`m1s*x-sx<0Bc(VuVn zo!d1p>j>}0VB!%+80hfwxt322Aj4mHY2XZyfcj850%Lg`9&N4(Xuo-vO(}KdY-L`O z!L;XbGn;dOWj2}h4Ipd%4D^{CCx^W*>6?;--)ghb*5=TMI9C@|KG@qu8woh@NF~1) zFXayt?sG?{m~g>Po-yk3!u`K%7x>Mcjo+C2FV4J&XID*fEi1D4`H9_OlsZh#4IVNs z5`Bx=LJ1+I0SR@ucZ>tdxu%ZTIdy&K7{1Ud7~`cjub-hqGw|?yUU=5(50l6YSwvq1 zYGe@fi4LUj0VrNZCpk>@l*b4srWLRWiD|9-iAOw!(a93@`Ep!&v>c=eWW6MLHCe2| zxeA+(F?=oFiPqcP@kJD>kEJkpniE}wxrL=^o>1MPfQK1NDqbO0R<%%%eoY}gvF3p= zjT;7h2TPFGV{wSIWgSH(V%7M>(~`r_pTj4gmh0e@W?$-pVu$;XNr=lLB*tCM)=;KM^$M@Z%?`4!>A(ZFmi>gD zGs)!cQ_RTk5?J6-!DEtO<9~=G?{HaC2fr=wH8L45YqoCYidwC2t+!0h^4%tqv?J5^ zIusbJ{*phQGo`W3TPYo;w|KapF3~)35%HFD_|~n=zaxAcE)d&7+NR(Lm*#iluoHe2 zGh9I#sC9iY7hO{|5yP=grD!C!+aZ(G!HQBxM1+c{+ej#VLs(4+)rlr>u=ukb&vqlH zOfVYJt4PnAL;iARW4gMvRNX<+Y^0H`w3p*>w`bmR^=)L1LpTj$jZ(cGyUA!GgNunx zYA+#0(k89b8T!jC$*<*j*arfZ=qR7kP-jJ1?u?3-bvfE)Y47aNI#ig#Qk8(DV*h z6XI&xqx4NpXuQE26~LL^j4&h(Nh&tapQH6kp?;!{%<|A}ex+V1x0c)YTN8GqJN5~7 z6~6)w=fq+g*C9`_wD3+ED`|t;5hptjqH=ujYp6cus=ABV zP9Mc(B#bn=aa>QjfL@@|9e$1;u12=UyX;sZcC<<2CgtrlxW#Vaj#zhV46282N7D61 zHp)S_a!<|mAvwD4eFm~8dHD#gfPwGPYsua9Rv)r?s;_-Gmcr_?R?1-qPhozLqJha} z;`Vj?eJlA`zgczS#-3nx*uB4G#NjJ$FEbnCpNffevX9?&)DbiMw6pbIEc~5a)H?Bi zusUAS17-p@T--);2V6DiIIh4PoPl~+fws|hmU!Mhv9VKwLy&zbxyJR1h~?*8^I~eo zF%41SSZuFtg#9&}-2c0Ffy6KWgKzf!%>TZQXDKRN=uvC^HfqBAnw1aeQpnnDZ0yPO zS~6S8X8nt70MoaOEzHbj>bRV%WZ`FMi2W~6%U6(f?&BND-dbW-cP za<0M^1@wg7HLMfc4ZKcUSOP`zF7WOVG2Y5e9F9_^vslHmytCI+QFiJ}#0Vko7!bsw zEV+7l$vO*oKjutM+If_X+DcO9VzvXp1b_tZQH{d{Ob?Gg+-+!u3c83}wDfdPG4T@% z5K{#b#v~3*07${>j-4Txum~y z+Dz+?7K|NPzXv*6dik~HZIEb>DtIXA;gsxGhnSjceI<;L_`sgpWyG9fMK_u zCa7kIciaIZe&DV0pjXcZL*pfu2?JGaVVF&XC?ycZ7mUulsrO#u%qM6aGGmtY#H+D8 z__hWYErD$oTO=O=Z=pdG1`Xm9

zJVxC^yyOT*yP%@-j{F*dU7E?^}%04>SsdgG+_NSn;r$uJ0LTj@|di6t}gw$fq0v z2CG2SBXb0XB?yzJP@yL%!xR(LCZKx4;Ao?$*|JBl1z6JUec%ptC3JPweR&w+2-P7T zzsj-{>KCw*zPg}O9nmBde;8CAy^s;XEysZDLcxV!K_Xh|K_L9`0f}%T7)Ls3nWVWN z6Yjs}y-yS$#+F00;P9|aXjEMsQvrUCR{fMlmsl{WkcJS5oxtL#0ike4iDwK}RYIX+ z6v3cIXNrj9YKL|Lx-5b7hqKbwg=w2OMw<#TlhpIqTIm=oQ5^_!KA(g~D5MMoEsOp) z{v~U=>)r2a$IzXVDPz}E_bA~;Cy*N`qJ3lcal;1yD~1|ALAME3GD z=)9aLZcNhyNXQ^nUUHb&?var|0tPxfAPcK#Ty@Fu!d#iw%5hn2g^9> z*HXwyeo!fM&mJO)aM0Zv*jn3gXox(}-c$L@yz}X2Eu;?dl&#v5G-rvvF20E7c9{Z3 zlX~5j_&9y*aDiw6y|$>g5|A;{Q=w&_51p3eT5Pd-TuVAEwcbghsG9f4BY%+G1mtVRsSgz=)Mmy=t6d>Y^x63a*BIZ$wecKxmV}nvpS6!cQ`dvsDlR7&>LRIsrwOEIlm|YtQQ(b|jI+>nJZr|E{Z9Cn5 zZ5jC&-C7EoMXjddlgw(-ums*|?rm>q*-xbd07?6=i-{NQhZox$Gosztj<@RE|GRd9 zzwyz~e|3KQ+dqS6*R<3o&e}E+?BcBMn&_)p_gyR?OvMFW(i?GS5W6dKb~Bc~bVzdL zuQQ3{R_^8&4hY7vdE_BIQYR5M1z4y0Zh|Gn-_qaR!dXc1+w$X~spGa1*e+6l^NBDX zc-MF25=u>LR%F*e}Ibc<3AML|x&v$o4)bhh{7spc|ZO>g6@B3S={q zMCcVvNa$t~U+OFg4@H!egw7vtOAtZUXYxQfpE@z4;VX`a1lxrDX%_BrVgrkrd@*Aq zAqdX?)**-Nir)tN%H)i1h$rSE#Vt#P;kk)FXJnjGPfbG zmYH*}SruDQ(3B(@i0;03f0nM@JhVep*n`=5>))yzOC?3kBopgva9pdwzP}PXy}Xoe zVsmgxI~39VUdNf?){Je#w9SORGHgBxJ>%HU9>RZ`}Z`MV=o1)-1l3PW-99HO` za`rW0E&YKS!lvdInAAN9ms&Cyff~{yg)7I$`lky=DjF~3KviP_)f!SthCWb6gs+w` z#U(63pF~=rB{xu=Mr|z%_~0$~WrzhSd#c-;TADh;iw#S5BYPaV1Af(lIJq%(3^=2z zMDCk#IoKDeN61&DTYgV)b8M~UBe3r^PG^I#< zEgT@;^ERNSq5yPsGu5qfl;@suln$lC_0o%s^~p84;wInc6?gQgP~u&uVr=n4+R;0F zV{3-AJ|!*avW1O7Iy;ch4-Aenp_aJcSU+l<)*~?*5)1h!t6@tF8P=Wxg*!+kQK_#x zyD%yJ;c_=2hVFh9tOH!@I}VwaR9i-*vPoUxR@M=8n>=tZ&G7^eBVIR&f2OfXd7YPl z;Kt-7je&T*z1_gfo-eO%y*?I6s7oq^hWaqD_n2GqaHt`UI1H-6`Ea;@xcow{yp8KC zZsV@rn6c+1B{Lb$&^1g&kZdW9FS%jw=xGa*5e0H3rZA2RlkY4rfheNN=G{aiR55cQ zfOgR&b7NyO3v|qC5JuKVO7(FI2({Arfo!IY6=toBbFlD%P>gnijiQb1TZSS@$GCRd z0LX|dU53~YuCox!2K11|eI@!BZTLR*N{%p5@6Lm$h?$#TnxD8g`CxorGlL$dEt$>e z((Gp4&{vlzM;!C)R?daxG?-4O9&MB~2rUT3S8J4@SF8gG?a*Z4M7e)4x>7qhc&eU< zrmW~bTUwW&mrUpqK_cFZAh3p#0d4bceHEIj*h%{Mt4B!Ig0YK(VV;SyBpP(4R^Q2p z6G6;n)Rcv~{J<~5t!gZOx>Mq~a)&Z{Seg?7=O%C91KmK;0MaS@lZ+5%(6Q5<5IpqA zh;xd^h(br7C2bd_L3tc~qMi&<(vG?0QFdbU{2>L!zr`(b6c-m(K9<7+_y4Y4VD*2P zy!@Bu8}t|G<&G(cSA2RYHflFU_S2g+zVs9t`LiuD*}W>>M2pTbZyb2cPM`HZlq6$9`s0(pr>46<3ry*N6tO!*OI2SHSDAm{%3_LWn-;?1bLYMnOL?r>^GRk+ zV8TQvl}fR;ohRCn@YFbLRvc?K_#7Y4=`lk}hL(Jz&XQq>+`=B$9zB{zHSL6cTPrU& z&D~Ct!2d;~wr|YMTFg6TXeEohxCORmB*2WT_O(|{yozv03fbZSno-z&|1xr+_C*vB zdmLPlFv@9Qgxl4&APu(~qis@Z?YU8EzE*GF>)9z&W_i+hdw;$FRm&+_746>)An z-|1Mm+R%bhp^n0li0A_-rK@j8*a1Wc`#?m1H8Jjth=>%)+GcNo;gf*u;DbYus-g`d zR>5cGFr+G`AS+bManxap{0UM?pT!{O6m_yW@~M?i?%#&@)Xnw>%zXJ{%rBSuzL8wK zcJ)|X6EnXkqb6i&!>JjR^neSENrQ;{8%(gsjL-6T zN)MLOQ8N*kM^e&>k>71t7F&JAJs6vb^X7J|7+g7^{0VO$Y-F}1b@4V21nepuaqR72 z7^K4ykt8+u0#azF~>ly%N87Dxttm>p25TJ)}ii(Eh+3qn8P~SCOpW_w#N3~ z>$C4(cJakUwF0ZM)6uQWyzb~yhQ7g}#dQpecPCR*xba9kf-6}-%p6p?vkRFlv4P7t zBWf%TVzwfRYz^*Z$a)fv%2ylhp>^edo zU3NVARtKiNS>;ihV#UYgRr63e)o(D<%kKI`9S&tpK@eFjcnD!2055JtAHfka%>)_# z;|_5_QIrLzrr)U7lHd2Sq+X&LQRG>cKWcNCt79w~Yv`d7gvyePYTI-W4VJJx#r?l) z7x<@N{Fi>|-QT%e#k1>#;{7O%F-D{@7(z!2jvq?(YyN1u>$%dEw>jKh|Wy4 zbz~4kQrI$f2;||R13);NYZOz?XnCm$=`@{QZ>3xKC(c4a`b3d9QhTfhTWy*b(Rf(< z&1Z~2w9wtpFskwRxNL80+p)CYy-&yd?BoSu@(zPr1{ZRKUDR!DF*$pk_Nv4TR2Zl5 zcV~LyZV*b75H_ZjIBb*&xXkpM)HhO?aNuIJR^G#*7A`(O_7=$$0v{e3%;3E@@iK)S z4NWJltCNaNWEjVq?8gahyIEO%hU;jy+DyrXSN0^+l8c`@glPl-*@T!8F0;I-9SB6r z#JdhU^1^qK-TVL1#)~AvDmc34iGoSNAm6)9~Y$M7A)*rz{Y?}vJG^Kz}U1eP{TvnNu+cnmQ)(jLC@Dv#0dNCb-5ML<6#@6T5M)#Iu}`4+~o?%dwiz zH4r0yXqn4)EM~jRzmLj(EPFf_bko`U;@V?L8JojclC2(EQ2gNd7US6AIl|D`4jgqzjyW7nHEDk;_T;0Ei8~gCKr$M86D!a#pD|mCBbLKA z04E0_gKqMK6b^5<>y_>0!zz@V3SK^zgIdTs749QB?O_e@ntF>-sH~nr8sm~&MGRY0 z=X{R@Zvh!L{cALNC@dfBj#7QF%xNEN+Y7=tmrjw}H65CI~?mv@M@P+ol^ zH`^UnspwRCYaRDQ9#&z%O$4E@-A3L5CIq!ej0G+?*D?r9Sw%S5PW_-l z4_iDaVeXP}DP<(turWH4(eDup0>;$RF4cZw5)O*?zXxYLbjj{w4FqQ)4O+de5CcFv zpHh$lSQM9;PAXWvV^%*Qt?M$(2TEHi$k?`M!~{30D*+`PoxPe2(3}-W8jdlB-qr}F z4B$cols%b|l3-#TI)tR473Of-_Gs+%U#f1eMd^RbDv&I}pXjF7Qj2fRd=KExJH|6N zqr=7WE`?1cgXsoRg+dj_9fRdcdwrEjXN;248-VU~BA}TZ6*^+BWJ(_jod5$Zn=x4x z!ltp!OqSLPRn?8=7iQ)rQ=f>s z-2c0FfnR&@V?X(&pZoGZ$Fo;SdCanQ6CuxJqUq*dQc~Z3B`IOg=!`>mQszi(A(Y~c z^P#?Xy(y6ATg!>6;*|aS25TIhQl--`uLYnQK)%+i)@D zOBJ8~tO<1mrThD1)(klGXL+s~H+IgusD$Fn$i3r%!O>k!Z(O4_vvS#)jYKLe7vUZp ziyBt*N(%K@I0mI{PsV!19YV%MMcW04dSYlvzu!VM&5W#hf#qF!7Jf4(wF!5ich)z9 z`hci!p}i=rre+O2(4I?%3YksAxM3>$CRcxhO~v1Y%HXCSdR zs7%e839HiY_-*n~Y~YQTEr*Z&#BS+rGauHTG@1>BVD|F8L4gB=_;aXH$>vu>TVOo8 zb?b&6C^>@@O7S9ICaNB!ysc`;t|pWkhq?kIP2&zj7BKVcfNB=4wKP$ zD$IWM)j|*oOM`8%N){x)o+Lq*@x)>iy;+yFZ0eW2KIYFZM!~XB1(-M~K=_D1R#53T z{cYhPb6q?G2u$1i!G#u!Mp<EljX4u9`eWrYlan`N^T`Vdu0>4GVz?7#kI@OXBtVu_R}MxvoQVQ zP;?V_lOu}-Wg%fI6rBggHrr7M1rtT8L}tpzezG(^Y8edi^z0NJcgN zgX@Qh5J*D_Jq!ac*I+A|ynC>LdB8|5JpR?>SFLGYXl~=c?ZR${?jWPoESq#Gi`+?y zhFXmE75bzOyY4GnPK$7^H0?~0ekcIv=2d6LC>d}|{i@trFJO}aTIe|AvT~D1e+&p{ zOXOJb)Q|xMcX+d$Z*C$}dh}S|fs@jl@-<&Luv*Ah;N}3*)*lR}g4FczX*vwf-XYlR zJwF`A>hrbo!N{=0L2!az2})axw0|<-kk*d1|58U<4gki|8;)ODuTNKrp#_`LT8{Jq zI-;Gd{End6Zb{6CgyNBHEYsSwi&3Ei&?}4}Qj`s!Y}Al1Wvo$#pQS!m(Lf{DHG5e! zJR2MqG%SSt31JpWry2ehCAe*P>>gu_D7}2Dk%#yrS2!=!kqU%|ttYMZUATE{Y&B`n$sq(AGNOsF zIv5;;rz~H@t!u=>I{zJnMojw$qVCTgpK%poL5Yi5Td%T*ZKS>lw{ZL>x+}VRSrt}! zHIRr37c9)8oXg9t`fm6UvcP1y|99;IzyIy;|HHn&^Y^}wXXX|a1&^`;wHGbi_pzG7 zMK7VoV&CeL$+=U18heeAh)dH)01c=jdVQ&QHaIwMuZ53?i|#8hv7-1$cPvv5^CZqr-k@`3w04ze;`nMYKMh-h2Q`Q4OacYfyxl z_@Njr!j#4wU~cE?7BWG#=>^l;Q-xP4Du?V)5w`oc)vrJT z(gs@ASn_5{(nTY}A7qU4DKi`+!k)6lqi>sC#aFr522i{X7oih5EYag_51Uxzf zpR5Dn5Z|n)+1*Gg{z1HA$0$NObQbssJSF~^bU%i%ERHI_dK-vZMn;ScCQ#rFP`;!< zW>_RF*}};{axrdpET(X2RUP*Zz(EY7?&m!o%ZUQNA}K$yAWHgV72cGT$XmYIhLRh* znenhgTEwKIXn4=vI12lapGO_K5XqhSX`#7{&1dfS$Y7Xh3R{oi{dis{98+LS=LJ&U z2gliMhQ*KEq5cX|L6{Q@I{`u44(zF{sf-e4Hx~soA6geM8w^enK8-1iB)Aa>*W(^u zyLR)@RquY<_xGKtZHP-3B-=60XQm)kl}4sI)f)CzMKI;hXm+&s#WmVz#06%FWEL0k|Kg%UYl7epGu0wv`q~CeV$7TlZOs@*QrDl+hKG2cAmg%@ zNnOS5KUB642X20NZSiVvJr<=86-pF{xt$-V?+0;qQp+bjr-; zkX1RnJGKht_%`V6SU9|?4aSPdI2#;EM7==4@G7Sez`SB37T&?1WMTgq*K&Q2q(L1P zKrBJNbj5(E!d#NXi2D6;U~)zlIX?ceprI{HxdcyGtMuUN~*Pa zc{dbfG+&5554(}4X5YMv3)FerhHR&zc>`bt${=SS)H)=dFpWu!{HutQ!YDL zSVwBjXnzo$Ly3yR1;@OlT}6%$4L7DXpxxp)Yu2tKByX|xo$Xj)*u3}7Ue*o|8j8Dt z4O>{(o$*F!)i)d67pzJJC}zVu9;&L#lde~6o*=RFo|qi) zT3ZejFpaC!@f*^<-^V4Ld&&~r|GRd9UwC8cw?^Lj@^|p8Uk)%l9G$AhP1aL%r0tcAW!m0|*c#VqQ{Cq;CQ3BT`4x;TfyJH`gcp|Rn|FT=wzjG!q=yti7BEEs8P8u_ zbrex6$COqxt-3(| zgCiFOsdcyFj3TBTh?e^UA{rK-7`jCdqlme46#?lTI*hd|TZG(9p1s920{j^{K~i3_ ziSEFlHw}|f7osJ`zQPtQkaQ5rug+Ym)DI;aVZ0Gc2i>lE`WXjYR9wX`02_*G)g*Te z>*YrUty(8;u!sS#+A!X|9QGB4IK+UL<>ci+%Mr2*BQja)6Dmw+~t*F~`$* zVZR)BC%G;9QEc1ZP6& zRIpbQ={KE@*spJH@dlVA(hBoaUwV(RrI)?+EnMYeckxE}zPM^9>}cAXyH@Em(Bi6l zc-~oG=b49tC;s+Yrd&5r961pv9TXGcDQnkzP0Tun#*~)aOlt<|TYK0U1L6z6h{c~$ zuZsAz(Cg9kfGr_vb+|8)1!jZUKb^%mM0LEeS6t{DOSe>SA+QA3>Z@C2P_y5T6D}0+ zNX6@PWIuw}QV_=!1<~lyRabIp8}|rD%4G_;Gs;%hmk+#`*ZDG23ca8A-}|p*-zPF! z`U?+IXazXXQa2b6w|jP@0kr%o_pms^yqh%rMIRo-mU{r-%NBfxy^_XYHV;!0#BQz4 zg~uBZGYJkI5CsDNG4Ti&^x@PgpAOrBm*|^E`{0xxN)g(^xsaAC^2cK8HGGpc^#%4@spP%}Kqx9PVGlgy*|?2tjqX-S2eG=wn^%Kb zjyB6JSVoY)EW&eG4p(@INRZNe)(&+#c$q$$9pI48!Eg^l6uh23@vGTG?!nq;sZ&Gn!+{8mN z8MI)P#H9?oE6BfTU%<}P+|$5#;GIhL-RN_StiEPu8PRl8J19sKU2Hcw_)%qCN#L6* z(GsW~>O1@34LG;1goVLJN*gtBvX-{Uq81e&IjI19vsW=fe|f{bS*U4;k2ZKh)W6$= zkl*N-*g(6P2iXdj}{)x zB6BH7Z;P3WeEZzKY2%R zqGRN|pdqYa8yL8!xQUBaSOFs{i~T~F=9trz-FC|=x^~;+whEGeBl*Fja{75TJ@WpG zLViCTJ$f{Y>I=8xXrW-Wo`Bualy!@mNoA)x(7<}`^#iC=R|)PpO(%Hum{-uH`F5qf zB^Oad*SRK*dLU+SrPH$!JhrNpSU{4-p;aUsg3p-QU+Q-hE?&cq5xW6X3~q3cihYph z2i=&a`hY!>wj?ntG`2ooU>>xW4i`))g8q=Ix2!!J3JXP})$XEarii7Ze)@M)%*S6$ zq1{r8irA^4^by)%J%YEUI8!CIgF-1L|%{ZNL)HZXom5+-`Hb?9ElzM(CP zq#vQc6&^HJmBgCP9QDJ>2b7^CPNmbGoIg6)esn=ssef2Dn?5f6lMtORFg+>n=C+r}X# zw1axGQU_`9O1uTzS0f%J@spP1BqXEa1yNV7b%^U@YZ%Bf5J# zBk+}*Yd9sxLd1>4Sg#Rl0U4*RaFD#IUYzbw;yTzV3>?34sx09qC3jYEE0sa^L|w zA00+$pWFvIi)RdG^xZ@WyI*umC>wSP71Ei{ds3m&i^HF{!#Thuk?>{Jrg_=tT7lqq znbe7F6I|5zx8071x%5z`GIYUh)w1aGes6roo=aA-1>gIP%{bv#)`l-nEJ z3sL?e*cN_g22~iZnr%#xfrD7S^I*pfoXi2S!BKjUpttsr%_Ovyc>*BMX9zHS?_N`cz}hYKNW<#uWI+wR2NqCY^kPFJ4YxXArK|NSqY`z!x<@}Hgi z<)I&$c=!3xXe+{T9)tBo*aCThu|a#**271f*ObGj-DU{^ zB1k^Z8y_>00wU9{4d*I5Yt@yZ*5lg7?p|T_$>a8FbFW%?Jh;}}d7j?D>bBl0qzePN z!ay;avff;Nv^4wf!o8X4rF#7z4YF2X{l5+0VnKssM7qq^XkyY zyMUIa#~)1M4zECWmT;FTAT24Rw1Qo3Z5xsHHnIhSmKggb2Q;Sx?Yrd5AZbN8^l&?7-C3|Lcqr%l zN_;EyK}F<|jK3~4SSHp)FPJd)sLvDcOd6TQc=HocNH~C6ttBOW2D>M@cBF3((}w%Q z@9s)PNEjM?glu4{24I)Y5>7Hi%vKGM^`1)!?Yi0M;dJK>*|3gT|80*;rdl+T=An`R)S6VB0wv$@m(7RDtSb`n{)oKcD_|Fm_? zE@nE4;uL^Gv3a4re_|V2N7YaFwNKx}=-}3}f4gFaL5iE}we2bnch?}S znmFhsV<}a*n;c%n=3}7ky z)_xw1w?ec-_&V<17Aw_C86w)FC9-$MQ_-cCZCZgU4U(JiY@m|b>(j&xf?$XVhkk55 zK-&0unOE(%H=A2j=10?6Y{L=el8Y$P?hV`O1Q7vlaq(m~yj4|rbN!OAVunX-t*@`P z@3yul#2sHlFWZp?z~@gv_TjGl5vG0QJ*a%IiD)JTM=)%?E3bn_BL(g_Mh8~y7&qwwu)xxuO`|;{-Yjw9! z7<`(BqLkLor}No-5g{%3A34j!hDasSZNukz{^bp&vd(Q-RVO#rkx|OgN_45Bg5_u? zu=I!;MJrF*MpbH=0Xem{v25Pz$OOmzUfyUHbz*T^U~$|eL>jp#mJ2hpOhfG)j4r&M zOkLkdxd)>yF!1yG=6V_?40w>B3pE(RwrDbuL(rI)EAXI7rmIO@G8=HIW_%GkNuBOd z_I<=e4Lsse+Rcnz$Fw&@ls8bTmof_nvuJV=Bq*JLX&Am!z4J2<=O!kXpaM@WP0r3t z+>5ffne}=NWinDhG#5aHThQ7@QevT)D229a*hoE;9DXu-cmxuI zuD9J|ZX;@>n&eJ^ixCyZbg~2oqR4FZCkf4Vm?NVRJb7^ito2E`z?*~im6$G@GE59v zV*owI7d~^iWJ?7BoAHD}6U=NvYx#jdiRRu?8$!4yfkaTudxKA~t$OG7T@K0k60ayl z-vi%W2!R(tP=Qx~>D(8q_t=7qN!GO|Tkw6M0*AcPz$A?@*wt0zQ*gtp2d@K^_A^PL zw)+l18C5MOT3o z?vY3%6&jlymIq#aS`Lw}6>jW!ZtIJ`yp$97hIX0()c6j)0l>tZmc|jXI9?GZ@WD6O zm>61etbxc3SQ5f4j>;ei0!Z7}R_n{<>a*E$^+|cT-X3tRSOahaT9K%EI)?7l2hg_Q zfE2VFplh04N);OAQZ|>%7V^b%xsoeXE46%SaCo>rI6Rmy4ONQudKLpw%?}ROE47ho ztyalqhX*UewMwN>85*t*Rw^Uq!NSlmVAS%(>|n7pQqE_~#X`MQtPhp5#cHWgDdo%6 zT(wrI4h`iRrQvF&SZ-u1LyR80cv$`{tbgFuXIM`2styZ?S zHrk^ZN)*iW0nU`>-oQHiMC=>Gvc~S~ln|!$Cz=QX1koZFcMXhMK@3>>N&Oici2zUM zMt5`7sfW$s+D>7mwLU+4cYM29`Qtmc8Y2tuO>LH{JDGcHBkwFf9L!dC$Ef>o+C*^x zpq0JH_lGv_>@7UoUbxrFPe0jtdt!6^VRibwspx1Qdr7~C?9k~1iq&(j?aztl{fpWemg#QaeN+6+WwRIi*Jn2-y3reAhOx# zo8=WqseJ2sd3SSdXQcgjuT?0m3_X1^va{J%aiStbRwjysF3(bw(v%UPtUVEh2E)VVv zKOV}q^E-{@$E9YPcLZW#A1)5$JHcpel%D3B`Q7Ka_F8eMwo@*wYz*xTwhGn9waQ?v zfmN{5Xf&lS!<~RUZx2>CS2jyS&o_pvd#$0`N^x!XX?FPeZlPT%b6r9Vr$X!=*f6k5q<-vxC*@@Nl_NEDV-L3RraVQ1J4@ zLobkGj8@&$D05r9mfBEtGd1dN2C~bojpb^k@O-VdHaJpwGB{G+t1MUY?bTcjze}P# zLygZ33=NE=E6r94fVQ#PjXtv4tPzxWp#(|2&mz!wM_VYkPd)EjV9zt$?pV9P(x3V# zzqyzC`@c#@g42o2iNtmylc4pW`_rXBmjYc1bScoKK$ikt3Un#Zr9hVgT?%w5(4|0^ z0+T%-T|W7;XFsO}%F7z3Pu&JC7hcxJfgwH1VYPOF8^7?q-~Q)ctGwN{3v`*iOMxy0 zx)kVApi6-+1-caIQlLwLE(N+2=u)6dfd~cu|7aID$6i{yz?I(}{}Vs^)mnjZ1eacE zpGf>-;tN-P>Pr6dU%LDgm;UujZ(q9l%K!Ar-^7dVPnQB+3Un#Zr9hVgT?%w5(4|0^ z0$mEc3!{_?!;6C#DEFw$(TG0_-ZQ*Up#l>gJ=aU z$k+>p17+5c6k)aetW7|+X7R$g6K@^4rma;vf9}M4ht`VX`O$@Q=TCg&{71*8j&U7- zR$g1Jm20K!U?D#=I9zNL8-v5S+DLsEF?l1!20Xb3OVviH(14#f{Kxa<5JJ>_`^p$ip?fEEWnwK;xH4(vL2jc`33WxxkYwf??1=cBnF3 z7#^uN3PZJ_>~N!y%hpQe@^F1qCfT1c5-7Um;nh zd(NNu=zL^s8HRIE2(68XlRO75gMIXw(-%&Bbnz3=*~EelNSrG{))3k?ly8(O!;LI` z{1Kv=D-Y!njX6>+muo}ik=k&jSgeeIMRK{}`bZ;NMR?{=xm>6fhl_c1@pD4%&z=HG zmp(a48AXYK4Z}m({7`Lpusk$asTCTvYAIhT)`~+TIb0!xfCO+x84S_Lm&#z*T0UPZ zGX|kJh`)-ZMh*D@azKr{&)Vtul0ZX z$;Y${d@(V1BJqDD{%PX-iSH%;yTorM{(j<+SezE7o$&;tg zo&8byJahI7@_Fja=ka;r^x4nx>#1{Re?&gdp8c$Ro;mv&`RqBHkk3nJugK@=vzO)b z)R{~8ym0F5EAsV~vlr#_;@J!GdExAN`8~MML$nnmg=W2>DO0Y(a(z)_4C37{XBpEY|n+0C(oUekF#gx;|#0> zc-Ye;AE!^t$Ej02Jr~3-aESwE?E+`kKKso-xA=PrwF`XXMB@KS{9fXBVHxYS@t?tK;BP0so%m~szXChKpH2Lk#J>Ss!A~c?@xwm!-Trqe(4|0^0$mDp zDbS@rmjYc1bScoKK$ikt3Un!O915H{4GY2-<@b-u?=Q&j&-3@G&&lr}k>8({-=C4+ z3Hg0ReqWZ~m*n>=^82Fvz97HP_ne0B0Dqn3uQUAB!(XTQ>(uGfVi#b@fM*x@_21rl z_0{h-YHAmFPP@RziJwdSv&4Ux_-5jF68}NsA1D6Z#6L>>xx}A0ly;0i~<`U`yb?w?O3fB$#>b~ldThnY9KeeY7BOMxy0x)kVApi6-+ z1-caIQlLwLE(N+2cnJ!KNB{qa;s_G#tF;UCel0cg)8G3uKS3vgE0@1}0=@x%7dZyE z6YnI>UisdYU%B$9uROcbcjfHm{}wO1KV1rRDbS@rmjYc1bScoKK$ikt3Un#Zr9hVg zf5;U0(v`C(PTsiOb9(yzJF{=zonM-mnZ9%XE)&h)x{Oy3oLA#hQ!{TbJ)E9@I6FHt zw=j8oY5v{$g~>pFId*?-nahP zDHOrz1&esS`__-2#CzZpgUYwSz@Ndt8`duHpZ=}S|NdJiYd!QAxN_weP9*+UWE=c; z;!h=Z6RU|oo+u7U}9o$P*7lCU|@t|AVoG{WYDWB;00+HAlr;ljiVtj n8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd71*Aut*O6ovo*4{!$i literal 0 HcmV?d00001 diff --git a/.devenv/tasks.db-shm b/.devenv/tasks.db-shm new file mode 100644 index 0000000000000000000000000000000000000000..f5895a24ea0419cc961f70533bf093582d364ccb GIT binary patch literal 32768 zcmeI)yG;W@5CG8i`TxNnqHBPV4lKC?Cu9zskW&QI;wqexkWvGMV9%bwvdG|~<=#l6 zTkYNM&AbAp{4I@X9%37i11cp;4d literal 0 HcmV?d00001 diff --git a/.devenv/tasks.db-wal b/.devenv/tasks.db-wal new file mode 100644 index 0000000000000000000000000000000000000000..8411545240fd958003f4c37fd29c815ad82ede02 GIT binary patch literal 61832 zcmeI*Ym6jEeE{%%_;zp4*oUm33m}`(iqmzRH(7FlM`EO2EdSQ5cBZ=P_3NtY$5zj3pIEv%$NkTr&dt4bu3i4^pYO959=`UM+H3#v>bCr& zFS4NdgW>6SKKUo_dDmBOU;FS@n7bWsK#-YtBo=#NVeY-14%>Et{VcX|_LesOU6T*& z_b9yspO1RGFxPF;d(Qsv+`_$2&E5Od*%y!62*N-B1V8`;KmY_l00ck)1V8`;{-^|Q zuFWr>rh)9KF(k6hi{*xBstY+Tyf>`briJb0$Vmck{J zMeba>{DI5cJDta`UVd!j>c={dZhq|IXuac7n}lhbL1$<4qdT4LE9_@?YiqnZ#56aP z#C7`Z`pb`PUfbDt?D74LI`7+j|HkgtPUn%`t5-L-cl!IAjHA<`ZM&3qE?v2@wYjmq zA8)+Y4qQ8?L)5vnb>-63I=6D|VOy$x9wK*A{7K=?tUbK2bnY5AKNn)htr!%$hVPrh zd@%alpT_75(;@A3{pr2d)n4cC-!J%^KYz#fp8tn;zI0C;IKGw8zJmn>KmY_l00ck) z1V8`;KmY_l00cnbWD2zR2o@*v2zIxBy!y-M_)nZ^>k9V;PUaqmT7duvfB*=900@8p z2!H?xfB*=9z#V}@&k=n6?tgsu-y7-Ie#3JFD|e#Lonr5Q>*okg-yM&hBe>`3d#}G` z_a5`^pRoW~KmY_l00ck)1V8`;K;YyG+&H~_ZsYv?r|ut3_6?n?Uz$0aOziZ%kWHJ; zO&a%8GY<}iuIx>gzP$au&5w4DkJGub-8mTV!BJ}1MDE2-o90@)ynIe$nJ*uo`N8<7 ze{;UaXSsHB<<#=I^XKP3y*ru_elX#K@0SkE1V6ZDGz&a*I&ALu<_9*fPS5||uiWh1 z@3ykDp=jWQUdW_Jv$7|tPNrqI)4J^e&Rv}M1Cx&9w}Bz;J0WrHd_d}%$>95w$oY2D z-R;XC-rbyGKeMJSEuDMx{BbkB4_3M_9Q?HWsfQMq&)t9j{LRNkh@(1_A)frc>k!@~ z^tXfCD}7s!#H4wjWwTE|LE5kGgES{z>-gY>^Yp^S&ILRuFRZP#kq5&(9p;@6UAwY< zX03PE(z!?OKd$1F0{4ZfHV{tJdf{1SiiCeC>%B7jJ*#^WSfuBbYz?t2y?91q46<1V8`;KmY_l00ck)1V8`; zK;T3PtSp{d8Q))E)fznmFu5=A^ZLJ^`^sByeF?sI@I>u!s0;{z00@8p2!H?xfB*=9 z00@8p2plKSem}te|0DSL3qO7PzlO+y?;Sjj3&KGF1V8`;KmY_l00ck)1V8`;K;T3P zEVZ?n%oq63j~@Ot@4aEde1Q|SkD)Rk00JNY0w4eaAOHd&00JNY0w8dlz;e4{a$n%} z;d{@1{^x6t!hC__xF8$^KmY_l00ck)1V8`;KmY_l00d5y!0EO&llua1y!@>z-}$e< zdI|0eoTz;al>q?|009sH0T2KI5C8!X009sHf#U>L*ou|OeSz%ayT3{QefdA(zQA!@ z5Do$$00JNY0w4eaAOHd&00JNY0w+pffz@XJzQD8J`)V$4MIUd!Uts?1-_5ZXEFb^^ zAOHd&00JNY0w4eaAOHd&00M8Wz(?knPrXfANfyWdT`Dl3v-;ne3+;-)5XImgV+>Bv@K zsfk*9S&VUMYqb(7m2Jejss?6MXx4aNOvg$sR%;=PWlO0A*~>yqgg|eKAgeW@&;`!7 zZLTC{n!1D&>|@~ZEtfi8!ofk;p(D*J~n1>fd4jyxe?MUkgdX{5%~ z1J4%a(h>xXDtxM20&n_J>?2+>?xb>5Y^Rbfhfob=Qsfn37SM*+S|TYaHDX_tIUAX= z%UZo8%vL~<4Kd?+8>uqp1F9IRN_9Jn)QGPXT=^X9e38kq{vvjT>2e239FP=8k>pu| zE>c?(y-H{_rR0+!rZtzwKJ&v~=TEm;_RZ5i*d?M`6VU`5! zW_p@k)tZ<&u`I+=6H&!01zZYcP^hJ+1%=2JA!4nKq09UXx+ceZR$88^CP+^y5{oEc zKfLUvM70&48>B9#G#MBAs3wnCrHGswzASPma4|YRV0XlqFHsMmXYZrvGmyR!P!H{x#3*sHi3uNu@CgObE#;vB8RHc$Srs zGAXlz>7=zia9o<#iOZTAE7eS;6q5}HDNl=3MRg|Fja-(pBzV*cq>A{;;nYUuYCE|! zJG!K*We18bID}M%z)d{X@Q;fBJ05BTxhegG9Vdhm@nI82VkoFp5$k5G@pjF5A$G>c z8a7z*v&9dsik%dcNf7z6&r_9QkkqV9DK>RXWyCq5=ZzC!&17bXPKhMw4NYV@a>_I| z%9y7P!Gd9_A=b=*^YM_f9w&E=>GoyWYEv~4j-ym21`Zk}M73nsY2gGak#QT9o%OZj z>}<*5Chq`K(Xm^4XLhm~4~e%X*D zB?3i7iY&g(H#ga7NmrT~rBu^boFLXHGF2f}i-eEO#zUz^*hq&nR^29KLjxP4r)yis zP9cYmsvDAzVs=i7tdMndNwbA2D>99_EK3q`W0nLJ28}8#sR0|JjDk?kGHJlL#8jF+n=vZn z)5h}DP`1Scl#RG%QxyW13W6e)FECF+*0R|kp>*ZFokWJXw|6?VcN%^0 zN}`(#Y%}T6fSu>|*r<@FX54eDbU^aGtqYU%*lFR>D-R{bE-u&zK4|m5Q{zJchc@5K zd31%+UY8u&(#`Ej0PVFW!>qTE$>q=f5wUM6zflash-attn) (3.13.1) +Requirement already satisfied: typing-extensions>=4.10.0 in /nix/store/x74hdbjsz4ck98w8lyxv8kkwxs1wm2il-python3.13-typing-extensions-4.13.2/lib/python3.13/site-packages (from torch->flash-attn) (4.13.2) +Requirement already satisfied: sympy>=1.13.3 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (1.13.3) +Requirement already satisfied: networkx in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (3.3) +Requirement already satisfied: jinja2 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (3.1.4) +Requirement already satisfied: fsspec in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (2024.6.1) +Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.8.61 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.61) +Requirement already satisfied: nvidia-cuda-runtime-cu12==12.8.57 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.57) +Requirement already satisfied: nvidia-cuda-cupti-cu12==12.8.57 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.57) +Requirement already satisfied: nvidia-cudnn-cu12==9.7.1.26 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (9.7.1.26) +Requirement already satisfied: nvidia-cublas-cu12==12.8.3.14 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.3.14) +Requirement already satisfied: nvidia-cufft-cu12==11.3.3.41 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (11.3.3.41) +Requirement already satisfied: nvidia-curand-cu12==10.3.9.55 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (10.3.9.55) +Requirement already satisfied: nvidia-cusolver-cu12==11.7.2.55 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (11.7.2.55) +Requirement already satisfied: nvidia-cusparse-cu12==12.5.7.53 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.5.7.53) +Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (0.6.3) +Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (2.26.2) +Requirement already satisfied: nvidia-nvtx-cu12==12.8.55 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.55) +Requirement already satisfied: nvidia-nvjitlink-cu12==12.8.61 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (12.8.61) +Requirement already satisfied: nvidia-cufile-cu12==1.13.0.11 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (1.13.0.11) +Requirement already satisfied: triton==3.3.1 in ./.devenv/state/venv/lib/python3.11/site-packages (from torch->flash-attn) (3.3.1) +Requirement already satisfied: setuptools>=40.8.0 in ./.devenv/state/venv/lib/python3.11/site-packages (from triton==3.3.1->torch->flash-attn) (80.9.0) +Requirement already satisfied: mpmath<1.4,>=1.1.0 in ./.devenv/state/venv/lib/python3.11/site-packages (from sympy>=1.13.3->torch->flash-attn) (1.3.0) +Requirement already satisfied: MarkupSafe>=2.0 in ./.devenv/state/venv/lib/python3.11/site-packages (from jinja2->torch->flash-attn) (2.1.5) +Using cached einops-0.8.1-py3-none-any.whl (64 kB) +Installing collected packages: einops, flash-attn + +Successfully installed einops-0.8.1 flash-attn-2.8.0.post2 diff --git a/LORA_TARGET_MODULES.md b/LORA_TARGET_MODULES.md new file mode 100644 index 0000000..39aa5c8 --- /dev/null +++ b/LORA_TARGET_MODULES.md @@ -0,0 +1,124 @@ +# LoRA Target Modules Reference + +This document provides the correct target module names for different model architectures when using LoRA (Low-Rank Adaptation). + +## Model Architecture Detection + +Use the inspection script to find correct target modules: + +```bash +# In the nix development environment +python /home/centra/dev/pnn/inspect_conv1d_model.py [model_name] +``` + +## Common Model Architectures + +### GPT-2 / DialoGPT Models +- **Model Type**: GPT2LMHeadModel +- **Layer Type**: Conv1D (not Linear!) +- **Base Model**: microsoft/DialoGPT-small, gpt2, gpt2-medium, gpt2-large, gpt2-xl + +#### Attention Modules +- `c_attn` - Combined query, key, value projection (nf=3*hidden_size) +- `c_proj` - Output projection + +#### MLP Modules +- `mlp.c_fc` - Feed-forward up projection +- `mlp.c_proj` - Feed-forward down projection + +#### Recommended Configurations +```yaml +# Basic stage (attention only) +target_modules: ["c_attn", "c_proj"] + +# Advanced stage (attention + MLP) +target_modules: ["c_attn", "c_proj", "mlp.c_fc", "mlp.c_proj"] +``` + +### LLaMA Models +- **Model Type**: LlamaForCausalLM +- **Layer Type**: Linear +- **Base Model**: meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.2-8B + +#### Attention Modules +- `q_proj` - Query projection +- `k_proj` - Key projection +- `v_proj` - Value projection +- `o_proj` - Output projection + +#### MLP Modules +- `gate_proj` - Gate projection +- `up_proj` - Up projection +- `down_proj` - Down projection + +#### Recommended Configurations +```yaml +# Basic stage (attention only) +target_modules: ["q_proj", "v_proj"] + +# Advanced stage (attention + MLP) +target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] +``` + +### Mistral Models +- **Model Type**: MistralForCausalLM +- **Layer Type**: Linear +- **Base Model**: mistralai/Mistral-7B-v0.1 + +#### Target Modules (same as LLaMA) +```yaml +target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] +``` + +### Qwen Models +- **Model Type**: QWenLMHeadModel +- **Layer Type**: Linear +- **Base Model**: Qwen/Qwen-7B + +#### Target Modules +```yaml +target_modules: ["c_attn", "c_proj", "w1", "w2"] +``` + +## Important Notes + +1. **Conv1D vs Linear**: GPT-2 based models use `Conv1D` layers, not `Linear` layers +2. **Module Patterns**: Use simple patterns like `"c_attn"` rather than full paths like `"transformer.h.0.attn.c_attn"` +3. **Testing**: Always test your configuration before training by creating a PEFT model +4. **Architecture Variations**: Different model families use different naming conventions + +## Troubleshooting + +### Error: "Target module not found" +- Run the inspection script to find actual module names +- Check if the model uses Conv1D or Linear layers +- Verify the module naming pattern for your specific model + +### Error: "No trainable parameters" +- Ensure target modules exist in the model +- Check that the module names match exactly +- Verify the model architecture is supported by PEFT + +## Testing Your Configuration + +```python +from peft import get_peft_model, LoraConfig, TaskType + +# Test configuration +lora_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + r=8, + lora_alpha=16, + lora_dropout=0.1, + target_modules=["c_attn", "c_proj"], # Your target modules + bias="none" +) + +# Try to create PEFT model +try: + peft_model = get_peft_model(model, lora_config) + peft_model.print_trainable_parameters() + print("✓ Configuration works!") +except Exception as e: + print(f"✗ Configuration failed: {e}") +``` \ No newline at end of file diff --git a/config/README.md b/config/README.md new file mode 100644 index 0000000..797a4a0 --- /dev/null +++ b/config/README.md @@ -0,0 +1,85 @@ +# Training Configuration Files + +This directory contains configuration files for different model sizes and use cases. + +## Available Configurations + +### Small Models (Testing) +- `training_config.yaml` - Default configuration for small models (DialoGPT-small) + - Memory: ~1GB VRAM + - Batch size: 8 + - No quantization + +### Medium Models (8B) +- `training_config_large.yaml` - Configuration for 8B models (Llama-3.2-8B) + - Memory: ~12GB VRAM with 4-bit quantization + - Batch size: 1, gradient accumulation: 16-64 + - 4-bit quantization enabled + +### Large Models (13B) +- `training_config_13b.yaml` - Configuration for 13B models + - Memory: ~16GB VRAM with 4-bit quantization + - Batch size: 1, gradient accumulation: 32-128 + - Higher LoRA ranks (32-128) + +### Extra Large Models (70B) +- `training_config_70b.yaml` - Configuration for 70B models + - Memory: ~40GB+ VRAM with 4-bit quantization + - Batch size: 1, gradient accumulation: 64-256 + - Maximum LoRA ranks (64-256) + - Multi-GPU support with FSDP + +## Configuration Parameters + +### Model Settings +- `load_in_4bit`: Enable 4-bit quantization (recommended for large models) +- `gradient_checkpointing`: Trade compute for memory +- `use_flash_attention_2`: Faster attention computation if available + +### Adapter Settings +- `r`: LoRA rank (higher = more parameters but better capacity) +- `lora_alpha`: LoRA scaling factor (typically 2x the rank) +- `init_lora_weights`: Set to `true` for identity initialization + +### Training Settings +- `per_device_batch_size`: Usually 1 for large models +- `gradient_accumulation_steps`: Effective batch size multiplier +- `learning_rate`: Lower for larger models +- `bf16`: Use bfloat16 for better numerical stability + +## Usage + +```bash +# For 8B models +python scripts/train_progressive.py --config config/training_config_large.yaml + +# For 13B models +python scripts/train_progressive.py --config config/training_config_13b.yaml + +# For 70B models (requires multiple GPUs) +python scripts/train_progressive.py --config config/training_config_70b.yaml +``` + +## Memory Requirements + +| Model Size | VRAM (4-bit) | VRAM (16-bit) | GPUs Recommended | +|------------|--------------|---------------|------------------| +| 8B | 12-16GB | 32GB | 1x RTX 4090 | +| 13B | 16-20GB | 52GB | 1x A100 | +| 70B | 40-60GB | 140GB | 2x A100 | + +## Tips for Large Models + +1. **Start with smaller models** to validate your approach +2. **Use gradient checkpointing** to reduce memory usage +3. **Monitor GPU memory** during training +4. **Use lower learning rates** for stability +5. **Consider multi-GPU setup** for 70B+ models +6. **Enable flash attention** if available for speed + +## Troubleshooting + +- **OOM errors**: Reduce batch size or enable gradient checkpointing +- **Slow training**: Enable flash attention, use bf16 +- **Poor convergence**: Adjust learning rate or warmup steps +- **Multi-GPU issues**: Check FSDP configuration \ No newline at end of file diff --git a/config/training_config.yaml b/config/training_config.yaml new file mode 100644 index 0000000..ba68d02 --- /dev/null +++ b/config/training_config.yaml @@ -0,0 +1,36 @@ +experiment: + name: "progressive_reasoning_experiment" + base_model: "microsoft/DialoGPT-small" # Lightweight model for testing + output_dir: "./outputs" + use_wandb: false + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: false # Disable quantization for small model + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + device_map: "auto" + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 8 + lora_alpha: 16 + lora_dropout: 0.1 + target_modules: ["c_attn", "c_proj"] + training: + num_epochs: 2 + per_device_batch_size: 8 # Increase batch size for small model + gradient_accumulation_steps: 2 # Reduce accumulation steps + learning_rate: 5e-4 # Higher learning rate for faster training + warmup_steps: 50 + max_length: 1024 # Shorter sequences + +evaluation: + benchmarks: + - "HLE" # Humanity's Last Exam + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" \ No newline at end of file diff --git a/config/training_config_13b.yaml b/config/training_config_13b.yaml new file mode 100644 index 0000000..59fd626 --- /dev/null +++ b/config/training_config_13b.yaml @@ -0,0 +1,83 @@ +experiment: + name: "progressive_reasoning_13b" + base_model: "meta-llama/Llama-3.2-13B" # 13B model + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: true + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + bnb_4bit_quant_type: "nf4" + device_map: "auto" + gradient_checkpointing: true + use_flash_attention_2: true + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 32 # Higher rank for 13B models + lora_alpha: 64 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 32 + learning_rate: 1e-4 + warmup_steps: 100 + max_length: 2048 + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 64 + lora_alpha: 128 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 64 + learning_rate: 8e-5 + warmup_steps: 200 + max_length: 4096 + bf16: true + max_grad_norm: 0.3 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning" + dataset_path: "./data/complex_reasoning/" + inherit_from: "math_reasoning" + adapter_config: + r: 128 # Maximum rank for 13B models + lora_alpha: 256 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 128 + learning_rate: 5e-5 + warmup_steps: 300 + max_length: 8192 + bf16: true + max_grad_norm: 0.3 + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" \ No newline at end of file diff --git a/config/training_config_70b.yaml b/config/training_config_70b.yaml new file mode 100644 index 0000000..ed44f42 --- /dev/null +++ b/config/training_config_70b.yaml @@ -0,0 +1,101 @@ +experiment: + name: "progressive_reasoning_70b" + base_model: "meta-llama/Llama-3.2-70B" # 70B model - requires significant resources + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: true + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + bnb_4bit_quant_type: "nf4" + device_map: "auto" + gradient_checkpointing: true + use_flash_attention_2: true + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 64 # Even higher rank for 70B models + lora_alpha: 128 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 64 + learning_rate: 5e-5 # Lower learning rate for stability + warmup_steps: 200 + max_length: 2048 + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + dataloader_num_workers: 2 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 128 + lora_alpha: 256 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 128 + learning_rate: 3e-5 + warmup_steps: 300 + max_length: 4096 + bf16: true + max_grad_norm: 0.3 + dataloader_num_workers: 2 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning" + dataset_path: "./data/complex_reasoning/" + inherit_from: "math_reasoning" + adapter_config: + r: 256 # Maximum rank for 70B models + lora_alpha: 512 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 256 + learning_rate: 2e-5 + warmup_steps: 500 + max_length: 8192 + bf16: true + max_grad_norm: 0.3 + dataloader_num_workers: 2 + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" + +# Additional settings for 70B models +optimization: + gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: false + ddp_find_unused_parameters: false + # Multi-GPU settings + fsdp: "full_shard auto_wrap" + fsdp_transformer_layer_cls_to_wrap: "LlamaDecoderLayer" + fsdp_min_num_params: 1000000 + fsdp_config: + min_num_params: 1000000 + sharding_strategy: "FULL_SHARD" + cpu_offload: false \ No newline at end of file diff --git a/config/training_config_gemma2_small.yaml b/config/training_config_gemma2_small.yaml new file mode 100644 index 0000000..fa035d6 --- /dev/null +++ b/config/training_config_gemma2_small.yaml @@ -0,0 +1,91 @@ +experiment: + name: "progressive_reasoning_gemma2_small" + base_model: "google/gemma-2-2b-it" # Instruction-tuned version + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: false # 2B model is manageable without quantization + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + device_map: "auto" + gradient_checkpointing: false + use_flash_attention_2: false + use_eager_attention: true # Required for Gemma 3 models + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 8 # Start with smaller rank for small model + lora_alpha: 16 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] + init_lora_weights: true + training: + num_epochs: 3 + per_device_batch_size: 8 # Larger batch size for small model + gradient_accumulation_steps: 2 + learning_rate: 5e-4 # Higher learning rate for small model + warmup_steps: 50 + max_length: 1024 + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 50 + logging_steps: 10 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 16 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 3 + per_device_batch_size: 4 + gradient_accumulation_steps: 4 + learning_rate: 3e-4 + warmup_steps: 100 + max_length: 2048 + bf16: true + max_grad_norm: 1.0 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning with Mixture-of-Thoughts" + dataset_path: "open-r1/Mixture-of-Thoughts" # HuggingFace dataset + inherit_from: "math_reasoning" + adapter_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 # Large dataset, fewer epochs + per_device_batch_size: 2 + gradient_accumulation_steps: 8 + learning_rate: 2e-4 + warmup_steps: 200 + max_length: 4096 + bf16: true + max_grad_norm: 1.0 + save_steps: 500 + logging_steps: 50 + dataset_config: + streaming: true + max_samples: 30000 + split: "train" + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" \ No newline at end of file diff --git a/config/training_config_gemma3_1b.yaml b/config/training_config_gemma3_1b.yaml new file mode 100644 index 0000000..2433612 --- /dev/null +++ b/config/training_config_gemma3_1b.yaml @@ -0,0 +1,102 @@ +experiment: + name: "progressive_reasoning_gemma3_1b" + base_model: "google/gemma-3-1b-pt" # Using Gemma 2 2B (1B might not be available) + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: false + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + device_map: "auto" + gradient_checkpointing: false # Not needed for small models + use_flash_attention_2: false + use_eager_attention: true + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 8 + lora_alpha: 16 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] # Gemma attention modules + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 8 + gradient_accumulation_steps: 2 + learning_rate: 5e-4 + warmup_steps: 50 + max_length: 1024 + fp16: false + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 100 + logging_steps: 10 + + - name: "math_reasoning" + description: "Mathematical reasoning with OpenR1-Math-220k dataset" + dataset_path: "open-r1/OpenR1-Math-220k" # HuggingFace dataset + inherit_from: "basic_cot" + adapter_config: + r: 16 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 # Large dataset, fewer epochs + per_device_batch_size: 4 + gradient_accumulation_steps: 4 + learning_rate: 3e-4 + warmup_steps: 100 + max_length: 2048 + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 1000 + logging_steps: 100 + dataset_config: + # OpenR1-Math-220k specific settings + streaming: true # Use streaming for large dataset + max_samples: 200000 # Limit samples for faster training + split: "train" + + - name: "complex_reasoning" + description: "Complex multi-step reasoning with Mixture-of-Thoughts" + dataset_path: "open-r1/Mixture-of-Thoughts" # HuggingFace dataset + inherit_from: "math_reasoning" + adapter_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 # Large dataset, fewer epochs + per_device_batch_size: 2 + gradient_accumulation_steps: 8 + learning_rate: 2e-4 + warmup_steps: 200 + max_length: 4096 + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 500 + logging_steps: 50 + dataset_config: + # Mixture-of-Thoughts specific settings + streaming: true # Use streaming for large dataset + max_samples: 30000 # Limit samples for faster training + split: "train" + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" diff --git a/config/training_config_gemma3_1b_cpu_offload.yaml b/config/training_config_gemma3_1b_cpu_offload.yaml new file mode 100644 index 0000000..2b4158c --- /dev/null +++ b/config/training_config_gemma3_1b_cpu_offload.yaml @@ -0,0 +1,133 @@ +experiment: + name: "progressive_reasoning_gemma3_1b_cpu_offload" + base_model: "google/gemma-3-1b-pt" # Using Gemma 3 1B + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: true # Enable 4-bit quantization for QLoRA + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + bnb_4bit_quant_type: "nf4" + device_map: "auto" # Let accelerate handle device placement + max_memory: + 0: "5GB" # Limit GPU memory to 3GB (leave room for CUDA kernels) + "cpu": "32GB" # Allow up to 32GB CPU RAM + offload_folder: "./offload" # Directory for disk offloading if needed + gradient_checkpointing: true # Trade compute for memory + use_flash_attention_2: false + use_eager_attention: true + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 8 # Lower rank for memory efficiency + lora_alpha: 16 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 2 # Smaller batch size + gradient_accumulation_steps: 8 # Compensate with gradient accumulation + learning_rate: 5e-4 + warmup_steps: 50 + max_length: 512 # Shorter sequences for memory + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 100 + logging_steps: 10 + dataloader_num_workers: 0 # Disable multiprocessing to save memory + optim: "paged_adamw_8bit" # Use 8-bit optimizer + + - name: "math_reasoning" + description: "Mathematical reasoning with OpenR1-Math-220k dataset" + dataset_path: "open-r1/OpenR1-Math-220k" + inherit_from: "basic_cot" + adapter_config: + r: 16 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 # Minimal batch size + gradient_accumulation_steps: 16 + learning_rate: 3e-4 + warmup_steps: 100 + max_length: 1024 + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + save_steps: 1000 + logging_steps: 100 + optim: "paged_adamw_8bit" + dataset_config: + streaming: true + max_samples: 200000 # Reduced for testing + split: "train" + + - name: "complex_reasoning" + description: "Complex multi-step reasoning with Mixture-of-Thoughts" + dataset_path: "open-r1/Mixture-of-Thoughts" # HuggingFace dataset + inherit_from: "math_reasoning" + adapter_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.1 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 32 + learning_rate: 2e-4 + warmup_steps: 200 + max_length: 2048 + bf16: true + max_grad_norm: 1.0 + weight_decay: 0.001 + optim: "paged_adamw_8bit" + save_steps: 500 + logging_steps: 50 + dataset_config: + streaming: true + max_samples: 300000 # Limited for CPU offload config + split: "train" + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" + +# DeepSpeed configuration for advanced CPU offloading (optional) +# Uncomment to use DeepSpeed ZeRO-2 with CPU offload +# deepspeed: +# zero_optimization: +# stage: 2 +# offload_optimizer: +# device: "cpu" +# pin_memory: true +# offload_param: +# device: "cpu" +# pin_memory: true +# overlap_comm: true +# contiguous_gradients: true +# sub_group_size: 1e9 +# reduce_bucket_size: 1e6 + +# FSDP configuration for distributed training (optional) +# Uncomment to use FSDP with CPU offload +# fsdp: +# sharding_strategy: "FULL_SHARD" +# cpu_offload: true +# auto_wrap_policy: "TRANSFORMER_BASED_WRAP" +# transformer_layer_cls_to_wrap: "GemmaDecoderLayer" +# min_num_params: 1e6 diff --git a/config/training_config_large.yaml b/config/training_config_large.yaml new file mode 100644 index 0000000..22cbd3b --- /dev/null +++ b/config/training_config_large.yaml @@ -0,0 +1,98 @@ +experiment: + name: "progressive_reasoning_large_model" + base_model: "meta-llama/Llama-3.2-8B" # Or other whitelisted models + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: true # Enable 4-bit quantization for memory efficiency + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + bnb_4bit_quant_type: "nf4" + device_map: "auto" + # Additional memory optimizations + gradient_checkpointing: true + use_flash_attention_2: true # If available + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 16 # Larger rank for bigger models + lora_alpha: 32 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj"] + init_lora_weights: true # Identity initialization + training: + num_epochs: 1 + per_device_batch_size: 1 # Small batch size for large models + gradient_accumulation_steps: 16 # Effective batch size = 16 + learning_rate: 2e-4 + warmup_steps: 100 + max_length: 2048 + fp16: false + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + save_steps: 50 + logging_steps: 10 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 32 # Increase rank for more complex tasks + lora_alpha: 64 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 32 # Effective batch size = 32 + learning_rate: 1e-4 + warmup_steps: 200 + max_length: 4096 + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning" + dataset_path: "./data/complex_reasoning/" + inherit_from: "math_reasoning" + adapter_config: + r: 64 # Maximum rank for most complex tasks + lora_alpha: 128 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 64 # Effective batch size = 64 + learning_rate: 5e-5 + warmup_steps: 300 + max_length: 8192 + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" + +# Memory optimization settings +optimization: + gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: false + ddp_find_unused_parameters: false + fsdp: "full_shard auto_wrap" # For multi-GPU setups + fsdp_transformer_layer_cls_to_wrap: "LlamaDecoderLayer" \ No newline at end of file diff --git a/config/training_config_llama_auth.yaml b/config/training_config_llama_auth.yaml new file mode 100644 index 0000000..090df6f --- /dev/null +++ b/config/training_config_llama_auth.yaml @@ -0,0 +1,85 @@ +experiment: + name: "progressive_reasoning_llama_auth" + base_model: "meta-llama/Llama-3.2-8B" + output_dir: "./outputs" + use_wandb: true + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: true + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + bnb_4bit_quant_type: "nf4" + device_map: "auto" + gradient_checkpointing: true + use_flash_attention_2: true + # Add your HuggingFace token here, or set HF_TOKEN environment variable + # hf_token: "your_token_here" + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 16 + lora_alpha: 32 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj"] + init_lora_weights: true + training: + num_epochs: 1 + per_device_batch_size: 1 + gradient_accumulation_steps: 16 + learning_rate: 2e-4 + warmup_steps: 100 + max_length: 2048 + bf16: true + max_grad_norm: 0.3 + weight_decay: 0.001 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 32 + learning_rate: 1e-4 + warmup_steps: 200 + max_length: 4096 + bf16: true + max_grad_norm: 0.3 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning" + dataset_path: "./data/complex_reasoning/" + inherit_from: "math_reasoning" + adapter_config: + r: 64 + lora_alpha: 128 + lora_dropout: 0.05 + target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 64 + learning_rate: 5e-5 + warmup_steps: 300 + max_length: 8192 + bf16: true + max_grad_norm: 0.3 + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" \ No newline at end of file diff --git a/config/training_config_public.yaml b/config/training_config_public.yaml new file mode 100644 index 0000000..a1abea4 --- /dev/null +++ b/config/training_config_public.yaml @@ -0,0 +1,82 @@ +experiment: + name: "progressive_reasoning_public_model" + base_model: "microsoft/DialoGPT-medium" # Public model, no authentication needed + output_dir: "./outputs" + use_wandb: false + wandb_project: "matsuo-llm-comp-2025" + +model: + load_in_4bit: false # DialoGPT is smaller, quantization not needed + bnb_4bit_compute_dtype: "bfloat16" + bnb_4bit_use_double_quant: true + device_map: "auto" + gradient_checkpointing: false + +progressive_stages: + - name: "basic_cot" + description: "Basic Chain-of-Thought reasoning" + dataset_path: "./data/basic_cot/" + adapter_config: + r: 16 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: ["c_attn", "c_proj"] # GPT-2 style attention modules + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 4 + gradient_accumulation_steps: 4 + learning_rate: 2e-4 + warmup_steps: 100 + max_length: 1024 + fp16: false + bf16: false # Use fp32 for smaller models + max_grad_norm: 1.0 + weight_decay: 0.001 + + - name: "math_reasoning" + description: "Mathematical reasoning with think tags" + dataset_path: "./data/math_reasoning/" + inherit_from: "basic_cot" + adapter_config: + r: 32 + lora_alpha: 64 + lora_dropout: 0.1 + target_modules: ["c_attn", "c_proj"] + init_lora_weights: true + training: + num_epochs: 3 + per_device_batch_size: 2 + gradient_accumulation_steps: 8 + learning_rate: 1e-4 + warmup_steps: 200 + max_length: 2048 + bf16: false + max_grad_norm: 1.0 + + - name: "complex_reasoning" + description: "Complex multi-step reasoning" + dataset_path: "./data/complex_reasoning/" + inherit_from: "math_reasoning" + adapter_config: + r: 64 + lora_alpha: 128 + lora_dropout: 0.1 + target_modules: ["c_attn", "c_proj"] + init_lora_weights: true + training: + num_epochs: 2 + per_device_batch_size: 1 + gradient_accumulation_steps: 16 + learning_rate: 5e-5 + warmup_steps: 300 + max_length: 4096 + bf16: false + max_grad_norm: 1.0 + +evaluation: + benchmarks: + - "HLE" + - "Do-Not-Answer" + save_results: true + results_dir: "./outputs/evaluation_results" \ No newline at end of file diff --git a/devenv.lock b/devenv.lock new file mode 100644 index 0000000..d06c441 --- /dev/null +++ b/devenv.lock @@ -0,0 +1,139 @@ +{ + "nodes": { + "devenv": { + "locked": { + "dir": "src/modules", + "lastModified": 1751909516, + "owner": "cachix", + "repo": "devenv", + "rev": "36e4cf7d6cb89862e69efce4e5c147ac2e4d38f9", + "type": "github" + }, + "original": { + "dir": "src/modules", + "owner": "cachix", + "repo": "devenv", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1747046372, + "owner": "edolstra", + "repo": "flake-compat", + "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-compat_2": { + "flake": false, + "locked": { + "lastModified": 1747046372, + "owner": "edolstra", + "repo": "flake-compat", + "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "git-hooks": { + "inputs": { + "flake-compat": "flake-compat", + "gitignore": "gitignore", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1750779888, + "owner": "cachix", + "repo": "git-hooks.nix", + "rev": "16ec914f6fb6f599ce988427d9d94efddf25fe6d", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "git-hooks.nix", + "type": "github" + } + }, + "gitignore": { + "inputs": { + "nixpkgs": [ + "git-hooks", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1709087332, + "owner": "hercules-ci", + "repo": "gitignore.nix", + "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "gitignore.nix", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1751792365, + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "1fd8bada0b6117e6c7eb54aad5813023eed37ccb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-python": { + "inputs": { + "flake-compat": "flake-compat_2", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1749760516, + "owner": "cachix", + "repo": "nixpkgs-python", + "rev": "908dbb466af5955ea479ac95953333fd64387216", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "nixpkgs-python", + "type": "github" + } + }, + "root": { + "inputs": { + "devenv": "devenv", + "git-hooks": "git-hooks", + "nixpkgs": "nixpkgs", + "nixpkgs-python": "nixpkgs-python", + "pre-commit-hooks": [ + "git-hooks" + ] + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake-minimal.nix b/flake-minimal.nix new file mode 100644 index 0000000..d5fde54 --- /dev/null +++ b/flake-minimal.nix @@ -0,0 +1,95 @@ +{ + description = "Progressive LLM Training for 松尾研LLMコンペ2025 (Minimal)"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + config = { + allowUnfree = true; + cudaSupport = true; + }; + }; + + # Python 3.11 for better compatibility + python = pkgs.python311; + + # Minimal Python packages + pythonWithPackages = python.withPackages (ps: with ps; [ + # Core essentials only + torch + transformers + numpy + + # Essential dependencies + pyyaml + + # Build tools + pip + setuptools + wheel + ]); + + in + { + devShells.default = pkgs.mkShell { + buildInputs = with pkgs; [ + # Python with packages + pythonWithPackages + + # Build tools + gcc + cmake + ninja + pkg-config + + # Git + git + git-lfs + + # Libraries needed for Python packages + openssl + zlib + glib + stdenv.cc.cc.lib + + # CUDA support + cudaPackages.cudatoolkit + cudaPackages.cudnn + ]; + + shellHook = '' + echo "🚀 Progressive LLM Training Environment (Minimal)" + echo "Python version: $(python --version)" + echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')" + echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')" + + # Set up CUDA environment + export CUDA_HOME=${pkgs.cudaPackages.cudatoolkit} + export CUDA_PATH=${pkgs.cudaPackages.cudatoolkit} + export LD_LIBRARY_PATH=${pkgs.cudaPackages.cudatoolkit}/lib:${pkgs.cudaPackages.cudnn}/lib:${pkgs.stdenv.cc.cc.lib}/lib:$LD_LIBRARY_PATH + + # Set Python path + export PYTHONPATH=$PWD/src:$PYTHONPATH + + echo "" + echo "Note: This is a minimal configuration. Install additional packages with pip as needed:" + echo " pip install accelerate peft trl datasets bitsandbytes wandb jsonlines scikit-learn sentencepiece protobuf" + echo " pip install flash-attn --no-build-isolation" + ''; + + # Environment variables + CUDA_HOME = "${pkgs.cudaPackages.cudatoolkit}"; + CUDA_PATH = "${pkgs.cudaPackages.cudatoolkit}"; + NIX_SHELL_PRESERVE_PROMPT = 1; + LOCALE_ARCHIVE = "${pkgs.glibcLocales}/lib/locale/locale-archive"; + LC_ALL = "en_US.UTF-8"; + }; + }); +} \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..bd80c39 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1751792365, + "narHash": "sha256-J1kI6oAj25IG4EdVlg2hQz8NZTBNYvIS0l4wpr9KcUo=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "1fd8bada0b6117e6c7eb54aad5813023eed37ccb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..d0d9d05 --- /dev/null +++ b/flake.nix @@ -0,0 +1,195 @@ +{ + description = "Progressive LLM Training for 松尾研LLMコンペ2025"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + config = { + allowUnfree = true; + cudaSupport = true; + }; + overlays = [ + (final: prev: { + python311 = prev.python311.override { + packageOverrides = python-self: python-super: { + # Disable tests for problematic packages + pytest-doctestplus = python-super.pytest-doctestplus.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + pytestCheckPhase = "echo 'Skipping tests'"; + }); + # Also disable tests for jupyter-related packages if they cause issues + jupyter = python-super.jupyter.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + }); + notebook = python-super.notebook.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + }); + # Disable tests for psycopg and psycopg2 + psycopg = python-super.psycopg.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + pytestCheckPhase = "echo 'Skipping tests'"; + pythonImportsCheck = []; # Disable import checks + }); + psycopg2 = python-super.psycopg2.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + pytestCheckPhase = "echo 'Skipping tests'"; + pythonImportsCheck = []; # Disable import checks + }); + # Disable tests for sqlframe + sqlframe = python-super.sqlframe.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + pytestCheckPhase = "echo 'Skipping tests'"; + pythonImportsCheck = []; # Disable import checks + }); + # Disable tests for accelerate + accelerate = python-super.accelerate.overrideAttrs (oldAttrs: { + doCheck = false; + doInstallCheck = false; + pytestCheckPhase = "echo 'Skipping tests'"; + pythonImportsCheck = []; # Disable import checks + }); + }; + }; + }) + ]; + }; + + # Python 3.11 for better compatibility + python = pkgs.python311; + + # Python packages + pythonWithPackages = python.withPackages (ps: with ps; [ + # Core ML packages + torch + torchvision + torchaudio + transformers + accelerate + datasets + tokenizers + scikit-learn + + # Required dependencies from requirements.txt + pyyaml + jsonlines + sentencepiece + protobuf + + # Additional useful packages + numpy + scipy + matplotlib + jupyter + notebook + ipython + pandas + rich # For TUI + + # Development tools + black + flake8 + pytest + mypy + + # Build tools + pip + setuptools + wheel + + # LLM specific packages + peft + trl + bitsandbytes + wandb + ]); + + in + { + devShells.default = pkgs.mkShell { + buildInputs = with pkgs; [ + # Python with packages + pythonWithPackages + + # Build tools + gcc + cmake + ninja + pkg-config + + # Git + git + git-lfs + + # Development tools + htop + tmux + vim + + # Libraries needed for Python packages + openssl + zlib + glib + stdenv.cc.cc.lib + + # CUDA support + cudaPackages.cudatoolkit + cudaPackages.cudnn + ]; + + shellHook = '' + echo "🚀 Progressive LLM Training Environment" + echo "Python version: $(python --version)" + echo "PyTorch version: $(python -c 'import torch; print(torch.__version__)')" + echo "CUDA available: $(python -c 'import torch; print(torch.cuda.is_available())')" + + # Set up CUDA environment + export CUDA_HOME=${pkgs.cudaPackages.cudatoolkit} + export CUDA_PATH=${pkgs.cudaPackages.cudatoolkit} + export LD_LIBRARY_PATH=${pkgs.cudaPackages.cudatoolkit}/lib:${pkgs.cudaPackages.cudnn}/lib:${pkgs.stdenv.cc.cc.lib}/lib:$LD_LIBRARY_PATH + + # Set Python path + export PYTHONPATH=$PWD/src:$PYTHONPATH + + echo "" + echo "Available commands:" + echo " python scripts/train_progressive.py # Start training" + echo " python scripts/evaluate.py # Evaluate model" + echo " jupyter notebook # Start Jupyter" + echo "" + + # Create data directory if not exists + mkdir -p data + + # Prepare sample data if not exists + if [ ! -f "data/basic_cot/train.jsonl" ]; then + echo "Preparing sample datasets..." + python -c "from src.data_utils import prepare_sample_datasets; prepare_sample_datasets()" || echo "Sample data preparation skipped" + fi + + # Note about flash-attn + echo "Note: flash-attn is not included in nixpkgs. If needed, install manually with:" + echo " pip install flash-attn --no-build-isolation" + ''; + + # Environment variables + CUDA_HOME = "${pkgs.cudaPackages.cudatoolkit}"; + CUDA_PATH = "${pkgs.cudaPackages.cudatoolkit}"; + NIX_SHELL_PRESERVE_PROMPT = 1; + LOCALE_ARCHIVE = "${pkgs.glibcLocales}/lib/locale/locale-archive"; + LC_ALL = "en_US.UTF-8"; + }; + }); +} \ No newline at end of file diff --git a/requirements-cpu.txt b/requirements-cpu.txt new file mode 100644 index 0000000..6e3167e --- /dev/null +++ b/requirements-cpu.txt @@ -0,0 +1,15 @@ +# CPU version of PyTorch +torch>=2.0.0 --index-url https://download.pytorch.org/whl/cpu +transformers>=4.40.0 +accelerate>=0.27.0 +peft>=0.11.0 +trl>=0.9.0 +datasets>=2.18.0 +bitsandbytes>=0.43.0 +wandb>=0.16.0 +pyyaml>=6.0 +jsonlines>=4.0.0 +scikit-learn>=1.3.0 +# flash-attn is not needed for CPU version +sentencepiece>=0.2.0 +protobuf>=4.25.0 \ No newline at end of file diff --git a/requirements-torch.txt b/requirements-torch.txt new file mode 100644 index 0000000..7a1beba --- /dev/null +++ b/requirements-torch.txt @@ -0,0 +1,3 @@ +--index-url https://download.pytorch.org/whl/cu128 +torch>=2.0.0 +torchaudio>=2.0.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..534ab7a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +transformers>=4.40.0 +accelerate>=0.27.0 +peft>=0.11.0 +trl>=0.9.0 +datasets>=2.18.0 +bitsandbytes>=0.43.0 +wandb>=0.16.0 +pyyaml>=6.0 +jsonlines>=4.0.0 +scikit-learn>=1.3.0 +# flash-attn>=2.5.0 # Install separately with --no-build-isolation +sentencepiece>=0.2.0 +protobuf>=4.25.0 diff --git a/scripts/analyze_adapter_size.py b/scripts/analyze_adapter_size.py new file mode 100755 index 0000000..b05b0d3 --- /dev/null +++ b/scripts/analyze_adapter_size.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Analyze the size and structure of LoRA adapters +""" + +import sys +from pathlib import Path +import torch +import yaml +from peft import PeftModel, LoraConfig + +# Add src to path +sys.path.append(str(Path(__file__).parent.parent)) + +from src.progressive_model import ProgressiveReasoningModel + + +def analyze_adapter_sizes(): + # Load configuration + with open("config/training_config.yaml") as f: + config = yaml.safe_load(f) + + print("=" * 60) + print("LoRA Adapter Size Analysis") + print("=" * 60) + + # Get adapter configuration from config + basic_cot_config = config["progressive_stages"][0] + adapter_config = basic_cot_config["adapter_config"] + + print(f"\nConfiguration for 'basic_cot' adapter:") + print(f" - r (rank): {adapter_config['r']}") + print(f" - lora_alpha: {adapter_config['lora_alpha']}") + print(f" - lora_dropout: {adapter_config['lora_dropout']}") + print(f" - target_modules: {adapter_config['target_modules']}") + + # Load the base model to get dimensions + print("\nLoading base model to analyze dimensions...") + model_wrapper = ProgressiveReasoningModel(config) + model_wrapper.setup_base_model() + + # Analyze model architecture + print(f"\nBase model: {config['experiment']['base_model']}") + + # Count parameters in base model + total_params = sum(p.numel() for p in model_wrapper.model.parameters()) + print(f"Total base model parameters: {total_params:,}") + + # Load saved adapter if it exists + adapter_path = Path(config["experiment"]["output_dir"]) / "adapters" / "basic_cot" + if adapter_path.exists(): + print(f"\nLoading saved adapter from: {adapter_path}") + + # Load adapter state dict + adapter_model_path = adapter_path / "adapter_model.safetensors" + if not adapter_model_path.exists(): + adapter_model_path = adapter_path / "adapter_model.bin" + + if adapter_model_path.exists(): + if adapter_model_path.suffix == ".safetensors": + from safetensors.torch import load_file + adapter_weights = load_file(adapter_model_path) + else: + adapter_weights = torch.load(adapter_model_path, map_location="cpu") + + print("\nLoRA Adapter Layer Details:") + print("-" * 60) + + total_lora_params = 0 + layer_info = {} + + for name, tensor in adapter_weights.items(): + size = tensor.numel() + total_lora_params += size + + # Parse layer name + parts = name.split('.') + if 'lora_A' in name or 'lora_B' in name: + # Extract module info + module_name = '.'.join(parts[:-2]) + lora_type = parts[-2] # lora_A or lora_B + + if module_name not in layer_info: + layer_info[module_name] = {} + + layer_info[module_name][lora_type] = { + 'shape': list(tensor.shape), + 'params': size + } + + # Display layer information + for module, info in sorted(layer_info.items()): + print(f"\nModule: {module}") + if 'lora_A' in info and 'lora_B' in info: + shape_a = info['lora_A']['shape'] + shape_b = info['lora_B']['shape'] + params_a = info['lora_A']['params'] + params_b = info['lora_B']['params'] + + print(f" LoRA A: {shape_a} = {params_a:,} parameters") + print(f" LoRA B: {shape_b} = {params_b:,} parameters") + print(f" Total: {params_a + params_b:,} parameters") + + # Calculate original layer size (approximation) + original_size = shape_a[1] * shape_b[0] + compression_ratio = original_size / (params_a + params_b) + print(f" Original layer size (approx): {original_size:,} parameters") + print(f" Compression ratio: {compression_ratio:.1f}x") + + print("\n" + "=" * 60) + print(f"Total LoRA parameters: {total_lora_params:,}") + print(f"Percentage of base model: {(total_lora_params / total_params) * 100:.2f}%") + + # Calculate theoretical size + r = adapter_config['r'] + num_modules = len(adapter_config['target_modules']) + + # For GPT models, typical dimensions + if "DialoGPT" in config['experiment']['base_model']: + hidden_size = 768 # DialoGPT-small uses 768 + print(f"\nTheoretical calculation (hidden_size={hidden_size}, r={r}):") + print(f" Per module: 2 * {hidden_size} * {r} = {2 * hidden_size * r:,} parameters") + print(f" Total ({num_modules} modules): {2 * hidden_size * r * num_modules:,} parameters") + else: + print(f"\nNo saved adapter found at: {adapter_path}") + print("Run training first to generate the adapter.") + + # Show theoretical sizes based on config + r = adapter_config['r'] + print(f"\nTheoretical LoRA sizes with r={r}:") + print(f" For hidden_size=768 (DialoGPT-small): {2 * 768 * r:,} params per module") + print(f" For hidden_size=1024 (medium models): {2 * 1024 * r:,} params per module") + print(f" For hidden_size=1280 (GPT-2 large): {2 * 1280 * r:,} params per module") + + +if __name__ == "__main__": + analyze_adapter_sizes() \ No newline at end of file diff --git a/scripts/check_vram.py b/scripts/check_vram.py new file mode 100644 index 0000000..869dc02 --- /dev/null +++ b/scripts/check_vram.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +Check VRAM usage and model memory requirements +""" + +import torch +import psutil +import sys +from pathlib import Path +import yaml + +# Add src to path +sys.path.append(str(Path(__file__).parent.parent)) + +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + + +def get_memory_info(): + """Get current memory usage""" + if torch.cuda.is_available(): + print("=== CUDA Information ===") + print(f"CUDA available: {torch.cuda.is_available()}") + print(f"CUDA device: {torch.cuda.get_device_name(0)}") + print(f"CUDA device count: {torch.cuda.device_count()}") + + # Get VRAM info + vram_total = torch.cuda.get_device_properties(0).total_memory / 1024**3 + vram_reserved = torch.cuda.memory_reserved(0) / 1024**3 + vram_allocated = torch.cuda.memory_allocated(0) / 1024**3 + vram_free = vram_total - vram_allocated + + print(f"\n=== VRAM Usage ===") + print(f"Total VRAM: {vram_total:.2f} GB") + print(f"Allocated VRAM: {vram_allocated:.2f} GB") + print(f"Reserved VRAM: {vram_reserved:.2f} GB") + print(f"Free VRAM: {vram_free:.2f} GB") + else: + print("CUDA not available!") + + # Get system RAM info + ram = psutil.virtual_memory() + print(f"\n=== System RAM ===") + print(f"Total RAM: {ram.total / 1024**3:.2f} GB") + print(f"Available RAM: {ram.available / 1024**3:.2f} GB") + print(f"Used RAM: {ram.used / 1024**3:.2f} GB ({ram.percent}%)") + + +def estimate_model_size(model_name: str, quantization: str = None): + """Estimate model memory requirements""" + print(f"\n=== Model Memory Estimation ===") + print(f"Model: {model_name}") + + # Common model sizes (in billions of parameters) + model_sizes = { + "gemma-2-2b": 2.5, + "gemma-3-1b": 1.2, + "llama-3.2-8b": 8, + "llama-3.2-13b": 13, + "llama-3.2-70b": 70, + } + + # Find model size + model_key = None + for key in model_sizes: + if key in model_name.lower(): + model_key = key + break + + if model_key: + params_billions = model_sizes[model_key] + + # Memory estimates (rough) + fp32_gb = params_billions * 4 # 4 bytes per parameter + fp16_gb = params_billions * 2 # 2 bytes per parameter + int8_gb = params_billions * 1 # 1 byte per parameter + int4_gb = params_billions * 0.5 # 0.5 bytes per parameter + + print(f"Estimated parameters: {params_billions}B") + print(f"Memory requirements:") + print(f" FP32: ~{fp32_gb:.1f} GB") + print(f" FP16/BF16: ~{fp16_gb:.1f} GB") + print(f" INT8: ~{int8_gb:.1f} GB") + print(f" INT4 (QLoRA): ~{int4_gb:.1f} GB") + + # Add overhead for activations and gradients + print(f"\nWith training overhead:") + print(f" FP16 + LoRA: ~{fp16_gb * 1.5:.1f} GB") + print(f" INT4 + QLoRA: ~{int4_gb * 1.5:.1f} GB") + else: + print("Model size not recognized, unable to estimate memory requirements") + + +def suggest_offloading_strategies(): + """Suggest CPU offloading strategies""" + print("\n=== CPU Offloading Strategies ===") + print("\n1. **Device Map Auto with CPU Offload**") + print(" ```python") + print(" device_map = {") + print(" 'model.embed_tokens': 'cpu',") + print(" 'model.layers.0': 0, # GPU") + print(" 'model.layers.1': 0, # GPU") + print(" 'model.layers.2': 'cpu', # CPU") + print(" # ... distribute layers between GPU and CPU") + print(" }") + print(" ```") + + print("\n2. **Accelerate's CPU Offload**") + print(" ```yaml") + print(" model:") + print(" device_map: 'auto'") + print(" max_memory:") + print(" 0: '4GB' # Limit GPU memory") + print(" 'cpu': '20GB' # Allow CPU memory") + print(" ```") + + print("\n3. **DeepSpeed ZeRO-Offload**") + print(" - ZeRO-2: Offload optimizer states to CPU") + print(" - ZeRO-3: Offload optimizer states and parameters to CPU") + print(" ```yaml") + print(" deepspeed:") + print(" zero_optimization:") + print(" stage: 2") + print(" offload_optimizer:") + print(" device: 'cpu'") + print(" ```") + + print("\n4. **Gradient Checkpointing + CPU Offload**") + print(" - Trade compute for memory") + print(" - Combine with layer-wise CPU offloading") + + print("\n5. **QLoRA with CPU Offload**") + print(" - 4-bit quantization reduces base model size") + print(" - Only LoRA parameters on GPU") + print(" - Base model layers can be on CPU") + + +def check_config_compatibility(config_path: str): + """Check if config is compatible with CPU offloading""" + if Path(config_path).exists(): + with open(config_path) as f: + config = yaml.safe_load(f) + + print(f"\n=== Config Analysis: {config_path} ===") + model_config = config.get("model", {}) + + print(f"Current settings:") + print(f" 4-bit quantization: {model_config.get('load_in_4bit', False)}") + print(f" Gradient checkpointing: {model_config.get('gradient_checkpointing', False)}") + print(f" Device map: {model_config.get('device_map', 'None')}") + + if model_config.get('load_in_4bit', False): + print("✓ Already using 4-bit quantization (good for memory)") + else: + print("✗ Consider enabling 4-bit quantization") + + if not model_config.get('gradient_checkpointing', False): + print("✗ Consider enabling gradient checkpointing") + + +def main(): + """Main function""" + print("VRAM and Memory Analysis for Progressive LLM Training") + print("=" * 60) + + # Get memory info + get_memory_info() + + # Estimate model sizes + models = [ + "google/gemma-2-2b-it", + "google/gemma-3-1b-pt", + "meta-llama/Llama-3.2-8B", + ] + + for model in models: + estimate_model_size(model) + + # Suggest strategies + suggest_offloading_strategies() + + # Check configs + configs = [ + "config/training_config_gemma3_1b.yaml", + "config/training_config_gemma2_small.yaml", + ] + + for config in configs: + check_config_compatibility(config) + + print("\n=== Recommendations ===") + print("1. Start with QLoRA (4-bit) if not already enabled") + print("2. Use device_map with max_memory limits") + print("3. Enable gradient checkpointing") + print("4. Consider DeepSpeed for advanced offloading") + print("5. Monitor actual usage during training") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/compare_models_tui.py b/scripts/compare_models_tui.py new file mode 100755 index 0000000..b2913b1 --- /dev/null +++ b/scripts/compare_models_tui.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +""" +TUI for comparing original and trained models +""" + +import sys +from pathlib import Path +import yaml +import torch +from rich.console import Console +from rich.panel import Panel +from rich.columns import Columns +from rich.prompt import Prompt +from rich.text import Text +from rich.layout import Layout +from rich.live import Live +from rich.table import Table +import time + +# Add src to path +sys.path.append(str(Path(__file__).parent.parent)) + +from src.progressive_model import ProgressiveReasoningModel + + +class ModelCompareTUI: + def __init__(self, config_path: str = "config/training_config.yaml"): + self.console = Console() + + # Load configuration + with open(config_path) as f: + self.config = yaml.safe_load(f) + + # Initialize models + self.console.print("[yellow]Loading models...[/yellow]") + + # Original model + self.original_model = ProgressiveReasoningModel(self.config) + self.original_model.setup_base_model() + + # Trained model + self.trained_model = ProgressiveReasoningModel(self.config) + self.trained_model.setup_base_model() + + # Load the trained adapter if it exists + adapter_path = Path(self.config["experiment"]["output_dir"]) / "adapters" / "basic_cot" + if adapter_path.exists(): + self.console.print(f"[green]Loading trained adapter from: {adapter_path}[/green]") + self.trained_model.load_for_inference(["basic_cot"]) + else: + self.console.print("[red]No trained adapter found. Please run training first.[/red]") + self.console.print("[yellow]Both models will show original behavior.[/yellow]") + + self.console.print("[green]Models loaded successfully![/green]\n") + + def generate_response(self, model, prompt: str, with_think_tags: bool = True) -> str: + """Generate response from a model""" + # For trained model, encourage think tags + if with_think_tags and model == self.trained_model: + formatted_prompt = f"{prompt}\n\nPlease think step by step." + else: + formatted_prompt = prompt + + inputs = model.tokenizer(formatted_prompt, return_tensors="pt").to(model.model.device) + + with torch.no_grad(): + outputs = model.model.generate( + **inputs, + max_length=512, + temperature=0.7, + do_sample=True, + top_p=0.95, + pad_token_id=model.tokenizer.pad_token_id, + eos_token_id=model.tokenizer.eos_token_id + ) + + response = model.tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract response after prompt + response = response[len(formatted_prompt):].strip() + + return response + + def create_comparison_panel(self, prompt: str, original_response: str, trained_response: str) -> Panel: + """Create a panel showing the comparison""" + # Create table + table = Table(show_header=True, header_style="bold magenta", expand=True) + table.add_column("Original Model", style="cyan", width=50) + table.add_column("Trained Model (with CoT)", style="green", width=50) + + table.add_row(original_response, trained_response) + + return Panel( + table, + title=f"[bold yellow]Prompt: {prompt}[/bold yellow]", + border_style="blue" + ) + + def run_interactive_mode(self): + """Run interactive comparison mode""" + self.console.print("\n[bold cyan]Model Comparison TUI[/bold cyan]") + self.console.print("Compare responses from original and trained models\n") + self.console.print("[dim]Type 'quit' or 'exit' to leave[/dim]\n") + + while True: + # Get user prompt + prompt = Prompt.ask("\n[bold yellow]Enter your prompt[/bold yellow]") + + if prompt.lower() in ['quit', 'exit']: + self.console.print("\n[yellow]Goodbye![/yellow]") + break + + # Generate responses + self.console.print("\n[dim]Generating responses...[/dim]") + + start_time = time.time() + original_response = self.generate_response(self.original_model, prompt, with_think_tags=False) + original_time = time.time() - start_time + + start_time = time.time() + trained_response = self.generate_response(self.trained_model, prompt, with_think_tags=True) + trained_time = time.time() - start_time + + # Display comparison + panel = self.create_comparison_panel(prompt, original_response, trained_response) + self.console.print(panel) + + # Show generation times + self.console.print(f"\n[dim]Generation times - Original: {original_time:.2f}s, Trained: {trained_time:.2f}s[/dim]") + + def run_benchmark_mode(self): + """Run benchmark with predefined prompts""" + test_prompts = [ + "What is 156 + 389?", + "If I have 23 apples and buy 17 more, how many do I have?", + "A store has 145 items. If 38 are sold, how many remain?", + "What is 45 * 12?", + "Explain why 2 + 2 = 4", + "If a train travels 80 km/h for 2.5 hours, how far does it go?", + "What is the sum of all numbers from 1 to 10?", + "How many minutes are in 3.5 hours?", + ] + + self.console.print("\n[bold cyan]Running Benchmark Comparison[/bold cyan]\n") + + for i, prompt in enumerate(test_prompts, 1): + self.console.print(f"[bold]Test {i}/{len(test_prompts)}[/bold]") + + # Generate responses + original_response = self.generate_response(self.original_model, prompt, with_think_tags=False) + trained_response = self.generate_response(self.trained_model, prompt, with_think_tags=True) + + # Display comparison + panel = self.create_comparison_panel(prompt, original_response, trained_response) + self.console.print(panel) + self.console.print("") + + self.console.print("[green]Benchmark completed![/green]") + + +def main(): + import argparse + + parser = argparse.ArgumentParser(description="Compare original and trained models") + parser.add_argument("--mode", choices=["interactive", "benchmark"], default="interactive", + help="Mode to run the comparison") + parser.add_argument("--config", default="config/training_config.yaml", + help="Path to configuration file") + + args = parser.parse_args() + + # Create TUI + tui = ModelCompareTUI(args.config) + + # Run in selected mode + if args.mode == "interactive": + tui.run_interactive_mode() + else: + tui.run_benchmark_mode() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/evaluate.py b/scripts/evaluate.py new file mode 100755 index 0000000..485ca76 --- /dev/null +++ b/scripts/evaluate.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Evaluation script for progressive model +""" + +import sys +from pathlib import Path + +sys.path.append(str(Path(__file__).parent.parent)) + +from src.progressive_model import ProgressiveReasoningModel +import yaml + + +def evaluate_reasoning(model_wrapper, test_prompts): + """Evaluate model on test prompts""" + results = [] + + for prompt in test_prompts: + print(f"\nPrompt: {prompt}") + response = model_wrapper.generate_with_reasoning(prompt) + print(f"Response: {response}") + results.append({ + "prompt": prompt, + "response": response + }) + + return results + + +def main(): + # Load config + with open("config/training_config.yaml") as f: + config = yaml.safe_load(f) + + # Initialize model + model_wrapper = ProgressiveReasoningModel(config) + model_wrapper.setup_base_model() + + # Test different adapters + test_prompts = [ + "What is 156 + 389?", + "If a train travels 80 km/h for 2.5 hours, how far does it go?", + "Explain why the sky is blue.", + ] + + # Test each adapter + for adapter_name in ["basic_cot", "math_reasoning", "complex_reasoning"]: + if adapter_name in model_wrapper.adapters: + print(f"\n{'='*50}") + print(f"Testing adapter: {adapter_name}") + print(f"{'='*50}") + + model_wrapper.load_for_inference([adapter_name]) + results = evaluate_reasoning(model_wrapper, test_prompts) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/simple_compare.py b/scripts/simple_compare.py new file mode 100755 index 0000000..dcea3ff --- /dev/null +++ b/scripts/simple_compare.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +""" +Simple comparison script without rich TUI +""" + +import sys +from pathlib import Path +import yaml +import torch +import argparse + +# Add src to path +sys.path.append(str(Path(__file__).parent.parent)) + +from src.progressive_model import ProgressiveReasoningModel + + +def parse_args(): + parser = argparse.ArgumentParser(description="Compare original and trained models") + parser.add_argument( + "--config", "-c", + type=str, + default="config/training_config_gemma2_small.yaml", + help="Path to configuration file" + ) + parser.add_argument( + "--adapter", "-a", + type=str, + default="basic_cot", + help="Adapter name to load for comparison" + ) + parser.add_argument( + "--max-length", + type=int, + default=512, + help="Maximum generation length" + ) + return parser.parse_args() + + +def load_config(config_path): + """Load configuration from file""" + config_path = Path(config_path) + if not config_path.exists(): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + with open(config_path) as f: + config = yaml.safe_load(f) + return config + + +def generate_response(model, tokenizer, prompt, max_length=512): + """Generate response using the model""" + # Format prompt for Gemma + formatted_prompt = f"user\n{prompt}\nmodel\n" + + # Tokenize + inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) + + # Generate + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_length=len(inputs["input_ids"][0]) + max_length, + temperature=0.7, + do_sample=True, + top_p=0.9, + pad_token_id=tokenizer.pad_token_id, + eos_token_id=tokenizer.eos_token_id, + repetition_penalty=1.1, + ) + + # Decode + response = tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract only the model's response + if "model" in response: + response = response.split("model")[-1].strip() + + return response + + +def main(): + args = parse_args() + + try: + config = load_config(args.config) + except FileNotFoundError as e: + print(f"Error: {e}") + return + + print(f"Progressive Model Comparison") + print(f"Config: {args.config}") + print(f"Base model: {config['experiment']['base_model']}") + print(f"Adapter: {args.adapter}") + print("="*60) + + print("Loading models...") + + # Original model (no adapter) + print("Loading original model...") + original_model = ProgressiveReasoningModel(config) + original_model.setup_base_model() + + # Trained model (with adapter) + print("Loading trained model...") + trained_model = ProgressiveReasoningModel(config) + trained_model.setup_base_model() + + # Load the trained adapter if it exists + adapter_path = Path(config["experiment"]["output_dir"]) / "adapters" / args.adapter + if adapter_path.exists(): + print(f"Loading trained adapter from: {adapter_path}") + try: + trained_model.load_for_inference([args.adapter]) + print("Adapter loaded successfully!") + except Exception as e: + print(f"Error loading adapter: {e}") + print("Will compare with base model instead.") + else: + print(f"No trained adapter found at: {adapter_path}") + print("Available adapters:") + adapters_dir = Path(config["experiment"]["output_dir"]) / "adapters" + if adapters_dir.exists(): + for adapter_dir in adapters_dir.iterdir(): + if adapter_dir.is_dir(): + print(f" - {adapter_dir.name}") + else: + print(" No adapters directory found.") + print("Both models will show original behavior.") + + print("\nModels loaded! Enter prompts to compare (type 'quit' to exit)") + print("Examples:") + print(" - What is 25 + 17?") + print(" - Explain why the sky is blue") + print(" - Solve this step by step: If I have 10 apples and give away 3, how many do I have left?") + print() + + while True: + try: + prompt = input("\nPrompt: ").strip() + if prompt.lower() in ['quit', 'exit', 'q']: + break + + if not prompt: + continue + + print(f"\n{'='*60}") + print("ORIGINAL MODEL (No fine-tuning)") + print("="*60) + + try: + original_response = generate_response( + original_model.model, + original_model.tokenizer, + prompt, + args.max_length + ) + print(original_response) + except Exception as e: + print(f"Error generating original response: {e}") + + print(f"\n{'='*60}") + print(f"TRAINED MODEL (With {args.adapter} adapter)") + print("="*60) + + try: + # Add CoT prompt for trained model + cot_prompt = f"{prompt}\n\nPlease think step by step using tags." + trained_response = generate_response( + trained_model.model, + trained_model.tokenizer, + cot_prompt, + args.max_length + ) + print(trained_response) + except Exception as e: + print(f"Error generating trained response: {e}") + + except KeyboardInterrupt: + print("\nExiting...") + break + except Exception as e: + print(f"Error: {e}") + continue + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/train_progressive.py b/scripts/train_progressive.py new file mode 100755 index 0000000..d3cd938 --- /dev/null +++ b/scripts/train_progressive.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Main training script for progressive reasoning model +""" + +import sys +import yaml +import argparse +from pathlib import Path + +# Add src to path +sys.path.append(str(Path(__file__).parent.parent)) + +from src.progressive_model import ProgressiveReasoningModel +from src.training import ProgressiveTrainer +from src.data_utils import prepare_sample_datasets + + +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Progressive LLM Training for 松尾研LLMコンペ2025", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Use default config + python scripts/train_progressive.py + + # Use specific config file + python scripts/train_progressive.py --config config/training_config_large.yaml + + # Use config with custom path + python scripts/train_progressive.py --config /path/to/my_config.yaml + + # Prepare sample datasets + python scripts/train_progressive.py --prepare-data + """ + ) + + parser.add_argument( + "--config", "-c", + type=str, + default="config/training_config.yaml", + help="Path to the training configuration file (default: config/training_config.yaml)" + ) + + parser.add_argument( + "--prepare-data", + action="store_true", + help="Prepare sample datasets before training" + ) + + parser.add_argument( + "--dry-run", + action="store_true", + help="Load config and model but skip training (for testing)" + ) + + return parser.parse_args() + + +def load_config(config_path: str) -> dict: + """Load configuration from file""" + config_path = Path(config_path) + + if not config_path.exists(): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + print(f"Loading configuration from: {config_path}") + + with open(config_path) as f: + config = yaml.safe_load(f) + + return config + + +def main(): + args = parse_args() + + print("Progressive LLM Training for 松尾研LLMコンペ2025") + print("=" * 50) + + # Load configuration + try: + config = load_config(args.config) + except FileNotFoundError as e: + print(f"Error: {e}") + print("Available config files:") + config_dir = Path("config") + if config_dir.exists(): + for config_file in config_dir.glob("*.yaml"): + print(f" {config_file}") + sys.exit(1) + except Exception as e: + print(f"Error loading config: {e}") + sys.exit(1) + + # Print configuration info + print(f"Experiment: {config['experiment']['name']}") + print(f"Base model: {config['experiment']['base_model']}") + print(f"Output directory: {config['experiment']['output_dir']}") + print(f"Stages: {len(config['progressive_stages'])}") + + # Prepare sample datasets if requested + if args.prepare_data: + print("\nPreparing sample datasets...") + prepare_sample_datasets() + print("Sample datasets prepared.") + + # Initialize model wrapper + print("\nInitializing model...") + model_wrapper = ProgressiveReasoningModel(config) + model_wrapper.setup_base_model() + + if args.dry_run: + print("\nDry run completed. Model loaded successfully.") + return + + # Initialize trainer + print("\nInitializing trainer...") + trainer = ProgressiveTrainer(model_wrapper, config) + + # Run progressive training + print("\nStarting progressive training...") + trainer.run_progressive_training() + + print("\nTraining completed successfully!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_utils.py b/src/data_utils.py new file mode 100644 index 0000000..0c7370a --- /dev/null +++ b/src/data_utils.py @@ -0,0 +1,88 @@ +import json +import jsonlines +from typing import List, Dict +from pathlib import Path +import random + + +def create_think_tag_example(question: str, reasoning: str, answer: str) -> Dict: + """Create training example with think tags""" + output = f"\n{reasoning}\n\n\n{answer}" + + return { + "input": question, + "output": output + } + + +def prepare_basic_cot_data(output_dir: str, num_examples: int = 1000): + """Create basic Chain-of-Thought examples""" + output_path = Path(output_dir) / "basic_cot" + output_path.mkdir(parents=True, exist_ok=True) + + examples = [] + + # Simple arithmetic examples + for i in range(num_examples // 2): + a = random.randint(10, 100) + b = random.randint(10, 100) + question = f"What is {a} + {b}?" + reasoning = f"To find {a} + {b}, I need to add these two numbers together.\n{a} + {b} = {a + b}" + answer = f"The answer is {a + b}." + + examples.append(create_think_tag_example(question, reasoning, answer)) + + # Simple word problems + templates = [ + { + "question": "If I have {a} apples and buy {b} more, how many apples do I have?", + "reasoning": "Starting with {a} apples, then adding {b} more apples.\nTotal: {a} + {b} = {result}", + "answer": "I have {result} apples." + }, + { + "question": "A store has {a} items. If {b} are sold, how many remain?", + "reasoning": "Starting amount: {a} items\nSold: {b} items\nRemaining: {a} - {b} = {result}", + "answer": "There are {result} items remaining." + } + ] + + for i in range(num_examples // 2): + template = random.choice(templates) + a = random.randint(20, 200) + b = random.randint(10, min(50, a)) + + if "+" in template["reasoning"]: + result = a + b + else: + result = a - b + + question = template["question"].format(a=a, b=b) + reasoning = template["reasoning"].format(a=a, b=b, result=result) + answer = template["answer"].format(result=result) + + examples.append(create_think_tag_example(question, reasoning, answer)) + + # Save to jsonl + output_file = output_path / "train.jsonl" + with jsonlines.open(output_file, "w") as writer: + writer.write_all(examples) + + print(f"Created {len(examples)} basic CoT examples at: {output_file}") + + +def prepare_sample_datasets(base_dir: str = "./data"): + """Prepare sample datasets for all stages""" + base_path = Path(base_dir) + + # Basic CoT + prepare_basic_cot_data(base_path) + + # Math reasoning (placeholder) + math_path = base_path / "math_reasoning" + math_path.mkdir(parents=True, exist_ok=True) + + # Complex reasoning (placeholder) + complex_path = base_path / "complex_reasoning" + complex_path.mkdir(parents=True, exist_ok=True) + + print(f"Sample datasets prepared in: {base_path}") \ No newline at end of file diff --git a/src/progressive_model.py b/src/progressive_model.py new file mode 100644 index 0000000..6c5e669 --- /dev/null +++ b/src/progressive_model.py @@ -0,0 +1,366 @@ +import torch +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + BitsAndBytesConfig, + TrainingArguments +) +from peft import ( + LoraConfig, + PeftModel, + TaskType, + get_peft_model, + prepare_model_for_kbit_training +) +from typing import Dict, List, Optional, Tuple +import json +from pathlib import Path + + +class ProgressiveReasoningModel: + """Progressive training approach for reasoning models""" + + def __init__(self, config: dict): + self.config = config + self.base_model_name = config["experiment"]["base_model"] + self.output_dir = Path(config["experiment"]["output_dir"]) + self.output_dir.mkdir(parents=True, exist_ok=True) + + self.model = None + self.tokenizer = None + self.adapters = {} + self.training_history = [] + + def setup_base_model(self): + """Initialize base model with quantization""" + print(f"Loading base model: {self.base_model_name}") + + # Check if quantization is enabled + if self.config["model"].get("load_in_4bit", False): + # BitsAndBytes config for 4-bit quantization + bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=getattr(torch, self.config["model"]["bnb_4bit_compute_dtype"]), + bnb_4bit_use_double_quant=self.config["model"]["bnb_4bit_use_double_quant"], + bnb_4bit_quant_type=self.config["model"].get("bnb_4bit_quant_type", "nf4") + ) + quantization_config = bnb_config + else: + quantization_config = None + + # Model loading arguments + model_kwargs = { + "device_map": self.config["model"]["device_map"], + "trust_remote_code": True, + "torch_dtype": torch.bfloat16 if torch.cuda.is_available() else torch.float32, + } + + # Add authentication token if provided + if "hf_token" in self.config["model"] and self.config["model"]["hf_token"]: + model_kwargs["token"] = self.config["model"]["hf_token"] + + # Add max_memory configuration for CPU offloading + if "max_memory" in self.config["model"]: + model_kwargs["max_memory"] = self.config["model"]["max_memory"] + print(f"Using max_memory configuration: {model_kwargs['max_memory']}") + + # Add offload folder if specified + if "offload_folder" in self.config["model"]: + model_kwargs["offload_folder"] = self.config["model"]["offload_folder"] + model_kwargs["offload_state_dict"] = True + print(f"Using offload folder: {model_kwargs['offload_folder']}") + + # Note: llm_int8_enable_fp32_cpu_offload is not supported for all models + # Only add it if we're not using Gemma models + if (quantization_config and + self.config["model"].get("llm_int8_enable_fp32_cpu_offload", False) and + "gemma" not in self.base_model_name.lower()): + model_kwargs["llm_int8_enable_fp32_cpu_offload"] = True + print("Enabled FP32 CPU offload for quantized model") + + # Add quantization config if enabled + if quantization_config: + model_kwargs["quantization_config"] = quantization_config + + # Add attention implementation + if self.config["model"].get("use_flash_attention_2", False): + model_kwargs["attn_implementation"] = "flash_attention_2" + elif self.config["model"].get("use_eager_attention", False): + model_kwargs["attn_implementation"] = "eager" + + # Load model + print("Loading model with the following kwargs:") + for k, v in model_kwargs.items(): + if k != "quantization_config": + print(f" {k}: {v}") + else: + print(f" {k}: ") + + try: + self.model = AutoModelForCausalLM.from_pretrained( + self.base_model_name, + **model_kwargs + ) + except Exception as e: + print(f"Error loading model: {e}") + # Try without some problematic kwargs + if "offload_folder" in model_kwargs: + print("Retrying without offload_folder...") + del model_kwargs["offload_folder"] + del model_kwargs["offload_state_dict"] + self.model = AutoModelForCausalLM.from_pretrained( + self.base_model_name, + **model_kwargs + ) + + # Prepare for k-bit training if using quantization + if quantization_config: + self.model = prepare_model_for_kbit_training(self.model) + + # Disable gradient checkpointing for now to avoid conflicts + # Enable gradient checkpointing if requested (but disable use_cache) + # if self.config["model"].get("gradient_checkpointing", False): + # self.model.gradient_checkpointing_enable() + # self.model.config.use_cache = False + # print("Gradient checkpointing enabled, use_cache disabled") + + # Explicitly disable use_cache to avoid conflicts + if hasattr(self.model, 'config'): + self.model.config.use_cache = False + + # Load tokenizer + tokenizer_kwargs = {"trust_remote_code": True} + if "hf_token" in self.config["model"] and self.config["model"]["hf_token"]: + tokenizer_kwargs["token"] = self.config["model"]["hf_token"] + + self.tokenizer = AutoTokenizer.from_pretrained( + self.base_model_name, + **tokenizer_kwargs + ) + + # Set padding token and other special tokens for Gemma + if self.tokenizer.pad_token is None: + self.tokenizer.pad_token = self.tokenizer.eos_token + + # For Gemma models, ensure special tokens are set + if "gemma" in self.base_model_name.lower(): + print("Configuring Gemma-specific tokenizer settings") + # Add special tokens if they don't exist + special_tokens = { + "bos_token": "", + "eos_token": "", + "pad_token": "", + } + + # Only add tokens that don't already exist + tokens_to_add = {} + for token_name, token_value in special_tokens.items(): + if getattr(self.tokenizer, token_name, None) is None: + tokens_to_add[token_name] = token_value + + if tokens_to_add: + num_added = self.tokenizer.add_special_tokens(tokens_to_add) + print(f"Added special tokens: {tokens_to_add}") + if num_added > 0: + # Resize model embeddings to accommodate new tokens + self.model.resize_token_embeddings(len(self.tokenizer)) + print(f"Resized model embeddings to {len(self.tokenizer)} tokens") + + # Set appropriate model_max_length for Gemma + if hasattr(self.tokenizer, 'model_max_length') and self.tokenizer.model_max_length > 8192: + self.tokenizer.model_max_length = 8192 + print(f"Set tokenizer model_max_length to {self.tokenizer.model_max_length}") + + # Debug: print model structure for target module identification + print("Model structure:") + for name, module in self.model.named_modules(): + if any(target in name for target in ['attn', 'proj', 'mlp', 'gate', 'up', 'down']): + print(f" {name}: {type(module).__name__}") + + print("Base model loaded successfully") + + def get_target_modules(self, suggested_modules): + """Auto-detect valid target modules for the model""" + valid_modules = [] + all_modules = [name for name, _ in self.model.named_modules()] + + # Check each suggested module + for module_name in suggested_modules: + # Find modules that contain this name + matching_modules = [name for name in all_modules if module_name in name] + if matching_modules: + valid_modules.append(module_name) + print(f" Found target module: {module_name} (matches: {len(matching_modules)} modules)") + else: + print(f" Warning: target module '{module_name}' not found in model") + + # If no valid modules found, try common alternatives + if not valid_modules: + print(" No suggested modules found, trying common alternatives...") + common_alternatives = [ + "q_proj", "k_proj", "v_proj", "o_proj", # Common attention + "gate_proj", "up_proj", "down_proj", # Common MLP + "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", # Full path + "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj", # Full MLP path + ] + + for module_name in common_alternatives: + matching_modules = [name for name in all_modules if module_name in name] + if matching_modules: + valid_modules.append(module_name) + print(f" Found alternative target module: {module_name}") + if len(valid_modules) >= 2: # At least 2 modules + break + + if not valid_modules: + print(" ERROR: No valid target modules found!") + print(" Available modules containing 'proj' or 'attn':") + for name in all_modules: + if any(keyword in name.lower() for keyword in ['proj', 'attn', 'mlp']): + print(f" {name}") + # Fallback to a basic module that should exist + valid_modules = ["embed_tokens"] + + return valid_modules + + def create_adapter(self, stage_config: dict) -> LoraConfig: + """Create LoRA adapter configuration""" + adapter_config = stage_config["adapter_config"] + + # Get initialization method from config, default to True for identity init + init_method = adapter_config.get("init_lora_weights", True) + + # Auto-detect valid target modules + suggested_modules = adapter_config["target_modules"] + valid_modules = self.get_target_modules(suggested_modules) + + print(f"Using target modules: {valid_modules}") + + return LoraConfig( + task_type=TaskType.CAUSAL_LM, + r=adapter_config["r"], + lora_alpha=adapter_config["lora_alpha"], + lora_dropout=adapter_config["lora_dropout"], + target_modules=valid_modules, + bias="none", + init_lora_weights=init_method # Initialize LoRA weights (True = identity, "gaussian" = random) + ) + + def add_progressive_adapter(self, stage_name: str, stage_config: dict): + """Add a new adapter for progressive training""" + print(f"\nAdding adapter for stage: {stage_name}") + + # Check if we should inherit from previous adapter + if "inherit_from" in stage_config and stage_config["inherit_from"] in self.adapters: + print(f"Inheriting from: {stage_config['inherit_from']}") + # Load previous adapter as base + prev_adapter_path = self.adapters[stage_config["inherit_from"]] + self.model = PeftModel.from_pretrained( + self.model, + prev_adapter_path, + is_trainable=True + ) + # Merge and unload to incorporate previous learning + self.model = self.model.merge_and_unload() + + # Create new adapter config + lora_config = self.create_adapter(stage_config) + + # Add adapter to model + self.model = get_peft_model(self.model, lora_config) + + # Ensure model is in training mode + self.model.train() + + # Print trainable parameters + self.model.print_trainable_parameters() + + # Debug: check if any parameters require gradients + trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) + total_params = sum(p.numel() for p in self.model.parameters()) + print(f"Trainable parameters: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)") + + # List parameters that require gradients + grad_params = [name for name, param in self.model.named_parameters() if param.requires_grad] + print(f"Parameters requiring gradients: {len(grad_params)} parameters") + if len(grad_params) > 0: + print(f"First few: {grad_params[:5]}") + else: + print("WARNING: No parameters require gradients!") + + # Save adapter path + adapter_path = self.output_dir / "adapters" / stage_name + adapter_path.mkdir(parents=True, exist_ok=True) + self.adapters[stage_name] = str(adapter_path) + + def save_adapter(self, stage_name: str): + """Save current adapter""" + if stage_name in self.adapters: + print(f"Saving adapter: {stage_name}") + self.model.save_pretrained(self.adapters[stage_name]) + # Also save tokenizer for convenience + self.tokenizer.save_pretrained(self.adapters[stage_name]) + + def load_for_inference(self, adapter_names: List[str], weights: Optional[Dict[str, float]] = None): + """Load model with specific adapters for inference""" + if len(adapter_names) == 1: + # Single adapter + adapter_name = adapter_names[0] + + # Check if adapter path is in memory + if adapter_name in self.adapters: + adapter_path = self.adapters[adapter_name] + else: + # Try to find adapter in output directory + adapter_path = self.output_dir / "adapters" / adapter_name + if not adapter_path.exists(): + raise ValueError(f"Adapter {adapter_name} not found at {adapter_path}") + adapter_path = str(adapter_path) + + print(f"Loading adapter from: {adapter_path}") + self.model = PeftModel.from_pretrained( + self.model, + adapter_path + ) + else: + # Multiple adapters - load and combine + # This is a simplified version - real implementation would need adapter composition + print("Multi-adapter inference not fully implemented in this bootstrap") + # For now, just load the last adapter + adapter_name = adapter_names[-1] + if adapter_name in self.adapters: + adapter_path = self.adapters[adapter_name] + else: + adapter_path = str(self.output_dir / "adapters" / adapter_name) + self.model = PeftModel.from_pretrained( + self.model, + adapter_path + ) + + def generate_with_reasoning(self, prompt: str, max_length: int = 2048) -> str: + """Generate response with reasoning""" + # Format prompt with think tags expectation + formatted_prompt = f"{prompt}\n\nPlease think step by step using tags before providing your answer." + + # Tokenize + inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device) + + # Generate + with torch.no_grad(): + outputs = self.model.generate( + **inputs, + max_length=max_length, + temperature=0.7, + do_sample=True, + top_p=0.95, + pad_token_id=self.tokenizer.pad_token_id, + eos_token_id=self.tokenizer.eos_token_id + ) + + # Decode + response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Extract response after prompt + response = response[len(formatted_prompt):].strip() + + return response \ No newline at end of file diff --git a/src/training.py b/src/training.py new file mode 100644 index 0000000..af6f63a --- /dev/null +++ b/src/training.py @@ -0,0 +1,450 @@ +from transformers import TrainingArguments +from trl import SFTTrainer +from datasets import load_dataset, Dataset +import torch +from typing import Dict, List +import json +import jsonlines +from pathlib import Path + + +class ProgressiveTrainer: + """Handle progressive training stages""" + + def __init__(self, model_wrapper, config: dict): + self.model_wrapper = model_wrapper + self.config = config + self.training_history = [] + + def load_dataset(self, dataset_path: str, stage_config: dict = None) -> Dataset: + """Load dataset from jsonl files or HuggingFace datasets""" + print(f"Loading dataset from path: {dataset_path}") + + # Check if it's a HuggingFace dataset (contains '/') + if '/' in dataset_path and not Path(dataset_path).exists(): + print(f"Loading HuggingFace dataset: {dataset_path}") + return self.load_huggingface_dataset(dataset_path, stage_config) + + # Load local dataset + data = [] + print(f"Current working directory: {Path.cwd()}") + + # Support both single file and directory + path = Path(dataset_path) + print(f"Path exists: {path.exists()}") + print(f"Is file: {path.is_file()}") + print(f"Is directory: {path.is_dir()}") + + if path.is_file(): + files = [path] + else: + files = list(path.glob("*.jsonl")) + + print(f"Found {len(files)} files to load") + for f in files: + print(f" - {f}") + + for file_path in files: + print(f"Loading file: {file_path}") + try: + with jsonlines.open(file_path) as reader: + count = 0 + for item in reader: + # Format for chat template + formatted = { + "messages": [ + {"role": "user", "content": item["input"]}, + {"role": "assistant", "content": item["output"]} + ] + } + data.append(formatted) + count += 1 + print(f" Loaded {count} examples from {file_path}") + except Exception as e: + print(f" Error loading file {file_path}: {e}") + + print(f"Total examples loaded: {len(data)}") + return Dataset.from_list(data) + + def load_huggingface_dataset(self, dataset_name: str, stage_config: dict) -> Dataset: + """Load dataset from HuggingFace""" + try: + dataset_config = stage_config.get("dataset_config", {}) if stage_config else {} + + # Default settings + split = dataset_config.get("split", "train") + max_samples = dataset_config.get("max_samples", None) + streaming = dataset_config.get("streaming", False) + + print(f"Loading HuggingFace dataset: {dataset_name}") + print(f" Split: {split}") + print(f" Max samples: {max_samples}") + print(f" Streaming: {streaming}") + + # Load dataset + if streaming: + dataset = load_dataset(dataset_name, split=split, streaming=True) + if max_samples: + dataset = dataset.take(max_samples) + # Convert streaming dataset to regular dataset + data = [] + count = 0 + for item in dataset: + data.append(item) + count += 1 + if count % 1000 == 0: + print(f" Loaded {count} examples...") + if max_samples and count >= max_samples: + break + dataset = Dataset.from_list(data) + else: + dataset = load_dataset(dataset_name, split=split) + if max_samples: + dataset = dataset.select(range(min(max_samples, len(dataset)))) + + print(f" Loaded dataset with {len(dataset)} examples") + print(f" Dataset columns: {dataset.column_names}") + if len(dataset) > 0: + print(f" First example: {dataset[0]}") + + # Convert to our expected format based on dataset name + if "math" in dataset_name.lower(): + return self.convert_math_dataset(dataset) + elif "mixture-of-thoughts" in dataset_name.lower(): + return self.convert_mixture_of_thoughts_dataset(dataset) + else: + return self.convert_generic_dataset(dataset) + + except Exception as e: + print(f"Error loading HuggingFace dataset {dataset_name}: {e}") + print("Falling back to empty dataset") + return Dataset.from_list([]) + + def convert_math_dataset(self, dataset: Dataset) -> Dataset: + """Convert OpenR1-Math-220k format to our training format""" + def format_math_example(example): + # OpenR1-Math-220k format has different column names + # Try to find the right columns + input_text = None + output_text = None + + # Common column names in math datasets + if "question" in example: + input_text = example["question"] + elif "problem" in example: + input_text = example["problem"] + elif "input" in example: + input_text = example["input"] + elif "query" in example: + input_text = example["query"] + + if "answer" in example: + output_text = example["answer"] + elif "solution" in example: + output_text = example["solution"] + elif "output" in example: + output_text = example["output"] + elif "response" in example: + output_text = example["response"] + + # If we can't find the right columns, use the raw example + if input_text is None or output_text is None: + print(f"Warning: Could not parse example columns: {list(example.keys())}") + # Try to use the first two string fields + string_fields = [k for k, v in example.items() if isinstance(v, str) and len(v) > 10] + if len(string_fields) >= 2: + input_text = example[string_fields[0]] + output_text = example[string_fields[1]] + else: + # Skip this example + return None + + # Format with think tags for math reasoning + formatted_output = f"\nLet me solve this step by step.\n\n{output_text}\n\n\n{output_text}" + + return { + "messages": [ + {"role": "user", "content": input_text}, + {"role": "assistant", "content": formatted_output} + ] + } + + # Convert and filter out None results + converted = dataset.map(format_math_example, desc="Converting math dataset") + converted = converted.filter(lambda x: x is not None, desc="Filtering valid examples") + + print(f"Converted {len(converted)} math examples") + if len(converted) > 0: + print(f"First converted example: {converted[0]}") + + return converted + + def convert_mixture_of_thoughts_dataset(self, dataset: Dataset) -> Dataset: + """Convert Mixture-of-Thoughts format to our training format""" + def format_mot_example(example): + # Mixture-of-Thoughts typically has complex reasoning patterns + # Check for common column names in the dataset + input_text = None + output_text = None + + # Try to identify input/output columns + if "prompt" in example: + input_text = example["prompt"] + elif "question" in example: + input_text = example["question"] + elif "input" in example: + input_text = example["input"] + elif "instruction" in example: + input_text = example["instruction"] + + if "response" in example: + output_text = example["response"] + elif "output" in example: + output_text = example["output"] + elif "completion" in example: + output_text = example["completion"] + elif "answer" in example: + output_text = example["answer"] + + # If columns not found, look for thinking patterns + if input_text is None or output_text is None: + # Try to find columns with substantial text + for key, value in example.items(): + if isinstance(value, str) and len(value) > 20: + if input_text is None and any(q in key.lower() for q in ["prompt", "question", "input"]): + input_text = value + elif output_text is None and any(a in key.lower() for a in ["response", "answer", "output"]): + output_text = value + + if input_text is None or output_text is None: + print(f"Warning: Could not parse MoT example columns: {list(example.keys())}") + return None + + # Check if output already contains thinking tags + if "" in output_text or "思考" in output_text: + # Already formatted with thinking + formatted_output = output_text + else: + # Add thinking structure for complex reasoning + formatted_output = f"\nLet me break this down step by step.\n\n{output_text}\n\n\nBased on my analysis, {output_text}" + + return { + "messages": [ + {"role": "user", "content": input_text}, + {"role": "assistant", "content": formatted_output} + ] + } + + # Convert and filter + converted = dataset.map(format_mot_example, desc="Converting Mixture-of-Thoughts dataset") + converted = converted.filter(lambda x: x is not None, desc="Filtering valid examples") + + print(f"Converted {len(converted)} Mixture-of-Thoughts examples") + if len(converted) > 0: + print(f"First converted example: {converted[0]}") + + return converted + + def convert_generic_dataset(self, dataset: Dataset) -> Dataset: + """Convert generic dataset format to our training format""" + def format_generic_example(example): + # Generic conversion for unknown dataset formats + input_text = None + output_text = None + + # Look for any text columns + text_columns = [(k, v) for k, v in example.items() if isinstance(v, str) and len(v) > 10] + + if len(text_columns) >= 2: + # Use first two substantial text columns + input_text = text_columns[0][1] + output_text = text_columns[1][1] + elif len(text_columns) == 1: + # Only one text column - skip this example + return None + else: + return None + + return { + "messages": [ + {"role": "user", "content": input_text}, + {"role": "assistant", "content": output_text} + ] + } + + converted = dataset.map(format_generic_example, desc="Converting generic dataset") + converted = converted.filter(lambda x: x is not None, desc="Filtering valid examples") + + print(f"Converted {len(converted)} generic examples") + return converted + + def format_dataset(self, dataset: Dataset) -> Dataset: + """Format dataset for training""" + print(f"Dataset before formatting: {len(dataset)} examples") + print(f"First example: {dataset[0] if len(dataset) > 0 else 'No data'}") + + # Check if tokenizer has chat template + has_chat_template = ( + hasattr(self.model_wrapper.tokenizer, 'chat_template') and + self.model_wrapper.tokenizer.chat_template is not None + ) + + if not has_chat_template: + print("No chat template found, setting default Gemma chat template") + # Set a simple chat template for Gemma + self.model_wrapper.tokenizer.chat_template = "{% for message in messages %}{{ message['role'] }}\n{{ message['content'] }}\n{% endfor %}" + + def format_chat(example): + # Try to use chat template if available + if has_chat_template or self.model_wrapper.tokenizer.chat_template: + try: + text = self.model_wrapper.tokenizer.apply_chat_template( + example["messages"], + tokenize=False, + add_generation_prompt=False + ) + return {"text": text} + except Exception as e: + print(f"Chat template failed: {e}, using fallback") + + # Fallback: create simple formatted text + if "messages" in example: + user_msg = example["messages"][0]["content"] + assistant_msg = example["messages"][1]["content"] + return {"text": f"user\n{user_msg}\nmodel\n{assistant_msg}\n"} + elif "input" in example and "output" in example: + return {"text": f"user\n{example['input']}\nmodel\n{example['output']}\n"} + else: + return {"text": str(example)} + + # Format dataset + formatted = dataset.map(format_chat, batched=False, desc="Formatting dataset") + print(f"Dataset after formatting: {len(formatted)} examples") + if len(formatted) > 0: + print(f"Columns: {formatted.column_names}") + print(f"First formatted example: {formatted[0]}") + + # Keep only the 'text' column for SFTTrainer + if 'text' in formatted.column_names: + columns_to_remove = [col for col in formatted.column_names if col != 'text'] + if columns_to_remove: + formatted = formatted.remove_columns(columns_to_remove) + + return formatted + + def filter_by_length(self, dataset: Dataset, max_length: int) -> Dataset: + """Filter dataset by sequence length""" + def is_valid_length(example): + # Tokenize and check length + tokens = self.model_wrapper.tokenizer( + example["text"], + truncation=False, + return_length=True + ) + return len(tokens["input_ids"]) <= max_length + + filtered = dataset.filter(is_valid_length, desc="Filtering by length") + print(f"Filtered dataset: {len(filtered)} examples (max_length={max_length})") + return filtered + + def train_stage(self, stage_name: str, stage_config: dict): + """Train a single stage""" + print(f"\n{'='*50}") + print(f"Training stage: {stage_name}") + print(f"Description: {stage_config['description']}") + print(f"{'='*50}\n") + + # Add adapter + self.model_wrapper.add_progressive_adapter(stage_name, stage_config) + + # Load and format dataset + dataset = self.load_dataset(stage_config["dataset_path"], stage_config) + dataset = self.format_dataset(dataset) + + # Filter by sequence length if specified + if "max_length" in stage_config["training"]: + dataset = self.filter_by_length(dataset, stage_config["training"]["max_length"]) + + print(f"Final dataset size: {len(dataset)} examples") + + # Training arguments - with CPU offload optimizations + training_args = TrainingArguments( + output_dir=f"./outputs/checkpoints/{stage_name}", + num_train_epochs=stage_config["training"]["num_epochs"], + per_device_train_batch_size=stage_config["training"]["per_device_batch_size"], + gradient_accumulation_steps=stage_config["training"]["gradient_accumulation_steps"], + learning_rate=float(stage_config["training"]["learning_rate"]), # Ensure it's a float + warmup_steps=stage_config["training"]["warmup_steps"], + logging_steps=stage_config["training"].get("logging_steps", 10), + save_strategy="epoch", + eval_strategy="no", + bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(), + gradient_checkpointing=self.config["model"].get("gradient_checkpointing", False), + max_grad_norm=stage_config["training"].get("max_grad_norm", 1.0), + report_to="wandb" if self.config["experiment"]["use_wandb"] else "none", + run_name=f"{self.config['experiment']['name']}_{stage_name}", + dataloader_pin_memory=False, # Reduce memory usage + remove_unused_columns=False, # Keep all columns + optim=stage_config["training"].get("optim", "adamw_torch"), # Support 8-bit optimizers + dataloader_num_workers=stage_config["training"].get("dataloader_num_workers", 2), + ) + + # Print dataset info for debugging + print(f"Dataset columns: {dataset.column_names}") + print(f"Dataset first example: {dataset[0]}") + + # Ensure model is in training mode before creating trainer + self.model_wrapper.model.train() + + # Final check of trainable parameters + trainable_params = sum(p.numel() for p in self.model_wrapper.model.parameters() if p.requires_grad) + print(f"Final check - Trainable parameters: {trainable_params:,}") + + # Create trainer with minimal configuration + try: + trainer = SFTTrainer( + model=self.model_wrapper.model, + processing_class=self.model_wrapper.tokenizer, + train_dataset=dataset, + args=training_args, + packing=False, # Disable packing for better gradient flow + ) + except Exception as e: + print(f"Error creating SFTTrainer: {e}") + print("Trying with basic configuration...") + trainer = SFTTrainer( + model=self.model_wrapper.model, + processing_class=self.model_wrapper.tokenizer, + train_dataset=dataset, + args=training_args, + ) + + # Train + trainer.train() + + # Save adapter + self.model_wrapper.save_adapter(stage_name) + + # Record history + self.training_history.append({ + "stage": stage_name, + "config": stage_config, + "metrics": trainer.state.log_history + }) + + print(f"\nCompleted training stage: {stage_name}") + + def run_progressive_training(self): + """Run all training stages progressively""" + stages = self.config["progressive_stages"] + + for stage_config in stages: + stage_name = stage_config["name"] + self.train_stage(stage_name, stage_config) + + # Save training history + history_path = Path(self.config["experiment"]["output_dir"]) / "training_history.json" + with open(history_path, "w") as f: + json.dump(self.training_history, f, indent=2) + + print(f"\nAll stages completed! Training history saved to: {history_path}") \ No newline at end of file diff --git a/test_data_load.py b/test_data_load.py new file mode 100644 index 0000000..e16e530 --- /dev/null +++ b/test_data_load.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +"""Test data loading""" + +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent)) + +from src.training import ProgressiveTrainer +from src.progressive_model import ProgressiveReasoningModel +import yaml + +# Load config +with open("config/training_config.yaml") as f: + config = yaml.safe_load(f) + +# Create dummy model wrapper +class DummyModelWrapper: + def __init__(self): + self.tokenizer = None + +model_wrapper = DummyModelWrapper() + +# Create trainer +trainer = ProgressiveTrainer(model_wrapper, config) + +# Test data loading +stage_config = config["progressive_stages"][0] +dataset_path = stage_config["dataset_path"] +print(f"Loading dataset from: {dataset_path}") + +dataset = trainer.load_dataset(dataset_path) +print(f"Loaded {len(dataset)} examples") + +if len(dataset) > 0: + print(f"First example: {dataset[0]}") \ No newline at end of file