WIP snapshot: preserve dirty worktree

2026-03-20 16:25:11 +09:00 · 2026-03-20 16:25:11 +09:00 · ed0f9f42f4
commit ed0f9f42f4
parent 1b9ed0cd22
736 changed files with 62387 additions and 111098 deletions
--- a/.claude.json
+++ b/.claude.json
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@ -14,6 +14,7 @@ jobs:
      workspaces: ${{ steps.filter.outputs.changes }}
      any_changed: ${{ steps.filter.outputs.workspaces_any_changed }}
      global_changed: ${{ steps.filter.outputs.global }}
      shared_crates_changed: ${{ steps.filter.outputs.shared_crates }}
    steps:
      - uses: actions/checkout@v4
      - uses: dorny/paths-filter@v3
@ -24,9 +25,17 @@ jobs:
              - 'flake.nix'
              - 'flake.lock'
              - 'nix/**'
              - 'nix-nos/**'
              - '.github/workflows/nix.yml'
              - 'Cargo.toml'
              - 'Cargo.lock'
              - 'crates/**'
              - 'client-common/**'
              - 'baremetal/**'
              - 'scripts/**'
              - 'specifications/**'
              - 'docs/**'
            shared_crates: 'crates/**'
            chainfire: 'chainfire/**'
            flaredb: 'flaredb/**'
            iam: 'iam/**'
@ -60,7 +69,21 @@ jobs:
      - name: Run PhotonCloud Gate
        run: |
-          nix run .#gate-ci -- --workspace ${{ matrix.workspace }} --tier 0 --no-logs
+          nix run ./nix/ci#gate-ci -- --workspace ${{ matrix.workspace }} --tier 0 --no-logs
  shared-crates-gate:
    needs: filter
    if: ${{ needs.filter.outputs.shared_crates_changed == 'true' }}
    runs-on: ubuntu-latest
    name: gate (shared crates)
    steps:
      - uses: actions/checkout@v4
      - uses: DeterminateSystems/nix-installer-action@v11
      - uses: DeterminateSystems/magic-nix-cache-action@v8
      - name: Run Shared Crates Gate
        run: |
          nix run ./nix/ci#gate-ci -- --shared-crates --tier 0 --no-logs
  # Build server packages (tier 1+)
  build:
@ -88,7 +111,7 @@ jobs:
  # Summary job for PR status checks
  ci-status:
-    needs: [filter, gate]
+    needs: [filter, gate, shared-crates-gate]
    if: always()
    runs-on: ubuntu-latest
    steps:
@ -97,10 +120,19 @@ jobs:
          if [[ "${{ needs.gate.result }}" == "failure" ]]; then
            exit 1
          fi
          if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then
            exit 1
          fi
          if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then
            if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
              echo "Gate was skipped despite changes. This is unexpected."
              exit 1
            fi
          fi
          if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
            if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then
              echo "Shared crates gate was skipped despite crates/** changes. This is unexpected."
              exit 1
            fi
          fi
          echo "CI passed or was correctly skipped."
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,10 @@
 # Claude Code
 .cccc/
 .code/
 .codex/
 .claude.json
 .ralphrc
 .sisyphus/
 # Rust
 target/
@ -9,6 +14,7 @@ target/
 # Nix
 result
 result-*
 plasmavmc/result
 # local CI artifacts
 work/
@ -33,6 +39,8 @@ Thumbs.db
 # Logs
 *.log
 quanta/test_output_renamed.log
 plasmavmc/kvm_test_output.log
 # VM disk images and ISOs (large binary files)
 **/*.qcow2
@ -54,3 +62,13 @@ flaredb/repomix-output.xml
 # Temporary files
 *.tmp
 *.bak
 tmp_test.txt
 tmp_test_write.txt
 tmp_write_check.txt
 # Runtime state
 data/
 chainfire/data/
 flaredb/data/
 creditservice/.tmp/
 nightlight/.tmp/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,27 @@
 # Contributing
 PhotonCloud uses Nix as the primary development and validation entrypoint.
 ## Setup
 ```bash
 nix develop
 ```
 ## Before sending changes
 Run the strongest local validation you can afford.
 ```bash
 nix run ./nix/test-cluster#cluster -- fresh-smoke
 ```
 For smaller changes, use the narrower commands under `nix/test-cluster`.
 ## Expectations
 - keep service startup on file-first `--config` paths
 - prefer Nix-native workflows over ad hoc host scripts
 - do not commit secrets, private keys, runtime state, or generated disk images
 - document new validation entrypoints in `README.md` or `docs/`
 - when changing multi-node behavior, validate on the VM cluster rather than only with unit tests
--- a/FOREMAN_TASK.md
+++ b/FOREMAN_TASK.md
@ -1,35 +0,0 @@
 Title: Foreman Task Brief (Project-specific)
 Purpose (free text)
 - Complete PROJECT.md Item 12 (Nightlight) - the FINAL infrastructure component (COMPLETE)
 - Achieve 12/12 PROJECT.md deliverables (NOW 12/12)
 - Prepare for production deployment using T032 bare-metal provisioning
 Current objectives (ranked, short)
 - 1) T033 Nightlight completion: S4 PromQL Engine (P0), S5 Storage, S6 Integration
 - 2) Production deployment prep: NixOS modules + Nightlight observability stack
 - 3) Deferred features: T029.S5 practical app demo, FlareDB SQL layer (post-MVP)
 Standing work (edit freely)
 - Task status monitoring: Check docs/por/T*/task.yaml for stale/blocked tasks
 - Risk radar: Monitor POR.md Risk Radar for new/escalating risks
 - Progress tracking: Verify step completion matches claimed LOC/test counts
 - Stale task alerts: Flag tasks with no progress >48h
 - Evidence validation: Spot-check evidence trail (cargo check, test counts)
 Useful references
 - PROJECT.md
 - docs/por/POR.md
 - docs/por/T*/task.yaml (active tasks)
 - docs/evidence/**  and  .cccc/work/**
 How to act each run
 - Do one useful, non-interactive step within the time box (≤ 30m).
 - Save temporary outputs to .cccc/work/foreman/<YYYYMMDD-HHMMSS>/.
 - Write one message to .cccc/mailbox/foreman/to_peer.md with header To: Both|PeerA|PeerB and wrap body in <TO_PEER>..</TO_PEER>.
 Escalation
 - If a decision is needed, write a 6–10 line RFD and ask the peer.
 Safety
 - Do not modify orchestrator code/policies; provide checkable artifacts.
--- a/202
+++ b/202
@ -0,0 +1,202 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright [yyyy] [name of copyright owner]
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/37
+++ b/37
@ -0,0 +1,37 @@
 # PhotonCloud Makefile
 # Unifies build and test commands
 .PHONY: all build cluster-up cluster-down cluster-status cluster-validate cluster-smoke cluster-matrix cluster-bench-storage clean
 # Build all services (using Nix)
 build:
 	nix build .#packages.x86_64-linux.default
 # Cluster Management
 cluster-up:
 	nix run ./nix/test-cluster#cluster -- start
 cluster-down:
 	nix run ./nix/test-cluster#cluster -- stop
 cluster-status:
 	nix run ./nix/test-cluster#cluster -- status
 cluster-validate:
 	nix run ./nix/test-cluster#cluster -- validate
 cluster-smoke:
 	nix run ./nix/test-cluster#cluster -- fresh-smoke
 cluster-matrix:
 	nix run ./nix/test-cluster#cluster -- fresh-matrix
 cluster-bench-storage:
 	nix run ./nix/test-cluster#cluster -- fresh-storage-bench
 cluster-clean:
 	nix run ./nix/test-cluster#cluster -- clean
 # Clean up build artifacts
 clean:
 	rm -rf result
--- a/Nix-NOS.md
+++ b/Nix-NOS.md
@ -1,398 +0,0 @@
 # PlasmaCloud/PhotonCloud と Nix-NOS の統合分析
 ## Architecture Decision (2025-12-13)
 **決定:** Nix-NOSを汎用ネットワークモジュールとして別リポジトリに分離する。
 ### Three-Layer Architecture
 ```
 Layer 3: PlasmaCloud Cluster (T061)
  - plasmacloud-cluster.nix
  - cluster-config.json生成
  - Deployer (Rust)
  depends on ↓
 Layer 2: PlasmaCloud Network (T061)
  - plasmacloud-network.nix
  - FiberLB BGP連携
  - PrismNET統合
  depends on ↓
 Layer 1: Nix-NOS Generic (T062) ← 別リポジトリ
  - BGP (BIRD2/GoBGP)
  - VLAN
  - Network interfaces
  - PlasmaCloudを知らない汎用モジュール
 ```
 ### Repository Structure
 - **github.com/centra/nix-nos**: Layer 1 (汎用、VyOS/OpenWrt代替)
 - **github.com/centra/plasmacloud**: Layers 2+3 (既存リポジトリ)
 ---
 ## 1. 既存プロジェクトの概要
 PlasmaCloud（PhotonCloud）は、以下のコンポーネントで構成されるクラウド基盤プロジェクト：
 ### コアサービス
 | コンポーネント | 役割 | 技術スタック |
 |---------------|------|-------------|
 | **ChainFire** | 分散KVストア（etcd互換） | Rust, Raft (openraft) |
 | **FlareDB** | SQLデータベース | Rust, KVバックエンド |
 | **IAM** | 認証・認可 | Rust, JWT/mTLS |
 | **PlasmaVMC** | VM管理 | Rust, KVM/FireCracker |
 | **PrismNET** | オーバーレイネットワーク | Rust, OVN連携 |
 | **LightningSTOR** | オブジェクトストレージ | Rust, S3互換 |
 | **FlashDNS** | DNS | Rust, hickory-dns |
 | **FiberLB** | ロードバランサー | Rust, L4/L7, BGP予定 |
 | **NightLight** | メトリクス | Rust, Prometheus互換 |
 | **k8shost** | コンテナオーケストレーション | Rust, K8s API互換 |
 ### インフラ層
 - **NixOSモジュール**: 各サービス用 (`nix/modules/`)
 - **first-boot-automation**: 自動クラスタ参加
 - **PXE/Netboot**: ベアメタルプロビジョニング
 - **TLS証明書管理**: 開発用証明書生成スクリプト
 ---
 ## 2. Nix-NOS との統合ポイント
 ### 2.1 Baremetal Provisioning → Deployer強化
 **既存の実装:**
 ```
 first-boot-automation.nix
 ├── cluster-config.json による設定注入
 ├── bootstrap vs join の自動判定
 ├── マーカーファイルによる冪等性
 └── systemd サービス連携
 ```
 **Nix-NOSで追加すべき機能:**
 | 既存 | Nix-NOS追加 |
 |------|-------------|
 | cluster-config.json (手動作成) | topology.nix から自動生成 |
 | 単一クラスタ構成 | 複数クラスタ/サイト対応 |
 | nixos-anywhere 依存 | Deployer (Phone Home + Push) |
 | 固定IP設定 | IPAM連携による動的割当 |
 **統合設計:**
 ```nix
 # topology.nix（Nix-NOS）
 {
  nix-nos.clusters.plasmacloud = {
    nodes = {
      "node01" = { 
        role = "control-plane";
        ip = "10.0.1.10";
        services = [ "chainfire" "flaredb" "iam" ];
      };
      "node02" = { role = "control-plane"; ip = "10.0.1.11"; };
      "node03" = { role = "worker"; ip = "10.0.1.12"; };
    };
    # Nix-NOSが自動生成 → first-boot-automationが読む
    # cluster-config.json の内容をNix評価時に決定
  };
 }
 ```
 ### 2.2 Network Management → PrismNET + FiberLB + Nix-NOS BGP
 **既存の実装:**
 ```
 PrismNET (prismnet/)
 ├── VPC/Subnet/Port管理
 ├── Security Groups
 ├── IPAM
 └── OVN連携
 FiberLB (fiberlb/)
 ├── L4/L7ロードバランシング
 ├── ヘルスチェック
 ├── VIP管理
 └── BGP統合（設計済み、GoBGPサイドカー）
 ```
 **Nix-NOSで追加すべき機能:**
 ```
 Nix-NOS Network Layer
 ├── BGP設定生成（BIRD2）
 │   ├── iBGP/eBGP自動計算
 │   ├── Route Reflector対応
 │   └── ポリシー抽象化
 ├── topology.nix → systemd-networkd
 ├── OpenWrt/Cisco設定生成（将来）
 └── FiberLB BGP連携
 ```
 **統合設計:**
 ```nix
 # Nix-NOSのBGPモジュール → FiberLBのGoBGP設定に統合
 {
  nix-nos.network.bgp = {
    autonomousSystems = {
      "65000" = {
        members = [ "node01" "node02" "node03" ];
        ibgp.strategy = "route-reflector";
        ibgp.reflectors = [ "node01" ];
      };
    };
    # FiberLBのVIPをBGPで広報
    vipAdvertisements = {
      "fiberlb" = {
        vips = [ "10.0.100.1" "10.0.100.2" ];
        nextHop = "self";
        communities = [ "65000:100" ];
      };
    };
  };
  # FiberLBモジュールとの連携
  services.fiberlb.bgp = {
    enable = true;
    # Nix-NOSが生成するGoBGP設定を参照
    configFile = config.nix-nos.network.bgp.gobgpConfig;
  };
 }
 ```
 ### 2.3 K8sパチモン → k8shost + Pure NixOS Alternative
 **既存の実装:**
 ```
 k8shost (k8shost/)
 ├── Pod管理（gRPC API）
 ├── Service管理（ClusterIP/NodePort）
 ├── Node管理
 ├── CNI連携
 ├── CSI連携
 └── FiberLB/FlashDNS連携
 ```
 **Nix-NOSの役割:**
 k8shostはすでにKubernetesのパチモンとして機能している。Nix-NOSは：
 1. **k8shostを使う場合**: k8shostクラスタ自体のデプロイをNix-NOSで管理
 2. **Pure NixOS（K8sなし）**: より軽量な選択肢として、Systemd + Nix-NOSでサービス管理
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                  Orchestration Options                       │
 ├─────────────────────────────────────────────────────────────┤
 │  Option A: k8shost (K8s-like)                               │
 │  ┌─────────────────────────────────────────────────────┐   │
 │  │  Nix-NOS manages: cluster topology, network, certs  │   │
 │  │  k8shost manages: pods, services, scaling           │   │
 │  └─────────────────────────────────────────────────────┘   │
 │                                                             │
 │  Option B: Pure NixOS (K8s-free)                           │
 │  ┌─────────────────────────────────────────────────────┐   │
 │  │  Nix-NOS manages: everything                        │   │
 │  │  systemd + containers, static service discovery     │   │
 │  │  Use case: クラウド基盤自体の管理                    │   │
 │  └─────────────────────────────────────────────────────┘   │
 └─────────────────────────────────────────────────────────────┘
 ```
 **重要な洞察:**
 > 「クラウドの基盤そのものを作るのにKubernetesは使いたくない」
 これは正しいアプローチ。PlasmaCloudのコアサービス（ChainFire, FlareDB, IAM等）は：
 - K8sの上で動くのではなく、K8sを提供する側
 - Pure NixOS + Systemdで管理されるべき
 - Nix-NOSはこのレイヤーを担当
 ---
 ## 3. 具体的な統合計画
 ### Phase 1: Baremetal Provisioning統合
 **目標:** first-boot-automationをNix-NOSのtopology.nixと連携
 ```nix
 # nix/modules/first-boot-automation.nix への追加
 { config, lib, ... }:
 let
  # Nix-NOSのトポロジーから設定を生成
  clusterConfig = 
    if config.nix-nos.cluster != null then
      config.nix-nos.cluster.generateClusterConfig {
        hostname = config.networking.hostName;
      }
    else
      # 従来のcluster-config.json読み込み
      builtins.fromJSON (builtins.readFile /etc/nixos/secrets/cluster-config.json);
 in {
  # 既存のfirst-boot-automationロジックはそのまま
  # ただし設定ソースをNix-NOSに切り替え可能に
 }
 ```
 ### Phase 2: BGP/Network統合
 **目標:** FiberLBのBGP連携（T055.S3）をNix-NOSで宣言的に管理
 ```nix
 # nix/modules/fiberlb-bgp-nixnos.nix
 { config, lib, pkgs, ... }:
 let
  fiberlbCfg = config.services.fiberlb;
  nixnosBgp = config.nix-nos.network.bgp;
 in {
  config = lib.mkIf (fiberlbCfg.enable && nixnosBgp.enable) {
    # GoBGP設定をNix-NOSから生成
    services.gobgpd = {
      enable = true;
      configFile = pkgs.writeText "gobgp.yaml" (
        nixnosBgp.generateGobgpConfig {
          localAs = nixnosBgp.getLocalAs config.networking.hostName;
          routerId = nixnosBgp.getRouterId config.networking.hostName;
          neighbors = nixnosBgp.getPeers config.networking.hostName;
        }
      );
    };
    # FiberLBにGoBGPアドレスを注入
    services.fiberlb.bgp = {
      gobgpAddress = "127.0.0.1:50051";
    };
  };
 }
 ```
 ### Phase 3: Deployer実装
 **目標:** Phone Home + Push型デプロイメントコントローラー
 ```
 plasmacloud/
 ├── deployer/                      # 新規追加
 │   ├── src/
 │   │   ├── api.rs                # Phone Home API
 │   │   ├── orchestrator.rs       # デプロイワークフロー
 │   │   ├── state.rs              # ノード状態管理（ChainFire連携）
 │   │   └── iso_generator.rs      # ISO自動生成
 │   └── Cargo.toml
 └── nix/
    └── modules/
        └── deployer.nix          # NixOSモジュール
 ```
 **ChainFireとの連携:**
 DeployerはChainFireを状態ストアとして使用：
 ```rust
 // deployer/src/state.rs
 struct NodeState {
    hostname: String,
    status: NodeStatus,  // Pending, Provisioning, Active, Failed
    bootstrap_key_hash: Option<String>,
    ssh_pubkey: Option<String>,
    last_seen: DateTime<Utc>,
 }
 impl DeployerState {
    async fn register_node(&self, node: &NodeState) -> Result<()> {
        // ChainFireに保存
        self.chainfire_client
            .put(format!("deployer/nodes/{}", node.hostname), node.to_json())
            .await
    }
 }
 ```
 ---
 ## 4. アーキテクチャ全体図
 ```
 ┌─────────────────────────────────────────────────────────────────────┐
 │                        Nix-NOS Layer                                │
 │  ┌─────────────────────────────────────────────────────────────┐   │
 │  │  topology.nix                                               │   │
 │  │  - ノード定義                                                │   │
 │  │  - ネットワークトポロジー                                     │   │
 │  │  - サービス配置                                              │   │
 │  └─────────────────────────────────────────────────────────────┘   │
 │                              │                                      │
 │              generates       │                                      │
 │                              ▼                                      │
 │  ┌──────────────┬──────────────┬──────────────┬──────────────┐    │
 │  │ NixOS Config │ BIRD Config  │ GoBGP Config │ cluster-     │    │
 │  │ (systemd)    │ (BGP)        │ (FiberLB)    │ config.json  │    │
 │  └──────────────┴──────────────┴──────────────┴──────────────┘    │
 └─────────────────────────────────────────────────────────────────────┘
                              │
                              ▼
 ┌─────────────────────────────────────────────────────────────────────┐
 │                     PlasmaCloud Services                            │
 │  ┌───────────────────────────────────────────────────────────────┐ │
 │  │  Control Plane                                                 │ │
 │  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐        │ │
 │  │  │ChainFire │ │ FlareDB  │ │   IAM    │ │ Deployer │        │ │
 │  │  │(Raft KV) │ │  (SQL)   │ │(AuthN/Z) │ │  (新規)  │        │ │
 │  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘        │ │
 │  └───────────────────────────────────────────────────────────────┘ │
 │                                                                     │
 │  ┌───────────────────────────────────────────────────────────────┐ │
 │  │  Network Plane                                                 │ │
 │  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐        │ │
 │  │  │ PrismNET │ │ FiberLB  │ │ FlashDNS │ │  BIRD2   │        │ │
 │  │  │  (OVN)   │ │(LB+BGP)  │ │  (DNS)   │ │(Nix-NOS) │        │ │
 │  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘        │ │
 │  └───────────────────────────────────────────────────────────────┘ │
 │                                                                     │
 │  ┌───────────────────────────────────────────────────────────────┐ │
 │  │  Compute Plane                                                 │ │
 │  │  ┌──────────┐ ┌──────────┐ ┌──────────┐                      │ │
 │  │  │PlasmaVMC │ │ k8shost  │ │Lightning │                      │ │
 │  │  │(VM/FC)   │ │(K8s-like)│ │  STOR    │                      │ │
 │  │  └──────────┘ └──────────┘ └──────────┘                      │ │
 │  └───────────────────────────────────────────────────────────────┘ │
 └─────────────────────────────────────────────────────────────────────┘
 ```
 ---
 ## 5. 優先度と実装順序
 | 優先度 | 機能 | 依存関係 | 工数 |
 |--------|------|----------|------|
 | **P0** | topology.nix → cluster-config.json生成 | なし | 1週間 |
 | **P0** | BGPモジュール（BIRD2設定生成） | なし | 2週間 |
 | **P1** | FiberLB BGP連携（GoBGP） | T055.S3完了 | 2週間 |
 | **P1** | Deployer基本実装 | ChainFire | 3週間 |
 | **P2** | OpenWrt設定生成 | BGPモジュール | 2週間 |
 | **P2** | ISO自動生成パイプライン | Deployer完了後 | 1週間 |
 | **P2** | 各サービスの設定をNixで管理可能なように | なし | 適当 |
 ---
 ## 6. 結論
 PlasmaCloud/PhotonCloudプロジェクトは、Nix-NOSの構想を実装するための**理想的な基盤**：
 1. **すでにNixOSモジュール化されている** → Nix-NOSモジュールとの統合が容易
 2. **first-boot-automationが存在** → Deployerの基礎として活用可能
 3. **FiberLBにBGP設計がある** → Nix-NOSのBGPモジュールと自然に統合
 4. **ChainFireが状態ストア** → Deployer状態管理に利用可能
 5. **k8shostが存在するがK8sではない** → 「K8sパチモン」の哲学と一致
 **次のアクション:**
 1. Nix-NOSモジュールをPlasmaCloudリポジトリに追加
 2. topology.nix → cluster-config.json生成の実装
 3. BGPモジュール（BIRD2）の実装とFiberLB連携
--- a/PROJECT.md
+++ b/PROJECT.md
@ -1,105 +0,0 @@
 # Project Overview
 これは、日本発のクラウド基盤を作るためのプロジェクトです。
 OpenStackなどの既存の使いにくいクラウド基板の代替となり、ついでに基礎技術を各種ソフトウェアに転用できるようにする。
 # Principal
 Peer Aへ：**自分で戦略を**決めて良い！好きにやれ！
 # Current Priorities
 一通り実装を終わらせ、使いやすいプラットフォームと仕様が完成することを目標とする。
 実装すべきもの：
 1. クラスター管理用KVS（chainfire）
  - これは、ライブラリとして作ることにする。単体でとりあえずKVSとして簡易的にも使えるという想定。
  - Raft+Gossip。
 2. IAM基盤（aegisという名前にしたい。）
  - 様々な認証方法に対応しておいてほしい。
  - あと、サービス感の認証もうまくやる必要がある。mTLSでやることになるだろう。IAMとしてやるのが正解かどうかはわからないが。
 3. DBaaSのための高速KVS（FlareDB）
  - そこそこクエリ効率の良いKVSを作り、その上にSQL互換レイヤーなどが乗れるようにする。
  - 超高速である必要がある。
  - 結果整合性モードと強整合性モードを両方載せられるようにしたい。
  - Tsurugiのような高速なDBが参考になるかも知れない。
  - DBaaSのためでもあるが、高速分散KVSということで、他のもののメタデータストアとして使えるべき。
  - Chainfireとの棲み分けとしては、Chainfireは単体で使う時用と、大規模な場合はクラスター管理に集中させ、メタデータのストア（特に、サービ ス感の連携をするような場合は他のサービスのメタデータにアクセスしたくなるだろう。その時に、このKVSから読めれば良い。）はFlareDBにすると良 さそう。
 4. VM基盤（PlasmaVMC）
  - ちゃんとした抽象化をすることで、様々なVMを扱えるようにしたい（KVM,FireCracker,mvisorなどなど）
 5. オブジェクトストレージ基盤（LightningSTOR）
  - この基盤の標準的な感じの（ある程度共通化されており、使いやすい）APIと、S3互換なAPIがあると良いかも
  - メタデータストアにFlareDBが使えるように当然なっているべき
 6. DNS（FlashDNS）
  - PowerDNSを100%完全に代替可能なようにしてほしい。
  - Route53のようなサービスが作れるようにしたい。
  - BINDも使いたくない。
    - 逆引きDNSをやるためにとんでもない行数のBINDのファイルを書くというのがあり、バカバカしすぎるのでサブネットマスクみたいなものに対応すると良い。
  - DNS All-Rounderという感じにしたい。
 7. ロードバランサー（FiberLB）
  - 超高速なロードバランサーとは名ばかりで、実体としてはBGPでやるので良いような気がしている。
  - AWS ELBみたいなことをできるようにしたい。
  - MaglevによるL4ロードバランシング
  - BGP AnycastによるL2ロードバランシング
  - L7ロードバランシング
  - これらをいい感じにできると良い（既存のソフトウェアでできるかも？これは要確認。）
 8. Kubernetesクラスタをいい感じにホストできるもの？
  - k0sとかk3sとかが参考になるかも知れない。
 9. これらをNixOS上で動くようにパッケージ化をしたりすると良い（Flake化？）。
  - あと、Nixで設定できると良い。まあ設定ファイルを生成するだけなのでそれはできると思うが
 10. Nixによるベアメタルプロビジョニング（Deployer）
  - Phone Home + Push型のデプロイメントコントローラー
  - topology.nix からクラスタ設定を自動生成
  - ChainFireを状態ストアとして使用
  - ISO自動生成パイプライン対応
 11. オーバーレイネットワーク
  - マルチテナントでもうまく動くためには、ユーザーの中でアクセスできるネットワークなど、考えなければいけないことが山ほどある。これを処理 するものも必要。
  - とりあえずネットワーク部分自体の実装はOVNとかで良い。
 12. オブザーバビリティコンポーネント（NightLight）
  - メトリクスストアが必要
  - VictoriaMetricsはmTLSが有料なので、作る必要がある
    - 完全オープンソースでやりたいからね
  - 最低限、Prometheus互換（PromQL）とスケーラビリティ、Push型というのは必須になる
    - メトリクスのデータをどこに置くかは良く良く考えないといけない。スケーラビリティを考えるとS3互換ストレージの上に載せたいが…？
    - あと、圧縮するかどうかなど
 13. クレジット・クオータ管理（CreditService）
  - プロジェクトごとのリソース使用量と課金を管理する「銀行」のようなサービス
  - 各サービス（PlasmaVMCなど）からのリソース作成リクエストをインターセプトして残高確認（Admission Control）を行う
  - NightLightから使用量メトリクスを収集して定期的に残高を引き落とす（Billing Batch）
 # Recent Changes (2025-12-11)
 - **Renaming**:
  - `Nightlight` -> `NightLight` (監視・メトリクス)
  - `PrismNET` -> `PrismNET` (ネットワーク)
  - `PlasmaCloud` -> `PhotonCloud` (プロジェクト全体コードネーム)
 - **Architecture Decision**:
  - IAMにクオータ管理を持たせず、専用の `CreditService` を新設することを決定。
  - `NightLight` を使用量計測のバックエンドとして活用する方針を策定。
 # Next Steps
 1. **CreditServiceの実装**:
   - プロジェクトごとのWallet管理、残高管理機能
   - gRPC APIによるAdmission Controlの実装
 2. **NightLightの実装完了**:
   - 永続化層とクエリエンジンの完成
   - `CreditService` へのデータ提供機能の実装
 3. **PlasmaVMCの改修**:
   - `CreditService` と連携したリソース作成時のチェック処理追加
   - プロジェクト単位のリソース総量制限の実装
 # 守るべき事柄
 1. Rustで書く。
 2. 全部のソフトウェアにおいて、コードベースの構造や依存ライブラリ、仕様や使い方を揃えて、統一感があるようにする。
 3. テスト可能なように作る。また、テストをちゃんと書く。スケーラブルかどうかや、実際に動くかどうかもテスト可能なように良く考えたうえで作る。
 4. スケーラビリティに気をつけて書く。ボトルネックになる箇所はないか？と常に確認する。
 5. 統一感ある仕様をちゃんと考える。（specificationsの中にmdで書いていってほしい。1ソフトウェアごとにフォルダを作り、その中に仕様を書く。 ）
 6. 設定ファイルについても統一感ある仕様が必要。
 7. マルチテナントに関して最初から考慮したうえで設計する（次の年にAWSやGCPでそのまま採用されてもおかしくないような性能や使いやすさが必要）。
 8. ホームラボ用途も満たすようにしたい。
 9. NixのFlakeで環境を作ったり固定したりすると良い。
 10. 前方互換性は気にする必要がない（すでにある実装に縛られる必要はなく、両方を変更して良い）。v2とかv3とかそういうふうにバージョンを増やしていくのはやめてほしい。そうではなく、完璧な一つの実装を作ることに専念してほしい。
 11. ライブラリは可能な限り最新版を使う。この先も長くメンテナンスされることを想定したい。
 # 実戦テスト
 全ての作ったコンポーネントについて、実践的なテストを作ってバグや仕様の悪い点を洗い出し、修正する。
 NixやVM、コンテナなどあらゆるものを活用してよい。
 これにより、実用レベルまで持っていくことが期待される。
 実用的なアプリケーションを作ってみるとか、パフォーマンスを実際に高負荷な試験で確認するとか、そのレベルのものが求められている。
 また、各コンポーネントごとのテストも行うべきだが、様々なものを組み合わせるテストも行うべきである。これも含まれる。
 また、設定のやり方がちゃんと統一されているかなど、細かい点まで気を配ってやる必要がある。
--- a/README.md
+++ b/README.md
@ -1,507 +1,50 @@
-# PhotonCloud (旧 PlasmaCloud)
+# PhotonCloud
-**A modern, multi-tenant cloud infrastructure platform built in Rust**
+PhotonCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository.
-> NOTE: プロジェクトコードネームを PlasmaCloud から PhotonCloud に改称。コンポーネント名も Nightlight → NightLight へ統一済み（詳細は `PROJECT.md` の Recent Changes を参照）。
+The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
 > 併存する「PlasmaCloud」表記は旧コードネームを指します。PhotonCloud と読み替えてください。
-PhotonCloud provides a complete cloud computing stack with strong tenant isolation, role-based access control (RBAC), and seamless integration between compute, networking, and storage services.
+## Components
-## MVP-Beta Status: COMPLETE ✅
+- `chainfire`: replicated coordination store
-
+- `flaredb`: replicated KV and metadata store
-The MVP-Beta milestone validates end-to-end tenant isolation and core infrastructure provisioning:
+- `iam`: identity, token issuance, and authorization
-
+- `prismnet`: tenant networking control plane
- ✅ **IAM**: User authentication, RBAC, multi-tenant isolation
+- `flashdns`: authoritative DNS service
- ✅ **PrismNET**: VPC overlay networking with tenant boundaries
+- `fiberlb`: load balancer control plane and dataplane
- ✅ **PlasmaVMC**: VM provisioning with network attachment
+- `plasmavmc`: VM control plane and worker agents
- ✅ **Integration**: E2E tests validate complete tenant path
+- `coronafs`: shared filesystem for mutable VM volumes
-
+- `lightningstor`: object storage and VM image backing
-**Test Results**: 8/8 integration tests passing
+- `k8shost`: Kubernetes-style hosting control plane
- IAM: 6/6 tenant path tests
+- `apigateway`: external API and proxy surface
- Network+VM: 2/2 integration tests
+- `nightlight`: metrics ingestion and query service
 - `creditservice`: minimal reference quota/credit service
 - `deployer`: bootstrap and phone-home deployment service
 - `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services
 ## Quick Start
 ### Get Started in 3 Steps
 1. **Deploy the Platform**
   ```bash
   # Start IAM service
   cd iam && cargo run --bin iam-server -- --port 50080
   # Start PrismNET service
   cd prismnet && cargo run --bin prismnet-server -- --port 50081
   # Start PlasmaVMC service
   cd plasmavmc && cargo run --bin plasmavmc-server -- --port 50082
   ```
 2. **Onboard Your First Tenant**
   ```bash
   # Create user, provision network, deploy VM
   # See detailed guide below
   ```
 3. **Verify End-to-End**
   ```bash
   # Run integration tests
   cd iam && cargo test --test tenant_path_integration
   cd plasmavmc && cargo test --test prismnet_integration -- --ignored
   ```
 **For detailed instructions**: [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
 ## Architecture Overview
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                     User / API Client                        │
 └─────────────────────────────────────────────────────────────┘
                            │
                            ↓
 ┌─────────────────────────────────────────────────────────────┐
 │  IAM (Identity & Access Management)                         │
 │  • User authentication & JWT tokens                         │
 │  • RBAC with hierarchical scopes (Org → Project)            │
 │  • Cross-tenant access denial                               │
 └─────────────────────────────────────────────────────────────┘
                            │
              ┌─────────────┴─────────────┐
              ↓                           ↓
 ┌──────────────────────┐     ┌──────────────────────┐
 │      PrismNET         │     │      PlasmaVMC       │
 │  • VPC overlay       │────▶│  • VM provisioning   │
 │  • Subnets + DHCP    │     │  • Hypervisor mgmt   │
 │  • Ports (IP/MAC)    │     │  • Network attach    │
 │  • Security Groups   │     │  • KVM, Firecracker  │
 └──────────────────────┘     └──────────────────────┘
 ```
 **Full Architecture**: [MVP-Beta Tenant Path Architecture](docs/architecture/mvp-beta-tenant-path.md)
 ## Core Components
 ### IAM (Identity & Access Management)
 **Location**: `/iam`
 Multi-tenant identity and access management with comprehensive RBAC.
 **Features**:
 - User and service account management
 - Hierarchical scopes: System → Organization → Project
 - Custom role creation with fine-grained permissions
 - Policy evaluation with conditional logic
 - JWT token issuance with tenant claims
 **Services**:
 - `IamAdminService`: User, role, and policy management
 - `IamAuthzService`: Authorization and permission checks
 - `IamTokenService`: Token issuance and validation
 **Quick Start**:
 ```bash
-cd iam
+nix develop
-cargo build --release
+nix run ./nix/test-cluster#cluster -- fresh-smoke
 cargo run --bin iam-server -- --port 50080
 ```
-### PrismNET (Network Virtualization)
+## Main Entrypoints
-**Location**: `/prismnet`
+- workspace flake: [flake.nix](/home/centra/cloud/flake.nix)
 - VM validation harness: [nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
 - shared volume notes: [coronafs/README.md](/home/centra/cloud/coronafs/README.md)
 - minimal quota-service rationale: [creditservice/README.md](/home/centra/cloud/creditservice/README.md)
 - archived manual VM launch scripts: [baremetal/vm-cluster/README.md](/home/centra/cloud/baremetal/vm-cluster/README.md)
-VPC-based overlay networking with tenant isolation.
+## Repository Guide
-**Features**:
+- [docs/README.md](/home/centra/cloud/docs/README.md): documentation entrypoint
- Virtual Private Cloud (VPC) provisioning
+- [docs/testing.md](/home/centra/cloud/docs/testing.md): validation path summary
- Subnet management with CIDR allocation
+- [docs/component-matrix.md](/home/centra/cloud/docs/component-matrix.md): supported multi-component compositions
- Port allocation with IP/MAC assignment
+- [docs/storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
- DHCP server integration
+- `plans/`: design notes and exploration documents
 - Security group enforcement
 - OVN integration for production networking
-**Services**:
+## Scope
 - `VpcService`: VPC lifecycle management
 - `SubnetService`: Subnet CRUD operations
 - `PortService`: Port allocation and attachment
 - `SecurityGroupService`: Firewall rule management
-**Quick Start**:
+PhotonCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
 ```bash
 cd prismnet
 export IAM_ENDPOINT=http://localhost:50080
 cargo build --release
 cargo run --bin prismnet-server -- --port 50081
 ```
 ### PlasmaVMC (VM Provisioning & Management)
 **Location**: `/plasmavmc`
 Virtual machine lifecycle management with hypervisor abstraction.
 **Features**:
 - VM provisioning with tenant scoping
 - Hypervisor abstraction (KVM, Firecracker)
 - Network attachment via PrismNET ports
 - CPU, memory, and disk configuration
 - VM metadata persistence (ChainFire)
 - Live migration support (planned)
 **Services**:
 - `VmService`: VM lifecycle (create, start, stop, delete)
 **Quick Start**:
 ```bash
 cd plasmavmc
 export NOVANET_ENDPOINT=http://localhost:50081
 export IAM_ENDPOINT=http://localhost:50080
 cargo build --release
 cargo run --bin plasmavmc-server -- --port 50082
 ```
 ## Future Components (Roadmap)
 ### FlashDNS (DNS Service)
 **Status**: Planned for next milestone
 DNS resolution within tenant VPCs with automatic record creation.
 **Features** (Planned):
 - Tenant-scoped DNS zones
 - Automatic hostname assignment for VMs
 - DNS record lifecycle tied to resources
 - Integration with PrismNET for VPC resolution
 ### FiberLB (Load Balancing)
 **Status**: Planned for next milestone
 Layer 4/7 load balancing with tenant isolation.
 **Features** (Planned):
 - Load balancer provisioning within VPCs
 - Backend pool management (VM targets)
 - VIP allocation from tenant subnets
 - Health checks and failover
 ### LightningStor (Block Storage)
 **Status**: Planned for next milestone
 Distributed block storage with snapshot support.
 **Features** (Planned):
 - Volume creation and attachment to VMs
 - Snapshot lifecycle management
 - Replication and high availability
 - Integration with ChainFire for immutable logs
 ## Testing
 ### Integration Test Suite
 PlasmaCloud includes comprehensive integration tests validating the complete E2E tenant path.
 **IAM Tests** (6 tests, 778 LOC):
 ```bash
 cd iam
 cargo test --test tenant_path_integration
 # Tests:
 # ✅ test_tenant_setup_flow
 # ✅ test_cross_tenant_denial
 # ✅ test_rbac_project_scope
 # ✅ test_hierarchical_scope_inheritance
 # ✅ test_custom_role_fine_grained_permissions
 # ✅ test_multiple_role_bindings
 ```
 **Network + VM Tests** (2 tests, 570 LOC):
 ```bash
 cd plasmavmc
 cargo test --test prismnet_integration -- --ignored
 # Tests:
 # ✅ prismnet_port_attachment_lifecycle
 # ✅ test_network_tenant_isolation
 ```
 **Coverage**: 8/8 tests passing (100% success rate)
 See [E2E Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md) for detailed test descriptions.
 ## Documentation
 ### Getting Started
 - **[Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)**: Complete walkthrough of deploying your first tenant
 ### Architecture
 - **[MVP-Beta Tenant Path](docs/architecture/mvp-beta-tenant-path.md)**: Complete system architecture with diagrams
 - **[Component Integration](docs/architecture/mvp-beta-tenant-path.md#component-boundaries)**: How services communicate
 ### Testing & Validation
 - **[E2E Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md)**: Comprehensive test suite description
 - **[T023 Summary](docs/por/T023-e2e-tenant-path/SUMMARY.md)**: MVP-Beta deliverables and test results
 ### Component Specifications
 - [IAM Specification](specifications/iam.md)
 - [PrismNET Specification](specifications/prismnet.md)
 - [PlasmaVMC Specification](specifications/plasmavmc.md)
 ## Tenant Isolation Model
 PlasmaCloud enforces tenant isolation at three layers:
 ### Layer 1: IAM Policy Enforcement
 Every API call is validated against the user's JWT token:
 - Token includes `org_id` and `project_id` claims
 - Resources are scoped as: `org/{org_id}/project/{project_id}/{resource_type}/{id}`
 - RBAC policies enforce: `resource.org_id == token.org_id`
 - Cross-tenant access results in 403 Forbidden
 ### Layer 2: Network VPC Isolation
 Each VPC provides a logical network boundary:
 - VPC scoped to an `org_id`
 - OVN overlay ensures traffic isolation between VPCs
 - Different tenants can use the same CIDR without collision
 - Security groups provide intra-VPC firewall rules
 ### Layer 3: VM Scoping
 Virtual machines are scoped to tenant organizations:
 - VM metadata includes `org_id` and `project_id`
 - VMs can only attach to ports in their tenant's VPC
 - VM operations filter by token scope
 - Hypervisor isolation ensures compute boundary
 **Validation**: All three layers tested in [cross-tenant denial tests](docs/por/T023-e2e-tenant-path/e2e_test.md#test-scenario-2-cross-tenant-denial).
 ## Example Workflow
 ### Create a Tenant with Network and VM
 ```bash
 # 1. Authenticate and get token
 grpcurl -plaintext -d '{
  "principal_id": "alice",
  "org_id": "acme-corp",
  "project_id": "project-alpha"
 }' localhost:50080 iam.v1.IamTokenService/IssueToken
 export TOKEN="<your-token>"
 # 2. Create VPC
 grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
  "org_id": "acme-corp",
  "project_id": "project-alpha",
  "name": "main-vpc",
  "cidr": "10.0.0.0/16"
 }' localhost:50081 prismnet.v1.VpcService/CreateVpc
 export VPC_ID="<vpc-id>"
 # 3. Create Subnet
 grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
  "org_id": "acme-corp",
  "project_id": "project-alpha",
  "vpc_id": "'$VPC_ID'",
  "name": "web-subnet",
  "cidr": "10.0.1.0/24",
  "gateway": "10.0.1.1",
  "dhcp_enabled": true
 }' localhost:50081 prismnet.v1.SubnetService/CreateSubnet
 export SUBNET_ID="<subnet-id>"
 # 4. Create Port
 grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
  "org_id": "acme-corp",
  "project_id": "project-alpha",
  "subnet_id": "'$SUBNET_ID'",
  "name": "vm-port",
  "ip_address": "10.0.1.10"
 }' localhost:50081 prismnet.v1.PortService/CreatePort
 export PORT_ID="<port-id>"
 # 5. Create VM with Network
 grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
  "name": "web-server-1",
  "org_id": "acme-corp",
  "project_id": "project-alpha",
  "spec": {
    "network": [{
      "id": "eth0",
      "port_id": "'$PORT_ID'"
    }]
  }
 }' localhost:50082 plasmavmc.v1.VmService/CreateVm
 ```
 **Full walkthrough**: See [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
 ## Development
 ### Prerequisites
 - Rust 1.70+ with Cargo
 - Protocol Buffers compiler (protoc)
 - Optional: KVM for real VM execution
 - Optional: OVN for production networking
 ### Build from Source
 ```bash
 # Clone repository
 git clone https://github.com/your-org/plasmacloud.git
 cd cloud
 # Initialize submodules
 git submodule update --init --recursive
 # Build all components
 cd iam && cargo build --release
 cd ../prismnet && cargo build --release
 cd ../plasmavmc && cargo build --release
 ```
 ### Run Tests
 ```bash
 # IAM tests
 cd iam && cargo test --test tenant_path_integration
 # Network + VM tests
 cd plasmavmc && cargo test --test prismnet_integration -- --ignored
 # Unit tests (all components)
 cargo test
 ```
 ### Project Structure
 ```
 cloud/
 ├── iam/                    # Identity & Access Management
 │   ├── crates/
 │   │   ├── iam-api/       # gRPC services
 │   │   ├── iam-authz/     # Authorization engine
 │   │   ├── iam-store/     # Data persistence
 │   │   └── iam-types/     # Core types
 │   └── tests/
 │       └── tenant_path_integration.rs  # E2E tests
 │
 ├── prismnet/               # Network Virtualization
 │   ├── crates/
 │   │   ├── prismnet-server/    # gRPC services
 │   │   ├── prismnet-api/       # Protocol buffers
 │   │   ├── prismnet-metadata/  # Metadata store
 │   │   └── prismnet-ovn/       # OVN integration
 │   └── proto/
 │
 ├── plasmavmc/             # VM Provisioning
 │   ├── crates/
 │   │   ├── plasmavmc-server/      # VM service
 │   │   ├── plasmavmc-api/         # Protocol buffers
 │   │   ├── plasmavmc-hypervisor/  # Hypervisor abstraction
 │   │   ├── plasmavmc-kvm/         # KVM backend
 │   │   └── plasmavmc-firecracker/ # Firecracker backend
 │   └── tests/
 │       └── prismnet_integration.rs  # E2E tests
 │
 ├── flashdns/              # DNS Service (planned)
 ├── fiberlb/               # Load Balancing (planned)
 ├── lightningstor/         # Block Storage (planned)
 │
 ├── chainfire/             # Immutable event log (submodule)
 ├── flaredb/               # Distributed metadata store (submodule)
 │
 ├── docs/
 │   ├── architecture/      # Architecture docs
 │   ├── getting-started/   # Onboarding guides
 │   └── por/               # Plan of Record (POR) docs
 │       └── T023-e2e-tenant-path/  # MVP-Beta deliverables
 │
 ├── specifications/        # Component specifications
 └── README.md             # This file
 ```
 ## Contributing
 We welcome contributions! Please follow these guidelines:
 1. **Fork the repository** and create a feature branch
 2. **Write tests** for new functionality
 3. **Update documentation** as needed
 4. **Run tests** before submitting PR: `cargo test`
 5. **Follow Rust style**: Use `cargo fmt` and `cargo clippy`
 ### Code Review Process
 1. All PRs require at least one approval
 2. CI must pass (tests, formatting, lints)
 3. Documentation must be updated for user-facing changes
 4. Integration tests required for new features
 ## License
 PlasmaCloud is licensed under the Apache License 2.0. See [LICENSE](LICENSE) for details.
 ## Support & Community
 - **GitHub Issues**: Report bugs or request features
 - **Documentation**: See [docs/](docs/) for detailed guides
 - **Architecture**: Review [architecture docs](docs/architecture/mvp-beta-tenant-path.md) for design decisions
 ## Roadmap
 ### Completed (MVP-Beta) ✅
 - [x] IAM with RBAC and tenant scoping
 - [x] PrismNET VPC overlay networking
 - [x] PlasmaVMC VM provisioning
 - [x] End-to-end integration tests
 - [x] Comprehensive documentation
 ### In Progress
 - [ ] FlashDNS integration (S3)
 - [ ] FiberLB integration (S4)
 - [ ] LightningStor integration (S5)
 ### Planned
 - [ ] FlareDB persistence for production
 - [ ] ChainFire integration for VM metadata
 - [ ] OVN production deployment
 - [ ] Kubernetes integration
 - [ ] Terraform provider
 - [ ] Web UI / Dashboard
 ## Acknowledgments
 PlasmaCloud builds upon:
 - **ChainFire**: Immutable event log for audit trails
 - **FlareDB**: Distributed metadata store
 - **OVN (Open Virtual Network)**: Production-grade overlay networking
 - **gRPC**: High-performance RPC framework
 - **Rust**: Safe, concurrent systems programming
 ---
 **Status**: MVP-Beta Complete ✅
 **Last Updated**: 2025-12-09
 **Next Milestone**: FlashDNS, FiberLB, LightningStor integration
 For detailed information, see:
 - [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
 - [Architecture Documentation](docs/architecture/mvp-beta-tenant-path.md)
 - [Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md)
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,13 @@
 # Security Policy
 Do not report sensitive vulnerabilities through public issues.
 Use the repository security advisory workflow or a private maintainer contact channel when this repository is published.
 When reporting, include:
 - affected component
 - impact summary
 - reproduction steps
 - configuration assumptions
 - any suggested mitigation or patch direction
--- a/T003-architectural-gap-analysis.md
+++ b/T003-architectural-gap-analysis.md
@ -1,54 +0,0 @@
 # Architectural Gap Analysis: Compute & Core
 **Date:** 2025-12-08
 **Scope:** Core Infrastructure (Chainfire, IAM, FlareDB) & Application Services (FlashDNS, PlasmaVMC)
 ## Executive Summary
 The platform's core infrastructure ("Data" and "Identity" pillars) is in excellent shape, with implementation matching specifications closely. However, the "Compute" pillar (PlasmaVMC) exhibits a significant architectural deviation from its specification, currently existing as a monolithic prototype rather than the specified distributed control plane/agent model.
 ## Component Status Matrix
 | Component | Role | Specification Status | Implementation Status | Alignment |
 |-----------|------|----------------------|-----------------------|-----------|
 | **Chainfire** | Cluster KVS | High | High | ✅ Strong |
 | **Aegis (IAM)** | Identity | High | High | ✅ Strong |
 | **FlareDB** | DBaaS KVS | High | High | ✅ Strong |
 | **FlashDNS** | DNS Service | High | High | ✅ Strong |
 | **PlasmaVMC** | VM Platform | High | **Low / Prototype** | ❌ **Mismatch** |
 ## Detailed Findings
 ### 1. Core Infrastructure (Chainfire, Aegis, FlareDB)
 *   **Chainfire:** Fully implemented crate structure. Detailed feature gap analysis exists (`chainfire_t003_gap_analysis.md`).
 *   **Aegis:** Correctly structured with `iam-server`, `iam-authn`, `iam-authz`, etc. Integration with Chainfire/FlareDB backends is present in `main.rs`.
 *   **FlareDB:** Correctly structured with `flaredb-pd`, `flaredb-server` (Multi-Raft), and reserved namespaces for IAM/Metrics.
 ### 2. Application Services (FlashDNS)
 *   **Status:** Excellent.
 *   **Evidence:** Crate structure matches spec. Integration with Chainfire (storage) and Aegis (auth) is visible in configuration and code.
 ### 3. Compute Platform (PlasmaVMC) - The Gap
 *   **Specification:** Describes a distributed system with:
    *   **Control Plane:** API, Scheduler, Image management.
    *   **Agent:** Runs on compute nodes, manages local hypervisors.
    *   **Communication:** gRPC between Control Plane and Agent.
 *   **Current Implementation:** Monolithic `plasmavmc-server`.
    *   The `server` binary directly initializes `HypervisorRegistry` and registers `KvmBackend`/`FireCrackerBackend`.
    *   **Missing Crates:**
        *   `plasmavmc-agent` (Critical)
        *   `plasmavmc-client`
        *   `plasmavmc-core` (Scheduler logic)
    *   **Implication:** The current code cannot support multi-node deployment or scheduling. It effectively runs the control plane *on* the hypervisor node.
 ## Recommendations
 1.  **Prioritize PlasmaVMC Refactoring:** The immediate engineering focus should be to split `plasmavmc-server` into:
    *   `plasmavmc-server` (Control Plane, Scheduler, API)
    *   `plasmavmc-agent` (Node status, Hypervisor control)
 2.  **Implement Agent Protocol:** Define the gRPC interface between Server and Agent (`agent.proto` mentioned in spec but possibly missing or unused).
 3.  **Leverage Existing Foundation:** The `plasmavmc-hypervisor` trait is solid. The `agent` implementation should simply wrap this existing trait, making the refactor straightforward.
 ## Conclusion
 The project foundation is solid. The "Data" and "Identity" layers are ready for higher-level integration. The "Compute" layer requires architectural realignment to meet the distributed design goals.
--- a/TOAGENT.md
+++ b/TOAGENT.md
@ -1,5 +0,0 @@
 Peer Aへ：
 /a あなたはpeerAです。戦略決定と計画立案に特化してください。実際の作業は、peerBへ依頼してください。PROJECT.mdは度々更新されることがあるので、PORに内容を追加したり、適切にMVPを設定・到達状況を確認するなどもあなたの仕事です。ともかく、終える前に確実にタスクをpeerBに渡すことを考えてください。
 Peer Bへ：
 /b peerAからの実装依頼に基づいて実装や実験などの作業を行い、終わったあとは必ずpeerAに結果を（to_peer.mdで）報告してください。高品質に作業を行うことに集中してください。
--- a/advice.md
+++ b/advice.md
--- a/apigateway/Cargo.lock
+++ b/apigateway/Cargo.lock
--- a/apigateway/Cargo.toml
+++ b/apigateway/Cargo.toml
@ -23,7 +23,7 @@ tokio = { version = "1.40", features = ["full"] }
 # HTTP server
 axum = "0.7"
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
+reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream", "json"] }
 # Serialization
 serde = { version = "1.0", features = ["derive"] }
@ -31,7 +31,7 @@ serde_json = "1.0"
 toml = "0.8"
 # gRPC
-tonic = "0.12"
+tonic = { version = "0.12", features = ["tls"] }
 tonic-build = "0.12"
 prost = "0.13"
 prost-types = "0.13"
--- a/apigateway/crates/apigateway-server/Cargo.toml
+++ b/apigateway/crates/apigateway-server/Cargo.toml
@ -31,6 +31,7 @@ bytes = "1"
 iam-api = { path = "../../../iam/crates/iam-api" }
 iam-authn = { path = "../../../iam/crates/iam-authn" }
 iam-authz = { path = "../../../iam/crates/iam-authz" }
 iam-service-auth = { path = "../../../iam/crates/iam-service-auth" }
 iam-store = { path = "../../../iam/crates/iam-store" }
 iam-types = { path = "../../../iam/crates/iam-types" }
 creditservice-api = { path = "../../../creditservice/crates/creditservice-api" }
--- a/apigateway/crates/apigateway-server/src/main.rs
+++ b/apigateway/crates/apigateway-server/src/main.rs
@ -1,10 +1,8 @@
 use std::collections::HashMap;
 use std::io;
 use std::net::SocketAddr;
 use std::pin::Pin;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::time::Duration;
 use apigateway_api::proto::{
@ -13,15 +11,13 @@ use apigateway_api::proto::{
 use apigateway_api::{GatewayAuthServiceClient, GatewayCreditServiceClient};
 use axum::{
    body::{to_bytes, Body},
-    extract::State,
+    extract::{ConnectInfo, State},
    http::{HeaderMap, Request, StatusCode, Uri},
    response::Response,
    routing::{any, get},
    Json, Router,
 };
 use clap::Parser;
 use bytes::Bytes;
 use futures_core::Stream;
 use reqwest::{Client, Url};
 use serde::{Deserialize, Serialize};
 use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};
@ -31,8 +27,41 @@ use tracing_subscriber::EnvFilter;
 use uuid::Uuid;
 const DEFAULT_REQUEST_ID_HEADER: &str = "x-request-id";
 const PHOTON_AUTH_TOKEN_HEADER: &str = "x-photon-auth-token";
 const DEFAULT_AUTH_TIMEOUT_MS: u64 = 500;
 const DEFAULT_CREDIT_TIMEOUT_MS: u64 = 500;
 const DEFAULT_UPSTREAM_TIMEOUT_MS: u64 = 10_000;
 const RESERVED_AUTH_HEADERS: [&str; 10] = [
    "authorization",
    "x-photon-auth-token",
    "x-subject-id",
    "x-org-id",
    "x-project-id",
    "x-roles",
    "x-scopes",
    "x-iam-session-id",
    "x-iam-principal-kind",
    "x-iam-auth-method",
 ];
 const AUTH_PROVIDER_BLOCK_HEADERS: [&str; 17] = [
    "authorization",
    "x-photon-auth-token",
    "x-subject-id",
    "x-org-id",
    "x-project-id",
    "x-roles",
    "x-scopes",
    "proxy-authorization",
    "cookie",
    "set-cookie",
    "host",
    "connection",
    "upgrade",
    "keep-alive",
    "te",
    "trailer",
    "transfer-encoding",
 ];
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
@ -62,6 +91,22 @@ fn default_credit_units() -> u64 {
    1
 }
 fn default_upstream_timeout_ms() -> u64 {
    DEFAULT_UPSTREAM_TIMEOUT_MS
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct TlsConfig {
    #[serde(default)]
    ca_file: Option<String>,
    #[serde(default)]
    cert_file: Option<String>,
    #[serde(default)]
    key_file: Option<String>,
    #[serde(default)]
    domain_name: Option<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct AuthProviderConfig {
    name: String,
@ -70,6 +115,8 @@ struct AuthProviderConfig {
    endpoint: String,
    #[serde(default)]
    timeout_ms: Option<u64>,
    #[serde(default)]
    tls: Option<TlsConfig>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -80,6 +127,8 @@ struct CreditProviderConfig {
    endpoint: String,
    #[serde(default)]
    timeout_ms: Option<u64>,
    #[serde(default)]
    tls: Option<TlsConfig>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -103,6 +152,8 @@ struct RouteCreditConfig {
    #[serde(default = "default_commit_policy")]
    commit_on: CommitPolicy,
    #[serde(default)]
    allow_header_subject: bool,
    #[serde(default)]
    attributes: HashMap<String, String>,
 }
@ -114,6 +165,8 @@ struct RouteConfig {
    #[serde(default)]
    strip_prefix: bool,
    #[serde(default)]
    timeout_ms: Option<u64>,
    #[serde(default)]
    auth: Option<RouteAuthConfig>,
    #[serde(default)]
    credit: Option<RouteCreditConfig>,
@ -134,6 +187,12 @@ struct ServerConfig {
    log_level: String,
    #[serde(default = "default_max_body_bytes")]
    max_body_bytes: usize,
    #[serde(default = "default_max_response_bytes")]
    max_response_bytes: usize,
    #[serde(default = "default_upstream_timeout_ms")]
    upstream_timeout_ms: u64,
    #[serde(default)]
    trust_forwarded_headers: bool,
    #[serde(default)]
    auth_providers: Vec<AuthProviderConfig>,
    #[serde(default)]
@ -148,6 +207,9 @@ impl Default for ServerConfig {
            http_addr: default_http_addr(),
            log_level: default_log_level(),
            max_body_bytes: default_max_body_bytes(),
            max_response_bytes: default_max_response_bytes(),
            upstream_timeout_ms: default_upstream_timeout_ms(),
            trust_forwarded_headers: false,
            auth_providers: Vec::new(),
            credit_providers: Vec::new(),
            routes: Vec::new(),
@ -175,9 +237,12 @@ struct Args {
 struct ServerState {
    routes: Vec<Route>,
    client: Client,
    upstream_timeout: Duration,
    max_body_bytes: usize,
    max_response_bytes: usize,
    auth_providers: HashMap<String, AuthProvider>,
    credit_providers: HashMap<String, CreditProvider>,
    trust_forwarded_headers: bool,
 }
 #[derive(Clone)]
@ -211,6 +276,13 @@ struct SubjectInfo {
    scopes: Vec<String>,
 }
 #[derive(Clone, Debug)]
 struct CreditSubject {
    subject_id: String,
    org_id: String,
    project_id: String,
 }
 #[derive(Clone, Debug)]
 struct AuthDecision {
    allow: bool,
@ -238,84 +310,6 @@ struct CreditReservation {
    reservation_id: String,
 }
 struct CreditFinalizeState {
    state: Arc<ServerState>,
    route: Route,
    reservation: Option<CreditReservation>,
    status: reqwest::StatusCode,
 }
 impl CreditFinalizeState {
    fn spawn_success(self) {
        tokio::spawn(async move {
            finalize_credit(&self.state, &self.route, self.reservation, self.status).await;
        });
    }
    fn spawn_abort(self) {
        tokio::spawn(async move {
            finalize_credit_abort(&self.state, &self.route, self.reservation).await;
        });
    }
 }
 struct CreditFinalizeStream {
    bytes: Option<Bytes>,
    finalize: Option<CreditFinalizeState>,
    completed: bool,
 }
 impl CreditFinalizeStream {
    fn new(bytes: Bytes, finalize: CreditFinalizeState) -> Self {
        Self {
            bytes: Some(bytes),
            finalize: Some(finalize),
            completed: false,
        }
    }
    fn finalize_success(&mut self) {
        if self.completed {
            return;
        }
        self.completed = true;
        if let Some(finalize) = self.finalize.take() {
            finalize.spawn_success();
        }
    }
    fn finalize_abort(&mut self) {
        if self.completed {
            return;
        }
        self.completed = true;
        if let Some(finalize) = self.finalize.take() {
            finalize.spawn_abort();
        }
    }
 }
 impl Stream for CreditFinalizeStream {
    type Item = Result<Bytes, io::Error>;
    fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
        if let Some(bytes) = self.bytes.take() {
            return Poll::Ready(Some(Ok(bytes)));
        }
        self.finalize_success();
        Poll::Ready(None)
    }
 }
 impl Drop for CreditFinalizeStream {
    fn drop(&mut self) {
        if !self.completed {
            self.finalize_abort();
        }
    }
 }
 #[derive(Clone, Debug)]
 struct RequestContext {
    request_id: String,
@ -341,6 +335,10 @@ fn default_max_body_bytes() -> usize {
    16 * 1024 * 1024
 }
 fn default_max_response_bytes() -> usize {
    default_max_body_bytes()
 }
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let args = Args::parse();
@ -374,10 +372,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let routes = build_routes(config.routes)?;
    let auth_providers = build_auth_providers(config.auth_providers).await?;
    let credit_providers = build_credit_providers(config.credit_providers).await?;
    let upstream_timeout = Duration::from_millis(config.upstream_timeout_ms);
    let client = Client::builder().build()?;
    info!("Starting API gateway");
    info!("  HTTP: {}", config.http_addr);
    info!("  Max body bytes: {}", config.max_body_bytes);
    info!("  Max response bytes: {}", config.max_response_bytes);
    if !routes.is_empty() {
        info!("Configured {} routes", routes.len());
@ -394,10 +395,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let state = Arc::new(ServerState {
        routes,
-        client: Client::new(),
+        client,
        upstream_timeout,
        max_body_bytes: config.max_body_bytes,
        max_response_bytes: config.max_response_bytes,
        auth_providers,
        credit_providers,
        trust_forwarded_headers: config.trust_forwarded_headers,
    });
    let app = Router::new()
@ -408,7 +412,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
        .with_state(state);
    let listener = tokio::net::TcpListener::bind(config.http_addr).await?;
-    axum::serve(listener, app).await?;
+    axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>()).await?;
    Ok(())
 }
@ -427,6 +431,7 @@ async fn list_routes(State(state): State<Arc<ServerState>>) -> Json<Vec<RouteCon
 async fn proxy(
    State(state): State<Arc<ServerState>>,
    ConnectInfo(remote_addr): ConnectInfo<SocketAddr>,
    request: Request<Body>,
 ) -> Result<Response<Body>, StatusCode> {
    let path = request.uri().path();
@ -441,15 +446,16 @@ async fn proxy(
        path: request.uri().path().to_string(),
        raw_query: request.uri().query().unwrap_or("").to_string(),
        headers: headers_to_map(request.headers()),
-        client_ip: extract_client_ip(request.headers()),
+        client_ip: extract_client_ip(
            request.headers(),
            remote_addr,
            state.trust_forwarded_headers,
        ),
        route_name: route.config.name.clone(),
    };
-    let auth_token = request
+    let auth_token = extract_auth_token(request.headers());
-        .headers()
+    let forward_client_auth_headers = route.config.auth.is_none();
        .get(axum::http::header::AUTHORIZATION)
        .and_then(|value| value.to_str().ok())
        .map(|value| value.to_string());
    let auth_outcome = enforce_auth(&state, &route, &context, auth_token).await?;
    let credit_reservation =
@ -457,11 +463,22 @@ async fn proxy(
    let target_url = build_upstream_url(&route, request.uri())?;
-    let mut builder = state.client.request(request.method().clone(), target_url);
+    let request_timeout =
        Duration::from_millis(route.config.timeout_ms.unwrap_or(state.upstream_timeout.as_millis() as u64));
    let mut builder = state
        .client
        .request(request.method().clone(), target_url)
        .timeout(request_timeout);
    for (name, value) in request.headers().iter() {
        if name == axum::http::header::HOST || name == axum::http::header::CONNECTION {
            continue;
        }
        if is_reserved_auth_header(name) {
            if forward_client_auth_headers && should_preserve_client_auth_header(name.as_str()) {
                builder = builder.header(name, value);
            }
            continue;
        }
        builder = builder.header(name, value);
    }
@ -481,6 +498,12 @@ async fn proxy(
    };
    let status = response.status();
    if let Some(content_length) = response.content_length() {
        if state.max_response_bytes > 0 && content_length as usize > state.max_response_bytes {
            finalize_credit_abort(&state, &route, credit_reservation).await;
            return Err(StatusCode::PAYLOAD_TOO_LARGE);
        }
    }
    let mut response_builder = Response::builder().status(status);
    let headers = response_builder
@ -494,23 +517,22 @@ async fn proxy(
        headers.insert(name, value.clone());
    }
-    let bytes = match response.bytes().await {
+    let body = match response.bytes().await {
-        Ok(bytes) => bytes,
+        Ok(body) => body,
        Err(_) => {
            finalize_credit_abort(&state, &route, credit_reservation).await;
            return Err(StatusCode::BAD_GATEWAY);
        }
    };
    if state.max_response_bytes > 0 && body.len() > state.max_response_bytes {
        finalize_credit_abort(&state, &route, credit_reservation).await;
        return Err(StatusCode::PAYLOAD_TOO_LARGE);
    }
-    let finalize = CreditFinalizeState {
+    finalize_credit(&state, &route, credit_reservation, status).await;
        state: Arc::clone(&state),
        route,
        reservation: credit_reservation,
        status,
    };
    response_builder
-        .body(Body::from_stream(CreditFinalizeStream::new(bytes, finalize)))
+        .body(Body::from(body))
        .map_err(|_| StatusCode::BAD_GATEWAY)
 }
@ -592,7 +614,22 @@ async fn enforce_credit(
        return Ok(None);
    }
-    let decision = reserve_credit(state, credit_cfg, context, subject).await;
+    let credit_subject = resolve_credit_subject(context, subject, credit_cfg.allow_header_subject);
    if credit_subject.is_none() {
        if credit_cfg.mode == PolicyMode::Required {
            return Err(StatusCode::UNAUTHORIZED);
        }
        warn!("Credit skipped: missing org/project scope");
        return Ok(None);
    }
    let decision = reserve_credit(
        state,
        credit_cfg,
        context,
        credit_subject.as_ref().expect("credit subject resolved"),
    )
    .await;
    apply_credit_mode(credit_cfg.mode, credit_cfg.fail_open, decision)
        .map(|decision| {
            decision.map(|decision| CreditReservation {
@ -696,26 +733,56 @@ async fn authorize_request(
    }
 }
 fn resolve_credit_subject(
    context: &RequestContext,
    subject: Option<&SubjectInfo>,
    allow_header_subject: bool,
 ) -> Option<CreditSubject> {
    if let Some(subject) = subject {
        return Some(CreditSubject {
            subject_id: subject.subject_id.clone(),
            org_id: subject.org_id.clone(),
            project_id: subject.project_id.clone(),
        });
    }
    if !allow_header_subject {
        return None;
    }
    let org_id = context.headers.get("x-org-id")?.trim();
    let project_id = context.headers.get("x-project-id")?.trim();
    if org_id.is_empty() || project_id.is_empty() {
        return None;
    }
    let subject_id = context
        .headers
        .get("x-subject-id")
        .map(|value| value.trim().to_string())
        .unwrap_or_default();
    Some(CreditSubject {
        subject_id,
        org_id: org_id.to_string(),
        project_id: project_id.to_string(),
    })
 }
 async fn reserve_credit(
    state: &ServerState,
    credit_cfg: &RouteCreditConfig,
    context: &RequestContext,
-    subject: Option<&SubjectInfo>,
+    credit_subject: &CreditSubject,
 ) -> Result<CreditDecision, StatusCode> {
    let provider = state
        .credit_providers
        .get(&credit_cfg.provider)
        .ok_or(StatusCode::INTERNAL_SERVER_ERROR)?;
-    let (subject_id, org_id, project_id) = subject
+    let subject_id = credit_subject.subject_id.clone();
-        .map(|subject| {
+    let org_id = credit_subject.org_id.clone();
-            (
+    let project_id = credit_subject.project_id.clone();
                subject.subject_id.clone(),
                subject.org_id.clone(),
                subject.project_id.clone(),
            )
        })
        .unwrap_or_default();
    match provider {
        CreditProvider::Grpc(provider) => {
@ -875,8 +942,14 @@ async fn rollback_credit(
    }
 }
-fn apply_auth_headers(mut builder: reqwest::RequestBuilder, outcome: &AuthOutcome) -> reqwest::RequestBuilder {
+fn apply_auth_headers(
    mut builder: reqwest::RequestBuilder,
    outcome: &AuthOutcome,
 ) -> reqwest::RequestBuilder {
    for (key, value) in &outcome.headers {
        if !should_forward_auth_header(key) {
            continue;
        }
        builder = builder.header(key, value);
    }
@ -896,6 +969,39 @@ fn apply_auth_headers(mut builder: reqwest::RequestBuilder, outcome: &AuthOutcom
    builder
 }
 async fn build_client_tls_config(
    tls: &Option<TlsConfig>,
 ) -> Result<Option<ClientTlsConfig>, Box<dyn std::error::Error>> {
    let Some(tls) = tls else {
        return Ok(None);
    };
    let mut tls_config = ClientTlsConfig::new();
    if let Some(ca_file) = &tls.ca_file {
        let ca = tokio::fs::read(ca_file).await?;
        tls_config = tls_config.ca_certificate(Certificate::from_pem(ca));
    }
    match (&tls.cert_file, &tls.key_file) {
        (Some(cert_file), Some(key_file)) => {
            let cert = tokio::fs::read(cert_file).await?;
            let key = tokio::fs::read(key_file).await?;
            tls_config = tls_config.identity(Identity::from_pem(cert, key));
        }
        (None, None) => {}
        _ => {
            return Err(config_error("tls requires both cert_file and key_file").into());
        }
    }
    if let Some(domain) = &tls.domain_name {
        tls_config = tls_config.domain_name(domain);
    }
    Ok(Some(tls_config))
 }
 async fn build_auth_providers(
    configs: Vec<AuthProviderConfig>,
 ) -> Result<HashMap<String, AuthProvider>, Box<dyn std::error::Error>> {
@ -913,11 +1019,19 @@ async fn build_auth_providers(
        match provider_type.as_str() {
            "grpc" => {
-                let endpoint = Endpoint::from_shared(config.endpoint.clone())?
+                let mut endpoint = Endpoint::from_shared(config.endpoint.clone())?
-                    .connect_timeout(Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS)))
+                    .connect_timeout(Duration::from_millis(
-                    .timeout(Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS)));
+                        config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS),
                    ))
                    .timeout(Duration::from_millis(
                        config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS),
                    ));
                if let Some(tls) = build_client_tls_config(&config.tls).await? {
                    endpoint = endpoint.tls_config(tls)?;
                }
                let channel = endpoint.connect().await?;
-                let timeout = Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS));
+                let timeout =
                    Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS));
                providers.insert(
                    config.name.clone(),
                    AuthProvider::Grpc(GrpcAuthProvider {
@ -956,7 +1070,7 @@ async fn build_credit_providers(
        match provider_type.as_str() {
            "grpc" => {
-                let endpoint = Endpoint::from_shared(config.endpoint.clone())?
+                let mut endpoint = Endpoint::from_shared(config.endpoint.clone())?
                    .connect_timeout(Duration::from_millis(
                        config
                            .timeout_ms
@ -968,6 +1082,10 @@ async fn build_credit_providers(
                            .unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS),
                    ));
                if let Some(tls) = build_client_tls_config(&config.tls).await? {
                    endpoint = endpoint.tls_config(tls)?;
                }
                let channel = endpoint.connect().await?;
                let timeout = Duration::from_millis(
                    config
@ -1049,13 +1167,34 @@ fn extract_request_id(headers: &HeaderMap) -> String {
        .unwrap_or_else(|| Uuid::new_v4().to_string())
 }
-fn extract_client_ip(headers: &HeaderMap) -> String {
+fn extract_client_ip(
-    headers
+    headers: &HeaderMap,
    remote_addr: SocketAddr,
    trust_forwarded_headers: bool,
 ) -> String {
    if trust_forwarded_headers {
        if let Some(value) = headers
            .get("x-forwarded-for")
            .and_then(|value| value.to_str().ok())
            .and_then(|value| value.split(',').next())
-        .map(|value| value.trim().to_string())
+        {
-        .unwrap_or_default()
+            let trimmed = value.trim();
            if !trimmed.is_empty() {
                return trimmed.to_string();
            }
        }
        if let Some(value) = headers
            .get("x-real-ip")
            .and_then(|value| value.to_str().ok())
        {
            let trimmed = value.trim();
            if !trimmed.is_empty() {
                return trimmed.to_string();
            }
        }
    }
    remote_addr.ip().to_string()
 }
 fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
@ -1073,6 +1212,78 @@ fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
    map
 }
 fn extract_auth_token(headers: &HeaderMap) -> Option<String> {
    let auth_header = headers
        .get(axum::http::header::AUTHORIZATION)
        .and_then(|value| value.to_str().ok());
    if let Some(token) = auth_header.and_then(parse_auth_token_value) {
        return Some(token);
    }
    let photon_header = headers
        .get(PHOTON_AUTH_TOKEN_HEADER)
        .and_then(|value| value.to_str().ok());
    photon_header.and_then(parse_auth_token_value)
 }
 fn is_reserved_auth_header(name: &axum::http::header::HeaderName) -> bool {
    is_reserved_auth_header_str(name.as_str())
 }
 fn is_reserved_auth_header_str(name: &str) -> bool {
    let header = name.to_ascii_lowercase();
    RESERVED_AUTH_HEADERS.iter().any(|value| *value == header)
 }
 fn should_forward_auth_header(name: &str) -> bool {
    let header = name.to_ascii_lowercase();
    if AUTH_PROVIDER_BLOCK_HEADERS
        .iter()
        .any(|value| *value == header)
    {
        return false;
    }
    header.starts_with("x-")
 }
 fn should_preserve_client_auth_header(name: &str) -> bool {
    let header = name.to_ascii_lowercase();
    header == "authorization" || header == PHOTON_AUTH_TOKEN_HEADER
 }
 fn parse_auth_token_value(value: &str) -> Option<String> {
    let trimmed = value.trim();
    if trimmed.is_empty() {
        return None;
    }
    if let Some(token) = parse_bearer_token(trimmed) {
        return Some(token);
    }
    // Legacy support: allow raw token values without a scheme.
    if trimmed.split_whitespace().count() != 1 {
        return None;
    }
    Some(trimmed.to_string())
 }
 fn parse_bearer_token(value: &str) -> Option<String> {
    let mut parts = value.split_whitespace();
    let scheme = parts.next()?;
    if !scheme.eq_ignore_ascii_case("bearer") {
        return None;
    }
    let token = parts.next()?;
    if parts.next().is_some() {
        return None;
    }
    Some(token.to_string())
 }
 fn normalize_path_prefix(prefix: &str) -> String {
    let trimmed = prefix.trim();
    if trimmed.is_empty() {
@ -1104,7 +1315,22 @@ fn normalize_upstream_base_path(path: &str) -> String {
 fn match_route<'a>(routes: &'a [Route], path: &str) -> Option<&'a Route> {
    routes
        .iter()
-        .find(|route| path.starts_with(&route.config.path_prefix))
+        .find(|route| path_matches_prefix(path, &route.config.path_prefix))
 }
 fn path_matches_prefix(path: &str, prefix: &str) -> bool {
    if prefix == "/" {
        return true;
    }
    if path == prefix {
        return true;
    }
    match path.strip_prefix(prefix) {
        Some(stripped) => stripped.starts_with('/'),
        None => false,
    }
 }
 fn strip_prefix_path(path: &str, prefix: &str) -> String {
@ -1160,9 +1386,8 @@ fn build_upstream_url(route: &Route, uri: &Uri) -> Result<Url, StatusCode> {
 mod tests {
    use super::*;
    use axum::routing::get;
-    use creditservice_api::{
+    use creditservice_api::{CreditServiceImpl, CreditStorage, GatewayCreditServiceImpl};
-        CreditServiceImpl, CreditStorage, GatewayCreditServiceImpl, GatewayCreditServiceServer,
+    use apigateway_api::GatewayCreditServiceServer;
    };
    use creditservice_types::Wallet;
    use iam_api::{GatewayAuthServiceImpl, GatewayAuthServiceServer};
    use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey};
@ -1173,12 +1398,28 @@ mod tests {
    use tonic::transport::Server;
    use uuid::Uuid;
    async fn wait_for_test_tcp(addr: SocketAddr) {
        let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
        loop {
            if tokio::net::TcpStream::connect(addr).await.is_ok() {
                return;
            }
            assert!(
                tokio::time::Instant::now() < deadline,
                "timed out waiting for test listener {}",
                addr
            );
            tokio::time::sleep(Duration::from_millis(25)).await;
        }
    }
    fn route_config(name: &str, prefix: &str, upstream: &str, strip_prefix: bool) -> RouteConfig {
        RouteConfig {
            name: name.to_string(),
            path_prefix: prefix.to_string(),
            upstream: upstream.to_string(),
            strip_prefix,
            timeout_ms: None,
            auth: None,
            credit: None,
        }
@ -1202,7 +1443,21 @@ mod tests {
    }
    async fn start_upstream() -> SocketAddr {
-        let app = Router::new().route("/v1/echo", get(|| async { "ok" }));
+        let app = Router::new()
            .route("/v1/echo", get(|| async { "ok" }))
            .route(
                "/v1/echo-auth",
                get(|headers: HeaderMap| async move {
                    Json(serde_json::json!({
                        "authorization": headers
                            .get(axum::http::header::AUTHORIZATION)
                            .and_then(|value| value.to_str().ok()),
                        "photon_token": headers
                            .get(PHOTON_AUTH_TOKEN_HEADER)
                            .and_then(|value| value.to_str().ok()),
                    }))
                }),
            );
        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
            .await
            .expect("bind upstream");
@ -1210,6 +1465,7 @@ mod tests {
        tokio::spawn(async move {
            axum::serve(listener, app).await.expect("upstream serve");
        });
        wait_for_test_tcp(addr).await;
        addr
    }
@ -1279,10 +1535,11 @@ mod tests {
                .expect("iam gateway serve");
        });
        wait_for_test_tcp(addr).await;
        (addr, issued.token)
    }
-    async fn start_credit_gateway() -> SocketAddr {
+    async fn start_credit_gateway(iam_addr: &SocketAddr) -> SocketAddr {
        let storage = creditservice_api::InMemoryStorage::new();
        let wallet = Wallet::new("proj-1".into(), "org-1".into(), 100);
        storage
@ -1290,7 +1547,13 @@ mod tests {
            .await
            .expect("wallet create");
-        let credit_service = Arc::new(CreditServiceImpl::new(storage));
+        let auth_service = Arc::new(
            iam_service_auth::AuthService::new(&format!("http://{}", iam_addr))
                .await
                .expect("auth service"),
        );
        let credit_service = Arc::new(CreditServiceImpl::new(storage, auth_service));
        let gateway_credit = GatewayCreditServiceImpl::new(credit_service);
        let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
@ -1305,6 +1568,7 @@ mod tests {
                .expect("credit gateway serve");
        });
        wait_for_test_tcp(addr).await;
        addr
    }
@ -1343,6 +1607,23 @@ mod tests {
        assert_eq!(matched.config.name, "api-v1");
    }
    #[test]
    fn test_match_route_segment_boundary() {
        let routes = build_routes(vec![
            route_config("api", "/api", "http://example.com", false),
            route_config("api2", "/api2", "http://example.com", false),
        ])
        .unwrap();
        let matched = match_route(&routes, "/api2").unwrap();
        assert_eq!(matched.config.name, "api2");
        let matched = match_route(&routes, "/api2/health").unwrap();
        assert_eq!(matched.config.name, "api2");
        assert!(match_route(&routes, "/apiary").is_none());
    }
    #[test]
    fn test_build_upstream_url_preserves_query() {
        let routes = build_routes(vec![route_config(
@ -1419,13 +1700,14 @@ mod tests {
    async fn test_gateway_auth_and_credit_flow() {
        let upstream_addr = start_upstream().await;
        let (iam_addr, token) = start_iam_gateway().await;
-        let credit_addr = start_credit_gateway().await;
+        let credit_addr = start_credit_gateway(&iam_addr).await;
        let routes = build_routes(vec![RouteConfig {
            name: "public".to_string(),
            path_prefix: "/v1".to_string(),
            upstream: format!("http://{}", upstream_addr),
            strip_prefix: false,
            timeout_ms: None,
            auth: Some(RouteAuthConfig {
                provider: "iam".to_string(),
                mode: PolicyMode::Required,
@ -1437,6 +1719,7 @@ mod tests {
                units: 1,
                fail_open: false,
                commit_on: CommitPolicy::Success,
                allow_header_subject: false,
                attributes: HashMap::new(),
            }),
        }])
@ -1447,6 +1730,7 @@ mod tests {
            provider_type: "grpc".to_string(),
            endpoint: format!("http://{}", iam_addr),
            timeout_ms: Some(1000),
            tls: None,
        }])
        .await
        .unwrap();
@ -1464,19 +1748,124 @@ mod tests {
        let state = Arc::new(ServerState {
            routes,
            client: Client::new(),
            upstream_timeout: Duration::from_secs(5),
            max_body_bytes: 1024 * 1024,
            max_response_bytes: 1024 * 1024,
            auth_providers,
            credit_providers,
            trust_forwarded_headers: false,
        });
        let deadline = tokio::time::Instant::now() + Duration::from_secs(10);
        let mut response = None;
        while tokio::time::Instant::now() < deadline {
            let request = Request::builder()
                .method("GET")
                .uri("/v1/echo")
                .header(axum::http::header::AUTHORIZATION, &token)
                .body(Body::empty())
                .expect("request build");
            match proxy(
                State(Arc::clone(&state)),
                ConnectInfo("127.0.0.1:40000".parse().unwrap()),
                request,
            )
            .await
            {
                Ok(ok) => {
                    response = Some(ok);
                    break;
                }
                Err(StatusCode::BAD_GATEWAY) => {
                    tokio::time::sleep(Duration::from_millis(25)).await;
                }
                Err(status) => panic!("unexpected proxy status: {}", status),
            }
        }
        let response = response.expect("gateway auth+credit test timed out waiting for ready backends");
        assert_eq!(response.status(), StatusCode::OK);
    }
    #[tokio::test]
    async fn test_proxy_forwards_client_auth_headers_when_route_has_no_auth() {
        let upstream_addr = start_upstream().await;
        let routes = build_routes(vec![route_config(
            "passthrough",
            "/v1",
            &format!("http://{}", upstream_addr),
            false,
        )])
        .unwrap();
        let state = Arc::new(ServerState {
            routes,
            client: Client::new(),
            upstream_timeout: Duration::from_secs(5),
            max_body_bytes: 1024 * 1024,
            max_response_bytes: 1024 * 1024,
            auth_providers: HashMap::new(),
            credit_providers: HashMap::new(),
            trust_forwarded_headers: false,
        });
        let request = Request::builder()
            .method("GET")
-            .uri("/v1/echo")
+            .uri("/v1/echo-auth")
-            .header(axum::http::header::AUTHORIZATION, token)
+            .header(axum::http::header::AUTHORIZATION, "Bearer passthrough-token")
            .header(PHOTON_AUTH_TOKEN_HEADER, "photon-token")
            .body(Body::empty())
            .expect("request build");
-        let response = proxy(State(state), request).await.unwrap();
+        let response = proxy(
            State(state),
            ConnectInfo("127.0.0.1:40000".parse().unwrap()),
            request,
        )
        .await
        .unwrap();
        assert_eq!(response.status(), StatusCode::OK);
        let body = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
        assert_eq!(json.get("authorization").and_then(|v| v.as_str()), Some("Bearer passthrough-token"));
        assert_eq!(json.get("photon_token").and_then(|v| v.as_str()), Some("photon-token"));
    }
    #[test]
    fn test_extract_auth_token_accepts_bearer_authorization() {
        let mut headers = HeaderMap::new();
        headers.insert(
            axum::http::header::AUTHORIZATION,
            "Bearer abc123".parse().unwrap(),
        );
        assert_eq!(extract_auth_token(&headers).as_deref(), Some("abc123"));
    }
    #[test]
    fn test_extract_auth_token_accepts_legacy_raw_authorization() {
        let mut headers = HeaderMap::new();
        headers.insert(
            axum::http::header::AUTHORIZATION,
            "raw-token".parse().unwrap(),
        );
        assert_eq!(extract_auth_token(&headers).as_deref(), Some("raw-token"));
    }
    #[test]
    fn test_extract_auth_token_falls_back_to_photon_header() {
        let mut headers = HeaderMap::new();
        headers.insert(
            axum::http::header::AUTHORIZATION,
            "Basic abc".parse().unwrap(),
        );
        headers.insert(PHOTON_AUTH_TOKEN_HEADER, "photon-token".parse().unwrap());
        assert_eq!(
            extract_auth_token(&headers).as_deref(),
            Some("photon-token")
        );
    }
 }
--- a/baremetal/first-boot/ARCHITECTURE.md
+++ b/baremetal/first-boot/ARCHITECTURE.md
@ -1,763 +0,0 @@
 # First-Boot Automation Architecture
 ## Overview
 The first-boot automation system provides automated cluster joining and service initialization for bare-metal provisioned nodes. It handles two critical scenarios:
 1. **Bootstrap Mode**: First 3 nodes initialize a new Raft cluster
 2. **Join Mode**: Additional nodes join an existing cluster
 This document describes the architecture, design decisions, and implementation details.
 ## System Architecture
 ### Component Hierarchy
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                    NixOS Boot Process                        │
 └────────────────────┬────────────────────────────────────────┘
                     │
                     ▼
 ┌─────────────────────────────────────────────────────────────┐
 │              systemd.target: multi-user.target               │
 └────────────────────┬────────────────────────────────────────┘
                     │
     ┌───────────────┼───────────────┐
     │               │               │
     ▼               ▼               ▼
 ┌──────────┐  ┌──────────┐  ┌──────────┐
 │chainfire │  │ flaredb  │  │   iam    │
 │.service  │  │.service  │  │.service  │
 └────┬─────┘  └────┬─────┘  └────┬─────┘
     │             │               │
     ▼             ▼               ▼
 ┌──────────────────────────────────────────┐
 │   chainfire-cluster-join.service         │
 │   - Waits for local chainfire health     │
 │   - Checks bootstrap flag                │
 │   - Joins cluster if bootstrap=false     │
 └────────────────┬─────────────────────────┘
                 │
                 ▼
 ┌──────────────────────────────────────────┐
 │   flaredb-cluster-join.service           │
 │   - Requires chainfire-cluster-join      │
 │   - Waits for local flaredb health       │
 │   - Joins FlareDB cluster                │
 └────────────────┬─────────────────────────┘
                 │
                 ▼
 ┌──────────────────────────────────────────┐
 │   iam-initial-setup.service              │
 │   - Waits for IAM health                 │
 │   - Creates admin user if needed         │
 │   - Generates initial tokens             │
 └────────────────┬─────────────────────────┘
                 │
                 ▼
 ┌──────────────────────────────────────────┐
 │   cluster-health-check.service           │
 │   - Polls all service health endpoints   │
 │   - Verifies cluster membership          │
 │   - Reports to journald                  │
 └──────────────────────────────────────────┘
 ```
 ### Configuration Flow
 ```
 ┌─────────────────────────────────────────┐
 │  Provisioning Server                    │
 │  - Generates cluster-config.json        │
 │  - Copies to /etc/nixos/secrets/        │
 └────────────────┬────────────────────────┘
                 │
                 │ nixos-anywhere
                 │
                 ▼
 ┌─────────────────────────────────────────┐
 │  Target Node                            │
 │  /etc/nixos/secrets/cluster-config.json │
 └────────────────┬────────────────────────┘
                 │
                 │ Read by NixOS module
                 │
                 ▼
 ┌─────────────────────────────────────────┐
 │  first-boot-automation.nix              │
 │  - Parses JSON config                   │
 │  - Creates systemd services             │
 │  - Sets up dependencies                 │
 └────────────────┬────────────────────────┘
                 │
                 │ systemd activation
                 │
                 ▼
 ┌─────────────────────────────────────────┐
 │  Cluster Join Services                  │
 │  - Execute join logic                   │
 │  - Create marker files                  │
 │  - Log to journald                      │
 └─────────────────────────────────────────┘
 ```
 ## Bootstrap vs Join Decision Logic
 ### Decision Tree
 ```
                    ┌─────────────────┐
                    │  Node Boots     │
                    └────────┬────────┘
                             │
                    ┌────────▼────────┐
                    │ Read cluster-   │
                    │ config.json     │
                    └────────┬────────┘
                             │
                    ┌────────▼────────┐
                    │ bootstrap=true? │
                    └────────┬────────┘
                             │
                ┌────────────┴────────────┐
                │                         │
           YES  ▼                         ▼  NO
    ┌─────────────────┐       ┌─────────────────┐
    │ Bootstrap Mode  │       │  Join Mode      │
    │                 │       │                 │
    │ - Skip cluster  │       │ - Wait for      │
    │   join API      │       │   local health  │
    │ - Raft cluster  │       │ - Contact       │
    │   initializes   │       │   leader        │
    │   internally    │       │ - POST to       │
    │ - Create marker │       │   /member/add   │
    │ - Exit success  │       │ - Retry 5x      │
    └─────────────────┘       └─────────────────┘
 ```
 ### Bootstrap Mode (bootstrap: true)
 **When to use:**
 - First 3 nodes in a new cluster
 - Nodes configured with matching `initial_peers`
 - No existing cluster to join
 **Behavior:**
 1. Service starts with `--initial-cluster` parameter containing all bootstrap peers
 2. Raft consensus protocol automatically elects leader
 3. Cluster join service detects bootstrap mode and exits immediately
 4. No API calls to leader (cluster doesn't exist yet)
 **Configuration:**
 ```json
 {
  "bootstrap": true,
  "initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
 }
 ```
 **Marker file:** `/var/lib/first-boot-automation/.chainfire-initialized`
 ### Join Mode (bootstrap: false)
 **When to use:**
 - Nodes joining an existing cluster
 - Expansion or replacement nodes
 - Leader URL is known and reachable
 **Behavior:**
 1. Service starts with no initial cluster configuration
 2. Cluster join service waits for local service health
 3. POST to leader's `/admin/member/add` with node info
 4. Leader adds member to Raft configuration
 5. Node joins cluster and synchronizes state
 **Configuration:**
 ```json
 {
  "bootstrap": false,
  "leader_url": "https://node01.example.com:2379",
  "raft_addr": "10.0.1.13:2380"
 }
 ```
 **Marker file:** `/var/lib/first-boot-automation/.chainfire-joined`
 ## Idempotency and State Management
 ### Marker Files
 The system uses marker files to track initialization state:
 ```
 /var/lib/first-boot-automation/
 ├── .chainfire-initialized    # Bootstrap node initialized
 ├── .chainfire-joined          # Node joined cluster
 ├── .flaredb-initialized       # FlareDB bootstrap
 ├── .flaredb-joined            # FlareDB joined
 └── .iam-initialized           # IAM setup complete
 ```
 **Purpose:**
 - Prevent duplicate join attempts on reboot
 - Support idempotent operations
 - Enable troubleshooting (check timestamps)
 **Format:** ISO8601 timestamp of initialization
 ```
 2025-12-10T10:30:45+00:00
 ```
 ### State Transitions
 ```
 ┌──────────────┐
 │ First Boot   │
 │ (no marker)  │
 └──────┬───────┘
       │
       ▼
 ┌──────────────┐
 │ Check Config │
 │ bootstrap=?  │
 └──────┬───────┘
       │
       ├─(true)──▶ Bootstrap ──▶ Create .initialized ──▶ Done
       │
       └─(false)─▶ Join ──▶ Create .joined ──▶ Done
                     │
                     │ (reboot)
                     ▼
                  ┌──────────────┐
                  │ Marker Exists│
                  │ Skip Join    │
                  └──────────────┘
 ```
 ## Retry Logic and Error Handling
 ### Health Check Retry
 **Parameters:**
 - Timeout: 120 seconds (configurable)
 - Retry Interval: 5 seconds
 - Max Elapsed: 300 seconds
 **Logic:**
 ```bash
 START_TIME=$(date +%s)
 while true; do
    ELAPSED=$(($(date +%s) - START_TIME))
    if [[ $ELAPSED -ge $TIMEOUT ]]; then
        exit 1  # Timeout
    fi
    HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL")
    if [[ "$HTTP_CODE" == "200" ]]; then
        exit 0  # Success
    fi
    sleep 5
 done
 ```
 ### Cluster Join Retry
 **Parameters:**
 - Max Attempts: 5 (configurable)
 - Retry Delay: 10 seconds
 - Exponential Backoff: Optional (not implemented)
 **Logic:**
 ```bash
 for ATTEMPT in $(seq 1 $MAX_ATTEMPTS); do
    HTTP_CODE=$(curl -X POST "$LEADER_URL/admin/member/add" -d "$PAYLOAD")
    if [[ "$HTTP_CODE" == "200" || "$HTTP_CODE" == "201" ]]; then
        exit 0  # Success
    elif [[ "$HTTP_CODE" == "409" ]]; then
        exit 2  # Already member
    fi
    sleep $RETRY_DELAY
 done
 exit 1  # Max attempts exhausted
 ```
 ### Error Codes
 **Health Check:**
 - `0`: Service healthy
 - `1`: Timeout or unhealthy
 **Cluster Join:**
 - `0`: Successfully joined
 - `1`: Failed after max attempts
 - `2`: Already joined (idempotent)
 - `3`: Invalid arguments
 **Bootstrap Detector:**
 - `0`: Should bootstrap
 - `1`: Should join existing
 - `2`: Configuration error
 ## Security Considerations
 ### TLS Certificate Handling
 **Requirements:**
 - All inter-node communication uses TLS
 - Self-signed certificates supported via `-k` flag to curl
 - Certificate validation in production (remove `-k`)
 **Certificate Paths:**
 ```json
 {
  "tls": {
    "enabled": true,
    "ca_cert_path": "/etc/nixos/secrets/ca.crt",
    "node_cert_path": "/etc/nixos/secrets/node01.crt",
    "node_key_path": "/etc/nixos/secrets/node01.key"
  }
 }
 ```
 **Integration with T031:**
 - Certificates generated by T031 TLS automation
 - Copied to target during provisioning
 - Read by services at startup
 ### Secrets Management
 **Cluster Configuration:**
 - Stored in `/etc/nixos/secrets/cluster-config.json`
 - Permissions: `0600 root:root` (recommended)
 - Contains sensitive data: URLs, IPs, topology
 **API Credentials:**
 - IAM admin credentials (future implementation)
 - Stored in separate file: `/etc/nixos/secrets/iam-admin.json`
 - Never logged to journald
 ### Attack Surface
 **Mitigations:**
 1. **Network-level**: Firewall rules restrict cluster API ports
 2. **Application-level**: mTLS for authenticated requests
 3. **Access control**: SystemD service isolation
 4. **Audit**: All operations logged to journald with structured JSON
 ## Integration Points
 ### T024 NixOS Modules
 The first-boot automation module imports and extends service modules:
 ```nix
 # Example: netboot-control-plane.nix
 {
  imports = [
    ../modules/chainfire.nix
    ../modules/flaredb.nix
    ../modules/iam.nix
    ../modules/first-boot-automation.nix
  ];
  services.first-boot-automation.enable = true;
 }
 ```
 ### T031 TLS Certificates
 **Dependencies:**
 - TLS certificates must exist before first boot
 - Provisioning script copies certificates to `/etc/nixos/secrets/`
 - Services read certificates at startup
 **Certificate Generation:**
 ```bash
 # On provisioning server (T031)
 ./tls/generate-node-cert.sh node01.example.com 10.0.1.10
 # Copied to target
 scp ca.crt node01.crt node01.key root@10.0.1.10:/etc/nixos/secrets/
 ```
 ### T032.S1-S3 PXE/Netboot
 **Boot Flow:**
 1. PXE boot loads iPXE firmware
 2. iPXE chainloads NixOS kernel/initrd
 3. NixOS installer runs (nixos-anywhere)
 4. System installed to disk with first-boot automation
 5. Reboot into installed system
 6. First-boot automation executes
 **Configuration Injection:**
 ```bash
 # During nixos-anywhere provisioning
 mkdir -p /mnt/etc/nixos/secrets
 cp cluster-config.json /mnt/etc/nixos/secrets/
 chmod 600 /mnt/etc/nixos/secrets/cluster-config.json
 ```
 ## Service Dependencies
 ### Systemd Ordering
 **Chainfire:**
 ```
 After:  network-online.target, chainfire.service
 Before: flaredb-cluster-join.service
 Wants:  network-online.target
 ```
 **FlareDB:**
 ```
 After:  chainfire-cluster-join.service, flaredb.service
 Requires: chainfire-cluster-join.service
 Before: iam-initial-setup.service
 ```
 **IAM:**
 ```
 After:  flaredb-cluster-join.service, iam.service
 Before: cluster-health-check.service
 ```
 **Health Check:**
 ```
 After:  chainfire-cluster-join, flaredb-cluster-join, iam-initial-setup
 Type:   oneshot (no RemainAfterExit)
 ```
 ### Dependency Graph
 ```
 network-online.target
        │
        ├──▶ chainfire.service
        │         │
        │         ▼
        │    chainfire-cluster-join.service
        │         │
        ├──▶ flaredb.service
        │         │
        │         ▼
        └────▶ flaredb-cluster-join.service
                  │
             ┌────┴────┐
             │         │
        iam.service    │
             │         │
             ▼         │
        iam-initial-setup.service
             │         │
             └────┬────┘
                  │
                  ▼
        cluster-health-check.service
 ```
 ## Logging and Observability
 ### Structured Logging
 All scripts output JSON-formatted logs:
 ```json
 {
  "timestamp": "2025-12-10T10:30:45+00:00",
  "level": "INFO",
  "service": "chainfire",
  "operation": "cluster-join",
  "message": "Successfully joined cluster"
 }
 ```
 **Benefits:**
 - Machine-readable for log aggregation (T025)
 - Easy filtering with `journalctl -o json`
 - Includes context (service, operation, timestamp)
 ### Querying Logs
 **View all first-boot automation logs:**
 ```bash
 journalctl -u chainfire-cluster-join.service -u flaredb-cluster-join.service \
           -u iam-initial-setup.service -u cluster-health-check.service
 ```
 **Filter by log level:**
 ```bash
 journalctl -u chainfire-cluster-join.service | grep '"level":"ERROR"'
 ```
 **Follow live:**
 ```bash
 journalctl -u chainfire-cluster-join.service -f
 ```
 ### Health Check Integration
 **T025 Observability:**
 - Health check service can POST to metrics endpoint
 - Prometheus scraping of `/health` endpoints
 - Alerts on cluster join failures
 **Future:**
 - Webhook to provisioning server on completion
 - Slack/email notifications on errors
 - Dashboard showing cluster join status
 ## Performance Characteristics
 ### Boot Time Analysis
 **Typical Timeline (3-node cluster):**
 ```
 T+0s    : systemd starts
 T+5s    : network-online.target reached
 T+10s   : chainfire.service starts
 T+15s   : chainfire healthy
 T+15s   : chainfire-cluster-join runs (bootstrap, immediate exit)
 T+20s   : flaredb.service starts
 T+25s   : flaredb healthy
 T+25s   : flaredb-cluster-join runs (bootstrap, immediate exit)
 T+30s   : iam.service starts
 T+35s   : iam healthy
 T+35s   : iam-initial-setup runs
 T+40s   : cluster-health-check runs
 T+40s   : Node fully operational
 ```
 **Join Mode (node joining existing cluster):**
 ```
 T+0s    : systemd starts
 T+5s    : network-online.target reached
 T+10s   : chainfire.service starts
 T+15s   : chainfire healthy
 T+15s   : chainfire-cluster-join runs
 T+20s   : POST to leader, wait for response
 T+25s   : Successfully joined chainfire cluster
 T+25s   : flaredb.service starts
 T+30s   : flaredb healthy
 T+30s   : flaredb-cluster-join runs
 T+35s   : Successfully joined flaredb cluster
 T+40s   : iam-initial-setup (skips, already initialized)
 T+45s   : cluster-health-check runs
 T+45s   : Node fully operational
 ```
 ### Bottlenecks
 **Health Check Polling:**
 - 5-second intervals may be too aggressive
 - Recommendation: Exponential backoff
 **Network Latency:**
 - Join requests block on network RTT
 - Mitigation: Ensure low-latency cluster network
 **Raft Synchronization:**
 - New member must catch up on Raft log
 - Time depends on log size (seconds to minutes)
 ## Failure Modes and Recovery
 ### Common Failures
 **1. Leader Unreachable**
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Join request failed: connection error"}
 ```
 **Diagnosis:**
 - Check network connectivity: `ping node01.example.com`
 - Verify firewall rules: `iptables -L`
 - Check leader service status: `systemctl status chainfire.service`
 **Recovery:**
 ```bash
 # Fix network/firewall, then restart join service
 systemctl restart chainfire-cluster-join.service
 ```
 **2. Invalid Configuration**
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Configuration file not found"}
 ```
 **Diagnosis:**
 - Verify file exists: `ls -la /etc/nixos/secrets/cluster-config.json`
 - Check JSON syntax: `jq . /etc/nixos/secrets/cluster-config.json`
 **Recovery:**
 ```bash
 # Fix configuration, then restart
 systemctl restart chainfire-cluster-join.service
 ```
 **3. Service Not Healthy**
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Health check timeout"}
 ```
 **Diagnosis:**
 - Check service logs: `journalctl -u chainfire.service`
 - Verify service is running: `systemctl status chainfire.service`
 - Test health endpoint: `curl -k https://localhost:2379/health`
 **Recovery:**
 ```bash
 # Restart the main service
 systemctl restart chainfire.service
 # Join service will auto-retry after RestartSec
 ```
 **4. Already Member**
 **Symptom:**
 ```json
 {"level":"WARN","message":"Node already member of cluster (HTTP 409)"}
 ```
 **Diagnosis:**
 - This is normal on reboots
 - Marker file created to prevent future attempts
 **Recovery:**
 - No action needed (idempotent behavior)
 ### Manual Cluster Join
 If automation fails, manual join:
 **Chainfire:**
 ```bash
 curl -k -X POST https://node01.example.com:2379/admin/member/add \
  -H "Content-Type: application/json" \
  -d '{"id":"node04","raft_addr":"10.0.1.13:2380"}'
 # Create marker to prevent auto-retry
 mkdir -p /var/lib/first-boot-automation
 date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
 ```
 **FlareDB:**
 ```bash
 curl -k -X POST https://node01.example.com:2479/admin/member/add \
  -H "Content-Type: application/json" \
  -d '{"id":"node04","raft_addr":"10.0.1.13:2480"}'
 date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
 ```
 ### Rollback Procedure
 **Remove from cluster:**
 ```bash
 # On leader
 curl -k -X DELETE https://node01.example.com:2379/admin/member/node04
 # On node being removed
 systemctl stop chainfire.service
 rm -rf /var/lib/chainfire/*
 rm /var/lib/first-boot-automation/.chainfire-joined
 # Re-enable automation
 systemctl restart chainfire-cluster-join.service
 ```
 ## Future Enhancements
 ### Planned Improvements
 **1. Exponential Backoff**
 - Current: Fixed 10-second delay
 - Future: 1s, 2s, 4s, 8s, 16s exponential backoff
 **2. Leader Discovery**
 - Current: Static leader URL in config
 - Future: DNS SRV records for dynamic discovery
 **3. Webhook Notifications**
 - POST to provisioning server on completion
 - Include node info, join time, cluster health
 **4. Pre-flight Checks**
 - Validate network connectivity before attempting join
 - Check TLS certificate validity
 - Verify disk space, memory, CPU requirements
 **5. Automated Testing**
 - Integration tests with real cluster
 - Simulate failures (network partitions, leader crashes)
 - Validate idempotency
 **6. Configuration Validation**
 - JSON schema validation at boot
 - Fail fast on invalid configuration
 - Provide clear error messages
 ## References
 - **T024**: NixOS service modules
 - **T025**: Observability and monitoring
 - **T031**: TLS certificate automation
 - **T032.S1-S3**: PXE boot, netboot images, provisioning
 - **Design Document**: `/home/centra/cloud/docs/por/T032-baremetal-provisioning/design.md`
 ## Appendix: Configuration Schema
 ### cluster-config.json Schema
 ```json
 {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "required": ["node_id", "node_role", "bootstrap", "cluster_name", "leader_url", "raft_addr"],
  "properties": {
    "node_id": {
      "type": "string",
      "description": "Unique node identifier"
    },
    "node_role": {
      "type": "string",
      "enum": ["control-plane", "worker", "all-in-one"]
    },
    "bootstrap": {
      "type": "boolean",
      "description": "True for first 3 nodes, false for join"
    },
    "cluster_name": {
      "type": "string"
    },
    "leader_url": {
      "type": "string",
      "format": "uri"
    },
    "raft_addr": {
      "type": "string",
      "pattern": "^[0-9.]+:[0-9]+$"
    },
    "initial_peers": {
      "type": "array",
      "items": {"type": "string"}
    },
    "flaredb_peers": {
      "type": "array",
      "items": {"type": "string"}
    }
  }
 }
 ```
--- a/baremetal/first-boot/README.md
+++ b/baremetal/first-boot/README.md
@ -1,858 +0,0 @@
 # First-Boot Automation for Bare-Metal Provisioning
 Automated cluster joining and service initialization for bare-metal provisioned NixOS nodes.
 ## Table of Contents
 - [Overview](#overview)
 - [Quick Start](#quick-start)
 - [Configuration](#configuration)
 - [Bootstrap vs Join](#bootstrap-vs-join)
 - [Systemd Services](#systemd-services)
 - [Troubleshooting](#troubleshooting)
 - [Manual Operations](#manual-operations)
 - [Security](#security)
 - [Examples](#examples)
 ## Overview
 The first-boot automation system handles automated cluster joining for distributed services (Chainfire, FlareDB, IAM) on first boot of bare-metal provisioned nodes. It supports two modes:
 - **Bootstrap Mode**: Initialize a new Raft cluster (first 3 nodes)
 - **Join Mode**: Join an existing cluster (additional nodes)
 ### Features
 - Automated health checking with retries
 - Idempotent operations (safe to run multiple times)
 - Structured JSON logging to journald
 - Graceful failure handling with configurable retries
 - Integration with TLS certificates (T031)
 - Support for both bootstrap and runtime join scenarios
 ### Architecture
 See [ARCHITECTURE.md](ARCHITECTURE.md) for detailed design documentation.
 ## Quick Start
 ### Prerequisites
 1. Node provisioned via T032.S1-S3 (PXE boot and installation)
 2. Cluster configuration file at `/etc/nixos/secrets/cluster-config.json`
 3. TLS certificates at `/etc/nixos/secrets/` (T031)
 4. Network connectivity to cluster leader (for join mode)
 ### Enable First-Boot Automation
 In your NixOS configuration:
 ```nix
 # /etc/nixos/configuration.nix
 {
  imports = [
    ./nix/modules/first-boot-automation.nix
  ];
  services.first-boot-automation = {
    enable = true;
    configFile = "/etc/nixos/secrets/cluster-config.json";
    # Optional: disable specific services
    enableChainfire = true;
    enableFlareDB = true;
    enableIAM = true;
    enableHealthCheck = true;
  };
 }
 ```
 ### First Boot
 After provisioning and reboot:
 1. Node boots from disk
 2. systemd starts services
 3. First-boot automation runs automatically
 4. Cluster join completes within 30-60 seconds
 Check status:
 ```bash
 systemctl status chainfire-cluster-join.service
 systemctl status flaredb-cluster-join.service
 systemctl status iam-initial-setup.service
 systemctl status cluster-health-check.service
 ```
 ## Configuration
 ### cluster-config.json Format
 ```json
 {
  "node_id": "node01",
  "node_role": "control-plane",
  "bootstrap": true,
  "cluster_name": "prod-cluster",
  "leader_url": "https://node01.prod.example.com:2379",
  "raft_addr": "10.0.1.10:2380",
  "initial_peers": [
    "node01:2380",
    "node02:2380",
    "node03:2380"
  ],
  "flaredb_peers": [
    "node01:2480",
    "node02:2480",
    "node03:2480"
  ]
 }
 ```
 ### Required Fields
 | Field | Type | Description |
 |-------|------|-------------|
 | `node_id` | string | Unique identifier for this node |
 | `node_role` | string | Node role: `control-plane`, `worker`, or `all-in-one` |
 | `bootstrap` | boolean | `true` for first 3 nodes, `false` for additional nodes |
 | `cluster_name` | string | Cluster identifier |
 | `leader_url` | string | HTTPS URL of cluster leader (used for join) |
 | `raft_addr` | string | This node's Raft address (IP:port) |
 | `initial_peers` | array | List of bootstrap peer addresses |
 | `flaredb_peers` | array | List of FlareDB peer addresses |
 ### Optional Fields
 | Field | Type | Description |
 |-------|------|-------------|
 | `node_ip` | string | Node's primary IP address |
 | `node_fqdn` | string | Fully qualified domain name |
 | `datacenter` | string | Datacenter identifier |
 | `rack` | string | Rack identifier |
 | `services` | object | Per-service configuration |
 | `tls` | object | TLS certificate paths |
 | `network` | object | Network CIDR ranges |
 ### Example Configurations
 See [examples/](examples/) directory:
 - `cluster-config-bootstrap.json` - Bootstrap node (first 3)
 - `cluster-config-join.json` - Join node (additional)
 - `cluster-config-all-in-one.json` - Single-node deployment
 ## Bootstrap vs Join
 ### Bootstrap Mode (bootstrap: true)
 **When to use:**
 - First 3 nodes in a new cluster
 - Nodes configured with matching `initial_peers`
 - No existing cluster to join
 **Behavior:**
 1. Services start with `--initial-cluster` configuration
 2. Raft consensus automatically elects leader
 3. Cluster join service detects bootstrap mode and exits immediately
 4. Marker file created: `/var/lib/first-boot-automation/.chainfire-initialized`
 **Example:**
 ```json
 {
  "node_id": "node01",
  "bootstrap": true,
  "initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
 }
 ```
 ### Join Mode (bootstrap: false)
 **When to use:**
 - Nodes joining an existing cluster
 - Expansion or replacement nodes
 - Leader is known and reachable
 **Behavior:**
 1. Service starts with no initial cluster config
 2. Waits for local service to be healthy (max 120s)
 3. POST to leader's `/admin/member/add` endpoint
 4. Retries up to 5 times with 10s delay
 5. Marker file created: `/var/lib/first-boot-automation/.chainfire-joined`
 **Example:**
 ```json
 {
  "node_id": "node04",
  "bootstrap": false,
  "leader_url": "https://node01.prod.example.com:2379",
  "raft_addr": "10.0.1.13:2380"
 }
 ```
 ### Decision Matrix
 | Scenario | bootstrap | initial_peers | leader_url |
 |----------|-----------|---------------|------------|
 | Node 1 (first) | `true` | all 3 nodes | self |
 | Node 2 (first) | `true` | all 3 nodes | self |
 | Node 3 (first) | `true` | all 3 nodes | self |
 | Node 4+ (join) | `false` | all 3 nodes | node 1 |
 ## Systemd Services
 ### chainfire-cluster-join.service
 **Description:** Joins Chainfire cluster on first boot
 **Dependencies:**
 - After: `network-online.target`, `chainfire.service`
 - Before: `flaredb-cluster-join.service`
 **Configuration:**
 - Type: `oneshot`
 - RemainAfterExit: `true`
 - Restart: `on-failure`
 **Logs:**
 ```bash
 journalctl -u chainfire-cluster-join.service
 ```
 ### flaredb-cluster-join.service
 **Description:** Joins FlareDB cluster after Chainfire
 **Dependencies:**
 - After: `chainfire-cluster-join.service`, `flaredb.service`
 - Requires: `chainfire-cluster-join.service`
 **Configuration:**
 - Type: `oneshot`
 - RemainAfterExit: `true`
 - Restart: `on-failure`
 **Logs:**
 ```bash
 journalctl -u flaredb-cluster-join.service
 ```
 ### iam-initial-setup.service
 **Description:** IAM initial setup and admin user creation
 **Dependencies:**
 - After: `flaredb-cluster-join.service`, `iam.service`
 **Configuration:**
 - Type: `oneshot`
 - RemainAfterExit: `true`
 **Logs:**
 ```bash
 journalctl -u iam-initial-setup.service
 ```
 ### cluster-health-check.service
 **Description:** Validates cluster health on first boot
 **Dependencies:**
 - After: all cluster-join services
 **Configuration:**
 - Type: `oneshot`
 - RemainAfterExit: `false`
 **Logs:**
 ```bash
 journalctl -u cluster-health-check.service
 ```
 ## Troubleshooting
 ### Check Service Status
 ```bash
 # Overall status
 systemctl status chainfire-cluster-join.service
 systemctl status flaredb-cluster-join.service
 # Detailed logs with JSON output
 journalctl -u chainfire-cluster-join.service -o json-pretty
 # Follow logs in real-time
 journalctl -u chainfire-cluster-join.service -f
 ```
 ### Common Issues
 #### 1. Health Check Timeout
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Health check timeout after 120s"}
 ```
 **Causes:**
 - Service not starting (check main service logs)
 - Port conflict
 - TLS certificate issues
 **Solutions:**
 ```bash
 # Check main service
 systemctl status chainfire.service
 journalctl -u chainfire.service
 # Test health endpoint manually
 curl -k https://localhost:2379/health
 # Restart services
 systemctl restart chainfire.service
 systemctl restart chainfire-cluster-join.service
 ```
 #### 2. Leader Unreachable
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Join request failed: connection error"}
 ```
 **Causes:**
 - Network connectivity issues
 - Firewall blocking ports
 - Leader not running
 - Wrong leader URL in config
 **Solutions:**
 ```bash
 # Test network connectivity
 ping node01.prod.example.com
 curl -k https://node01.prod.example.com:2379/health
 # Check firewall
 iptables -L -n | grep 2379
 # Verify configuration
 jq '.leader_url' /etc/nixos/secrets/cluster-config.json
 # Try manual join (see below)
 ```
 #### 3. Invalid Configuration
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Configuration file not found"}
 ```
 **Causes:**
 - Missing configuration file
 - Wrong file path
 - Invalid JSON syntax
 - Missing required fields
 **Solutions:**
 ```bash
 # Check file exists
 ls -la /etc/nixos/secrets/cluster-config.json
 # Validate JSON syntax
 jq . /etc/nixos/secrets/cluster-config.json
 # Check required fields
 jq '.node_id, .bootstrap, .leader_url' /etc/nixos/secrets/cluster-config.json
 # Fix and restart
 systemctl restart chainfire-cluster-join.service
 ```
 #### 4. Already Member (Reboot)
 **Symptom:**
 ```json
 {"level":"WARN","message":"Already member of cluster (HTTP 409)"}
 ```
 **Explanation:**
 - This is **normal** on reboots
 - Marker file prevents duplicate joins
 - No action needed
 **Verify:**
 ```bash
 # Check marker file
 cat /var/lib/first-boot-automation/.chainfire-joined
 # Should show timestamp: 2025-12-10T10:30:45+00:00
 ```
 #### 5. Join Retry Exhausted
 **Symptom:**
 ```json
 {"level":"ERROR","message":"Failed to join cluster after 5 attempts"}
 ```
 **Causes:**
 - Persistent network issues
 - Leader down or overloaded
 - Invalid node configuration
 - Cluster at capacity
 **Solutions:**
 ```bash
 # Check cluster status on leader
 curl -k https://node01.prod.example.com:2379/admin/cluster/members | jq
 # Verify this node's configuration
 jq '.node_id, .raft_addr' /etc/nixos/secrets/cluster-config.json
 # Increase retry attempts (edit NixOS config)
 # Or perform manual join (see below)
 ```
 ### Verify Cluster Membership
 **On leader node:**
 ```bash
 # Chainfire members
 curl -k https://localhost:2379/admin/cluster/members | jq
 # FlareDB members
 curl -k https://localhost:2479/admin/cluster/members | jq
 ```
 **Expected output:**
 ```json
 {
  "members": [
    {"id": "node01", "raft_addr": "10.0.1.10:2380", "status": "healthy"},
    {"id": "node02", "raft_addr": "10.0.1.11:2380", "status": "healthy"},
    {"id": "node03", "raft_addr": "10.0.1.12:2380", "status": "healthy"}
  ]
 }
 ```
 ### Check Marker Files
 ```bash
 # List all marker files
 ls -la /var/lib/first-boot-automation/
 # View timestamps
 cat /var/lib/first-boot-automation/.chainfire-joined
 cat /var/lib/first-boot-automation/.flaredb-joined
 ```
 ### Reset and Re-join
 **Warning:** This will remove the node from the cluster and rejoin.
 ```bash
 # Stop services
 systemctl stop chainfire.service flaredb.service
 # Remove data and markers
 rm -rf /var/lib/chainfire/*
 rm -rf /var/lib/flaredb/*
 rm /var/lib/first-boot-automation/.chainfire-*
 rm /var/lib/first-boot-automation/.flaredb-*
 # Restart (will auto-join)
 systemctl start chainfire.service
 systemctl restart chainfire-cluster-join.service
 ```
 ## Manual Operations
 ### Manual Cluster Join
 If automation fails, perform manual join:
 **Chainfire:**
 ```bash
 # On joining node, ensure service is running and healthy
 curl -k https://localhost:2379/health
 # From any node, add member to cluster
 curl -k -X POST https://node01.prod.example.com:2379/admin/member/add \
  -H "Content-Type: application/json" \
  -d '{
    "id": "node04",
    "raft_addr": "10.0.1.13:2380"
  }'
 # Create marker to prevent auto-retry
 mkdir -p /var/lib/first-boot-automation
 date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
 ```
 **FlareDB:**
 ```bash
 curl -k -X POST https://node01.prod.example.com:2479/admin/member/add \
  -H "Content-Type: application/json" \
  -d '{
    "id": "node04",
    "raft_addr": "10.0.1.13:2480"
  }'
 date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
 ```
 ### Remove Node from Cluster
 **On leader:**
 ```bash
 # Chainfire
 curl -k -X DELETE https://node01.prod.example.com:2379/admin/member/node04
 # FlareDB
 curl -k -X DELETE https://node01.prod.example.com:2479/admin/member/node04
 ```
 **On removed node:**
 ```bash
 # Stop services
 systemctl stop chainfire.service flaredb.service
 # Clean up data
 rm -rf /var/lib/chainfire/*
 rm -rf /var/lib/flaredb/*
 rm /var/lib/first-boot-automation/.chainfire-*
 rm /var/lib/first-boot-automation/.flaredb-*
 ```
 ### Disable First-Boot Automation
 If you need to disable automation:
 ```nix
 # In NixOS configuration
 services.first-boot-automation.enable = false;
 ```
 Or stop services temporarily:
 ```bash
 systemctl stop chainfire-cluster-join.service
 systemctl disable chainfire-cluster-join.service
 ```
 ### Re-enable After Manual Operations
 After manual cluster operations:
 ```bash
 # Create marker files to indicate join complete
 mkdir -p /var/lib/first-boot-automation
 date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
 date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
 # Or re-enable automation (will skip if markers exist)
 systemctl enable --now chainfire-cluster-join.service
 ```
 ## Security
 ### TLS Certificates
 **Requirements:**
 - All cluster communication uses TLS
 - Certificates must exist before first boot
 - Generated by T031 TLS automation
 **Certificate Paths:**
 ```
 /etc/nixos/secrets/
 ├── ca.crt              # CA certificate
 ├── node01.crt          # Node certificate
 └── node01.key          # Node private key (mode 0600)
 ```
 **Permissions:**
 ```bash
 chmod 600 /etc/nixos/secrets/node01.key
 chmod 644 /etc/nixos/secrets/node01.crt
 chmod 644 /etc/nixos/secrets/ca.crt
 ```
 ### Configuration File Security
 **Cluster configuration contains sensitive data:**
 - IP addresses and network topology
 - Service URLs
 - Node identifiers
 **Recommended permissions:**
 ```bash
 chmod 600 /etc/nixos/secrets/cluster-config.json
 chown root:root /etc/nixos/secrets/cluster-config.json
 ```
 ### Network Security
 **Required firewall rules:**
 ```bash
 # Chainfire
 iptables -A INPUT -p tcp --dport 2379 -s 10.0.1.0/24 -j ACCEPT  # API
 iptables -A INPUT -p tcp --dport 2380 -s 10.0.1.0/24 -j ACCEPT  # Raft
 iptables -A INPUT -p tcp --dport 2381 -s 10.0.1.0/24 -j ACCEPT  # Gossip
 # FlareDB
 iptables -A INPUT -p tcp --dport 2479 -s 10.0.1.0/24 -j ACCEPT  # API
 iptables -A INPUT -p tcp --dport 2480 -s 10.0.1.0/24 -j ACCEPT  # Raft
 # IAM
 iptables -A INPUT -p tcp --dport 8080 -s 10.0.1.0/24 -j ACCEPT  # API
 ```
 ### Production Considerations
 **For production deployments:**
 1. **Remove `-k` flag from curl** (validate TLS certificates)
 2. **Implement mTLS** for client authentication
 3. **Rotate credentials** regularly
 4. **Audit logs** with structured logging
 5. **Monitor health endpoints** continuously
 6. **Backup cluster state** before changes
 ## Examples
 ### Example 1: 3-Node Bootstrap Cluster
 **Node 1:**
 ```json
 {
  "node_id": "node01",
  "bootstrap": true,
  "raft_addr": "10.0.1.10:2380",
  "initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
 }
 ```
 **Node 2:**
 ```json
 {
  "node_id": "node02",
  "bootstrap": true,
  "raft_addr": "10.0.1.11:2380",
  "initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
 }
 ```
 **Node 3:**
 ```json
 {
  "node_id": "node03",
  "bootstrap": true,
  "raft_addr": "10.0.1.12:2380",
  "initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
 }
 ```
 **Provisioning:**
 ```bash
 # Provision all 3 nodes simultaneously
 for i in {1..3}; do
  nixos-anywhere --flake .#node0$i root@node0$i.example.com &
 done
 wait
 # Nodes will bootstrap automatically on first boot
 ```
 ### Example 2: Join Existing Cluster
 **Node 4 (joining):**
 ```json
 {
  "node_id": "node04",
  "bootstrap": false,
  "leader_url": "https://node01.prod.example.com:2379",
  "raft_addr": "10.0.1.13:2380"
 }
 ```
 **Provisioning:**
 ```bash
 nixos-anywhere --flake .#node04 root@node04.example.com
 # Node will automatically join on first boot
 ```
 ### Example 3: Single-Node All-in-One
 **For development/testing:**
 ```json
 {
  "node_id": "aio01",
  "bootstrap": true,
  "raft_addr": "10.0.2.10:2380",
  "initial_peers": ["aio01:2380"],
  "flaredb_peers": ["aio01:2480"]
 }
 ```
 **Provisioning:**
 ```bash
 nixos-anywhere --flake .#aio01 root@aio01.example.com
 ```
 ## Integration with Other Systems
 ### T024 NixOS Modules
 First-boot automation integrates with service modules:
 ```nix
 {
  imports = [
    ./nix/modules/chainfire.nix
    ./nix/modules/flaredb.nix
    ./nix/modules/first-boot-automation.nix
  ];
  services.chainfire.enable = true;
  services.flaredb.enable = true;
  services.first-boot-automation.enable = true;
 }
 ```
 ### T025 Observability
 Health checks integrate with Prometheus:
 ```yaml
 # prometheus.yml
 scrape_configs:
  - job_name: 'cluster-health'
    static_configs:
      - targets: ['node01:2379', 'node02:2379', 'node03:2379']
    metrics_path: '/health'
 ```
 ### T031 TLS Certificates
 Certificates generated by T031 are used automatically:
 ```bash
 # On provisioning server
 ./tls/generate-node-cert.sh node01.example.com 10.0.1.10
 # Copied during nixos-anywhere
 # First-boot automation reads from /etc/nixos/secrets/
 ```
 ## Logs and Debugging
 ### Structured Logging
 All logs are JSON-formatted:
 ```json
 {
  "timestamp": "2025-12-10T10:30:45+00:00",
  "level": "INFO",
  "service": "chainfire",
  "operation": "cluster-join",
  "message": "Successfully joined cluster"
 }
 ```
 ### Query Examples
 **All first-boot logs:**
 ```bash
 journalctl -u "*cluster-join*" -u "*initial-setup*" -u "*health-check*"
 ```
 **Errors only:**
 ```bash
 journalctl -u chainfire-cluster-join.service | grep '"level":"ERROR"'
 ```
 **Last boot only:**
 ```bash
 journalctl -b -u chainfire-cluster-join.service
 ```
 **JSON output for parsing:**
 ```bash
 journalctl -u chainfire-cluster-join.service -o json | jq '.MESSAGE'
 ```
 ## Performance Tuning
 ### Timeout Configuration
 Adjust timeouts in NixOS module:
 ```nix
 services.first-boot-automation = {
  enable = true;
  # Override default ports if needed
  chainfirePort = 2379;
  flaredbPort = 2479;
 };
 ```
 ### Retry Configuration
 Modify retry logic in scripts:
 ```bash
 # baremetal/first-boot/cluster-join.sh
 MAX_ATTEMPTS=10      # Increase from 5
 RETRY_DELAY=15       # Increase from 10s
 ```
 ### Health Check Interval
 Adjust polling interval:
 ```bash
 # In service scripts
 sleep 10  # Increase from 5s for less aggressive polling
 ```
 ## Support and Contributing
 ### Getting Help
 1. Check logs: `journalctl -u chainfire-cluster-join.service`
 2. Review troubleshooting section above
 3. Consult [ARCHITECTURE.md](ARCHITECTURE.md) for design details
 4. Check cluster status on leader node
 ### Reporting Issues
 Include in bug reports:
 ```bash
 # Gather diagnostic information
 journalctl -u chainfire-cluster-join.service > cluster-join.log
 systemctl status chainfire-cluster-join.service > service-status.txt
 cat /etc/nixos/secrets/cluster-config.json > config.json  # Redact sensitive data!
 ls -la /var/lib/first-boot-automation/ > markers.txt
 ```
 ### Development
 See [ARCHITECTURE.md](ARCHITECTURE.md) for contributing guidelines.
 ## References
 - **ARCHITECTURE.md**: Detailed design documentation
 - **T024**: NixOS service modules
 - **T025**: Observability and monitoring
 - **T031**: TLS certificate automation
 - **T032.S1-S3**: PXE boot and provisioning
 - **Design Document**: `/home/centra/cloud/docs/por/T032-baremetal-provisioning/design.md`
 ## License
 Internal use only - Centra Cloud Platform
--- a/baremetal/first-boot/bootstrap-detector.sh
+++ b/baremetal/first-boot/bootstrap-detector.sh
@ -47,9 +47,9 @@ if command -v jq &> /dev/null; then
    NODE_ROLE=$(echo "$CONFIG_JSON" | jq -r '.node_role // "unknown"')
 else
    # Fallback to grep/sed for minimal environments
-    BOOTSTRAP=$(echo "$CONFIG_JSON" | grep -oP '"bootstrap"\s*:\s*\K(true|false)' || echo "false")
+    BOOTSTRAP=$(echo "$CONFIG_JSON" | grep -Eo '"bootstrap"[[:space:]]*:[[:space:]]*(true|false)' | head -n1 | sed -E 's/.*:[[:space:]]*(true|false)/\1/' || echo "false")
-    NODE_ID=$(echo "$CONFIG_JSON" | grep -oP '"node_id"\s*:\s*"\K[^"]+' || echo "unknown")
+    NODE_ID=$(echo "$CONFIG_JSON" | grep -Eo '"node_id"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_id"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
-    NODE_ROLE=$(echo "$CONFIG_JSON" | grep -oP '"node_role"\s*:\s*"\K[^"]+' || echo "unknown")
+    NODE_ROLE=$(echo "$CONFIG_JSON" | grep -Eo '"node_role"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_role"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
 fi
 log "INFO" "Node configuration: id=$NODE_ID, role=$NODE_ROLE, bootstrap=$BOOTSTRAP"
--- a/baremetal/first-boot/cluster-join.sh
+++ b/baremetal/first-boot/cluster-join.sh
@ -25,6 +25,9 @@ LEADER_URL="${3:-}"
 JOIN_PAYLOAD="${4:-}"
 MAX_ATTEMPTS="${5:-5}"
 RETRY_DELAY="${6:-10}"
 CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
 CURL_MAX_TIME="${CURL_MAX_TIME:-15}"
 CURL_INSECURE="${CURL_INSECURE:-1}"
 FIRST_BOOT_MARKER="/var/lib/first-boot-automation/.${SERVICE_NAME}-joined"
@ -81,7 +84,11 @@ else
            exit 1
        fi
-        HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" 2>/dev/null || echo "000")
+        CURL_FLAGS=(-s -o /dev/null -w "%{http_code}" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
        if [[ "$CURL_INSECURE" == "1" ]]; then
            CURL_FLAGS+=(-k)
        fi
        HTTP_CODE=$(curl "${CURL_FLAGS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000")
        if [[ "$HTTP_CODE" == "200" ]]; then
            log "INFO" "Local $SERVICE_NAME is healthy"
@ -109,13 +116,20 @@ for ATTEMPT in $(seq 1 "$MAX_ATTEMPTS"); do
    # Make join request to leader
    RESPONSE_FILE=$(mktemp)
-    HTTP_CODE=$(curl -k -s -w "%{http_code}" -o "$RESPONSE_FILE" \
+    PAYLOAD_FILE=$(mktemp)
    printf '%s' "$JOIN_PAYLOAD" > "$PAYLOAD_FILE"
    CURL_FLAGS=(-s -w "%{http_code}" -o "$RESPONSE_FILE" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
    if [[ "$CURL_INSECURE" == "1" ]]; then
        CURL_FLAGS+=(-k)
    fi
    HTTP_CODE=$(curl "${CURL_FLAGS[@]}" \
        -X POST "$LEADER_URL/admin/member/add" \
        -H "Content-Type: application/json" \
-        -d "$JOIN_PAYLOAD" 2>/dev/null || echo "000")
+        --data-binary "@$PAYLOAD_FILE" 2>/dev/null || echo "000")
    RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null || echo "")
-    rm -f "$RESPONSE_FILE"
+    rm -f "$RESPONSE_FILE" "$PAYLOAD_FILE"
    log "INFO" "Join request response: HTTP $HTTP_CODE"
--- a/baremetal/first-boot/examples/cluster-config-all-in-one.json
+++ b/baremetal/first-boot/examples/cluster-config-all-in-one.json
@ -1,77 +0,0 @@
 {
  "node_id": "aio01",
  "node_role": "all-in-one",
  "bootstrap": true,
  "cluster_name": "dev-cluster",
  "leader_url": "https://aio01.dev.example.com:2379",
  "raft_addr": "10.0.2.10:2380",
  "initial_peers": [
    "aio01:2380"
  ],
  "flaredb_peers": [
    "aio01:2480"
  ],
  "node_ip": "10.0.2.10",
  "node_fqdn": "aio01.dev.example.com",
  "datacenter": "dev",
  "rack": "rack1",
  "description": "Single-node all-in-one deployment for development/testing",
  "services": {
    "chainfire": {
      "enabled": true,
      "api_port": 2379,
      "raft_port": 2380,
      "gossip_port": 2381
    },
    "flaredb": {
      "enabled": true,
      "api_port": 2479,
      "raft_port": 2480
    },
    "iam": {
      "enabled": true,
      "api_port": 8080
    },
    "plasmavmc": {
      "enabled": true,
      "api_port": 8090
    },
    "novanet": {
      "enabled": true,
      "api_port": 8091
    },
    "flashdns": {
      "enabled": true,
      "dns_port": 53,
      "api_port": 8053
    },
    "fiberlb": {
      "enabled": true,
      "api_port": 8092
    },
    "lightningstor": {
      "enabled": true,
      "api_port": 8093
    },
    "k8shost": {
      "enabled": true,
      "api_port": 10250
    }
  },
  "tls": {
    "enabled": true,
    "ca_cert_path": "/etc/nixos/secrets/ca.crt",
    "node_cert_path": "/etc/nixos/secrets/aio01.crt",
    "node_key_path": "/etc/nixos/secrets/aio01.key"
  },
  "network": {
    "cluster_network": "10.0.2.0/24",
    "pod_network": "10.244.0.0/16",
    "service_network": "10.96.0.0/12"
  },
  "development": {
    "mode": "single-node",
    "skip_replication_checks": true,
    "allow_single_raft_member": true
  }
 }
--- a/baremetal/first-boot/examples/cluster-config-bootstrap.json
+++ b/baremetal/first-boot/examples/cluster-config-bootstrap.json
@ -1,68 +0,0 @@
 {
  "node_id": "node01",
  "node_role": "control-plane",
  "bootstrap": true,
  "cluster_name": "prod-cluster",
  "leader_url": "https://node01.prod.example.com:2379",
  "raft_addr": "10.0.1.10:2380",
  "initial_peers": [
    "node01:2380",
    "node02:2380",
    "node03:2380"
  ],
  "flaredb_peers": [
    "node01:2480",
    "node02:2480",
    "node03:2480"
  ],
  "node_ip": "10.0.1.10",
  "node_fqdn": "node01.prod.example.com",
  "datacenter": "dc1",
  "rack": "rack1",
  "description": "Bootstrap node for production cluster - initializes Raft cluster",
  "services": {
    "chainfire": {
      "enabled": true,
      "api_port": 2379,
      "raft_port": 2380,
      "gossip_port": 2381
    },
    "flaredb": {
      "enabled": true,
      "api_port": 2479,
      "raft_port": 2480
    },
    "iam": {
      "enabled": true,
      "api_port": 8080
    },
    "plasmavmc": {
      "enabled": true,
      "api_port": 8090
    },
    "novanet": {
      "enabled": true,
      "api_port": 8091
    },
    "flashdns": {
      "enabled": true,
      "dns_port": 53,
      "api_port": 8053
    },
    "fiberlb": {
      "enabled": true,
      "api_port": 8092
    }
  },
  "tls": {
    "enabled": true,
    "ca_cert_path": "/etc/nixos/secrets/ca.crt",
    "node_cert_path": "/etc/nixos/secrets/node01.crt",
    "node_key_path": "/etc/nixos/secrets/node01.key"
  },
  "network": {
    "cluster_network": "10.0.1.0/24",
    "pod_network": "10.244.0.0/16",
    "service_network": "10.96.0.0/12"
  }
 }
--- a/baremetal/first-boot/examples/cluster-config-join.json
+++ b/baremetal/first-boot/examples/cluster-config-join.json
@ -1,68 +0,0 @@
 {
  "node_id": "node04",
  "node_role": "control-plane",
  "bootstrap": false,
  "cluster_name": "prod-cluster",
  "leader_url": "https://node01.prod.example.com:2379",
  "raft_addr": "10.0.1.13:2380",
  "initial_peers": [
    "node01:2380",
    "node02:2380",
    "node03:2380"
  ],
  "flaredb_peers": [
    "node01:2480",
    "node02:2480",
    "node03:2480"
  ],
  "node_ip": "10.0.1.13",
  "node_fqdn": "node04.prod.example.com",
  "datacenter": "dc1",
  "rack": "rack2",
  "description": "Additional node joining existing cluster - will contact leader to join",
  "services": {
    "chainfire": {
      "enabled": true,
      "api_port": 2379,
      "raft_port": 2380,
      "gossip_port": 2381
    },
    "flaredb": {
      "enabled": true,
      "api_port": 2479,
      "raft_port": 2480
    },
    "iam": {
      "enabled": true,
      "api_port": 8080
    },
    "plasmavmc": {
      "enabled": true,
      "api_port": 8090
    },
    "novanet": {
      "enabled": true,
      "api_port": 8091
    },
    "flashdns": {
      "enabled": true,
      "dns_port": 53,
      "api_port": 8053
    },
    "fiberlb": {
      "enabled": true,
      "api_port": 8092
    }
  },
  "tls": {
    "enabled": true,
    "ca_cert_path": "/etc/nixos/secrets/ca.crt",
    "node_cert_path": "/etc/nixos/secrets/node04.crt",
    "node_key_path": "/etc/nixos/secrets/node04.key"
  },
  "network": {
    "cluster_network": "10.0.1.0/24",
    "pod_network": "10.244.0.0/16",
    "service_network": "10.96.0.0/12"
  }
 }
--- a/baremetal/first-boot/health-check.sh
+++ b/baremetal/first-boot/health-check.sh
@ -19,6 +19,9 @@ SERVICE_NAME="${1:-}"
 HEALTH_URL="${2:-}"
 TIMEOUT="${3:-300}"
 RETRY_INTERVAL="${4:-5}"
 CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
 CURL_MAX_TIME="${CURL_MAX_TIME:-10}"
 CURL_INSECURE="${CURL_INSECURE:-1}"
 # Validate arguments
 if [[ -z "$SERVICE_NAME" || -z "$HEALTH_URL" ]]; then
@ -55,8 +58,12 @@ while true; do
    ATTEMPT=$((ATTEMPT + 1))
    log "INFO" "Health check attempt $ATTEMPT (elapsed: ${ELAPSED}s)"
-    # Perform health check (allow insecure TLS for self-signed certs)
+    # Perform health check (allow insecure TLS if configured)
-    HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" 2>/dev/null || echo "000")
+    CURL_FLAGS=(-s -o /dev/null -w "%{http_code}" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
    if [[ "$CURL_INSECURE" == "1" ]]; then
        CURL_FLAGS+=(-k)
    fi
    HTTP_CODE=$(curl "${CURL_FLAGS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000")
    if [[ "$HTTP_CODE" == "200" ]]; then
        log "INFO" "Health check passed (HTTP $HTTP_CODE)"
--- a/baremetal/image-builder/OVERVIEW.md
+++ b/baremetal/image-builder/OVERVIEW.md
@ -1,570 +0,0 @@
 # PlasmaCloud Netboot Image Builder - Technical Overview
 ## Introduction
 This document provides a technical overview of the PlasmaCloud NixOS Image Builder, which generates bootable netboot images for bare-metal provisioning. This is part of T032 (Bare-Metal Provisioning) and specifically implements deliverable S3 (NixOS Image Builder).
 ## System Architecture
 ### High-Level Flow
 ```
 ┌─────────────────────┐
 │  Nix Flake          │
 │  (flake.nix)        │
 └──────────┬──────────┘
           │
           ├─── nixosConfigurations
           │    ├── netboot-control-plane
           │    ├── netboot-worker
           │    └── netboot-all-in-one
           │
           ├─── packages (T024)
           │    ├── chainfire-server
           │    ├── flaredb-server
           │    └── ... (8 services)
           │
           └─── modules (T024)
                ├── chainfire.nix
                ├── flaredb.nix
                └── ... (8 modules)
         Build Process
              ↓
 ┌─────────────────────┐
 │  build-images.sh    │
 └──────────┬──────────┘
           │
           ├─── nix build netbootRamdisk
           ├─── nix build kernel
           └─── copy to artifacts/
         Output
              ↓
 ┌─────────────────────┐
 │  Netboot Artifacts  │
 ├─────────────────────┤
 │  bzImage (kernel)   │
 │  initrd (ramdisk)   │
 │  netboot.ipxe       │
 └─────────────────────┘
           │
           ├─── PXE Server
           │    (HTTP/TFTP)
           │
           └─── Target Machine
                (PXE Boot)
 ```
 ## Component Breakdown
 ### 1. Netboot Configurations
 Located in `nix/images/`, these NixOS configurations define the netboot environment:
 #### `netboot-base.nix`
 **Purpose**: Common base configuration for all profiles
 **Key Features**:
 - Extends `netboot-minimal.nix` from nixpkgs
 - SSH server with root login (key-based only)
 - Generic kernel with broad hardware support
 - Disk management tools (disko, parted, cryptsetup, lvm2)
 - Network configuration (DHCP, predictable interface names)
 - Serial console support (ttyS0, tty0)
 - Minimal system (no docs, no sound)
 **Package Inclusions**:
 ```nix
 disko, parted, gptfdisk     # Disk management
 cryptsetup, lvm2            # Encryption and LVM
 e2fsprogs, xfsprogs         # Filesystem tools
 iproute2, curl, tcpdump     # Network tools
 vim, tmux, htop             # System tools
 ```
 **Kernel Configuration**:
 ```nix
 boot.kernelPackages = pkgs.linuxPackages_latest;
 boot.kernelParams = [
  "console=ttyS0,115200"
  "console=tty0"
  "loglevel=4"
 ];
 ```
 #### `netboot-control-plane.nix`
 **Purpose**: Full control plane deployment
 **Imports**:
 - `netboot-base.nix` (base configuration)
 - `../modules` (PlasmaCloud service modules)
 **Service Inclusions**:
 - Chainfire (ports 2379, 2380, 2381)
 - FlareDB (ports 2479, 2480)
 - IAM (port 8080)
 - PlasmaVMC (port 8081)
 - PrismNET (port 8082)
 - FlashDNS (port 53)
 - FiberLB (port 8083)
 - LightningStor (port 8084)
 - K8sHost (port 8085)
 **Service State**: All services **disabled** by default via `lib.mkDefault false`
 **Resource Limits** (for netboot environment):
 ```nix
 MemoryMax = "512M"
 CPUQuota = "50%"
 ```
 #### `netboot-worker.nix`
 **Purpose**: Compute-focused worker nodes
 **Imports**:
 - `netboot-base.nix`
 - `../modules`
 **Service Inclusions**:
 - PlasmaVMC (VM management)
 - PrismNET (SDN)
 **Additional Features**:
 - KVM virtualization support
 - Open vSwitch for SDN
 - QEMU and libvirt tools
 - Optimized sysctl for VM workloads
 **Performance Tuning**:
 ```nix
 "fs.file-max" = 1000000;
 "net.ipv4.ip_forward" = 1;
 "net.core.netdev_max_backlog" = 5000;
 ```
 #### `netboot-all-in-one.nix`
 **Purpose**: Single-node deployment with all services
 **Imports**:
 - `netboot-base.nix`
 - `../modules`
 **Combines**: All features from control-plane + worker
 **Use Cases**:
 - Development environments
 - Small deployments
 - Edge locations
 - POC installations
 ### 2. Flake Integration
 The main `flake.nix` exposes netboot configurations:
 ```nix
 nixosConfigurations = {
  netboot-control-plane = nixpkgs.lib.nixosSystem {
    system = "x86_64-linux";
    modules = [ ./nix/images/netboot-control-plane.nix ];
  };
  netboot-worker = nixpkgs.lib.nixosSystem {
    system = "x86_64-linux";
    modules = [ ./nix/images/netboot-worker.nix ];
  };
  netboot-all-in-one = nixpkgs.lib.nixosSystem {
    system = "x86_64-linux";
    modules = [ ./nix/images/netboot-all-in-one.nix ];
  };
 };
 ```
 ### 3. Build Script
 `build-images.sh` orchestrates the build process:
 **Workflow**:
 1. Parse command-line arguments (--profile, --output-dir)
 2. Create output directories
 3. For each profile:
   - Build netboot ramdisk: `nix build ...netbootRamdisk`
   - Build kernel: `nix build ...kernel`
   - Copy artifacts (bzImage, initrd)
   - Generate iPXE boot script
   - Calculate and display sizes
 4. Verify outputs (file existence, size sanity checks)
 5. Copy to PXE server (if available)
 6. Print summary
 **Build Commands**:
 ```bash
 nix build .#nixosConfigurations.netboot-$profile.config.system.build.netbootRamdisk
 nix build .#nixosConfigurations.netboot-$profile.config.system.build.kernel
 ```
 **Output Structure**:
 ```
 artifacts/
 ├── control-plane/
 │   ├── bzImage          # ~10-30 MB
 │   ├── initrd           # ~100-300 MB
 │   ├── netboot.ipxe     # iPXE script
 │   ├── build.log        # Build log
 │   ├── initrd-link      # Nix result symlink
 │   └── kernel-link      # Nix result symlink
 ├── worker/
 │   └── ... (same structure)
 └── all-in-one/
    └── ... (same structure)
 ```
 ## Integration Points
 ### T024 NixOS Modules
 The netboot configurations leverage T024 service modules:
 **Module Structure** (example: chainfire.nix):
 ```nix
 {
  options.services.chainfire = {
    enable = lib.mkEnableOption "chainfire service";
    port = lib.mkOption { ... };
    raftPort = lib.mkOption { ... };
    package = lib.mkOption { ... };
  };
  config = lib.mkIf cfg.enable {
    users.users.chainfire = { ... };
    systemd.services.chainfire = { ... };
  };
 }
 ```
 **Package Availability**:
 ```nix
 # In netboot-control-plane.nix
 environment.systemPackages = with pkgs; [
  chainfire-server  # From flake overlay
  flaredb-server    # From flake overlay
  # ...
 ];
 ```
 ### T032.S2 PXE Infrastructure
 The build script integrates with the PXE server:
 **Copy Workflow**:
 ```bash
 # Build script copies to:
 chainfire/baremetal/pxe-server/assets/nixos/
 ├── control-plane/
 │   ├── bzImage
 │   └── initrd
 ├── worker/
 │   ├── bzImage
 │   └── initrd
 └── all-in-one/
    ├── bzImage
    └── initrd
 ```
 **iPXE Boot Script** (generated):
 ```ipxe
 #!ipxe
 kernel ${boot-server}/control-plane/bzImage init=/nix/store/*/init console=ttyS0,115200
 initrd ${boot-server}/control-plane/initrd
 boot
 ```
 ## Build Process Deep Dive
 ### NixOS Netboot Build Internals
 1. **netboot-minimal.nix** (from nixpkgs):
   - Provides base netboot functionality
   - Configures initrd with kexec support
   - Sets up squashfs for Nix store
 2. **Our Extensions**:
   - Add PlasmaCloud service packages
   - Configure SSH for nixos-anywhere
   - Include provisioning tools (disko, etc.)
   - Customize kernel and modules
 3. **Build Outputs**:
   - **bzImage**: Compressed Linux kernel
   - **initrd**: Squashfs-compressed initial ramdisk containing:
     - Minimal NixOS system
     - Nix store with service packages
     - Init scripts for booting
 ### Size Optimization Strategies
 **Current Optimizations**:
 ```nix
 documentation.enable = false;          # -50MB
 documentation.nixos.enable = false;    # -20MB
 i18n.supportedLocales = [ "en_US" ];   # -100MB
 ```
 **Additional Strategies** (if needed):
 - Use `linuxPackages_hardened` (smaller kernel)
 - Remove unused kernel modules
 - Compress with xz instead of gzip
 - On-demand package fetching from HTTP substituter
 **Expected Sizes**:
 - **Control Plane**: ~250-350 MB (initrd)
 - **Worker**: ~150-250 MB (initrd)
 - **All-in-One**: ~300-400 MB (initrd)
 ## Boot Flow
 ### From PXE to Running System
 ```
 1. PXE Boot
   ├─ DHCP discovers boot server
   ├─ TFTP loads iPXE binary
   └─ iPXE executes boot script
 2. Netboot Download
   ├─ HTTP downloads bzImage (~20MB)
   ├─ HTTP downloads initrd (~200MB)
   └─ kexec into NixOS installer
 3. NixOS Installer (in RAM)
   ├─ Init system starts
   ├─ Network configuration (DHCP)
   ├─ SSH server starts
   └─ Ready for nixos-anywhere
 4. Installation (nixos-anywhere)
   ├─ SSH connection established
   ├─ Disk partitioning (disko)
   ├─ NixOS system installation
   ├─ Secret injection
   └─ Bootloader installation
 5. First Boot (from disk)
   ├─ GRUB/systemd-boot loads
   ├─ Services start (enabled)
   ├─ Cluster join (if configured)
   └─ Running PlasmaCloud node
 ```
 ## Customization Guide
 ### Adding a New Service
 **Step 1**: Create NixOS module
 ```nix
 # nix/modules/myservice.nix
 { config, lib, pkgs, ... }:
 {
  options.services.myservice = {
    enable = lib.mkEnableOption "myservice";
  };
  config = lib.mkIf cfg.enable {
    systemd.services.myservice = { ... };
  };
 }
 ```
 **Step 2**: Add to flake packages
 ```nix
 # flake.nix
 packages.myservice-server = buildRustWorkspace { ... };
 ```
 **Step 3**: Include in netboot profile
 ```nix
 # nix/images/netboot-control-plane.nix
 environment.systemPackages = with pkgs; [
  myservice-server
 ];
 services.myservice = {
  enable = lib.mkDefault false;
 };
 ```
 ### Creating a Custom Profile
 **Step 1**: Create new netboot configuration
 ```nix
 # nix/images/netboot-custom.nix
 { config, pkgs, lib, ... }:
 {
  imports = [
    ./netboot-base.nix
    ../modules
  ];
  # Your customizations
  environment.systemPackages = [ ... ];
 }
 ```
 **Step 2**: Add to flake
 ```nix
 # flake.nix
 nixosConfigurations.netboot-custom = nixpkgs.lib.nixosSystem {
  system = "x86_64-linux";
  modules = [ ./nix/images/netboot-custom.nix ];
 };
 ```
 **Step 3**: Update build script
 ```bash
 # build-images.sh
 profiles_to_build=("control-plane" "worker" "all-in-one" "custom")
 ```
 ## Security Model
 ### Netboot Phase
 **Risk**: Netboot image has root SSH access enabled
 **Mitigations**:
 1. **Key-based authentication only** (no passwords)
 2. **Isolated provisioning VLAN**
 3. **MAC address whitelist in DHCP**
 4. **Firewall disabled only during install**
 ### Post-Installation
 Services remain disabled until final configuration enables them:
 ```nix
 # In installed system configuration
 services.chainfire.enable = true;  # Overrides lib.mkDefault false
 ```
 ### Secret Management
 Secrets are **NOT** embedded in netboot images:
 ```nix
 # During nixos-anywhere installation:
 scp secrets/* root@target:/tmp/secrets/
 # Installed system references:
 services.chainfire.settings.tls = {
  cert_path = "/etc/nixos/secrets/tls-cert.pem";
 };
 ```
 ## Performance Characteristics
 ### Build Times
 - **First build**: 30-60 minutes (downloads all dependencies)
 - **Incremental builds**: 5-15 minutes (reuses cached artifacts)
 - **With local cache**: 2-5 minutes
 ### Network Requirements
 - **Initial download**: ~2GB (nixpkgs + dependencies)
 - **Netboot download**: ~200-400MB per node
 - **Installation**: ~500MB-2GB (depending on services)
 ### Hardware Requirements
 **Build Machine**:
 - CPU: 4+ cores recommended
 - RAM: 8GB minimum, 16GB recommended
 - Disk: 50GB free space
 - Network: Broadband connection
 **Target Machine**:
 - RAM: 4GB minimum for netboot (8GB+ for production)
 - Network: PXE boot support, DHCP
 - Disk: Depends on disko configuration
 ## Testing Strategy
 ### Verification Steps
 1. **Syntax Validation**:
   ```bash
   nix flake check
   ```
 2. **Build Test**:
   ```bash
   ./build-images.sh --profile control-plane
   ```
 3. **Artifact Verification**:
   ```bash
   file artifacts/control-plane/bzImage  # Should be Linux kernel
   file artifacts/control-plane/initrd   # Should be compressed data
   ```
 4. **PXE Boot Test**:
   - Boot VM from netboot image
   - Verify SSH access
   - Check available tools (disko, parted, etc.)
 5. **Installation Test**:
   - Run nixos-anywhere on test target
   - Verify successful installation
   - Check service availability
 ## Troubleshooting Matrix
 | Symptom | Possible Cause | Solution |
 |---------|---------------|----------|
 | Build fails | Missing flakes | Enable experimental-features |
 | Large initrd | Too many packages | Remove unused packages |
 | SSH fails | Wrong SSH key | Update authorized_keys |
 | Boot hangs | Wrong kernel params | Check console= settings |
 | No network | DHCP issues | Verify useDHCP = true |
 | Service missing | Package not built | Check flake overlay |
 ## Future Enhancements
 ### Planned Improvements
 1. **Image Variants**:
   - Minimal installer (no services)
   - Debug variant (with extra tools)
   - Rescue mode (for recovery)
 2. **Build Optimizations**:
   - Parallel profile builds
   - Incremental rebuild detection
   - Binary cache integration
 3. **Security Enhancements**:
   - Per-node SSH keys
   - TPM-based secrets
   - Measured boot support
 4. **Monitoring**:
   - Build metrics collection
   - Size trend tracking
   - Performance benchmarking
 ## References
 - **NixOS Netboot**: https://nixos.wiki/wiki/Netboot
 - **nixos-anywhere**: https://github.com/nix-community/nixos-anywhere
 - **disko**: https://github.com/nix-community/disko
 - **T032 Design**: `docs/por/T032-baremetal-provisioning/design.md`
 - **T024 Modules**: `nix/modules/`
 ## Revision History
 | Version | Date | Author | Changes |
 |---------|------|--------|---------|
 | 1.0 | 2025-12-10 | T032.S3 | Initial implementation |
--- a/baremetal/image-builder/README.md
+++ b/baremetal/image-builder/README.md
@ -1,388 +0,0 @@
 # PlasmaCloud NixOS Image Builder
 This directory contains tools and configurations for building bootable NixOS netboot images for bare-metal provisioning of PlasmaCloud infrastructure.
 ## Overview
 The NixOS Image Builder generates netboot images (kernel + initrd) that can be served via PXE/iPXE to provision bare-metal servers with PlasmaCloud services. These images integrate with the T024 NixOS service modules and the T032.S2 PXE boot infrastructure.
 ## Architecture
 The image builder produces three deployment profiles:
 ### 1. Control Plane (`netboot-control-plane`)
 Full control plane deployment with all 8 PlasmaCloud services:
 - **Chainfire**: Distributed configuration and coordination
 - **FlareDB**: Time-series metrics and events database
 - **IAM**: Identity and access management
 - **PlasmaVMC**: Virtual machine control plane
 - **PrismNET**: Software-defined networking controller
 - **FlashDNS**: High-performance DNS server
 - **FiberLB**: Layer 4/7 load balancer
 - **LightningStor**: Distributed block storage
 - **K8sHost**: Kubernetes hosting component
 **Use Cases**:
 - Multi-node production clusters (3+ control plane nodes)
 - High-availability deployments
 - Separation of control and data planes
 ### 2. Worker (`netboot-worker`)
 Compute-focused deployment for running tenant workloads:
 - **PlasmaVMC**: Virtual machine control plane
 - **PrismNET**: Software-defined networking
 **Use Cases**:
 - Worker nodes in multi-node clusters
 - Dedicated compute capacity
 - Scalable VM hosting
 ### 3. All-in-One (`netboot-all-in-one`)
 Single-node deployment with all 8 services:
 - All services from Control Plane profile
 - Optimized for single-node operation
 **Use Cases**:
 - Development/testing environments
 - Small deployments (1-3 nodes)
 - Edge locations
 - Proof-of-concept installations
 ## Prerequisites
 ### Build Environment
 - **NixOS** or **Nix package manager** installed
 - **Flakes** enabled in Nix configuration
 - **Git** access to PlasmaCloud repository
 - **Sufficient disk space**: ~10GB for build artifacts
 ### Enable Nix Flakes
 If not already enabled, add to `/etc/nix/nix.conf` or `~/.config/nix/nix.conf`:
 ```
 experimental-features = nix-command flakes
 ```
 ### Build Dependencies
 The build process automatically handles all dependencies, but ensure you have:
 - Working internet connection (for Nix binary cache)
 - ~4GB RAM minimum
 - ~10GB free disk space
 ## Build Instructions
 ### Quick Start
 Build all profiles:
 ```bash
 cd /home/centra/cloud/baremetal/image-builder
 ./build-images.sh
 ```
 Build a specific profile:
 ```bash
 # Control plane only
 ./build-images.sh --profile control-plane
 # Worker nodes only
 ./build-images.sh --profile worker
 # All-in-one deployment
 ./build-images.sh --profile all-in-one
 ```
 Custom output directory:
 ```bash
 ./build-images.sh --output-dir /srv/pxe/images
 ```
 ### Build Output
 Each profile generates:
 - `bzImage` - Linux kernel (~10-30 MB)
 - `initrd` - Initial ramdisk (~100-300 MB)
 - `netboot.ipxe` - iPXE boot script
 - `build.log` - Build log for troubleshooting
 Artifacts are placed in:
 ```
 ./artifacts/
 ├── control-plane/
 │   ├── bzImage
 │   ├── initrd
 │   ├── netboot.ipxe
 │   └── build.log
 ├── worker/
 │   ├── bzImage
 │   ├── initrd
 │   ├── netboot.ipxe
 │   └── build.log
 └── all-in-one/
    ├── bzImage
    ├── initrd
    ├── netboot.ipxe
    └── build.log
 ```
 ### Manual Build Commands
 You can also build images directly with Nix:
 ```bash
 # Build initrd
 nix build .#nixosConfigurations.netboot-control-plane.config.system.build.netbootRamdisk
 # Build kernel
 nix build .#nixosConfigurations.netboot-control-plane.config.system.build.kernel
 # Access artifacts
 ls -lh result/
 ```
 ## Deployment
 ### Integration with PXE Server (T032.S2)
 The build script automatically copies artifacts to the PXE server directory if it exists:
 ```
 chainfire/baremetal/pxe-server/assets/nixos/
 ├── control-plane/
 ├── worker/
 ├── all-in-one/
 ├── bzImage-control-plane -> control-plane/bzImage
 ├── initrd-control-plane -> control-plane/initrd
 ├── bzImage-worker -> worker/bzImage
 └── initrd-worker -> worker/initrd
 ```
 ### Manual Deployment
 Copy artifacts to your PXE/HTTP server:
 ```bash
 # Example: Deploy to nginx serving directory
 sudo cp -r ./artifacts/control-plane /srv/pxe/nixos/
 sudo cp -r ./artifacts/worker /srv/pxe/nixos/
 sudo cp -r ./artifacts/all-in-one /srv/pxe/nixos/
 ```
 ### iPXE Boot Configuration
 Reference the images in your iPXE boot script:
 ```ipxe
 #!ipxe
 set boot-server 10.0.0.2:8080
 :control-plane
 kernel http://${boot-server}/nixos/control-plane/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
 initrd http://${boot-server}/nixos/control-plane/initrd
 boot
 :worker
 kernel http://${boot-server}/nixos/worker/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
 initrd http://${boot-server}/nixos/worker/initrd
 boot
 ```
 ## Customization
 ### Adding Services
 To add a service to a profile, edit the corresponding configuration:
 ```nix
 # nix/images/netboot-control-plane.nix
 environment.systemPackages = with pkgs; [
  chainfire-server
  flaredb-server
  # ... existing services ...
  my-custom-service  # Add your service
 ];
 ```
 ### Custom Kernel Configuration
 Modify `nix/images/netboot-base.nix`:
 ```nix
 boot.kernelPackages = pkgs.linuxPackages_6_6;  # Specific kernel version
 boot.kernelModules = [ "my-driver" ];           # Additional modules
 boot.kernelParams = [ "my-param=value" ];       # Additional kernel parameters
 ```
 ### Additional Packages
 Add packages to the netboot environment:
 ```nix
 # nix/images/netboot-base.nix
 environment.systemPackages = with pkgs; [
  # ... existing packages ...
  # Your additions
  python3
  nodejs
  custom-tool
 ];
 ```
 ### Hardware-Specific Configuration
 See `examples/hardware-specific.nix` for hardware-specific customizations.
 ## Troubleshooting
 ### Build Failures
 **Symptom**: Build fails with Nix errors
 **Solutions**:
 1. Check build log: `cat artifacts/PROFILE/build.log`
 2. Verify Nix flakes are enabled
 3. Update nixpkgs: `nix flake update`
 4. Clear Nix store cache: `nix-collect-garbage -d`
 ### Missing Service Packages
 **Symptom**: Error: "package not found"
 **Solutions**:
 1. Verify service is built: `nix build .#chainfire-server`
 2. Check flake overlay: `nix flake show`
 3. Rebuild all packages: `nix build .#default`
 ### Image Too Large
 **Symptom**: Initrd > 500 MB
 **Solutions**:
 1. Remove unnecessary packages from `environment.systemPackages`
 2. Disable documentation (already done in base config)
 3. Use minimal kernel: `boot.kernelPackages = pkgs.linuxPackages_latest_hardened`
 ### PXE Boot Fails
 **Symptom**: Server fails to boot netboot image
 **Solutions**:
 1. Verify artifacts are accessible via HTTP
 2. Check iPXE script syntax
 3. Verify kernel parameters in boot script
 4. Check serial console output (ttyS0)
 5. Ensure DHCP provides correct boot server IP
 ### SSH Access Issues
 **Symptom**: Cannot SSH to netboot installer
 **Solutions**:
 1. Replace example SSH key in `nix/images/netboot-base.nix`
 2. Verify network connectivity (DHCP, firewall)
 3. Check SSH service is running: `systemctl status sshd`
 ## Configuration Reference
 ### Service Modules (T024 Integration)
 All netboot profiles import PlasmaCloud service modules from `nix/modules/`:
 - `chainfire.nix` - Chainfire configuration
 - `flaredb.nix` - FlareDB configuration
 - `iam.nix` - IAM configuration
 - `plasmavmc.nix` - PlasmaVMC configuration
 - `prismnet.nix` - PrismNET configuration
 - `flashdns.nix` - FlashDNS configuration
 - `fiberlb.nix` - FiberLB configuration
 - `lightningstor.nix` - LightningStor configuration
 - `k8shost.nix` - K8sHost configuration
 Services are **disabled by default** in netboot images and enabled in final installed configurations.
 ### Netboot Base Configuration
 Located at `nix/images/netboot-base.nix`, provides:
 - SSH server with root access (key-based)
 - Generic kernel with broad hardware support
 - Disk management tools (disko, parted, cryptsetup, lvm2)
 - Network tools (iproute2, curl, tcpdump)
 - Serial console support (ttyS0, tty0)
 - DHCP networking
 - Minimal system configuration
 ### Profile Configurations
 - `nix/images/netboot-control-plane.nix` - All 8 services
 - `nix/images/netboot-worker.nix` - Compute services (PlasmaVMC, PrismNET)
 - `nix/images/netboot-all-in-one.nix` - All services for single-node
 ## Security Considerations
 ### SSH Keys
 **IMPORTANT**: The default SSH key in `netboot-base.nix` is an example placeholder. You MUST replace it with your actual provisioning key:
 ```nix
 users.users.root.openssh.authorizedKeys.keys = [
  "ssh-ed25519 AAAAC3Nza... your-provisioning-key@host"
 ];
 ```
 Generate a new key:
 ```bash
 ssh-keygen -t ed25519 -C "provisioning@plasmacloud"
 ```
 ### Network Security
 - Netboot images have **firewall disabled** for installation phase
 - Use isolated provisioning VLAN for PXE boot
 - Implement MAC address whitelist in DHCP
 - Enable firewall in final installed configurations
 ### Secrets Management
 - Do NOT embed secrets in netboot images
 - Use nixos-anywhere to inject secrets during installation
 - Store secrets in `/etc/nixos/secrets/` on installed systems
 - Use proper file permissions (0400 for keys)
 ## Next Steps
 After building images:
 1. **Deploy to PXE Server**: Copy artifacts to HTTP server
 2. **Configure DHCP/iPXE**: Set up boot infrastructure (see T032.S2)
 3. **Prepare Node Configurations**: Create per-node configs for nixos-anywhere
 4. **Test Boot Process**: Verify PXE boot on test hardware
 5. **Run nixos-anywhere**: Install NixOS on target machines
 ## Resources
 - **Design Document**: `docs/por/T032-baremetal-provisioning/design.md`
 - **PXE Infrastructure**: `chainfire/baremetal/pxe-server/`
 - **Service Modules**: `nix/modules/`
 - **Example Configurations**: `baremetal/image-builder/examples/`
 ## Support
 For issues or questions:
 1. Check build logs: `artifacts/PROFILE/build.log`
 2. Review design document: `docs/por/T032-baremetal-provisioning/design.md`
 3. Examine example configurations: `examples/`
 4. Verify service module configuration: `nix/modules/`
 ## License
 Apache 2.0 - See LICENSE file for details
--- a/baremetal/image-builder/build-images.sh
+++ b/baremetal/image-builder/build-images.sh
@ -77,7 +77,7 @@ Build NixOS netboot images for PlasmaCloud bare-metal provisioning.
 OPTIONS:
    --profile PROFILE    Build specific profile:
                           - control-plane: All 8 PlasmaCloud services
-                           - worker: Compute-focused services (PlasmaVMC, NovaNET)
+                           - worker: Compute-focused services (PlasmaVMC, PrismNET)
                           - all-in-one: All services for single-node deployment
                           - all: Build all profiles (default)
@ -97,7 +97,7 @@ EXAMPLES:
 PROFILES:
    control-plane    - Full control plane with all 8 services
-    worker           - Worker node with PlasmaVMC and NovaNET
+    worker           - Worker node with PlasmaVMC and PrismNET
    all-in-one       - Single-node deployment with all services
 OUTPUT:
@ -141,6 +141,16 @@ build_profile() {
    cp -f "$profile_dir/initrd-link/initrd" "$profile_dir/initrd"
    cp -f "$profile_dir/kernel-link/bzImage" "$profile_dir/bzImage"
    # Resolve init path from the build (avoids hardcoding store paths)
    local init_path="/init"
    if toplevel=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-$profile.config.system.build.toplevel" 2>/dev/null); then
        if [ -n "$toplevel" ]; then
            init_path="${toplevel}/init"
        fi
    else
        print_warning "Failed to resolve init path for $profile; using /init"
    fi
    # Generate iPXE boot script
    print_info "  Generating iPXE boot script..."
    cat > "$profile_dir/netboot.ipxe" << EOF
@ -159,7 +169,7 @@ echo Initrd: initrd
 echo
 # Load kernel and initrd
-kernel \${boot-server}/$profile/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
+kernel \${boot-server}/$profile/bzImage init=${init_path} console=ttyS0,115200 console=tty0 loglevel=4
 initrd \${boot-server}/$profile/initrd
 # Boot
--- a/baremetal/image-builder/examples/custom-netboot.nix
+++ b/baremetal/image-builder/examples/custom-netboot.nix
@ -1,361 +0,0 @@
 { config, pkgs, lib, ... }:
 # ==============================================================================
 # CUSTOM NETBOOT CONFIGURATION EXAMPLE
 # ==============================================================================
 # This example demonstrates how to create a custom netboot configuration with:
 # - Custom kernel version and modules
 # - Additional packages for specialized use cases
 # - Hardware-specific drivers
 # - Custom network configuration
 # - Debugging tools
 #
 # Usage:
 #   1. Copy this file to nix/images/netboot-custom.nix
 #   2. Add to flake.nix:
 #      nixosConfigurations.netboot-custom = nixpkgs.lib.nixosSystem {
 #        system = "x86_64-linux";
 #        modules = [ ./nix/images/netboot-custom.nix ];
 #      };
 #   3. Build: ./build-images.sh --profile custom
 # ==============================================================================
 {
  imports = [
    ../netboot-base.nix  # Adjust path as needed
    ../../modules        # PlasmaCloud service modules
  ];
  # ============================================================================
  # CUSTOM KERNEL CONFIGURATION
  # ============================================================================
  # Use specific kernel version instead of latest
  boot.kernelPackages = pkgs.linuxPackages_6_6;  # LTS kernel
  # Add custom kernel modules for specialized hardware
  boot.kernelModules = [
    # Infiniband/RDMA support
    "ib_core"
    "ib_uverbs"
    "mlx5_core"
    "mlx5_ib"
    # GPU support (for GPU compute nodes)
    "nvidia"
    "nvidia_uvm"
    # Custom storage controller
    "megaraid_sas"
    "mpt3sas"
  ];
  # Custom kernel parameters
  boot.kernelParams = [
    # Default console configuration
    "console=ttyS0,115200"
    "console=tty0"
    "loglevel=4"
    # Custom parameters
    "intel_iommu=on"           # Enable IOMMU for PCI passthrough
    "iommu=pt"                 # Passthrough mode
    "hugepagesz=2M"            # 2MB hugepages
    "hugepages=1024"           # Allocate 1024 hugepages (2GB)
    "isolcpus=2-7"             # CPU isolation for real-time workloads
  ];
  # Blacklist problematic modules
  boot.blacklistedKernelModules = [
    "nouveau"     # Disable nouveau if using proprietary NVIDIA
    "i915"        # Disable Intel GPU if not needed
  ];
  # ============================================================================
  # ADDITIONAL PACKAGES
  # ============================================================================
  environment.systemPackages = with pkgs; [
    # Networking diagnostics
    iperf3           # Network performance testing
    mtr              # Network diagnostic tool
    nmap             # Network scanner
    wireshark-cli    # Packet analyzer
    # Storage tools
    nvme-cli         # NVMe management
    smartmontools    # SMART monitoring
    fio              # I/O performance testing
    sg3_utils        # SCSI utilities
    # Hardware diagnostics
    pciutils         # lspci
    usbutils         # lsusb
    dmidecode        # Hardware information
    lshw             # Hardware lister
    hwinfo           # Hardware info tool
    # Debugging tools
    strace           # System call tracer
    ltrace           # Library call tracer
    gdb              # GNU debugger
    valgrind         # Memory debugger
    # Performance tools
    perf             # Linux perf tool
    bpftrace         # eBPF tracing
    sysstat          # System statistics (sar, iostat)
    # Container/virtualization tools
    qemu_full        # Full QEMU with all features
    libvirt          # Virtualization management
    virt-manager     # VM management (CLI)
    docker           # Container runtime
    podman           # Alternative container runtime
    # Development tools (for on-site debugging)
    python3Full      # Python with all modules
    python3Packages.pip
    nodejs           # Node.js runtime
    git              # Version control
    gcc              # C compiler
    rustc            # Rust compiler
    cargo            # Rust package manager
    # Custom tools
    # Add your organization's custom packages here
  ];
  # ============================================================================
  # CUSTOM NETWORK CONFIGURATION
  # ============================================================================
  # Static IP instead of DHCP (example)
  networking.useDHCP = lib.mkForce false;
  networking.interfaces.eth0 = {
    useDHCP = false;
    ipv4.addresses = [{
      address = "10.0.1.100";
      prefixLength = 24;
    }];
  };
  networking.defaultGateway = "10.0.1.1";
  networking.nameservers = [ "10.0.1.1" "8.8.8.8" ];
  # Custom DNS domain
  networking.domain = "custom.example.com";
  # Enable jumbo frames
  networking.interfaces.eth0.mtu = 9000;
  # ============================================================================
  # CUSTOM SSH CONFIGURATION
  # ============================================================================
  # Multiple SSH keys for different operators
  users.users.root.openssh.authorizedKeys.keys = [
    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator1Key operator1@example.com"
    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator2Key operator2@example.com"
    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator3Key operator3@example.com"
  ];
  # Custom SSH port (for security through obscurity - not recommended for production)
  # services.openssh.ports = [ 2222 ];
  # ============================================================================
  # CUSTOM SERVICES
  # ============================================================================
  # Enable only specific PlasmaCloud services
  services.plasmavmc = {
    enable = lib.mkDefault false;
    port = 8081;
  };
  services.prismnet = {
    enable = lib.mkDefault false;
    port = 8082;
  };
  # ============================================================================
  # DEBUGGING AND LOGGING
  # ============================================================================
  # Enable verbose boot logging
  boot.kernelParams = lib.mkAfter [ "loglevel=7" "debug" ];
  # Enable systemd debug logging
  systemd.services."serial-getty@ttyS0".environment = {
    SYSTEMD_LOG_LEVEL = "debug";
  };
  # Enable additional logging
  services.journald.extraConfig = ''
    Storage=persistent
    MaxRetentionSec=7day
    SystemMaxUse=1G
  '';
  # ============================================================================
  # PERFORMANCE TUNING
  # ============================================================================
  # Custom sysctl settings for high-performance networking
  boot.kernel.sysctl = {
    # Network buffer sizes
    "net.core.rmem_max" = 268435456;      # 256 MB
    "net.core.wmem_max" = 268435456;      # 256 MB
    "net.core.rmem_default" = 67108864;   # 64 MB
    "net.core.wmem_default" = 67108864;   # 64 MB
    # TCP tuning
    "net.ipv4.tcp_rmem" = "4096 87380 134217728";
    "net.ipv4.tcp_wmem" = "4096 65536 134217728";
    "net.ipv4.tcp_congestion_control" = "bbr";
    # Connection tracking
    "net.netfilter.nf_conntrack_max" = 1048576;
    # File descriptor limits
    "fs.file-max" = 2097152;
    # Virtual memory
    "vm.swappiness" = 1;
    "vm.vfs_cache_pressure" = 50;
    "vm.dirty_ratio" = 10;
    "vm.dirty_background_ratio" = 5;
    # Kernel
    "kernel.pid_max" = 4194304;
  };
  # Increase systemd limits
  systemd.extraConfig = ''
    DefaultLimitNOFILE=1048576
    DefaultLimitNPROC=1048576
  '';
  # ============================================================================
  # HARDWARE-SPECIFIC CONFIGURATION
  # ============================================================================
  # Enable CPU microcode updates
  hardware.cpu.intel.updateMicrocode = true;
  hardware.cpu.amd.updateMicrocode = true;
  # Enable firmware updates
  hardware.enableRedistributableFirmware = true;
  # GPU support (example for NVIDIA)
  # Uncomment if using NVIDIA GPUs
  # hardware.nvidia.modesetting.enable = true;
  # services.xserver.videoDrivers = [ "nvidia" ];
  # ============================================================================
  # CUSTOM INITIALIZATION
  # ============================================================================
  # Run custom script on boot
  systemd.services.custom-init = {
    description = "Custom initialization script";
    wantedBy = [ "multi-user.target" ];
    after = [ "network-online.target" ];
    wants = [ "network-online.target" ];
    serviceConfig = {
      Type = "oneshot";
      RemainAfterExit = true;
    };
    script = ''
      echo "Running custom initialization..."
      # Example: Configure network interfaces
      ${pkgs.iproute2}/bin/ip link set dev eth1 up
      # Example: Load custom kernel modules
      ${pkgs.kmod}/bin/modprobe custom_driver || true
      # Example: Call home to provisioning server
      ${pkgs.curl}/bin/curl -X POST http://provisioning.example.com/api/register \
        -d "hostname=$(hostname)" \
        -d "ip=$(${pkgs.iproute2}/bin/ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}')" \
        || true
      echo "Custom initialization complete"
    '';
  };
  # ============================================================================
  # FIREWALL CONFIGURATION
  # ============================================================================
  # Custom firewall rules (disabled by default in netboot, but example provided)
  networking.firewall = {
    enable = lib.mkDefault false;  # Disabled during provisioning
    # When enabled, allow these ports
    allowedTCPPorts = [
      22    # SSH
      8081  # PlasmaVMC
      8082  # PrismNET
    ];
    # Custom iptables rules
    extraCommands = ''
      # Allow ICMP
      iptables -A INPUT -p icmp -j ACCEPT
      # Rate limit SSH connections
      iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --set
      iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --update --seconds 60 --hitcount 4 -j DROP
    '';
  };
  # ============================================================================
  # NIX CONFIGURATION
  # ============================================================================
  # Custom binary caches
  nix.settings = {
    substituters = [
      "https://cache.nixos.org"
      "https://custom-cache.example.com"  # Your organization's cache
    ];
    trusted-public-keys = [
      "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
      "custom-cache.example.com:YourPublicKeyHere"
    ];
    # Build settings
    max-jobs = "auto";
    cores = 0;  # Use all available cores
    # Experimental features
    experimental-features = [ "nix-command" "flakes" "repl-flake" ];
  };
  # ============================================================================
  # TIMEZONE AND LOCALE
  # ============================================================================
  # Custom timezone (instead of UTC)
  time.timeZone = lib.mkForce "America/New_York";
  # Additional locale support
  i18n.supportedLocales = [
    "en_US.UTF-8/UTF-8"
    "ja_JP.UTF-8/UTF-8"  # Japanese support
  ];
  i18n.defaultLocale = "en_US.UTF-8";
  # ============================================================================
  # SYSTEM STATE VERSION
  # ============================================================================
  system.stateVersion = "24.11";
 }
--- a/baremetal/image-builder/examples/hardware-specific.nix
+++ b/baremetal/image-builder/examples/hardware-specific.nix
@ -1,442 +0,0 @@
 { config, pkgs, lib, ... }:
 # ==============================================================================
 # HARDWARE-SPECIFIC NETBOOT CONFIGURATION EXAMPLE
 # ==============================================================================
 # This example demonstrates hardware-specific configurations for common
 # bare-metal server platforms. Use this as a template for your specific hardware.
 #
 # Common Server Platforms:
 # - Dell PowerEdge (R640, R650, R750)
 # - HP ProLiant (DL360, DL380, DL560)
 # - Supermicro (X11, X12 series)
 # - Generic whitebox servers
 #
 # Usage:
 #   1. Copy relevant sections to your netboot configuration
 #   2. Adjust based on your specific hardware
 #   3. Test boot on target hardware
 # ==============================================================================
 {
  imports = [
    ../netboot-base.nix
    ../../modules
  ];
  # ============================================================================
  # DELL POWEREDGE R640 CONFIGURATION
  # ============================================================================
  # Uncomment this section for Dell PowerEdge R640 servers
  /*
  # Hardware-specific kernel modules
  boot.initrd.availableKernelModules = [
    # Dell PERC RAID controller
    "megaraid_sas"
    # Intel X710 10GbE NIC
    "i40e"
    # NVMe drives
    "nvme"
    # Standard modules
    "ahci"
    "xhci_pci"
    "usb_storage"
    "sd_mod"
    "sr_mod"
  ];
  boot.kernelModules = [
    "kvm-intel"          # Intel VT-x
    "ipmi_devintf"       # IPMI interface
    "ipmi_si"            # IPMI system interface
  ];
  # Dell-specific firmware
  hardware.enableRedistributableFirmware = true;
  hardware.cpu.intel.updateMicrocode = true;
  # Network interface naming
  # R640 typically has:
  # - eno1, eno2: Onboard 1GbE (Intel i350)
  # - ens1f0, ens1f1: PCIe 10GbE (Intel X710)
  networking.interfaces = {
    eno1 = { useDHCP = true; };
    ens1f0 = {
      useDHCP = false;
      mtu = 9000;  # Jumbo frames for 10GbE
    };
  };
  # iDRAC/IPMI configuration
  services.freeipmi.enable = true;
  # Dell OpenManage tools (optional)
  environment.systemPackages = with pkgs; [
    ipmitool
    freeipmi
  ];
  */
  # ============================================================================
  # HP PROLIANT DL360 GEN10 CONFIGURATION
  # ============================================================================
  # Uncomment this section for HP ProLiant DL360 Gen10 servers
  /*
  boot.initrd.availableKernelModules = [
    # HP Smart Array controller
    "hpsa"
    # Broadcom/Intel NIC
    "tg3"
    "bnx2x"
    "i40e"
    # NVMe
    "nvme"
    # Standard
    "ahci"
    "xhci_pci"
    "usb_storage"
    "sd_mod"
  ];
  boot.kernelModules = [
    "kvm-intel"
    "ipmi_devintf"
    "ipmi_si"
  ];
  hardware.enableRedistributableFirmware = true;
  hardware.cpu.intel.updateMicrocode = true;
  # HP-specific tools
  environment.systemPackages = with pkgs; [
    ipmitool
    smartmontools
  ];
  # iLO/IPMI
  services.freeipmi.enable = true;
  */
  # ============================================================================
  # SUPERMICRO X11 SERIES CONFIGURATION
  # ============================================================================
  # Uncomment this section for Supermicro X11 series servers
  /*
  boot.initrd.availableKernelModules = [
    # LSI/Broadcom RAID
    "megaraid_sas"
    "mpt3sas"
    # Intel NIC (common on Supermicro)
    "igb"
    "ixgbe"
    "i40e"
    # NVMe
    "nvme"
    # Standard
    "ahci"
    "xhci_pci"
    "ehci_pci"
    "usb_storage"
    "sd_mod"
  ];
  boot.kernelModules = [
    "kvm-intel"          # Or kvm-amd for AMD CPUs
    "ipmi_devintf"
    "ipmi_si"
  ];
  hardware.enableRedistributableFirmware = true;
  # CPU-specific (adjust based on your CPU)
  hardware.cpu.intel.updateMicrocode = true;
  # hardware.cpu.amd.updateMicrocode = true;  # For AMD CPUs
  # IPMI configuration
  services.freeipmi.enable = true;
  environment.systemPackages = with pkgs; [
    ipmitool
    dmidecode
    smartmontools
  ];
  */
  # ============================================================================
  # GENERIC HIGH-PERFORMANCE SERVER
  # ============================================================================
  # This configuration works for most modern x86_64 servers
  boot.initrd.availableKernelModules = [
    # SATA/AHCI
    "ahci"
    "ata_piix"
    # NVMe
    "nvme"
    # USB
    "xhci_pci"
    "ehci_pci"
    "usb_storage"
    "usbhid"
    # SCSI/SAS
    "sd_mod"
    "sr_mod"
    # Common RAID controllers
    "megaraid_sas"  # LSI MegaRAID
    "mpt3sas"       # LSI SAS3
    "hpsa"          # HP Smart Array
    "aacraid"       # Adaptec
    # Network
    "e1000e"        # Intel GbE
    "igb"           # Intel GbE
    "ixgbe"         # Intel 10GbE
    "i40e"          # Intel 10/25/40GbE
    "bnx2x"         # Broadcom 10GbE
    "mlx4_core"     # Mellanox ConnectX-3
    "mlx5_core"     # Mellanox ConnectX-4/5
  ];
  boot.kernelModules = [
    "kvm-intel"     # Intel VT-x
    "kvm-amd"       # AMD-V
  ];
  # Enable all firmware
  hardware.enableRedistributableFirmware = true;
  # CPU microcode (both Intel and AMD)
  hardware.cpu.intel.updateMicrocode = true;
  hardware.cpu.amd.updateMicrocode = true;
  # ============================================================================
  # NETWORK INTERFACE CONFIGURATION
  # ============================================================================
  # Predictable interface names disabled in base config, using eth0, eth1, etc.
  # For specific hardware, you may want to use biosdevname or systemd naming
  # Example: Bond configuration for redundancy
  /*
  networking.bonds.bond0 = {
    interfaces = [ "eth0" "eth1" ];
    driverOptions = {
      mode = "802.3ad";        # LACP
      xmit_hash_policy = "layer3+4";
      lacp_rate = "fast";
      miimon = "100";
    };
  };
  networking.interfaces.bond0 = {
    useDHCP = true;
    mtu = 9000;
  };
  */
  # Example: VLAN configuration
  /*
  networking.vlans = {
    vlan100 = {
      id = 100;
      interface = "eth0";
    };
    vlan200 = {
      id = 200;
      interface = "eth0";
    };
  };
  networking.interfaces.vlan100 = {
    useDHCP = false;
    ipv4.addresses = [{
      address = "10.100.1.10";
      prefixLength = 24;
    }];
  };
  */
  # ============================================================================
  # STORAGE CONFIGURATION
  # ============================================================================
  # Enable RAID support
  boot.swraid.enable = true;
  boot.swraid.mdadmConf = ''
    ARRAY /dev/md0 level=raid1 num-devices=2
  '';
  # LVM support
  services.lvm.enable = true;
  # ZFS support (if needed)
  # boot.supportedFilesystems = [ "zfs" ];
  # boot.zfs.forceImportRoot = false;
  # ============================================================================
  # CPU-SPECIFIC OPTIMIZATIONS
  # ============================================================================
  # Intel-specific
  boot.kernelParams = lib.mkIf (config.hardware.cpu.intel.updateMicrocode) [
    "intel_pstate=active"      # Use Intel P-State driver
    "intel_iommu=on"           # Enable IOMMU for VT-d
  ];
  # AMD-specific
  boot.kernelParams = lib.mkIf (config.hardware.cpu.amd.updateMicrocode) [
    "amd_iommu=on"             # Enable IOMMU for AMD-Vi
  ];
  # ============================================================================
  # MEMORY CONFIGURATION
  # ============================================================================
  # Hugepages for high-performance applications (DPDK, databases)
  boot.kernelParams = [
    "hugepagesz=2M"
    "hugepages=1024"           # 2GB of 2MB hugepages
    "default_hugepagesz=2M"
  ];
  # Transparent Hugepages
  boot.kernel.sysctl = {
    "vm.nr_hugepages" = 1024;
    # "vm.nr_overcommit_hugepages" = 512;  # Additional hugepages if needed
  };
  # ============================================================================
  # IPMI/BMC CONFIGURATION
  # ============================================================================
  # Enable IPMI kernel modules
  boot.kernelModules = [ "ipmi_devintf" "ipmi_si" ];
  # IPMI tools
  services.freeipmi.enable = true;
  environment.systemPackages = with pkgs; [
    ipmitool           # IPMI command-line tool
    freeipmi           # Alternative IPMI tools
  ];
  # Example: Configure BMC network (usually done via IPMI)
  # Run manually: ipmitool lan set 1 ipaddr 10.0.100.10
  # Run manually: ipmitool lan set 1 netmask 255.255.255.0
  # Run manually: ipmitool lan set 1 defgw ipaddr 10.0.100.1
  # ============================================================================
  # PERFORMANCE TUNING
  # ============================================================================
  # Set CPU governor for performance
  powerManagement.cpuFreqGovernor = "performance";
  # Disable power management features that can cause latency
  boot.kernelParams = [
    "processor.max_cstate=1"   # Limit C-states
    "intel_idle.max_cstate=1"  # Limit idle states
    "idle=poll"                # Aggressive polling (high power usage!)
  ];
  # Note: The above settings prioritize performance over power efficiency
  # Remove or adjust for non-latency-sensitive workloads
  # ============================================================================
  # HARDWARE MONITORING
  # ============================================================================
  # Enable hardware sensors
  # services.lm_sensors.enable = true;  # Uncomment if needed
  # SMART monitoring
  services.smartd = {
    enable = true;
    autodetect = true;
  };
  # ============================================================================
  # GPU CONFIGURATION (if applicable)
  # ============================================================================
  # NVIDIA GPU
  /*
  hardware.nvidia = {
    modesetting.enable = true;
    powerManagement.enable = false;
    powerManagement.finegrained = false;
    open = false;  # Use proprietary driver
    nvidiaSettings = false;  # No GUI needed
  };
  services.xserver.videoDrivers = [ "nvidia" ];
  # NVIDIA Container Runtime (for GPU containers)
  hardware.nvidia-container-toolkit.enable = true;
  environment.systemPackages = with pkgs; [
    cudaPackages.cudatoolkit
    nvidia-docker
  ];
  */
  # AMD GPU
  /*
  boot.initrd.kernelModules = [ "amdgpu" ];
  services.xserver.videoDrivers = [ "amdgpu" ];
  */
  # ============================================================================
  # INFINIBAND/RDMA (for high-performance networking)
  # ============================================================================
  /*
  boot.kernelModules = [
    "ib_core"
    "ib_uverbs"
    "ib_umad"
    "rdma_cm"
    "rdma_ucm"
    "mlx5_core"
    "mlx5_ib"
  ];
  environment.systemPackages = with pkgs; [
    rdma-core
    libfabric
    # perftest  # RDMA performance tests
  ];
  # Configure IPoIB (IP over InfiniBand)
  networking.interfaces.ib0 = {
    useDHCP = false;
    ipv4.addresses = [{
      address = "192.168.100.10";
      prefixLength = 24;
    }];
    mtu = 65520;  # Max for IPoIB connected mode
  };
  */
  # ============================================================================
  # SYSTEM STATE VERSION
  # ============================================================================
  system.stateVersion = "24.11";
 }
--- a/baremetal/vm-cluster/README.md
+++ b/baremetal/vm-cluster/README.md
@ -1,36 +1,22 @@
-# QEMU Socket Networking VM Cluster
+# Legacy Baremetal VM Cluster
-## Architecture
+`baremetal/vm-cluster` is no longer the primary local validation path.
-**Topology:** 4 QEMU VMs connected via multicast socket networking (230.0.0.1:1234)
+Use [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md) for canonical local VM validation:
-**VMs:**
+```bash
-1. **pxe-server** (192.168.100.1) - Provides DHCP/TFTP/HTTP services
+nix run ./nix/test-cluster#cluster -- smoke
-2. **node01** (192.168.100.11) - Cluster node
+```
 3. **node02** (192.168.100.12) - Cluster node
 4. **node03** (192.168.100.13) - Cluster node
-**Network:** All VMs share L2 segment via QEMU multicast socket (no root privileges required)
+This directory is kept only for the older manual T036 PXE and bare-metal style experiments.
-## Files
+## What remains here
- `node01.qcow2`, `node02.qcow2`, `node03.qcow2` - 100GB cluster node disks
+- [`pxe-server/`](/home/centra/cloud/baremetal/vm-cluster/pxe-server): older PXE server configuration
- `pxe-server.qcow2` - 20GB PXE server disk
+- [`legacy/`](/home/centra/cloud/baremetal/vm-cluster/legacy/README.md): archived manual deployment, validation, and ad hoc QEMU launch scripts
 - `launch-pxe-server.sh` - PXE server startup script
 - `launch-node01.sh`, `launch-node02.sh`, `launch-node03.sh` - Node startup scripts
 - `pxe-server/` - PXE server configuration files
-## MACs
+## Status
- pxe-server: 52:54:00:00:00:01
+- unsupported for regular development
- node01: 52:54:00:00:01:01
+- not the release-validation path
- node02: 52:54:00:00:01:02
+- retained only to preserve old manual experiments
 - node03: 52:54:00:00:01:03
 ## Provisioning Flow
 1. Start PXE server VM (Alpine Linux with dnsmasq)
 2. Configure DHCP/TFTP/HTTP services
 3. Deploy NixOS netboot artifacts
 4. Start node VMs with PXE boot enabled
 5. Nodes PXE boot and provision via nixos-anywhere
--- a/baremetal/vm-cluster/alpine-answers.txt
+++ b/baremetal/vm-cluster/alpine-answers.txt
@ -1,46 +0,0 @@
 # Alpine Linux Answer File for Automated Installation
 # For use with: setup-alpine -f alpine-answers.txt
 # Keyboard layout
 KEYMAPOPTS="us us"
 # Hostname
 HOSTNAMEOPTS="-n pxe-server"
 # Network configuration
 # eth0: multicast network (static 192.168.100.1)
 # eth1: user network (DHCP for internet)
 INTERFACESOPTS="auto lo
 iface lo inet loopback
 auto eth0
 iface eth0 inet static
    address 192.168.100.1
    netmask 255.255.255.0
 auto eth1
 iface eth1 inet dhcp"
 # DNS
 DNSOPTS="8.8.8.8 8.8.4.4"
 # Timezone
 TIMEZONEOPTS="-z UTC"
 # Proxy (none)
 PROXYOPTS="none"
 # APK mirror (auto-detect fastest)
 APKREPOSOPTS="-f"
 # SSH server
 SSHDOPTS="-c openssh"
 # NTP client
 NTPOPTS="-c chrony"
 # Disk mode (sys = traditional installation to disk)
 DISKOPTS="-m sys /dev/vda"
 # Additional packages to install
 APKCACHEOPTS="/var/cache/apk"
--- a/baremetal/vm-cluster/legacy/README.md
+++ b/baremetal/vm-cluster/legacy/README.md
@ -0,0 +1,18 @@
 # Legacy Launch Scripts
 These scripts are archived manual launch helpers from the older `baremetal/vm-cluster` workflow.
 They are not the canonical test path and should not be used for normal validation.
 Use the Nix-native harness instead:
 ```bash
 nix run ./nix/test-cluster#cluster -- smoke
 ```
 Notes:
 - `deploy-all.sh` and `validate-cluster.sh` are preserved only for the retired PXE/manual flow
 - some scripts assume local disk images or host networking setup that is no longer maintained
 - Alpine-specific flows are treated as retired
 - supporting artifacts such as `alpine-answers.txt` are no longer kept current
--- a/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh
+++ b/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh
--- a/baremetal/vm-cluster/legacy/deploy-all.sh
+++ b/baremetal/vm-cluster/legacy/deploy-all.sh
@ -1,6 +1,7 @@
 #!/usr/bin/env bash
-# T036 VM Cluster Deployment Script
+# Legacy T036 VM cluster deployment script.
-# Deploys all VMs via nixos-anywhere after VNC network configuration
+# This is a manual bare-metal/PXE path. The canonical local VM validation path
 # is nix/test-cluster/run-cluster.sh.
 set -euo pipefail
@ -11,6 +12,8 @@ cd "$REPO_ROOT"
 echo "=== T036 VM Cluster Deployment ==="
 echo ""
 echo "This path is archived. Prefer: nix run ./nix/test-cluster#cluster -- smoke"
 echo ""
 echo "Prerequisites:"
 echo "  - PXE server booted and network configured (192.168.100.1)"
 echo "  - Node01 booted and network configured (192.168.100.11)"
@ -56,4 +59,5 @@ echo ""
 echo "All VMs have been provisioned. Systems will reboot from disk."
 echo "Wait 2-3 minutes for boot, then validate cluster..."
 echo ""
-echo "Next: Run ./validate-cluster.sh"
+echo "Legacy next step: baremetal/vm-cluster/legacy/validate-cluster.sh"
 echo "Preferred validation path: nix run ./nix/test-cluster#cluster -- smoke"
--- a/baremetal/vm-cluster/legacy/launch-node01-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node01-dual.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-dual.sh
--- a/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node01-iso.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-iso.sh
--- a/baremetal/vm-cluster/legacy/launch-node01-netboot.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-netboot.sh
@ -10,6 +10,7 @@ set -euo pipefail
 #   - Telnet serial console
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 DISK="${SCRIPT_DIR}/node01.qcow2"
 KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
 INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
    exit 1
 fi
 INIT_PATH="/init"
 if command -v nix >/dev/null 2>&1; then
    if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
        INIT_PATH="${TOPLEVEL}/init"
    fi
 fi
 echo "============================================"
 echo "Launching node01 with netboot (SSH key auth)..."
 echo "============================================"
@ -65,7 +73,7 @@ qemu-system-x86_64 \
  -drive file="${DISK}",if=virtio,format=qcow2 \
  -kernel "${KERNEL}" \
  -initrd "${INITRD}" \
-  -append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
+  -append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
  -netdev vde,id=vde0,sock=/tmp/vde.sock \
  -device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
  -netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
--- a/baremetal/vm-cluster/legacy/launch-node01-vde.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01-vde.sh
--- a/baremetal/vm-cluster/legacy/launch-node01.sh
+++ b/baremetal/vm-cluster/legacy/launch-node01.sh
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
  -m 16G \
  -drive file="$DISK",if=virtio,format=qcow2 \
  -netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
-  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
+  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
  -boot order=n \
  -vnc "$VNC_DISPLAY" \
  -serial telnet:localhost:4441,server,nowait \
--- a/baremetal/vm-cluster/legacy/launch-node02-alpine.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-alpine.sh
--- a/baremetal/vm-cluster/legacy/launch-node02-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node02-iso.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-iso.sh
--- a/baremetal/vm-cluster/legacy/launch-node02-netboot.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-netboot.sh
@ -10,6 +10,7 @@ set -euo pipefail
 #   - Telnet serial console
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 DISK="${SCRIPT_DIR}/node02.qcow2"
 KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
 INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
    exit 1
 fi
 INIT_PATH="/init"
 if command -v nix >/dev/null 2>&1; then
    if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
        INIT_PATH="${TOPLEVEL}/init"
    fi
 fi
 echo "============================================"
 echo "Launching node02 with netboot (SSH key auth)..."
 echo "============================================"
@ -65,7 +73,7 @@ qemu-system-x86_64 \
  -drive file="${DISK}",if=virtio,format=qcow2 \
  -kernel "${KERNEL}" \
  -initrd "${INITRD}" \
-  -append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
+  -append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
  -netdev vde,id=vde0,sock=/tmp/vde.sock \
  -device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
  -netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
--- a/baremetal/vm-cluster/legacy/launch-node02-recovery.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-recovery.sh
--- a/baremetal/vm-cluster/legacy/launch-node02-vde.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02-vde.sh
--- a/baremetal/vm-cluster/legacy/launch-node02.sh
+++ b/baremetal/vm-cluster/legacy/launch-node02.sh
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
  -m 16G \
  -drive file="$DISK",if=virtio,format=qcow2 \
  -netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
-  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
+  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
  -boot order=n \
  -vnc "$VNC_DISPLAY" \
  -serial telnet:localhost:4442,server,nowait \
--- a/baremetal/vm-cluster/legacy/launch-node03-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh
--- a/baremetal/vm-cluster/legacy/launch-node03-iso.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-iso.sh
--- a/baremetal/vm-cluster/legacy/launch-node03-netboot.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-netboot.sh
@ -10,6 +10,7 @@ set -euo pipefail
 #   - Telnet serial console
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
 DISK="${SCRIPT_DIR}/node03.qcow2"
 KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
 INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
    exit 1
 fi
 INIT_PATH="/init"
 if command -v nix >/dev/null 2>&1; then
    if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
        INIT_PATH="${TOPLEVEL}/init"
    fi
 fi
 echo "============================================"
 echo "Launching node03 with netboot (SSH key auth)..."
 echo "============================================"
@ -65,7 +73,7 @@ qemu-system-x86_64 \
  -drive file="${DISK}",if=virtio,format=qcow2 \
  -kernel "${KERNEL}" \
  -initrd "${INITRD}" \
-  -append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
+  -append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
  -netdev vde,id=vde0,sock=/tmp/vde.sock \
  -device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
  -netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
--- a/baremetal/vm-cluster/legacy/launch-node03-recovery.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-recovery.sh
--- a/baremetal/vm-cluster/legacy/launch-node03-vde.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03-vde.sh
--- a/baremetal/vm-cluster/legacy/launch-node03.sh
+++ b/baremetal/vm-cluster/legacy/launch-node03.sh
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
  -m 16G \
  -drive file="$DISK",if=virtio,format=qcow2 \
  -netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
-  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
+  -device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
  -boot order=n \
  -vnc "$VNC_DISPLAY" \
  -serial telnet:localhost:4443,server,nowait \
--- a/baremetal/vm-cluster/legacy/launch-pxe-server-install.sh
+++ b/baremetal/vm-cluster/legacy/launch-pxe-server-install.sh
--- a/baremetal/vm-cluster/legacy/launch-pxe-server-iso.sh
+++ b/baremetal/vm-cluster/legacy/launch-pxe-server-iso.sh
--- a/baremetal/vm-cluster/legacy/launch-pxe-server.sh
+++ b/baremetal/vm-cluster/legacy/launch-pxe-server.sh
--- a/baremetal/vm-cluster/legacy/pxe-server-setup.sh
+++ b/baremetal/vm-cluster/legacy/pxe-server-setup.sh
--- a/baremetal/vm-cluster/legacy/validate-cluster.sh
+++ b/baremetal/vm-cluster/legacy/validate-cluster.sh
@ -1,11 +1,22 @@
 #!/usr/bin/env bash
-# T036 Cluster Validation Script
+# Legacy T036 validation script.
-# Validates cluster health and Raft formation per S6 acceptance criteria
+# The canonical local VM validation path is now nix/test-cluster/run-cluster.sh.
 # Keep this script only for the older manual PXE flow.
 set -euo pipefail
 echo "=== T036 Cluster Validation ==="
 echo ""
 echo "This path is archived. Prefer: nix run ./nix/test-cluster#cluster -- smoke"
 echo ""
 CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
 CURL_MAX_TIME="${CURL_MAX_TIME:-10}"
 CURL_INSECURE="${CURL_INSECURE:-1}"
 CURL_FLAGS=(--connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
 if [[ "$CURL_INSECURE" == "1" ]]; then
    CURL_FLAGS+=(-k)
 fi
 # Wait for services to be ready
 echo "Waiting for cluster services to start (60 seconds)..."
@ -25,7 +36,7 @@ echo ""
 echo "=== S6.2: Chainfire Cluster Validation ==="
 echo ""
 echo "Checking Chainfire cluster members on node01..."
-curl -k https://192.168.100.11:2379/admin/cluster/members | jq . || echo "Chainfire API not ready"
+curl "${CURL_FLAGS[@]}" https://192.168.100.11:2379/admin/cluster/members | jq . || echo "Chainfire API not ready"
 echo ""
 echo "Expected: 3 members (node01, node02, node03), one leader elected"
@ -34,34 +45,34 @@ echo ""
 echo "=== S6.3: FlareDB Cluster Validation ==="
 echo ""
 echo "Checking FlareDB cluster members on node01..."
-curl -k https://192.168.100.11:2479/admin/cluster/members | jq . || echo "FlareDB API not ready"
+curl "${CURL_FLAGS[@]}" https://192.168.100.11:2479/admin/cluster/members | jq . || echo "FlareDB API not ready"
 echo ""
 echo "=== S6.4: CRUD Operations Test ==="
 echo ""
 echo "Writing test key to FlareDB..."
-curl -k -X PUT https://192.168.100.11:2479/api/v1/kv/test-key \
+curl "${CURL_FLAGS[@]}" -X PUT https://192.168.100.11:2479/api/v1/kv/test-key \
  -H "Content-Type: application/json" \
  -d '{"value": "hello-t036-cluster"}' || echo "Write failed"
 echo ""
 echo "Reading test key from node01..."
-curl -k https://192.168.100.11:2479/api/v1/kv/test-key || echo "Read failed"
+curl "${CURL_FLAGS[@]}" https://192.168.100.11:2479/api/v1/kv/test-key || echo "Read failed"
 echo ""
 echo "Reading test key from node02 (verify replication)..."
-curl -k https://192.168.100.12:2479/api/v1/kv/test-key || echo "Read failed"
+curl "${CURL_FLAGS[@]}" https://192.168.100.12:2479/api/v1/kv/test-key || echo "Read failed"
 echo ""
 echo "Reading test key from node03 (verify replication)..."
-curl -k https://192.168.100.13:2479/api/v1/kv/test-key || echo "Read failed"
+curl "${CURL_FLAGS[@]}" https://192.168.100.13:2479/api/v1/kv/test-key || echo "Read failed"
 echo ""
 echo "=== S6.5: IAM Service Validation ==="
 echo ""
 for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
    echo "Checking IAM health on $node..."
-    curl -k https://$node:8080/health || echo "IAM not ready on $node"
+    curl "${CURL_FLAGS[@]}" https://$node:8080/health || echo "IAM not ready on $node"
    echo ""
 done
@ -70,9 +81,9 @@ echo "=== S6.6: Health Checks ==="
 echo ""
 for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
    echo "Node: $node"
-    echo "  Chainfire: $(curl -sk https://$node:2379/health || echo 'N/A')"
+    echo "  Chainfire: $(curl -s "${CURL_FLAGS[@]}" https://$node:2379/health || echo 'N/A')"
-    echo "  FlareDB:   $(curl -sk https://$node:2479/health || echo 'N/A')"
+    echo "  FlareDB:   $(curl -s "${CURL_FLAGS[@]}" https://$node:2479/health || echo 'N/A')"
-    echo "  IAM:       $(curl -sk https://$node:8080/health || echo 'N/A')"
+    echo "  IAM:       $(curl -s "${CURL_FLAGS[@]}" https://$node:8080/health || echo 'N/A')"
    echo ""
 done
--- a/baremetal/vm-cluster/netboot-initrd
+++ b/baremetal/vm-cluster/netboot-initrd
@ -1 +0,0 @@
 /nix/store/nixfmms2rbqi07a0sqjf5l32mm28y1iz-initrd
--- a/baremetal/vm-cluster/netboot-kernel
+++ b/baremetal/vm-cluster/netboot-kernel
@ -1 +0,0 @@
 /nix/store/nmi1f4lsswcr9dmm1r6j6a8b7rar5gl4-linux-6.18
--- a/baremetal/vm-cluster/pxe-server/configuration.nix
+++ b/baremetal/vm-cluster/pxe-server/configuration.nix
@ -1,20 +1,10 @@
-{ config, pkgs, lib, ... }:
+{ config, pkgs, lib, modulesPath, ... }:
 {
  imports = [
-    <nixpkgs/nixos/modules/profiles/qemu-guest.nix>
+    "${modulesPath}/profiles/qemu-guest.nix"
  ];
  # Boot configuration
  boot.loader.grub.enable = true;
  boot.loader.grub.device = "/dev/vda";
  # Filesystems
  fileSystems."/" = {
    device = "/dev/vda1";
    fsType = "ext4";
  };
  # Network configuration
  networking.hostName = "pxe-server";
  networking.domain = "plasma.local";
@ -62,6 +52,7 @@
      # DNS configuration
      domain = "plasma.local";
      local = "/plasma.local/";
      address = "/deployer.local/192.168.100.1";
      # TFTP configuration
      enable-tftp = true;
@ -84,6 +75,17 @@
    settings.PermitRootLogin = "yes";
  };
  # Deployer API for ISO phone-home bootstrap
  services.deployer = {
    enable = true;
    bindAddr = "0.0.0.0:8080";
    clusterId = "plasmacloud-vm-cluster";
    requireChainfire = false;
    allowUnauthenticated = true;
    allowUnknownNodes = true;
    allowTestMappings = false;
  };
  # Root password (for SSH access)
  users.users.root.password = "plasmacloud";
@ -92,6 +94,7 @@
    vim
    curl
    htop
    deployer-server
  ];
  # System state version
--- a/bin/cloud-cli
+++ b/bin/cloud-cli
@ -0,0 +1,135 @@
 #!/usr/bin/env python3
 import argparse
 import json
 import os
 import sys
 import urllib.request
 import urllib.error
 # Default API Gateway URL (localhost forwarding from node06)
 DEFAULT_API_URL = "http://localhost:8080"
 def get_url(path):
    return f"{DEFAULT_API_URL}{path}"
 def headers(token=None):
    h = {"Content-Type": "application/json"}
    if token:
        h["Authorization"] = f"Bearer {token}"
    return h
 def print_json(data):
    print(json.dumps(data, indent=2))
 def request(method, url, data=None, token=None):
    parsed_headers = headers(token)
    body = None
    if data:
        body = json.dumps(data).encode('utf-8')
    req = urllib.request.Request(url, data=body, headers=parsed_headers, method=method)
    try:
        with urllib.request.urlopen(req) as response:
            if response.status in [200, 201, 204]:
                if response.status == 204:
                    print("{}")
                    return
                resp_data = json.load(response)
                print_json(resp_data)
            else:
                print(f"Error {response.status}")
    except urllib.error.HTTPError as e:
        print(f"HTTP Error {e.code}: {e.read().decode()}")
    except urllib.error.URLError as e:
        print(f"Connection failed: {e.reason}")
 def cmd_list_vpcs(args):
    url = get_url("/api/v1/vpcs")
    print(f"GET {url}")
    request("GET", url, token=args.token)
 def cmd_create_vpc(args):
    url = get_url("/api/v1/vpcs")
    data = {
        "name": args.name,
        "cidr_block": args.cidr,
        "org_id": "org-default",
        "project_id": "proj-default"
    }
    print(f"POST {url} with {data}")
    request("POST", url, data=data, token=args.token)
 def cmd_list_subnets(args):
    url = get_url("/api/v1/subnets")
    if args.vpc:
        url += f"?vpc_id={args.vpc}"
    print(f"GET {url}")
    request("GET", url, token=args.token)
 def cmd_create_subnet(args):
    url = get_url("/api/v1/subnets")
    data = {
        "name": args.name,
        "vpc_id": args.vpc,
        "cidr_block": args.cidr,
        "org_id": "org-default",
        "project_id": "proj-default"
    }
    print(f"POST {url} with {data}")
    request("POST", url, data=data, token=args.token)
 def cmd_list_vms(args):
    url = get_url("/api/v1/vms")
    print(f"GET {url}")
    request("GET", url, token=args.token)
 def main():
    global DEFAULT_API_URL
    parser = argparse.ArgumentParser(description="PhotonCloud CLI")
    parser.add_argument("--token", help="Auth token", default=os.environ.get("CLOUD_TOKEN"))
    parser.add_argument("--url", help="API URL", default=DEFAULT_API_URL)
    subparsers = parser.add_subparsers(dest="command", required=True)
    # VPC Commands
    vpc_parser = subparsers.add_parser("vpc", help="Manage VPCs")
    vpc_sub = vpc_parser.add_subparsers(dest="subcommand", required=True)
    vpc_list = vpc_sub.add_parser("list", help="List VPCs")
    vpc_list.set_defaults(func=cmd_list_vpcs)
    vpc_create = vpc_sub.add_parser("create", help="Create VPC")
    vpc_create.add_argument("--name", required=True)
    vpc_create.add_argument("--cidr", required=True)
    vpc_create.set_defaults(func=cmd_create_vpc)
    # Subnet Commands
    subnet_parser = subparsers.add_parser("subnet", help="Manage Subnets")
    subnet_sub = subnet_parser.add_subparsers(dest="subcommand", required=True)
    subnet_list = subnet_sub.add_parser("list", help="List Subnets")
    subnet_list.add_argument("--vpc", help="Filter by VPC ID")
    subnet_list.set_defaults(func=cmd_list_subnets)
    subnet_create = subnet_sub.add_parser("create", help="Create Subnet")
    subnet_create.add_argument("--name", required=True)
    subnet_create.add_argument("--vpc", required=True, help="VPC ID")
    subnet_create.add_argument("--cidr", required=True)
    subnet_create.set_defaults(func=cmd_create_subnet)
    # VM Commands
    vm_parser = subparsers.add_parser("vm", help="Manage VMs")
    vm_sub = vm_parser.add_subparsers(dest="subcommand", required=True)
    vm_list = vm_sub.add_parser("list", help="List VMs")
    vm_list.set_defaults(func=cmd_list_vms)
    args = parser.parse_args()
    if args.url:
        DEFAULT_API_URL = args.url
    args.func(args)
 if __name__ == "__main__":
    main()
--- a/chainfire/advice.md
+++ b/chainfire/advice.md
@ -1,87 +0,0 @@
 RaftとGossipプロトコルを用いた、クラスター管理のための数万台までスケールするKey-Value Storeを書いてほしいです。
 - プログラミング言語：rust
 - テストをちゃんと書きながら書くことを推奨する。
 - クラスターへの参加/削除/障害検知を行う。
 では、**「Raft（合意形成）」と「Gossip（情報の拡散）」を組み合わせた場合、具体的にどうデータが流れ、どうやってノードが動き出すのか**、その具体的なフローを解説します。
 -----
 ### 前提：このシステムの役割分担
  * **Control Plane (CP):** Raftで構成された3〜7台（Raftアルゴリズムでうまく合意が取れる範囲）のサーバー。情報の「正規の持ち主」。いなくなったら自動でWorker Nodesから昇格する。
  * **Worker Nodes (VM/DB Hosts):** 数百〜数千台の実働部隊。CPのクライアント。
 ### 1\. データはどのように書き込まれるか？ (Write)
 書き込みは **「必ず Control Plane の Raft Leader に対して」** 行います。Gossip経由での書き込みは（順序保証がないため）行いません。
 例：「VM-A を Node-10 で起動したい」
 1.  **API Call:** 管理者（またはCLI）が、CPのAPIサーバーにリクエストを送ります。
 2.  **Raft Log:** CPのリーダーは、この変更を `Put(Key="/nodes/node-10/tasks/vm-a", Value="START")` としてRaftログに追加します。
 3.  **Commit:** 過半数のCPノードがログを保存したら「書き込み完了」と見なします。
 ここまでは普通のDBと同じです。
 ### 2\. 各ノードはどのようにデータを取得し、通知を受けるか？ (Read & Notify)
 ここが最大のポイントです。数千台のノードが「自分宛ての命令はないか？」と毎秒ポーリング（問い合わせ）すると、CPがDDoS攻撃を受けたようにパンクします。
 ここで **「Watch（ロングポーリング）」** という仕組みを使います。
 #### A. Watchによる通知と取得（これがメイン）
 Kubernetesやetcdが採用している方式です。
 1.  **接続維持:** Node-10 は起動時に CP に対して `Watch("/nodes/node-10/")` というリクエストを送ります。
 2.  **待機:** CP は「Node-10 以下のキーに変更があるまで、レスポンスを返さずに接続を維持（ブロック）」します。
 3.  **イベント発火:** 先ほどの書き込み（VM起動命令）が発生した瞬間、CP は待機していた Node-10 への接続を通じて「更新イベント（Event: PUT, Key: ...vm-a, Value: START）」を即座にプッシュします。
 4.  **アクション:** Node-10 は通知を受け取り次第、VMを起動します。
 **結論:** 「書き込み後の通知」は絶対に必要です。それを効率よくやるのが **Watch API** です。
 -----
 ### 3\. じゃあ Gossip はどこで使うのか？
 「Raft + Watch」で完結しそうに見えますが、10台以上のスケール、特にVM基盤のような動的な環境では **Gossip が以下の「3つの穴」を埋めるために不可欠** になります。
 #### ① Nodeの死活監視・インベントリ管理（下り方向）
 CPが「Node-10にVMを立てたい」と思ったとき、「そもそもNode-10は生きているのか？ IPは？ 空きメモリは？」という情報を知る必要があります。
  * **Gossipの役割:** 各Worker Nodeは、Gossip（SWIMプロトコル）でお互いに、そしてCPに対して「自分は生きてるよ、IPはこれだよ」と喋り続けます。
  * CPはこのGossip情報を聞いて、最新の「ノード一覧リスト（Memberlist）」をメモリ上に保持します。
 #### ② サービスのディスカバリ（横方向）
 DB基盤の場合、「DBノードA」が「DBノードB」と通信したいことがあります。いちいちCPに聞きに行くと遅いです。
  * **Gossipの役割:** ノード同士で「私はここにいるよ」とGossipし合うことで、CPを介さずに直接通信相手を見つけられます。
 #### ③ "Actual State"（現状）の報告（上り方向）
 VMが起動した後、「起動しました」というステータスをどうCPに伝えるか。
  * **Raftに書く:** 確実ですが、頻繁に変わるステータス（CPU使用率など）を全部Raftに書くとCPがパンクします。
  * **Gossipで流す:** 「VM-Aは起動中、負荷50%」といった情報はGossipに乗せて、**「結果的にCPに伝わればいい」** という扱いにします。
 -----
 ### 設計のまとめ：RaftとGossipの使い分け
 これから作る「汎用管理DB（外部依存型）」は、以下のようなインターフェースを持つことになるでしょう。
 | アクション | 通信方式 | 具体的なデータ例 |
 | :--- | :--- | :--- |
 | **命令 (Desired State)**<br>「こうなってほしい」 | **Raft + Watch**<br>(強整合性) | ・VMの起動/停止命令<br>・DBのデータ配置情報の変更<br>・パスワードや設定変更 |
 | **現状 (Actual State)**<br>「今こうなってます」 | **Gossip**<br>(結果整合性) | ・ノードの生存確認 (Heartbeat)<br>・リソース使用率 (CPU/Mem)<br>・「VM起動完了」などのステータス |
 | **通知 (Notification)** | **Watch (HTTP/gRPC Stream)** | ・「新しい命令が来たぞ！」というトリガー |
 #### 実装のアドバイス
 もし「etcdのようなもの」を自作されるなら、**「Serf (Gossip)」と「Raft」をライブラリとして組み込み、その上に「gRPCによるWatch付きのKVS API」を被せる** という構成になります。
 これができれば、VM基盤は「Watchして、VMを起動して、Gossipでステータスを返すエージェント」を作るだけで済みますし、DB基盤も同様に作れます。非常にスケーラブルで美しい設計です。
--- a/chainfire/baremetal/pxe-server/OVERVIEW.md
+++ b/chainfire/baremetal/pxe-server/OVERVIEW.md
@ -1,295 +0,0 @@
 # T032.S2 PXE Boot Infrastructure - Implementation Summary
 ## Overview
 This directory contains a complete PXE (Preboot eXecution Environment) boot infrastructure for bare-metal provisioning of Centra Cloud nodes. It enables automated, network-based installation of NixOS on physical servers with profile-based configuration.
 ## Implementation Status
 **Task**: T032.S2 - PXE Boot Infrastructure  
 **Status**: ✅ Complete  
 **Total Lines**: 3086 lines across all files  
 **Date**: 2025-12-10
 ## What Was Delivered
 ### 1. Core Configuration Files
 | File | Lines | Purpose |
 |------|-------|---------|
 | `dhcp/dhcpd.conf` | 134 | ISC DHCP server configuration with BIOS/UEFI detection |
 | `ipxe/boot.ipxe` | 320 | Main iPXE boot script with 3 profiles and menu |
 | `http/nginx.conf` | 187 | Nginx HTTP server for boot assets |
 | `nixos-module.nix` | 358 | Complete NixOS service module |
 ### 2. Setup and Management
 | File | Lines | Purpose |
 |------|-------|---------|
 | `setup.sh` | 446 | Automated setup script with download/build/validate/test |
 ### 3. Documentation
 | File | Lines | Purpose |
 |------|-------|---------|
 | `README.md` | 1088 | Comprehensive documentation and troubleshooting |
 | `QUICKSTART.md` | 165 | 5-minute quick start guide |
 | `http/directory-structure.txt` | 95 | Directory layout documentation |
 | `ipxe/mac-mappings.txt` | 49 | MAC address mapping reference |
 ### 4. Examples
 | File | Lines | Purpose |
 |------|-------|---------|
 | `examples/nixos-config-examples.nix` | 391 | 8 different deployment scenario examples |
 ## Key Features Implemented
 ### DHCP Server
 - ✅ Automatic BIOS/UEFI detection (option 93)
 - ✅ Chainloading to iPXE via TFTP
 - ✅ Per-host fixed IP assignment
 - ✅ Multiple subnet support
 - ✅ DHCP relay documentation
 ### iPXE Boot System
 - ✅ Three boot profiles: control-plane, worker, all-in-one
 - ✅ MAC-based automatic profile selection
 - ✅ Interactive boot menu with 30-second timeout
 - ✅ Serial console support (ttyS0 115200)
 - ✅ Detailed error messages and debugging
 - ✅ iPXE shell access for troubleshooting
 ### HTTP Server (Nginx)
 - ✅ Serves iPXE bootloaders and scripts
 - ✅ Serves NixOS kernel and initrd
 - ✅ Proper cache control headers
 - ✅ Directory listing for debugging
 - ✅ Health check endpoint
 - ✅ HTTPS support (optional)
 ### NixOS Module
 - ✅ Declarative configuration
 - ✅ Automatic firewall rules
 - ✅ Service dependencies managed
 - ✅ Directory structure auto-created
 - ✅ Node definitions with MAC addresses
 - ✅ DHCP/TFTP/HTTP integration
 ### Setup Script
 - ✅ Directory creation
 - ✅ iPXE bootloader download from boot.ipxe.org
 - ✅ iPXE build from source (optional)
 - ✅ Configuration validation
 - ✅ Service testing
 - ✅ Colored output and logging
 ## Boot Profiles
 ### 1. Control Plane
 **Services**: All 8 core services (FlareDB, IAM, PlasmaVMC, K8sHost, FlashDNS, ChainFire, Object Storage, Monitoring)  
 **Use case**: Production control plane nodes  
 **Resources**: 8+ cores, 32+ GB RAM, 500+ GB SSD
 ### 2. Worker
 **Services**: Compute-focused (K8sHost, PlasmaVMC, ChainFire, FlashDNS, monitoring agents)  
 **Use case**: Worker nodes for customer workloads  
 **Resources**: 16+ cores, 64+ GB RAM, 1+ TB SSD
 ### 3. All-in-One
 **Services**: Complete Centra Cloud stack on one node  
 **Use case**: Testing, development, homelab  
 **Resources**: 16+ cores, 64+ GB RAM, 1+ TB SSD  
 **Warning**: Not for production (no HA)
 ## Network Flow
 ```
 Server Powers On
    ↓
 DHCP Discovery (broadcast)
    ↓
 DHCP Server assigns IP + provides bootloader filename
    ↓
 TFTP download bootloader (undionly.kpxe or ipxe.efi)
    ↓
 iPXE executes, requests boot.ipxe via HTTP
    ↓
 Boot menu displayed (or auto-select via MAC)
    ↓
 iPXE downloads NixOS kernel + initrd via HTTP
    ↓
 NixOS boots and provisions node
 ```
 ## File Structure
 ```
 baremetal/pxe-server/
 ├── README.md                    # Comprehensive documentation (1088 lines)
 ├── QUICKSTART.md                # Quick start guide (165 lines)
 ├── OVERVIEW.md                  # This file
 ├── setup.sh                     # Setup script (446 lines, executable)
 ├── nixos-module.nix            # NixOS service module (358 lines)
 ├── .gitignore                  # Git ignore for runtime assets
 │
 ├── dhcp/
 │   └── dhcpd.conf              # DHCP server config (134 lines)
 │
 ├── ipxe/
 │   ├── boot.ipxe               # Main boot script (320 lines)
 │   └── mac-mappings.txt        # MAC address reference (49 lines)
 │
 ├── http/
 │   ├── nginx.conf              # HTTP server config (187 lines)
 │   └── directory-structure.txt # Directory docs (95 lines)
 │
 ├── examples/
 │   └── nixos-config-examples.nix # 8 deployment examples (391 lines)
 │
 └── assets/
    └── .gitkeep                # Placeholder for runtime assets
 ```
 ## Dependencies on Other Tasks
 ### Prerequisites
 None - this is the first step in T032 (Bare-Metal Provisioning)
 ### Next Steps
 - **T032.S3**: Image Builder - Generate NixOS netboot images for each profile
 - **T032.S4**: Provisioning Orchestrator - API-driven node lifecycle management
 ### Integration Points
 - **FlareDB**: Node inventory and state storage
 - **IAM**: Authentication for provisioning API
 - **PlasmaVMC**: VM provisioning on bare-metal nodes
 - **K8sHost**: Kubernetes node integration
 ## Testing Status
 ### What Can Be Tested Now
 ✅ Directory structure creation  
 ✅ Configuration file syntax validation  
 ✅ Service startup (DHCP, TFTP, HTTP)  
 ✅ Firewall rules  
 ✅ Boot script download  
 ✅ iPXE bootloader download/build  
 ### What Requires T032.S3
 ⏳ Actual bare-metal provisioning (needs NixOS images)  
 ⏳ End-to-end boot flow (needs kernel/initrd)  
 ⏳ Profile-specific deployments (needs profile configs)  
 ## Quick Start Commands
 ```bash
 # Install and setup
 cd baremetal/pxe-server
 sudo ./setup.sh --install --download --validate
 # Configure NixOS (edit configuration.nix)
 imports = [ ./baremetal/pxe-server/nixos-module.nix ];
 services.centra-pxe-server.enable = true;
 # ... (see QUICKSTART.md for full config)
 # Deploy
 sudo nixos-rebuild switch
 # Test services
 sudo ./setup.sh --test
 # Boot a server
 # - Configure BIOS for PXE boot
 # - Connect to network
 # - Power on
 ```
 ## Known Limitations
 1. **No NixOS images yet**: T032.S3 will generate the actual boot images
 2. **Single interface**: Module supports one network interface (can be extended)
 3. **No HA built-in**: DHCP failover can be configured manually (example provided)
 4. **No authentication**: Provisioning API will add auth in T032.S4
 ## Configuration Examples Provided
 1. Basic single-subnet PXE server
 2. PXE server with MAC-based auto-selection
 3. Custom DHCP configuration
 4. Multi-homed server (multiple interfaces)
 5. High-availability with failover
 6. HTTPS boot (secure boot)
 7. Development/testing configuration
 8. Production with monitoring
 ## Security Considerations
 - DHCP is unauthenticated (normal for PXE)
 - TFTP is unencrypted (normal for PXE)
 - HTTP can be upgraded to HTTPS (documented)
 - iPXE supports secure boot with embedded certificates (build from source)
 - Network should be isolated (provisioning VLAN recommended)
 - Firewall rules limit exposure (only necessary ports)
 ## Troubleshooting Resources
 Comprehensive troubleshooting section in README.md covers:
 - DHCP discovery issues
 - TFTP timeout problems
 - HTTP download failures
 - Boot script errors
 - Serial console debugging
 - Common error messages
 - Service health checks
 - Network connectivity tests
 ## Performance Considerations
 - **Concurrent boots**: ~500 MB per node (kernel + initrd)
 - **Recommended**: 1 Gbps link for PXE server
 - **10 concurrent boots**: ~5 Gbps burst (stagger or use 10 Gbps)
 - **Disk space**: 5-10 GB recommended (multiple profiles + versions)
 ## Compliance with Requirements
 | Requirement | Status | Notes |
 |-------------|--------|-------|
 | DHCP server config | ✅ | ISC DHCP with BIOS/UEFI detection |
 | iPXE boot scripts | ✅ | Main menu + 3 profiles |
 | HTTP server config | ✅ | Nginx with proper paths |
 | NixOS module | ✅ | Complete systemd integration |
 | Setup script | ✅ | Download/build/validate/test |
 | README | ✅ | Comprehensive + troubleshooting |
 | Working examples | ✅ | All configs are production-ready |
 | 800-1200 lines | ✅ | 3086 lines (exceeded) |
 | No S3 implementation | ✅ | Placeholder paths only |
 ## Changelog
 **2025-12-10**: Initial implementation
 - Created complete PXE boot infrastructure
 - Added DHCP, TFTP, HTTP server configurations
 - Implemented iPXE boot scripts with 3 profiles
 - Created NixOS service module
 - Added setup script with validation
 - Wrote comprehensive documentation
 - Provided 8 configuration examples
 ## License
 Part of Centra Cloud infrastructure. See project root for license.
 ## Support
 For issues or questions:
 1. Check [README.md](README.md) troubleshooting section
 2. Run diagnostic: `sudo ./setup.sh --test`
 3. Review logs: `sudo journalctl -u dhcpd4 -u atftpd -u nginx -f`
 4. See [QUICKSTART.md](QUICKSTART.md) for common commands
 ---
 **Implementation by**: Claude Sonnet 4.5  
 **Task**: T032.S2 - PXE Boot Infrastructure  
 **Status**: Complete and ready for deployment
--- a/chainfire/baremetal/pxe-server/QUICKSTART.md
+++ b/chainfire/baremetal/pxe-server/QUICKSTART.md
@ -1,177 +0,0 @@
 # PXE Server Quick Start Guide
 This is a condensed guide for getting the PXE boot server running quickly.
 ## Prerequisites
 - NixOS server
 - Root access
 - Network connectivity to bare-metal servers
 ## 5-Minute Setup
 ### 1. Run Setup Script
 ```bash
 cd baremetal/pxe-server
 sudo ./setup.sh --install --download --validate
 ```
 ### 2. Configure NixOS
 Add to `/etc/nixos/configuration.nix`:
 ```nix
 imports = [ /path/to/baremetal/pxe-server/nixos-module.nix ];
 services.centra-pxe-server = {
  enable = true;
  interface = "eth0";               # YOUR NETWORK INTERFACE
  serverAddress = "10.0.100.10";   # YOUR PXE SERVER IP
  dhcp = {
    subnet = "10.0.100.0";         # YOUR SUBNET
    netmask = "255.255.255.0";
    broadcast = "10.0.100.255";
    range = {
      start = "10.0.100.100";      # DHCP RANGE START
      end = "10.0.100.200";        # DHCP RANGE END
    };
    router = "10.0.100.1";         # YOUR GATEWAY
  };
 };
 ```
 ### 3. Deploy
 ```bash
 sudo nixos-rebuild switch
 ```
 ### 4. Verify
 ```bash
 sudo ./setup.sh --test
 ```
 You should see:
 - TFTP server running
 - HTTP server running
 - DHCP server running
 ### 5. Boot a Server
 1. Configure server BIOS for PXE boot
 2. Connect to same network
 3. Power on
 4. Watch for boot menu
 ## Adding Nodes
 ### Quick Add (No Auto-Selection)
 Just boot the server and select profile from menu.
 ### With Auto-Selection
 1. Get MAC address from server
 2. Edit `ipxe/boot.ipxe`, add line:
   ```ipxe
   iseq ${mac} AA:BB:CC:DD:EE:FF && set profile worker && set hostname worker-05 && goto boot ||
   ```
 3. Optionally add to `dhcp/dhcpd.conf`:
   ```conf
   host worker-05 {
       hardware ethernet AA:BB:CC:DD:EE:FF;
       fixed-address 10.0.100.65;
       option host-name "worker-05";
   }
   ```
 4. Restart DHCP: `sudo systemctl restart dhcpd4`
 ## Troubleshooting
 ### Server doesn't get IP
 ```bash
 sudo tcpdump -i eth0 port 67 or port 68
 sudo journalctl -u dhcpd4 -f
 ```
 Check:
 - DHCP server running on correct interface
 - Network connectivity
 - Firewall allows UDP 67/68
 ### Server gets IP but no bootloader
 ```bash
 sudo tcpdump -i eth0 port 69
 sudo journalctl -u atftpd -f
 ```
 Check:
 - TFTP server running
 - Bootloaders exist: `ls /var/lib/tftpboot/`
 - Firewall allows UDP 69
 ### iPXE loads but can't get boot script
 ```bash
 curl http://localhost/boot/ipxe/boot.ipxe
 sudo tail -f /var/log/nginx/access.log
 ```
 Check:
 - Nginx running
 - boot.ipxe exists: `ls /var/lib/pxe-boot/ipxe/`
 - Firewall allows TCP 80
 ### Boot script loads but can't get kernel
 This is expected until T032.S3 (Image Builder) is complete.
 Check: `ls /var/lib/pxe-boot/nixos/`
 Should have:
 - bzImage
 - initrd
 These will be generated by the image builder.
 ## Common Commands
 ```bash
 # Check all services
 sudo systemctl status dhcpd4 atftpd nginx
 # View logs
 sudo journalctl -u dhcpd4 -u atftpd -u nginx -f
 # Test connectivity
 curl http://localhost/health
 tftp localhost -c get undionly.kpxe /tmp/test.kpxe
 # Restart services
 sudo systemctl restart dhcpd4 atftpd nginx
 # Check firewall
 sudo iptables -L -n | grep -E "67|68|69|80"
 ```
 ## Boot Profiles
 - **control-plane**: All services (FlareDB, IAM, PlasmaVMC, K8sHost, etc.)
 - **worker**: Compute services (K8sHost, PlasmaVMC, ChainFire)
 - **all-in-one**: Everything on one node (testing/homelab)
 ## Next Steps
 - Add more nodes (see "Adding Nodes" above)
 - Wait for T032.S3 to generate NixOS boot images
 - Configure monitoring for boot activity
 - Set up DHCP relay for multi-segment networks
 ## Full Documentation
 See [README.md](README.md) for complete documentation.
--- a/chainfire/baremetal/pxe-server/README.md
+++ b/chainfire/baremetal/pxe-server/README.md
@ -1,829 +0,0 @@
 # Centra Cloud PXE Boot Server
 This directory contains the PXE (Preboot eXecution Environment) boot infrastructure for bare-metal provisioning of Centra Cloud nodes. It enables network-based installation of NixOS on physical servers with automated profile selection.
 ## Table of Contents
 - [Architecture Overview](#architecture-overview)
 - [Components](#components)
 - [Quick Start](#quick-start)
 - [Detailed Setup](#detailed-setup)
 - [Configuration](#configuration)
 - [Boot Profiles](#boot-profiles)
 - [Network Requirements](#network-requirements)
 - [Troubleshooting](#troubleshooting)
 - [Advanced Topics](#advanced-topics)
 ## Architecture Overview
 The PXE boot infrastructure consists of three main services:
 ```
 ┌─────────────────────────────────────────────────────────────────┐
 │                        PXE Boot Flow                             │
 └─────────────────────────────────────────────────────────────────┘
  Bare-Metal Server                  PXE Boot Server
  ─────────────────                  ───────────────
  1. Power on
     │
     ├─► DHCP Request ──────────────► DHCP Server
     │                                (ISC DHCP)
     │                                │
     │                                ├─ Assigns IP
     │                                ├─ Detects BIOS/UEFI
     │                                └─ Provides bootloader path
     │
     ├◄─ DHCP Response ───────────────┤
     │   (IP, next-server, filename)
     │
     ├─► TFTP Get bootloader ─────────► TFTP Server
     │   (undionly.kpxe or ipxe.efi)   (atftpd)
     │
     ├◄─ Bootloader file ─────────────┤
     │
     ├─► Execute iPXE bootloader
     │   │
     │   ├─► HTTP Get boot.ipxe ──────► HTTP Server
     │   │                              (nginx)
     │   │
     │   ├◄─ boot.ipxe script ─────────┤
     │   │
     │   ├─► Display menu / Auto-select profile
     │   │
     │   ├─► HTTP Get kernel ──────────► HTTP Server
     │   │
     │   ├◄─ bzImage ───────────────────┤
     │   │
     │   ├─► HTTP Get initrd ───────────► HTTP Server
     │   │
     │   ├◄─ initrd ────────────────────┤
     │   │
     │   └─► Boot NixOS
     │
     └─► NixOS Installer
         └─ Provisions node based on profile
 ```
 ## Components
 ### 1. DHCP Server (ISC DHCP)
 - **Purpose**: Assigns IP addresses and directs PXE clients to bootloader
 - **Config**: `dhcp/dhcpd.conf`
 - **Features**:
  - BIOS/UEFI detection via option 93 (architecture type)
  - Per-host configuration for fixed IP assignment
  - Automatic next-server and filename configuration
 ### 2. TFTP Server (atftpd)
 - **Purpose**: Serves iPXE bootloader files to PXE clients
 - **Files served**:
  - `undionly.kpxe` - BIOS bootloader
  - `ipxe.efi` - UEFI x86-64 bootloader
  - `ipxe-i386.efi` - UEFI x86 32-bit bootloader (optional)
 ### 3. HTTP Server (nginx)
 - **Purpose**: Serves iPXE scripts and NixOS boot images
 - **Config**: `http/nginx.conf`
 - **Endpoints**:
  - `/boot/ipxe/boot.ipxe` - Main boot menu script
  - `/boot/nixos/bzImage` - NixOS kernel
  - `/boot/nixos/initrd` - NixOS initial ramdisk
  - `/health` - Health check endpoint
 ### 4. iPXE Boot Scripts
 - **Main script**: `ipxe/boot.ipxe`
 - **Features**:
  - Interactive boot menu with 3 profiles
  - MAC-based automatic profile selection
  - Serial console support for remote management
  - Detailed error messages and debugging options
 ### 5. NixOS Service Module
 - **File**: `nixos-module.nix`
 - **Purpose**: Declarative NixOS configuration for all services
 - **Features**:
  - Single configuration file for entire stack
  - Firewall rules auto-configured
  - Systemd service dependencies managed
  - Directory structure auto-created
 ## Quick Start
 ### Prerequisites
 - NixOS server with network connectivity
 - Network interface on the same subnet as bare-metal servers
 - Sufficient disk space (5-10 GB for boot images)
 ### Installation Steps
 1. **Clone this repository** (or copy `baremetal/pxe-server/` to your NixOS system)
 2. **Run the setup script**:
   ```bash
   sudo ./setup.sh --install --download --validate
   ```
   This will:
   - Create directory structure at `/var/lib/pxe-boot`
   - Download iPXE bootloaders from boot.ipxe.org
   - Install boot scripts
   - Validate configurations
 3. **Configure network settings**:
   Edit `nixos-module.nix` or create a NixOS configuration:
   ```nix
   # /etc/nixos/configuration.nix
   imports = [
     /path/to/baremetal/pxe-server/nixos-module.nix
   ];
   services.centra-pxe-server = {
     enable = true;
     interface = "eth0";  # Your network interface
     serverAddress = "10.0.100.10";  # PXE server IP
     dhcp = {
       subnet = "10.0.100.0";
       netmask = "255.255.255.0";
       broadcast = "10.0.100.255";
       range = {
         start = "10.0.100.100";
         end = "10.0.100.200";
       };
       router = "10.0.100.1";
     };
     # Optional: Define known nodes with MAC addresses
     nodes = {
       "52:54:00:12:34:56" = {
         profile = "control-plane";
         hostname = "control-plane-01";
         ipAddress = "10.0.100.50";
       };
     };
   };
   ```
 4. **Deploy NixOS configuration**:
   ```bash
   sudo nixos-rebuild switch
   ```
 5. **Verify services are running**:
   ```bash
   sudo ./setup.sh --test
   ```
 6. **Add NixOS boot images** (will be provided by T032.S3):
   ```bash
   # Placeholder - actual images will be built by image builder
   # For testing, you can use any NixOS netboot image
   sudo mkdir -p /var/lib/pxe-boot/nixos
   # Copy bzImage and initrd to /var/lib/pxe-boot/nixos/
   ```
 7. **Boot a bare-metal server**:
   - Configure server BIOS to boot from network (PXE)
   - Connect to same network segment
   - Power on server
   - Watch for DHCP discovery and iPXE boot menu
 ## Detailed Setup
 ### Option 1: NixOS Module (Recommended)
 The NixOS module provides a declarative way to configure the entire PXE server stack.
 **Advantages**:
 - Single configuration file
 - Automatic service dependencies
 - Rollback capability
 - Integration with NixOS firewall
 **Configuration Example**:
 See the NixOS configuration example in [Quick Start](#quick-start).
 ### Option 2: Manual Installation
 For non-NixOS systems or manual setup:
 1. **Install required packages**:
   ```bash
   # Debian/Ubuntu
   apt-get install isc-dhcp-server atftpd nginx curl
   # RHEL/CentOS
   yum install dhcp tftp-server nginx curl
   ```
 2. **Run setup script**:
   ```bash
   sudo ./setup.sh --install --download
   ```
 3. **Copy configuration files**:
   ```bash
   # DHCP configuration
   sudo cp dhcp/dhcpd.conf /etc/dhcp/dhcpd.conf
   # Edit to match your network
   sudo vim /etc/dhcp/dhcpd.conf
   # Nginx configuration
   sudo cp http/nginx.conf /etc/nginx/sites-available/pxe-boot
   sudo ln -s /etc/nginx/sites-available/pxe-boot /etc/nginx/sites-enabled/
   ```
 4. **Start services**:
   ```bash
   sudo systemctl enable --now isc-dhcp-server
   sudo systemctl enable --now atftpd
   sudo systemctl enable --now nginx
   ```
 5. **Configure firewall**:
   ```bash
   # UFW (Ubuntu)
   sudo ufw allow 67/udp    # DHCP
   sudo ufw allow 68/udp    # DHCP
   sudo ufw allow 69/udp    # TFTP
   sudo ufw allow 80/tcp    # HTTP
   # firewalld (RHEL)
   sudo firewall-cmd --permanent --add-service=dhcp
   sudo firewall-cmd --permanent --add-service=tftp
   sudo firewall-cmd --permanent --add-service=http
   sudo firewall-cmd --reload
   ```
 ## Configuration
 ### DHCP Configuration
 The DHCP server configuration is in `dhcp/dhcpd.conf`. Key sections:
 **Network Settings**:
 ```conf
 subnet 10.0.100.0 netmask 255.255.255.0 {
    range 10.0.100.100 10.0.100.200;
    option routers 10.0.100.1;
    option domain-name-servers 10.0.100.1, 8.8.8.8;
    next-server 10.0.100.10;  # PXE server IP
    # ...
 }
 ```
 **Boot File Selection** (automatic BIOS/UEFI detection):
 ```conf
 if exists user-class and option user-class = "iPXE" {
    filename "http://10.0.100.10/boot/ipxe/boot.ipxe";
 } elsif option architecture-type = 00:00 {
    filename "undionly.kpxe";  # BIOS
 } elsif option architecture-type = 00:07 {
    filename "ipxe.efi";  # UEFI x86-64
 }
 ```
 **Host-Specific Configuration**:
 ```conf
 host control-plane-01 {
    hardware ethernet 52:54:00:12:34:56;
    fixed-address 10.0.100.50;
    option host-name "control-plane-01";
 }
 ```
 ### iPXE Boot Script
 The main boot script is `ipxe/boot.ipxe`. It provides:
 1. **MAC-based automatic selection**:
   ```ipxe
   iseq ${mac} 52:54:00:12:34:56 && set profile control-plane && goto boot ||
   ```
 2. **Interactive menu** (if no MAC match):
   ```ipxe
   :menu
   menu Centra Cloud - Bare-Metal Provisioning
   item control-plane    1. Control Plane Node (All Services)
   item worker           2. Worker Node (Compute Services)
   item all-in-one       3. All-in-One Node (Testing/Homelab)
   ```
 3. **Kernel parameters**:
   ```ipxe
   set kernel-params centra.profile=${profile}
   set kernel-params ${kernel-params} centra.hostname=${hostname}
   set kernel-params ${kernel-params} console=tty0 console=ttyS0,115200n8
   ```
 ### Adding New Nodes
 To add a new node to the infrastructure:
 1. **Get the MAC address** from the server (check BIOS or network card label)
 2. **Add to MAC mappings** (`ipxe/mac-mappings.txt`):
   ```
   52:54:00:12:34:5d    worker    worker-04
   ```
 3. **Update boot script** (`ipxe/boot.ipxe`):
   ```ipxe
   iseq ${mac} 52:54:00:12:34:5d && set profile worker && set hostname worker-04 && goto boot ||
   ```
 4. **Add DHCP host entry** (`dhcp/dhcpd.conf`):
   ```conf
   host worker-04 {
       hardware ethernet 52:54:00:12:34:5d;
       fixed-address 10.0.100.64;
       option host-name "worker-04";
   }
   ```
 5. **Restart DHCP service**:
   ```bash
   sudo systemctl restart dhcpd4
   ```
 ## Boot Profiles
 ### 1. Control Plane Profile
 **Purpose**: Nodes that run core infrastructure services
 **Services included**:
 - FlareDB (PD, Store, TiKV-compatible database)
 - IAM (Identity and Access Management)
 - PlasmaVMC (Virtual Machine Controller)
 - K8sHost (Kubernetes node agent)
 - FlashDNS (High-performance DNS)
 - ChainFire (Firewall/networking)
 - Object Storage (S3-compatible)
 - Monitoring (Prometheus, Grafana)
 **Resource requirements**:
 - CPU: 8+ cores recommended
 - RAM: 32+ GB recommended
 - Disk: 500+ GB SSD
 **Use case**: Production control plane nodes in a cluster
 ### 2. Worker Profile
 **Purpose**: Nodes that run customer workloads
 **Services included**:
 - K8sHost (Kubernetes node agent) - primary service
 - PlasmaVMC (Virtual Machine Controller) - VM workloads
 - ChainFire (Network policy enforcement)
 - FlashDNS (Local DNS caching)
 - Basic monitoring agents
 **Resource requirements**:
 - CPU: 16+ cores recommended
 - RAM: 64+ GB recommended
 - Disk: 1+ TB SSD
 **Use case**: Worker nodes for running customer applications
 ### 3. All-in-One Profile
 **Purpose**: Single-node deployment for testing and development
 **Services included**:
 - Complete Centra Cloud stack on one node
 - All services from control-plane profile
 - Suitable for testing, development, homelab
 **Resource requirements**:
 - CPU: 16+ cores recommended
 - RAM: 64+ GB recommended
 - Disk: 1+ TB SSD
 **Use case**: Development, testing, homelab deployments
 **Warning**: Not recommended for production use (no HA, resource intensive)
 ## Network Requirements
 ### Network Topology
 The PXE server must be on the same network segment as the bare-metal servers, or you must configure DHCP relay.
 **Same Segment** (recommended for initial setup):
 ```
 ┌──────────────┐         ┌──────────────────┐
 │  PXE Server  │         │  Bare-Metal Srv  │
 │ 10.0.100.10  │◄────────┤  (DHCP client)   │
 └──────────────┘  L2 SW  └──────────────────┘
 ```
 **Different Segments** (requires DHCP relay):
 ```
 ┌──────────────┐         ┌──────────┐         ┌──────────────────┐
 │  PXE Server  │         │  Router  │         │  Bare-Metal Srv  │
 │ 10.0.100.10  │◄────────┤  (relay) │◄────────┤  (DHCP client)   │
 └──────────────┘         └──────────┘         └──────────────────┘
   Segment A              ip helper           Segment B
 ```
 ### DHCP Relay Configuration
 If your PXE server is on a different network segment:
 **Cisco IOS**:
 ```
 interface vlan 100
  ip helper-address 10.0.100.10
 ```
 **Linux (dhcp-helper)**:
 ```bash
 apt-get install dhcp-helper
 # Edit /etc/default/dhcp-helper
 DHCPHELPER_OPTS="-s 10.0.100.10"
 systemctl restart dhcp-helper
 ```
 **Linux (dhcrelay)**:
 ```bash
 apt-get install isc-dhcp-relay
 dhcrelay -i eth0 -i eth1 10.0.100.10
 ```
 ### Firewall Rules
 The following ports must be open on the PXE server:
 | Port | Protocol | Service | Direction | Description |
 |------|----------|---------|-----------|-------------|
 | 67   | UDP      | DHCP    | Inbound   | DHCP server |
 | 68   | UDP      | DHCP    | Outbound  | DHCP client responses |
 | 69   | UDP      | TFTP    | Inbound   | TFTP bootloader downloads |
 | 80   | TCP      | HTTP    | Inbound   | iPXE scripts and boot images |
 | 443  | TCP      | HTTPS   | Inbound   | Optional: secure boot images |
 ### Network Bandwidth
 Estimated bandwidth requirements:
 - Per-node boot: ~500 MB download (kernel + initrd)
 - Concurrent boots: Multiply by number of simultaneous boots
 - Recommended: 1 Gbps link for PXE server
 Example: Booting 10 nodes simultaneously requires ~5 Gbps throughput burst, so stagger boots or use 10 Gbps link.
 ## Troubleshooting
 ### DHCP Issues
 **Problem**: Server doesn't get IP address
 **Diagnosis**:
 ```bash
 # On PXE server, monitor DHCP requests
 sudo tcpdump -i eth0 -n port 67 or port 68
 # Check DHCP server logs
 sudo journalctl -u dhcpd4 -f
 # Verify DHCP server is running
 sudo systemctl status dhcpd4
 ```
 **Common causes**:
 - DHCP server not running on correct interface
 - Firewall blocking UDP 67/68
 - Network cable/switch issue
 - DHCP range exhausted
 **Solution**:
 ```bash
 # Check interface configuration
 ip addr show
 # Verify DHCP config syntax
 sudo dhcpd -t -cf /etc/dhcp/dhcpd.conf
 # Check firewall
 sudo iptables -L -n | grep -E "67|68"
 # Restart DHCP server
 sudo systemctl restart dhcpd4
 ```
 ### TFTP Issues
 **Problem**: PXE client gets IP but fails to download bootloader
 **Diagnosis**:
 ```bash
 # Monitor TFTP requests
 sudo tcpdump -i eth0 -n port 69
 # Check TFTP server logs
 sudo journalctl -u atftpd -f
 # Test TFTP locally
 tftp localhost -c get undionly.kpxe /tmp/test.kpxe
 ```
 **Common causes**:
 - TFTP server not running
 - Bootloader files missing
 - Permissions incorrect
 - Firewall blocking UDP 69
 **Solution**:
 ```bash
 # Check files exist
 ls -la /var/lib/tftpboot/
 # Fix permissions
 sudo chmod 644 /var/lib/tftpboot/*.{kpxe,efi}
 # Restart TFTP server
 sudo systemctl restart atftpd
 # Check firewall
 sudo iptables -L -n | grep 69
 ```
 ### HTTP Issues
 **Problem**: iPXE loads but can't download boot script or kernel
 **Diagnosis**:
 ```bash
 # Monitor HTTP requests
 sudo tail -f /var/log/nginx/access.log
 # Test HTTP locally
 curl -v http://localhost/boot/ipxe/boot.ipxe
 curl -v http://localhost/health
 # Check nginx status
 sudo systemctl status nginx
 ```
 **Common causes**:
 - Nginx not running
 - Boot files missing
 - Permissions incorrect
 - Firewall blocking TCP 80
 - Wrong server IP in boot.ipxe
 **Solution**:
 ```bash
 # Check nginx config
 sudo nginx -t
 # Verify files exist
 ls -la /var/lib/pxe-boot/ipxe/
 ls -la /var/lib/pxe-boot/nixos/
 # Fix permissions
 sudo chown -R nginx:nginx /var/lib/pxe-boot
 sudo chmod -R 755 /var/lib/pxe-boot
 # Restart nginx
 sudo systemctl restart nginx
 ```
 ### Boot Script Issues
 **Problem**: Boot menu appears but fails to load kernel
 **Diagnosis**:
 - Check iPXE error messages on console
 - Verify URLs in boot.ipxe match actual paths
 - Test kernel download manually:
  ```bash
  curl -I http://10.0.100.10/boot/nixos/bzImage
  ```
 **Common causes**:
 - NixOS boot images not deployed yet (normal for T032.S2)
 - Wrong paths in boot.ipxe
 - Files too large (check disk space)
 **Solution**:
 ```bash
 # Wait for T032.S3 (Image Builder) to generate boot images
 # OR manually place NixOS netboot images:
 sudo mkdir -p /var/lib/pxe-boot/nixos
 # Copy bzImage and initrd from NixOS netboot
 ```
 ### Serial Console Debugging
 For remote debugging without physical access:
 1. **Enable serial console in BIOS**:
   - Configure COM1/ttyS0 at 115200 baud
   - Enable console redirection
 2. **Connect via IPMI SOL** (if available):
   ```bash
   ipmitool -I lanplus -H <bmc-ip> -U admin sol activate
   ```
 3. **Watch boot process**:
   - DHCP discovery messages
   - TFTP download progress
   - iPXE boot menu
   - Kernel boot messages
 4. **Kernel parameters include serial console**:
   ```
   console=tty0 console=ttyS0,115200n8
   ```
 ### Common Error Messages
 | Error | Cause | Solution |
 |-------|-------|----------|
 | `PXE-E51: No DHCP or proxyDHCP offers were received` | DHCP server not responding | Check DHCP server running, network connectivity |
 | `PXE-E53: No boot filename received` | DHCP not providing filename | Check dhcpd.conf has `filename` option |
 | `PXE-E32: TFTP open timeout` | TFTP server not responding | Check TFTP server running, firewall rules |
 | `Not found: /boot/ipxe/boot.ipxe` | HTTP 404 error | Check file exists, nginx config, permissions |
 | `Could not boot: Exec format error` | Corrupted boot file | Re-download/rebuild bootloader |
 ## Advanced Topics
 ### Building iPXE from Source
 For production deployments, building iPXE from source provides:
 - Custom branding
 - Embedded certificates for HTTPS
 - Optimized size
 - Security hardening
 **Build instructions**:
 ```bash
 sudo ./setup.sh --build-ipxe
 ```
 Or manually:
 ```bash
 git clone https://github.com/ipxe/ipxe.git
 cd ipxe/src
 # BIOS bootloader
 make bin/undionly.kpxe
 # UEFI bootloader
 make bin-x86_64-efi/ipxe.efi
 # Copy to PXE server
 sudo cp bin/undionly.kpxe /var/lib/pxe-boot/ipxe/
 sudo cp bin-x86_64-efi/ipxe.efi /var/lib/pxe-boot/ipxe/
 ```
 ### HTTPS Boot (Secure Boot)
 For enhanced security, serve boot images over HTTPS:
 1. **Generate SSL certificate**:
   ```bash
   sudo openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
     -keyout /etc/ssl/private/pxe-server.key \
     -out /etc/ssl/certs/pxe-server.crt
   ```
 2. **Configure nginx for HTTPS** (uncomment HTTPS block in `http/nginx.conf`)
 3. **Update boot.ipxe** to use `https://` URLs
 4. **Rebuild iPXE with embedded certificate** (for secure boot without prompts)
 ### Multiple NixOS Versions
 To support multiple NixOS versions for testing/rollback:
 ```
 /var/lib/pxe-boot/nixos/
 ├── 24.05/
 │   ├── bzImage
 │   └── initrd
 ├── 24.11/
 │   ├── bzImage
 │   └── initrd
 └── latest -> 24.11/  # Symlink to current version
 ```
 Update `boot.ipxe` to use `/boot/nixos/latest/bzImage` or add menu items for version selection.
 ### Integration with BMC/IPMI
 For fully automated provisioning:
 1. **Discover new hardware** via IPMI/Redfish API
 2. **Configure PXE boot** via IPMI:
   ```bash
   ipmitool -I lanplus -H <bmc-ip> -U admin chassis bootdev pxe options=persistent
   ```
 3. **Power on server**:
   ```bash
   ipmitool -I lanplus -H <bmc-ip> -U admin power on
   ```
 4. **Monitor via SOL** (serial-over-LAN)
 ### Monitoring and Metrics
 Track PXE boot activity:
 1. **DHCP leases**:
   ```bash
   cat /var/lib/dhcp/dhcpd.leases
   ```
 2. **HTTP access logs**:
   ```bash
   sudo tail -f /var/log/nginx/access.log | grep -E "boot.ipxe|bzImage|initrd"
   ```
 3. **Prometheus metrics** (if nginx-module-vts installed):
   - Boot file download counts
   - Bandwidth usage
   - Response times
 4. **Custom metrics endpoint**:
   - Parse nginx access logs
   - Count boots per profile
   - Alert on failed boots
 ## Files and Directory Structure
 ```
 baremetal/pxe-server/
 ├── README.md                    # This file
 ├── setup.sh                     # Setup and management script
 ├── nixos-module.nix            # NixOS service module
 │
 ├── dhcp/
 │   └── dhcpd.conf              # DHCP server configuration
 │
 ├── ipxe/
 │   ├── boot.ipxe               # Main boot menu script
 │   └── mac-mappings.txt        # MAC address documentation
 │
 ├── http/
 │   ├── nginx.conf              # HTTP server configuration
 │   └── directory-structure.txt # Directory layout documentation
 │
 └── assets/                      # (Created at runtime)
    └── /var/lib/pxe-boot/
        ├── ipxe/
        │   ├── undionly.kpxe
        │   ├── ipxe.efi
        │   └── boot.ipxe
        └── nixos/
            ├── bzImage
            └── initrd
 ```
 ## Next Steps
 After completing the PXE server setup:
 1. **T032.S3 - Image Builder**: Automated NixOS image generation with profile-specific configurations
 2. **T032.S4 - Provisioning Orchestrator**: API-driven provisioning workflow and node lifecycle management
 3. **Integration with IAM**: Authentication for provisioning API
 4. **Integration with FlareDB**: Node inventory and state management
 ## References
 - [iPXE Documentation](https://ipxe.org/)
 - [ISC DHCP Documentation](https://www.isc.org/dhcp/)
 - [NixOS Manual - Netboot](https://nixos.org/manual/nixos/stable/index.html#sec-building-netboot)
 - [PXE Specification](https://www.intel.com/content/www/us/en/architecture-and-technology/intel-boot-executive.html)
 ## Support
 For issues or questions:
 - Check [Troubleshooting](#troubleshooting) section
 - Review logs: `sudo journalctl -u dhcpd4 -u atftpd -u nginx -f`
 - Run diagnostic: `sudo ./setup.sh --test`
 ## License
 Part of Centra Cloud infrastructure - see project root for license information.
--- a/chainfire/baremetal/pxe-server/examples/nixos-config-examples.nix
+++ b/chainfire/baremetal/pxe-server/examples/nixos-config-examples.nix
@ -1,392 +0,0 @@
 # NixOS Configuration Examples for PXE Boot Server
 #
 # This file contains example configurations for different deployment scenarios.
 # Copy the relevant section to your /etc/nixos/configuration.nix
 ##############################################################################
 # Example 1: Basic Single-Subnet PXE Server
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
      nameservers = [ "10.0.100.1" "8.8.8.8" ];
      domainName = "centra.local";
    };
  };
 }
 ##############################################################################
 # Example 2: PXE Server with Known Nodes (MAC-based Auto-Selection)
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
    };
    # Define known nodes with MAC addresses
    nodes = {
      # Control plane nodes
      "52:54:00:12:34:56" = {
        profile = "control-plane";
        hostname = "control-plane-01";
        ipAddress = "10.0.100.50";
      };
      "52:54:00:12:34:59" = {
        profile = "control-plane";
        hostname = "control-plane-02";
        ipAddress = "10.0.100.51";
      };
      "52:54:00:12:34:5a" = {
        profile = "control-plane";
        hostname = "control-plane-03";
        ipAddress = "10.0.100.52";
      };
      # Worker nodes
      "52:54:00:12:34:57" = {
        profile = "worker";
        hostname = "worker-01";
        ipAddress = "10.0.100.60";
      };
      "52:54:00:12:34:5b" = {
        profile = "worker";
        hostname = "worker-02";
        ipAddress = "10.0.100.61";
      };
      # All-in-one test node
      "52:54:00:12:34:58" = {
        profile = "all-in-one";
        hostname = "homelab-01";
        ipAddress = "10.0.100.70";
      };
    };
  };
 }
 ##############################################################################
 # Example 3: PXE Server with Custom DHCP Configuration
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
      nameservers = [ "10.0.100.1" "1.1.1.1" "8.8.8.8" ];
      domainName = "prod.centra.cloud";
      # Longer lease times for stable infrastructure
      defaultLeaseTime = 3600;  # 1 hour
      maxLeaseTime = 86400;     # 24 hours
      # Additional DHCP configuration
      extraConfig = ''
        # NTP servers
        option ntp-servers 10.0.100.1;
        # Additional subnet for management network
        subnet 10.0.101.0 netmask 255.255.255.0 {
            range 10.0.101.100 10.0.101.200;
            option routers 10.0.101.1;
            option subnet-mask 255.255.255.0;
            next-server 10.0.100.10;
            if exists user-class and option user-class = "iPXE" {
                filename "http://10.0.100.10/boot/ipxe/boot.ipxe";
            } elsif option architecture-type = 00:00 {
                filename "undionly.kpxe";
            } elsif option architecture-type = 00:07 {
                filename "ipxe.efi";
            }
        }
        # Deny unknown clients (only known MAC addresses can boot)
        # deny unknown-clients;
      '';
    };
  };
 }
 ##############################################################################
 # Example 4: Multi-Homed PXE Server (Multiple Network Interfaces)
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  # Note: The module currently supports single interface.
  # For multiple interfaces, configure multiple DHCP server instances manually
  # or extend the module to support this use case.
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";  # Primary provisioning network
    serverAddress = "10.0.100.10";
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
    };
  };
  # Manual configuration for second interface
  # services.dhcpd4.interfaces = [ "eth0" "eth1" ];
 }
 ##############################################################################
 # Example 5: High-Availability PXE Server (with Failover)
 ##############################################################################
 # Primary PXE server
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";  # Primary server IP
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.150";  # Split range for failover
      };
      router = "10.0.100.1";
      extraConfig = ''
        # DHCP Failover Configuration
        failover peer "centra-pxe-failover" {
          primary;
          address 10.0.100.10;
          port 647;
          peer address 10.0.100.11;
          peer port 647;
          max-response-delay 30;
          max-unacked-updates 10;
          load balance max seconds 3;
          mclt 1800;
          split 128;
        }
        pool {
          failover peer "centra-pxe-failover";
          range 10.0.100.100 10.0.100.150;
        }
      '';
    };
  };
 }
 # Secondary PXE server (similar config with "secondary" role)
 # Deploy on a different server with IP 10.0.100.11
 ##############################################################################
 # Example 6: PXE Server with HTTPS Boot (Secure Boot)
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";
    http = {
      port = 443;  # Use HTTPS
    };
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
    };
  };
  # Configure SSL certificates
  services.nginx = {
    virtualHosts."pxe.centra.local" = {
      enableSSL = true;
      sslCertificate = "/etc/ssl/certs/pxe-server.crt";
      sslCertificateKey = "/etc/ssl/private/pxe-server.key";
    };
  };
  # Note: You'll need to rebuild iPXE with embedded certificates
  # for seamless HTTPS boot without certificate warnings
 }
 ##############################################################################
 # Example 7: Development/Testing Configuration (Permissive)
 ##############################################################################
 {
  imports = [ ./baremetal/pxe-server/nixos-module.nix ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "192.168.1.10";  # Typical home network
    dhcp = {
      subnet = "192.168.1.0";
      netmask = "255.255.255.0";
      broadcast = "192.168.1.255";
      range = {
        start = "192.168.1.100";
        end = "192.168.1.120";
      };
      router = "192.168.1.1";
      # Short lease times for rapid testing
      defaultLeaseTime = 300;   # 5 minutes
      maxLeaseTime = 600;       # 10 minutes
    };
  };
  # Enable nginx directory listing for debugging
  services.nginx.appendHttpConfig = ''
    autoindex on;
  '';
 }
 ##############################################################################
 # Example 8: Production Configuration with Monitoring
 ##############################################################################
 {
  imports = [
    ./baremetal/pxe-server/nixos-module.nix
  ];
  services.centra-pxe-server = {
    enable = true;
    interface = "eth0";
    serverAddress = "10.0.100.10";
    dhcp = {
      subnet = "10.0.100.0";
      netmask = "255.255.255.0";
      broadcast = "10.0.100.255";
      range = {
        start = "10.0.100.100";
        end = "10.0.100.200";
      };
      router = "10.0.100.1";
    };
    nodes = {
      # Production node definitions
      # ... (add your nodes here)
    };
  };
  # Enable Prometheus monitoring
  services.prometheus.exporters.nginx = {
    enable = true;
    port = 9113;
  };
  # Centralized logging
  services.rsyslog = {
    enable = true;
    extraConfig = ''
      # Forward DHCP logs to centralized log server
      if $programname == 'dhcpd' then @@logserver.centra.local:514
    '';
  };
  # Backup DHCP leases
  systemd.services.backup-dhcp-leases = {
    description = "Backup DHCP leases";
    serviceConfig = {
      Type = "oneshot";
      ExecStart = "${pkgs.rsync}/bin/rsync -a /var/lib/dhcp/dhcpd.leases /backup/dhcp/dhcpd.leases.$(date +%Y%m%d)";
    };
  };
  systemd.timers.backup-dhcp-leases = {
    wantedBy = [ "timers.target" ];
    timerConfig = {
      OnCalendar = "daily";
      Persistent = true;
    };
  };
 }
 ##############################################################################
 # Notes
 ##############################################################################
 # 1. Always update serverAddress, subnet, and interface to match your network
 #
 # 2. For MAC-based auto-selection, add nodes to the `nodes` attribute
 #
 # 3. DHCP failover requires configuration on both primary and secondary servers
 #
 # 4. HTTPS boot requires custom-built iPXE with embedded certificates
 #
 # 5. Test configurations in a development environment before production deployment
 #
 # 6. Keep DHCP lease database backed up for disaster recovery
 #
 # 7. Monitor DHCP pool utilization to avoid exhaustion
 #
 # 8. Use fixed IP addresses (via MAC mapping) for critical infrastructure nodes
--- a/chainfire/baremetal/pxe-server/http/directory-structure.txt
+++ b/chainfire/baremetal/pxe-server/http/directory-structure.txt
@ -1,81 +0,0 @@
 # PXE Boot Server Directory Structure
 #
 # This document describes the directory layout for the HTTP/TFTP server
 # that serves PXE boot assets.
 #
 # Base Directory: /var/lib/pxe-boot/
 /var/lib/pxe-boot/
 ├── ipxe/                           # iPXE bootloaders and scripts
 │   ├── undionly.kpxe              # iPXE bootloader for BIOS (legacy)
 │   ├── ipxe.efi                   # iPXE bootloader for UEFI x86-64
 │   ├── ipxe-i386.efi              # iPXE bootloader for UEFI x86 32-bit (rare)
 │   ├── boot.ipxe                  # Main boot script (served via HTTP)
 │   └── README.txt                 # Documentation
 │
 ├── nixos/                          # NixOS netboot images
 │   ├── bzImage                    # Linux kernel (compressed)
 │   ├── initrd                     # Initial ramdisk
 │   ├── squashfs                   # Root filesystem (if using squashfs)
 │   ├── version.txt                # Build version info
 │   └── profiles/                  # Profile-specific boot images (optional)
 │       ├── control-plane/
 │       │   ├── bzImage
 │       │   └── initrd
 │       ├── worker/
 │       │   ├── bzImage
 │       │   └── initrd
 │       └── all-in-one/
 │           ├── bzImage
 │           └── initrd
 │
 └── README.txt                      # Top-level documentation
 # TFTP Directory (if using separate TFTP server)
 # Usually: /var/lib/tftpboot/ or /srv/tftp/
 /var/lib/tftpboot/
 ├── undionly.kpxe                  # Symlink to /var/lib/pxe-boot/ipxe/undionly.kpxe
 ├── ipxe.efi                       # Symlink to /var/lib/pxe-boot/ipxe/ipxe.efi
 └── ipxe-i386.efi                  # Symlink to /var/lib/pxe-boot/ipxe/ipxe-i386.efi
 # URL Mapping
 # The following URLs are served by nginx:
 #
 # http://10.0.100.10/boot/ipxe/boot.ipxe
 #   -> /var/lib/pxe-boot/ipxe/boot.ipxe
 #
 # http://10.0.100.10/boot/ipxe/undionly.kpxe
 #   -> /var/lib/pxe-boot/ipxe/undionly.kpxe
 #
 # http://10.0.100.10/boot/nixos/bzImage
 #   -> /var/lib/pxe-boot/nixos/bzImage
 #
 # http://10.0.100.10/boot/nixos/initrd
 #   -> /var/lib/pxe-boot/nixos/initrd
 # File Sizes (Typical)
 # - undionly.kpxe: ~100 KB
 # - ipxe.efi: ~1 MB
 # - boot.ipxe: ~10 KB (text script)
 # - bzImage: ~10-50 MB (compressed kernel)
 # - initrd: ~50-500 MB (depends on included tools/drivers)
 # Permissions
 # All files should be readable by the nginx user:
 #   chown -R nginx:nginx /var/lib/pxe-boot
 #   chmod -R 755 /var/lib/pxe-boot
 #   chmod 644 /var/lib/pxe-boot/ipxe/*
 #   chmod 644 /var/lib/pxe-boot/nixos/*
 # Disk Space Requirements
 # Minimum: 1 GB (for basic setup with one NixOS image)
 # Recommended: 5-10 GB (for multiple profiles and versions)
 # - Each NixOS profile: ~500 MB - 1 GB
 # - Keep 2-3 versions for rollback: multiply by 2-3x
 # - Add buffer for logs and temporary files
 # Backup Recommendations
 # - Boot scripts (ipxe/*.ipxe): Version control (git)
 # - Bootloaders (ipxe/*.kpxe, *.efi): Can re-download, but keep backups
 # - NixOS images: Can rebuild from S3 builder, but keep at least 2 versions
 # - Configuration files: Version control (git)
--- a/chainfire/baremetal/pxe-server/ipxe/mac-mappings.txt
+++ b/chainfire/baremetal/pxe-server/ipxe/mac-mappings.txt
@ -1,47 +0,0 @@
 # MAC Address to Profile Mappings
 #
 # This file documents the MAC address mappings used in boot.ipxe
 # Update this file when adding new nodes to your infrastructure
 #
 # Format: MAC_ADDRESS    PROFILE         HOSTNAME
 #
 # To generate MAC addresses for virtual machines (testing):
 #   - Use the 52:54:00:xx:xx:xx range (QEMU/KVM local)
 #   - Or use your hypervisor's MAC assignment
 #
 # For physical servers:
 #   - Use the actual MAC address of the primary network interface
 #   - Usually found on a label on the server or in BIOS/BMC
 #
 # Control Plane Nodes
 52:54:00:12:34:56    control-plane    control-plane-01
 52:54:00:12:34:59    control-plane    control-plane-02
 52:54:00:12:34:5a    control-plane    control-plane-03
 # Worker Nodes
 52:54:00:12:34:57    worker           worker-01
 52:54:00:12:34:5b    worker           worker-02
 52:54:00:12:34:5c    worker           worker-03
 # All-in-One Nodes (Testing/Homelab)
 52:54:00:12:34:58    all-in-one       all-in-one-01
 # Instructions for Adding New Nodes:
 # 1. Add the MAC address, profile, and hostname to this file
 # 2. Update boot.ipxe with the new MAC address mapping
 # 3. Update dhcpd.conf with a host entry for fixed IP assignment (optional)
 # 4. Restart the DHCP service: systemctl restart dhcpd
 #
 # Example:
 # 52:54:00:12:34:5d    worker           worker-04
 #
 # Then add to boot.ipxe:
 # iseq ${mac} 52:54:00:12:34:5d && set profile worker && set hostname worker-04 && goto boot ||
 #
 # And optionally add to dhcpd.conf:
 # host worker-04 {
 #     hardware ethernet 52:54:00:12:34:5d;
 #     fixed-address 10.0.100.64;
 #     option host-name "worker-04";
 # }
--- a/chainfire/chainfire-client/examples/basic.rs
+++ b/chainfire/chainfire-client/examples/basic.rs
@ -1,15 +0,0 @@
 use chainfire_client::Client;
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Build a client with default retry/backoff.
    let mut client = Client::builder("http://127.0.0.1:2379").build().await?;
    // Simple put/get roundtrip.
    client.put_str("/example/key", "value").await?;
    if let Some(val) = client.get_str("/example/key").await? {
        println!("Got value: {}", val);
    }
    Ok(())
 }
--- a/chainfire/chainfire-client/examples/cleanup.rs
+++ b/chainfire/chainfire-client/examples/cleanup.rs
@ -1,27 +0,0 @@
 // Minimal cleanup utility for deleting stale deployer entries from ChainFire.
 // Usage: cargo run -p chainfire-client --example cleanup
 use chainfire_client::Client;
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // ChainFire API endpoint
    let mut client = Client::connect("http://127.0.0.1:7000").await?;
    // Stale keys to remove
    let keys = [
        b"deployer/nodes/info/node-025456f1".as_ref(),
        b"deployer/nodes/config/025456f142ee424b88cd8aba5cf6c16a".as_ref(),
    ];
    for key in keys {
        let deleted = client.delete(key).await?;
        println!(
            "delete {} -> {}",
            String::from_utf8_lossy(key),
            if deleted { "removed" } else { "not found" }
        );
    }
    Ok(())
 }
--- a/chainfire/chainfire-client/src/client.rs
+++ b/chainfire/chainfire-client/src/client.rs
@ -103,6 +103,47 @@ impl Client {
        Ok(resp.kvs.into_iter().next().map(|kv| (kv.value, kv.mod_revision as u64)))
    }
    /// Put a key-value pair only if the key's mod_revision matches.
    ///
    /// This is a best-effort compare-and-set. The server may not return
    /// a reliable success flag, so callers should treat this as "attempted".
    pub async fn put_if_revision(
        &mut self,
        key: impl AsRef<[u8]>,
        value: impl AsRef<[u8]>,
        expected_mod_revision: u64,
    ) -> Result<()> {
        let key_bytes = key.as_ref().to_vec();
        let compare = Compare {
            result: compare::CompareResult::Equal as i32,
            target: compare::CompareTarget::Mod as i32,
            key: key_bytes.clone(),
            target_union: Some(compare::TargetUnion::ModRevision(
                expected_mod_revision as i64,
            )),
        };
        let put_op = RequestOp {
            request: Some(request_op::Request::RequestPut(PutRequest {
                key: key_bytes,
                value: value.as_ref().to_vec(),
                lease: 0,
                prev_kv: false,
            })),
        };
        self.kv
            .txn(TxnRequest {
                compare: vec![compare],
                success: vec![put_op],
                failure: vec![],
            })
            .await?
            .into_inner();
        Ok(())
    }
    /// Get a value as string
    pub async fn get_str(&mut self, key: &str) -> Result<Option<String>> {
        let value = self.get(key.as_bytes()).await?;
--- a/chainfire/crates/chainfire-api/src/kv_service.rs
+++ b/chainfire/crates/chainfire-api/src/kv_service.rs
@ -26,13 +26,9 @@ impl KvServiceImpl {
    }
    /// Create a response header
-    fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
+    async fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
-        make_header(
+        let term = self.raft.current_term().await;
-            self.cluster_id,
+        make_header(self.cluster_id, self.raft.node_id(), revision, term)
            self.raft.node_id(),
            revision,
            0, // TODO: get actual term
        )
    }
 }
@ -74,7 +70,7 @@ impl Kv for KvServiceImpl {
        let count = kvs.len() as i64;
        Ok(Response::new(RangeResponse {
-            header: Some(self.make_header(revision)),
+            header: Some(self.make_header(revision).await),
            kvs,
            more: false,
            count,
@ -107,7 +103,7 @@ impl Kv for KvServiceImpl {
        }
        Ok(Response::new(PutResponse {
-            header: Some(self.make_header(revision)),
+            header: Some(self.make_header(revision).await),
            prev_kv: None, // Not supported yet in custom RaftCore
        }))
    }
@ -165,7 +161,7 @@ impl Kv for KvServiceImpl {
        }
        Ok(Response::new(DeleteRangeResponse {
-            header: Some(self.make_header(revision)),
+            header: Some(self.make_header(revision).await),
            deleted: deleted_count,
            prev_kvs: vec![], // Not supported yet
        }))
@ -234,7 +230,7 @@ impl Kv for KvServiceImpl {
        warn!("Transaction response details not yet supported in custom Raft implementation");
        Ok(Response::new(TxnResponse {
-            header: Some(self.make_header(revision)),
+            header: Some(self.make_header(revision).await),
            succeeded: true, // Assume success if no error
            responses: vec![], // Not supported yet
        }))
--- a/chainfire/crates/chainfire-core/tests/integration.rs
+++ b/chainfire/crates/chainfire-core/tests/integration.rs
@ -1,52 +0,0 @@
 use std::time::Duration;
 use chainfire_core::ClusterBuilder;
 use chainfire_types::{node::NodeRole, RaftRole};
 use tokio::time::sleep;
 #[tokio::test]
 async fn test_single_node_bootstrap() {
    let _ = tracing_subscriber::fmt::try_init();
    // 1. Build a single node cluster
    let cluster = ClusterBuilder::new(1)
        .name("node-1")
        .memory_storage()
        .gossip_addr("127.0.0.1:0".parse().unwrap())
        .raft_addr("127.0.0.1:0".parse().unwrap())
        .role(NodeRole::ControlPlane)
        .raft_role(RaftRole::Voter)
        .bootstrap(true)
        .build()
        .await
        .expect("Failed to build cluster");
    let handle = cluster.handle();
    // 2. Run the cluster in a background task
    tokio::spawn(async move {
        cluster.run().await.unwrap();
    });
    // 3. Wait for leader election
    let mut leader_elected = false;
    for _ in 0..10 {
        if handle.is_leader() {
            leader_elected = true;
            break;
        }
        sleep(Duration::from_millis(500)).await;
    }
    assert!(leader_elected, "Node 1 should become leader in bootstrap mode");
    assert_eq!(handle.leader(), Some(1));
    // 4. Test KV operations
    let kv = handle.kv();
    kv.put("test-key", b"test-value").await.expect("Put failed");
    let value = kv.get("test-key").await.expect("Get failed");
    assert_eq!(value, Some(b"test-value".to_vec()));
    // 5. Shutdown
    handle.shutdown();
 }
--- a/chainfire/crates/chainfire-raft/tests/leader_election.rs
+++ b/chainfire/crates/chainfire-raft/tests/leader_election.rs
@ -1,613 +0,0 @@
 //! Integration tests for Leader Election (P1) and Log Replication (P2)
 //!
 //! Tests cover:
 //! - Single-node auto-election
 //! - 3-node majority election
 //! - Role transitions
 //! - Term management
 //! - Heartbeat mechanism
 //! - Log replication
 //! - Leader failure recovery
 #![cfg(all(test, feature = "custom-raft"))]
 use std::sync::Arc;
 use std::time::Duration;
 use tokio::time;
 use tokio::sync::mpsc;
 use chainfire_raft::core::{
    RaftCore, RaftConfig, RaftRole, NodeId,
 };
 use chainfire_raft::network::custom_test_client::{InMemoryRpcClient, RpcMessage};
 use chainfire_storage::{LogStorage, StateMachine, RocksStore};
 /// Helper to create a test node
 async fn create_test_node(node_id: NodeId, peers: Vec<NodeId>) -> (Arc<RaftCore>, tempfile::TempDir) {
    let temp_dir = tempfile::TempDir::new().unwrap();
    let rocks = RocksStore::new(temp_dir.path()).unwrap();
    let storage = Arc::new(LogStorage::new(rocks.clone()));
    let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
    let network = Arc::new(InMemoryRpcClient::new());
    let config = RaftConfig {
        election_timeout_min: 150,
        election_timeout_max: 300,
        heartbeat_interval: 50,
    };
    let node = Arc::new(RaftCore::new(
        node_id,
        peers,
        storage,
        state_machine,
        network,
        config,
    ));
    node.initialize().await.unwrap();
    (node, temp_dir)
 }
 /// Helper to create a 3-node cluster with RPC wiring
 async fn create_3node_cluster() -> (
    Vec<Arc<RaftCore>>,
    Vec<tempfile::TempDir>,
    Arc<InMemoryRpcClient>,
 ) {
    let network = Arc::new(InMemoryRpcClient::new());
    let mut nodes = Vec::new();
    let mut temp_dirs = Vec::new();
    // Create 3 nodes
    for node_id in 1..=3 {
        let peers: Vec<NodeId> = (1..=3).filter(|&id| id != node_id).collect();
        let temp_dir = tempfile::TempDir::new().unwrap();
        let rocks = RocksStore::new(temp_dir.path()).unwrap();
        let storage = Arc::new(LogStorage::new(rocks.clone()));
        let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
        let config = RaftConfig {
            election_timeout_min: 150,  // 150ms - matches single-node test
            election_timeout_max: 300,  // 300ms
            heartbeat_interval: 50,     // 50ms - matches single-node test
        };
        let node = Arc::new(RaftCore::new(
            node_id,
            peers,
            storage,
            state_machine,
            Arc::clone(&network) as Arc<dyn chainfire_raft::network::RaftRpcClient>,
            config,
        ));
        node.initialize().await.unwrap();
        nodes.push(node);
        temp_dirs.push(temp_dir);
    }
    // Wire up RPC channels for each node
    for node in &nodes {
        let node_id = node.node_id();
        let (tx, mut rx) = mpsc::unbounded_channel::<RpcMessage>();
        network.register(node_id, tx).await;
        // Spawn handler for this node's RPC messages
        let node_clone = Arc::clone(node);
        tokio::spawn(async move {
            eprintln!("[RPC Handler {}] Started", node_clone.node_id());
            while let Some(msg) = rx.recv().await {
                match msg {
                    RpcMessage::Vote(req, resp_tx) => {
                        eprintln!("[RPC Handler {}] Processing Vote from {}",
                            node_clone.node_id(), req.candidate_id);
                        node_clone.request_vote_rpc(req, resp_tx).await;
                    }
                    RpcMessage::AppendEntries(req, resp_tx) => {
                        eprintln!("[RPC Handler {}] Processing AppendEntries from {} term={}",
                            node_clone.node_id(), req.leader_id, req.term);
                        node_clone.append_entries_rpc(req, resp_tx).await;
                    }
                }
            }
            eprintln!("[RPC Handler {}] Stopped (channel closed)", node_clone.node_id());
        });
    }
    // Give all RPC handler tasks time to start
    tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
    (nodes, temp_dirs, network)
 }
 // ============================================================================
 // Test Cases
 // ============================================================================
 #[tokio::test]
 async fn test_node_creation_and_initialization() {
    // Test that we can create a node and initialize it
    let (node, _temp_dir) = create_test_node(1, vec![2, 3]).await;
    // Node should start as follower
    assert_eq!(node.role().await, RaftRole::Follower);
    // Node ID should be correct
    assert_eq!(node.node_id(), 1);
    // Term should start at 0
    assert_eq!(node.current_term().await, 0);
 }
 #[tokio::test]
 async fn test_role_transitions() {
    // Test basic role enumeration
    assert_ne!(RaftRole::Follower, RaftRole::Candidate);
    assert_ne!(RaftRole::Candidate, RaftRole::Leader);
    assert_ne!(RaftRole::Leader, RaftRole::Follower);
 }
 #[tokio::test]
 async fn test_term_persistence() {
    // Test that term can be persisted and loaded
    let temp_dir = tempfile::TempDir::new().unwrap();
    let path = temp_dir.path().to_str().unwrap().to_string();
    {
        // Create first node and let it initialize
        let rocks = RocksStore::new(&path).unwrap();
        let storage = Arc::new(LogStorage::new(rocks.clone()));
        let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
        let network = Arc::new(InMemoryRpcClient::new());
        let node = Arc::new(RaftCore::new(
            1,
            vec![2, 3],
            storage,
            state_machine,
            network,
            RaftConfig::default(),
        ));
        node.initialize().await.unwrap();
        // Initial term should be 0
        assert_eq!(node.current_term().await, 0);
    }
    {
        // Create second node with same storage path
        let rocks = RocksStore::new(&path).unwrap();
        let storage = Arc::new(LogStorage::new(rocks.clone()));
        let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
        let network = Arc::new(InMemoryRpcClient::new());
        let node = Arc::new(RaftCore::new(
            1,
            vec![2, 3],
            storage,
            state_machine,
            network,
            RaftConfig::default(),
        ));
        node.initialize().await.unwrap();
        // Term should still be 0 (loaded from storage)
        assert_eq!(node.current_term().await, 0);
    }
 }
 #[tokio::test]
 async fn test_config_defaults() {
    // Test that default config has reasonable values
    let config = RaftConfig::default();
    assert!(config.election_timeout_min > 0);
    assert!(config.election_timeout_max > config.election_timeout_min);
    assert!(config.heartbeat_interval > 0);
    assert!(config.heartbeat_interval < config.election_timeout_min);
 }
 // ============================================================================
 // P2: Log Replication Integration Tests
 // ============================================================================
 #[tokio::test]
 async fn test_3node_cluster_formation() {
    // Test 1: 3-Node Cluster Formation Test
    // - 3 nodes start → Leader elected
    // - All followers receive heartbeat
    // - No election timeout occurs
    let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
    // Start event loops for all nodes
    let mut handles = Vec::new();
    for node in &nodes {
        let node_clone = Arc::clone(node);
        let handle = tokio::spawn(async move {
            let _ = node_clone.run().await;
        });
        handles.push(handle);
    }
    // Wait for leader election (should happen within ~500ms)
    time::sleep(Duration::from_millis(500)).await;
    // Check that exactly one leader was elected
    let mut leader_count = 0;
    let mut follower_count = 0;
    let mut leader_id = None;
    for node in &nodes {
        match node.role().await {
            RaftRole::Leader => {
                leader_count += 1;
                leader_id = Some(node.node_id());
            }
            RaftRole::Follower => {
                follower_count += 1;
            }
            RaftRole::Candidate => {
                // Should not have candidates after election
                panic!("Node {} is still candidate after election", node.node_id());
            }
        }
    }
    assert_eq!(leader_count, 1, "Expected exactly one leader");
    assert_eq!(follower_count, 2, "Expected exactly two followers");
    assert!(leader_id.is_some(), "Leader should be identified");
    println!("✓ Leader elected: node {}", leader_id.unwrap());
    // Wait a bit more to ensure heartbeats prevent election timeout
    // Heartbeat interval is 50ms, election timeout is 150-300ms
    // So after 400ms, no new election should occur
    time::sleep(Duration::from_millis(400)).await;
    // Verify leader is still the same
    for node in &nodes {
        if node.node_id() == leader_id.unwrap() {
            assert_eq!(node.role().await, RaftRole::Leader, "Leader should remain leader");
        } else {
            assert_eq!(
                node.role().await,
                RaftRole::Follower,
                "Followers should remain followers due to heartbeats"
            );
        }
    }
    println!("✓ Heartbeats prevent election timeout");
 }
 #[tokio::test]
 #[ignore] // Requires client write API implementation
 async fn test_log_replication() {
    // Test 2: Log Replication Test
    // - Leader adds entries
    // - Replicated to all followers
    // - commit_index synchronized
    // TODO: Implement once client write API is ready
    // This requires handle_client_write to be fully implemented
 }
 #[tokio::test]
 #[ignore] // Requires graceful node shutdown
 async fn test_leader_failure_recovery() {
    // Test 3: Leader Failure Test
    // - Leader stops → New leader elected
    // - Log consistency maintained
    // TODO: Implement once we have graceful shutdown mechanism
    // Currently, aborting the event loop doesn't cleanly stop the node
 }
 // ============================================================================
 // Deferred complex tests
 // ============================================================================
 #[tokio::test]
 #[ignore] // Requires full cluster setup
 async fn test_split_vote_recovery() {
    // Test that cluster recovers from split vote
    // Deferred: Requires complex timing control
 }
 #[tokio::test]
 #[ignore] // Requires node restart mechanism
 async fn test_vote_persistence_across_restart() {
    // Test that votes persist across node restarts
    // Deferred: Requires proper shutdown/startup sequencing
 }
 // ============================================================================
 // P3: Commitment & State Machine Integration Tests
 // ============================================================================
 #[tokio::test]
 async fn test_write_replicate_commit() {
    // Test: Client write on leader → replication → commit → state machine apply
    // Verifies the complete write→replicate→commit→apply flow
    use chainfire_types::command::RaftCommand;
    let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
    // Start event loops for all nodes
    let mut handles = Vec::new();
    for node in &nodes {
        let node_clone = Arc::clone(node);
        let handle = tokio::spawn(async move {
            let _ = node_clone.run().await;
        });
        handles.push(handle);
    }
    // Wait for leader election (election timeout is 2-4s)
    time::sleep(Duration::from_millis(5000)).await;
    // Find the leader
    let mut leader = None;
    for node in &nodes {
        if matches!(node.role().await, RaftRole::Leader) {
            leader = Some(node);
            break;
        }
    }
    let leader = leader.expect("Leader should be elected");
    println!("✓ Leader elected: node {}", leader.node_id());
    // Submit a write command to the leader
    let cmd = RaftCommand::Put {
        key: b"test_key_1".to_vec(),
        value: b"test_value_1".to_vec(),
        lease_id: None,
        prev_kv: false,
    };
    leader
        .client_write(cmd)
        .await
        .expect("Client write should succeed");
    println!("✓ Client write submitted to leader");
    // Wait for replication and commit (heartbeat + replication + commit)
    // Heartbeat interval is 50ms, need multiple rounds:
    // 1. First heartbeat sends entries
    // 2. Followers ack, leader updates match_index and commit_index
    // 3. Second heartbeat propagates new leader_commit to followers
    // 4. Followers update their commit_index and apply entries
    // Give extra time to avoid re-election issues
    time::sleep(Duration::from_millis(1500)).await;
    // Debug: Check all nodes' roles and states
    println!("\nDEBUG: All nodes after write:");
    for node in &nodes {
        println!("  Node {} role={:?} term={} commit_index={} last_applied={}",
                 node.node_id(), node.role().await, node.current_term().await,
                 node.commit_index().await, node.last_applied().await);
    }
    println!();
    // Verify that the value is committed and applied on all nodes
    for node in &nodes {
        let commit_index = node.commit_index().await;
        let last_applied = node.last_applied().await;
        assert!(
            commit_index >= 1,
            "Node {} should have commit_index >= 1, got {}",
            node.node_id(),
            commit_index
        );
        assert!(
            last_applied >= 1,
            "Node {} should have last_applied >= 1, got {}",
            node.node_id(),
            last_applied
        );
        // Verify the value exists in the state machine
        let state_machine = node.state_machine();
        let result = state_machine.kv().get(b"test_key_1").expect("Get should succeed");
        assert!(
            result.is_some(),
            "Node {} should have test_key_1 in state machine",
            node.node_id()
        );
        let entry = result.unwrap();
        assert_eq!(
            entry.value,
            b"test_value_1",
            "Node {} has wrong value for test_key_1",
            node.node_id()
        );
        println!(
            "✓ Node {} has test_key_1=test_value_1 (commit_index={}, last_applied={})",
            node.node_id(),
            commit_index,
            last_applied
        );
    }
    println!("✓ All nodes have committed and applied the write");
 }
 #[tokio::test]
 async fn test_commit_consistency() {
    // Test: Multiple writes preserve order across all nodes
    // Verifies that the commit mechanism maintains consistency
    use chainfire_types::command::RaftCommand;
    let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
    // Start event loops
    let mut handles = Vec::new();
    for node in &nodes {
        let node_clone = Arc::clone(node);
        let handle = tokio::spawn(async move {
            let _ = node_clone.run().await;
        });
        handles.push(handle);
    }
    // Wait for leader election (election timeout is 2-4s)
    time::sleep(Duration::from_millis(5000)).await;
    // Find the leader
    let mut leader = None;
    for node in &nodes {
        if matches!(node.role().await, RaftRole::Leader) {
            leader = Some(node);
            break;
        }
    }
    let leader = leader.expect("Leader should be elected");
    println!("✓ Leader elected: node {}", leader.node_id());
    // Submit multiple writes in sequence
    for i in 1..=5 {
        let cmd = RaftCommand::Put {
            key: format!("key_{}", i).into_bytes(),
            value: format!("value_{}", i).into_bytes(),
            lease_id: None,
            prev_kv: false,
        };
        leader
            .client_write(cmd)
            .await
            .expect("Client write should succeed");
    }
    println!("✓ Submitted 5 writes to leader");
    // Wait for all writes to commit and apply
    time::sleep(Duration::from_millis(500)).await;
    // Verify all nodes have all 5 keys in correct order
    for node in &nodes {
        let commit_index = node.commit_index().await;
        let last_applied = node.last_applied().await;
        assert!(
            commit_index >= 5,
            "Node {} should have commit_index >= 5, got {}",
            node.node_id(),
            commit_index
        );
        assert!(
            last_applied >= 5,
            "Node {} should have last_applied >= 5, got {}",
            node.node_id(),
            last_applied
        );
        let state_machine = node.state_machine();
        for i in 1..=5 {
            let key = format!("key_{}", i).into_bytes();
            let expected_value = format!("value_{}", i).into_bytes();
            let result = state_machine.kv().get(&key).expect("Get should succeed");
            assert!(
                result.is_some(),
                "Node {} missing key_{}",
                node.node_id(),
                i
            );
            let entry = result.unwrap();
            assert_eq!(
                entry.value, expected_value,
                "Node {} has wrong value for key_{}",
                node.node_id(), i
            );
        }
        println!(
            "✓ Node {} has all 5 keys in correct order (commit_index={}, last_applied={})",
            node.node_id(),
            commit_index,
            last_applied
        );
    }
    println!("✓ All nodes maintain consistent order");
 }
 #[tokio::test]
 async fn test_leader_only_write() {
    // Test: Follower should reject client writes
    // Verifies that only the leader can accept writes (Raft safety)
    use chainfire_types::command::RaftCommand;
    use chainfire_raft::core::RaftError;
    let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
    // Start event loops
    let mut handles = Vec::new();
    for node in &nodes {
        let node_clone = Arc::clone(node);
        let handle = tokio::spawn(async move {
            let _ = node_clone.run().await;
        });
        handles.push(handle);
    }
    // Wait for leader election (election timeout is 2-4s)
    time::sleep(Duration::from_millis(5000)).await;
    // Find a follower
    let mut follower = None;
    for node in &nodes {
        if matches!(node.role().await, RaftRole::Follower) {
            follower = Some(node);
            break;
        }
    }
    let follower = follower.expect("Follower should exist");
    println!("✓ Found follower: node {}", follower.node_id());
    // Try to write to the follower
    let cmd = RaftCommand::Put {
        key: b"follower_write".to_vec(),
        value: b"should_fail".to_vec(),
        lease_id: None,
        prev_kv: false,
    };
    let result = follower.client_write(cmd).await;
    // Should return NotLeader error
    assert!(
        result.is_err(),
        "Follower write should fail with NotLeader error"
    );
    if let Err(RaftError::NotLeader { .. }) = result {
        println!("✓ Follower correctly rejected write with NotLeader error");
    } else {
        panic!(
            "Expected NotLeader error, got: {:?}",
            result.err().unwrap()
        );
    }
 }
--- a/chainfire/crates/chainfire-raft/tests/proptest_sim.rs
+++ b/chainfire/crates/chainfire-raft/tests/proptest_sim.rs
@ -1,274 +0,0 @@
 //! Property-based tests for `chainfire-raft` using an in-process simulated cluster.
 //!
 //! These tests aim to catch timing/partition edge cases with high reproducibility.
 #![cfg(all(test, feature = "custom-raft"))]
 use std::sync::Arc;
 use std::time::Duration;
 use proptest::prelude::*;
 use tokio::sync::mpsc;
 use tokio::time;
 use chainfire_raft::core::{RaftConfig, RaftCore};
 use chainfire_raft::network::test_client::{RpcMessage, SimulatedNetwork};
 use chainfire_raft::storage::{EntryPayload, LogEntry, LogStorage, StateMachine};
 use chainfire_types::command::RaftCommand;
 #[derive(Debug, Clone)]
 enum Op {
    Tick(u64),
    Disconnect(u64, u64),
    Reconnect(u64, u64),
    Delay(u64, u64, u64),
    ClearLink(u64, u64),
    Write(u64, u8, u8),
 }
 fn node_id() -> impl Strategy<Value = u64> {
    1_u64..=3_u64
 }
 fn distinct_pair() -> impl Strategy<Value = (u64, u64)> {
    (node_id(), node_id()).prop_filter("distinct nodes", |(a, b)| a != b)
 }
 fn op_strategy() -> impl Strategy<Value = Op> {
    prop_oneof![
        // Advance simulated time by up to 300ms.
        (0_u64..=300).prop_map(Op::Tick),
        distinct_pair().prop_map(|(a, b)| Op::Disconnect(a, b)),
        distinct_pair().prop_map(|(a, b)| Op::Reconnect(a, b)),
        (distinct_pair(), 0_u64..=50).prop_map(|((a, b), d)| Op::Delay(a, b, d)),
        distinct_pair().prop_map(|(a, b)| Op::ClearLink(a, b)),
        // Client writes: pick node + small key/value.
        (node_id(), any::<u8>(), any::<u8>()).prop_map(|(n, k, v)| Op::Write(n, k, v)),
    ]
 }
 fn ops_strategy() -> impl Strategy<Value = Vec<Op>> {
    prop::collection::vec(op_strategy(), 0..40)
 }
 async fn advance_ms(total_ms: u64) {
    // Advance in small steps to avoid “simultaneous” timer firings starving message handling.
    let step_ms: u64 = 10;
    let mut remaining = total_ms;
    while remaining > 0 {
        let d = remaining.min(step_ms);
        time::advance(Duration::from_millis(d)).await;
        tokio::task::yield_now().await;
        remaining -= d;
    }
 }
 async fn create_3node_cluster() -> (Vec<Arc<RaftCore>>, Arc<SimulatedNetwork>) {
    let network = Arc::new(SimulatedNetwork::new());
    let mut nodes = Vec::new();
    for node_id in 1..=3_u64 {
        let peers: Vec<u64> = (1..=3_u64).filter(|&id| id != node_id).collect();
        let storage = Arc::new(LogStorage::new_in_memory());
        let state_machine = Arc::new(StateMachine::new_in_memory());
        let config = RaftConfig {
            election_timeout_min: 150,
            election_timeout_max: 300,
            heartbeat_interval: 50,
            // Deterministic per-node seed for reproducibility.
            deterministic_seed: Some(node_id),
        };
        let node = Arc::new(RaftCore::new(
            node_id,
            peers,
            storage,
            state_machine,
            Arc::new(network.client(node_id)) as Arc<dyn chainfire_raft::network::RaftRpcClient>,
            config,
        ));
        node.initialize().await.unwrap();
        nodes.push(node);
    }
    // Wire up RPC handlers.
    for node in &nodes {
        let node_id = node.node_id();
        let (tx, mut rx) = mpsc::unbounded_channel::<RpcMessage>();
        network.register(node_id, tx).await;
        let node_clone: Arc<RaftCore> = Arc::clone(node);
        tokio::spawn(async move {
            while let Some(msg) = rx.recv().await {
                match msg {
                    RpcMessage::Vote(req, resp_tx) => {
                        node_clone.request_vote_rpc(req, resp_tx).await;
                    }
                    RpcMessage::AppendEntries(req, resp_tx) => {
                        node_clone.append_entries_rpc(req, resp_tx).await;
                    }
                }
            }
        });
    }
    (nodes, network)
 }
 fn payload_fingerprint(payload: &EntryPayload<Vec<u8>>) -> Vec<u8> {
    // Serialize the enum for stable equality checks across variants.
    bincode::serialize(payload).unwrap_or_default()
 }
 async fn assert_raft_invariants(nodes: &[Arc<RaftCore>]) {
    // Per-node monotonic invariants.
    for node in nodes {
        let commit = node.commit_index().await;
        let last_applied = node.last_applied().await;
        let st = node.storage().get_log_state().expect("log state");
        let last_log_index = st.last_log_id.map(|id| id.index).unwrap_or(0);
        assert!(
            last_applied <= commit,
            "node {}: last_applied={} > commit_index={}",
            node.node_id(),
            last_applied,
            commit
        );
        assert!(
            commit <= last_log_index,
            "node {}: commit_index={} > last_log_index={}",
            node.node_id(),
            commit,
            last_log_index
        );
    }
    // Log Matching Property:
    // If two logs contain an entry with the same index and term, then the logs are identical
    // for all entries up through that index.
    let mut node_logs: Vec<std::collections::BTreeMap<u64, (u64, Vec<u8>)>> = Vec::new();
    for node in nodes {
        let st = node.storage().get_log_state().expect("log state");
        let last = st.last_log_id.map(|id| id.index).unwrap_or(0);
        let entries: Vec<LogEntry<Vec<u8>>> = if last == 0 {
            vec![]
        } else {
            node.storage()
                .get_log_entries(1..=last)
                .expect("log entries")
        };
        let mut m = std::collections::BTreeMap::new();
        for e in entries {
            m.insert(e.log_id.index, (e.log_id.term, payload_fingerprint(&e.payload)));
        }
        node_logs.push(m);
    }
    for a in 0..nodes.len() {
        for b in (a + 1)..nodes.len() {
            let la = &node_logs[a];
            let lb = &node_logs[b];
            for (idx, (term_a, payload_a)) in la.iter() {
                if let Some((term_b, payload_b)) = lb.get(idx) {
                    if term_a == term_b {
                        assert_eq!(
                            payload_a, payload_b,
                            "log mismatch at idx={} term={} (nodes {} vs {})",
                            idx,
                            term_a,
                            nodes[a].node_id(),
                            nodes[b].node_id()
                        );
                        for j in 1..=*idx {
                            assert_eq!(
                                la.get(&j),
                                lb.get(&j),
                                "log matching violated at idx={} (prefix {} differs) nodes {} vs {}",
                                idx,
                                j,
                                nodes[a].node_id(),
                                nodes[b].node_id()
                            );
                        }
                    }
                }
            }
        }
    }
 }
 proptest! {
    #![proptest_config(ProptestConfig {
        cases: 32,
        .. ProptestConfig::default()
    })]
    #[test]
    fn prop_raft_log_matching_holds(ops in ops_strategy()) {
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_time()
            .build()
            .unwrap();
        rt.block_on(async move {
            tokio::time::pause();
            let (nodes, network) = create_3node_cluster().await;
            // Start event loops.
            let mut handles = Vec::new();
            for node in &nodes {
                let node_clone = Arc::clone(node);
                handles.push(tokio::spawn(async move {
                    let _ = node_clone.run().await;
                }));
            }
            tokio::task::yield_now().await;
            // Drive a randomized sequence of operations.
            for op in ops {
                match op {
                    Op::Tick(ms) => advance_ms(ms).await,
                    Op::Disconnect(a, b) => network.disconnect(a, b).await,
                    Op::Reconnect(a, b) => network.reconnect(a, b).await,
                    Op::Delay(a, b, d) => {
                        use chainfire_raft::network::test_client::LinkBehavior;
                        network.set_link(a, b, LinkBehavior::Delay(Duration::from_millis(d))).await;
                        network.set_link(b, a, LinkBehavior::Delay(Duration::from_millis(d))).await;
                    }
                    Op::ClearLink(a, b) => {
                        network.clear_link(a, b).await;
                        network.clear_link(b, a).await;
                    }
                    Op::Write(n, k, v) => {
                        let node = nodes.iter().find(|x| x.node_id() == n).unwrap();
                        let _ = node.client_write(RaftCommand::Put {
                            key: vec![k],
                            value: vec![v],
                            lease_id: None,
                            prev_kv: false,
                        }).await;
                    }
                }
            }
            // Let the system settle a bit.
            advance_ms(500).await;
            assert_raft_invariants(&nodes).await;
            // Best-effort cleanup.
            for h in handles {
                h.abort();
            }
        });
    }
 }
--- a/chainfire/crates/chainfire-server/tests/cluster_integration.rs
+++ b/chainfire/crates/chainfire-server/tests/cluster_integration.rs
@ -1,658 +0,0 @@
 //! Chainfire 3-Node Cluster Integration Test
 //!
 //! Verifies HA behavior: leader election, state replication, and node recovery.
 use chainfire_client::Client;
 use chainfire_server::{
    config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
    server::Server,
 };
 use chainfire_types::RaftRole;
 use std::net::SocketAddr;
 use std::time::Duration;
 use tokio::time::sleep;
 /// Create a 3-node cluster configuration with join flow
 /// Node 1 bootstraps alone, nodes 2 & 3 join via member_add API
 fn cluster_config_with_join(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
    let base_port = match node_id {
        1 => 12379,
        2 => 22379,
        3 => 32379,
        _ => panic!("Invalid node_id"),
    };
    let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
    let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
    let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
    let temp_dir = tempfile::tempdir().unwrap();
    let config = ServerConfig {
        node: NodeConfig {
            id: node_id,
            name: format!("test-node-{}", node_id),
            role: "control_plane".to_string(),
        },
        cluster: ClusterConfig {
            id: 1,
            bootstrap: node_id == 1, // Only node 1 bootstraps
            initial_members: vec![], // Node 1 starts alone, others join via API
        },
        network: NetworkConfig {
            api_addr,
            http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
            raft_addr,
            gossip_addr,
            tls: None,
        },
        storage: StorageConfig {
            data_dir: temp_dir.path().to_path_buf(),
        },
        // Node 1 is Voter (bootstrap), nodes 2 & 3 are Learner (join via member_add)
        raft: RaftConfig {
            role: if node_id == 1 { RaftRole::Voter } else { RaftRole::Learner },
        },
    };
    (config, temp_dir)
 }
 /// Alias for backwards compatibility (old tests use this)
 fn cluster_config(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
    cluster_config_with_join(node_id)
 }
 /// Create a 3-node cluster configuration with simultaneous bootstrap
 /// All nodes start together with the same initial_members (avoids add_learner bug)
 fn cluster_config_simultaneous_bootstrap(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
    use chainfire_server::config::MemberConfig;
    let base_port = match node_id {
        1 => 12379,
        2 => 22379,
        3 => 32379,
        _ => panic!("Invalid node_id"),
    };
    let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
    let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
    let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
    let temp_dir = tempfile::tempdir().unwrap();
    // All nodes have the same initial_members list
    let initial_members = vec![
        MemberConfig { id: 1, raft_addr: "127.0.0.1:12380".to_string() },
        MemberConfig { id: 2, raft_addr: "127.0.0.1:22380".to_string() },
        MemberConfig { id: 3, raft_addr: "127.0.0.1:32380".to_string() },
    ];
    let config = ServerConfig {
        node: NodeConfig {
            id: node_id,
            name: format!("test-node-{}", node_id),
            role: "control_plane".to_string(),
        },
        cluster: ClusterConfig {
            id: 1,
            bootstrap: node_id == 1, // Only node 1 bootstraps, but with full member list
            initial_members: initial_members.clone(),
        },
        network: NetworkConfig {
            api_addr,
            http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
            raft_addr,
            gossip_addr,
            tls: None,
        },
        storage: StorageConfig {
            data_dir: temp_dir.path().to_path_buf(),
        },
        raft: RaftConfig {
            role: RaftRole::Voter, // All nodes are voters from the start
        },
    };
    (config, temp_dir)
 }
 /// Create a single-node cluster configuration (for testing basic Raft functionality)
 fn single_node_config() -> (ServerConfig, tempfile::TempDir) {
    let api_addr: SocketAddr = "127.0.0.1:12379".parse().unwrap();
    let raft_addr: SocketAddr = "127.0.0.1:12380".parse().unwrap();
    let gossip_addr: SocketAddr = "127.0.0.1:12381".parse().unwrap();
    let temp_dir = tempfile::tempdir().unwrap();
    let config = ServerConfig {
        node: NodeConfig {
            id: 1,
            name: "test-node-1".to_string(),
            role: "control_plane".to_string(),
        },
        cluster: ClusterConfig {
            id: 1,
            bootstrap: true, // Single-node bootstrap
            initial_members: vec![], // Empty = single node
        },
        network: NetworkConfig {
            api_addr,
            http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
            raft_addr,
            gossip_addr,
            tls: None,
        },
        storage: StorageConfig {
            data_dir: temp_dir.path().to_path_buf(),
        },
        raft: RaftConfig::default(),
    };
    (config, temp_dir)
 }
 #[tokio::test]
 #[ignore] // Run with: cargo test --test cluster_integration -- --ignored
 async fn test_single_node_raft_leader_election() {
    println!("\n=== Test: Single-Node Raft Leader Election ===");
    // Start single node
    let (config, _temp) = single_node_config();
    let api_addr = config.network.api_addr;
    println!("Creating single-node cluster...");
    let server = Server::new(config).await.unwrap();
    let handle = tokio::spawn(async move { server.run().await });
    println!("Node started: {}", api_addr);
    // Wait for leader election
    println!("Waiting for leader election...");
    sleep(Duration::from_secs(2)).await;
    // Verify leader elected
    let mut client = Client::connect(format!("http://{}", api_addr))
        .await
        .expect("Failed to connect");
    let status = client.status().await.expect("Failed to get status");
    println!(
        "Node status: leader={}, term={}",
        status.leader, status.raft_term
    );
    assert_eq!(status.leader, 1, "Node 1 should be leader in single-node cluster");
    assert!(status.raft_term > 0, "Raft term should be > 0");
    // Test basic KV operations
    println!("Testing KV operations...");
    client.put("test-key", "test-value").await.unwrap();
    let value = client.get("test-key").await.unwrap();
    assert_eq!(value, Some(b"test-value".to_vec()));
    println!("✓ Single-node Raft working correctly");
    // Cleanup
    handle.abort();
 }
 #[tokio::test]
 #[ignore] // Run with: cargo test --test cluster_integration -- --ignored
 async fn test_3node_leader_election_with_join() {
    println!("\n=== Test: 3-Node Leader Election with Join Flow ===");
    // Start Node 1 (bootstrap alone)
    let (config1, _temp1) = cluster_config_with_join(1);
    let api1 = config1.network.api_addr;
    let raft1 = config1.network.raft_addr;
    println!("Creating Node 1 (bootstrap)...");
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    println!("Node 1 started: API={}, Raft={}", api1, raft1);
    // Wait for node 1 to become leader
    sleep(Duration::from_secs(2)).await;
    // Verify node 1 is leader
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .expect("Failed to connect to node 1");
    let status1 = client1.status().await.expect("Failed to get status");
    println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
    assert_eq!(status1.leader, 1, "Node 1 should be leader");
    // Start Node 2 (no bootstrap)
    let (config2, _temp2) = cluster_config_with_join(2);
    let api2 = config2.network.api_addr;
    let raft2 = config2.network.raft_addr;
    println!("Creating Node 2...");
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    println!("Node 2 started: API={}, Raft={}", api2, raft2);
    sleep(Duration::from_millis(500)).await;
    // Start Node 3 (no bootstrap)
    let (config3, _temp3) = cluster_config_with_join(3);
    let api3 = config3.network.api_addr;
    let raft3 = config3.network.raft_addr;
    println!("Creating Node 3...");
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    println!("Node 3 started: API={}, Raft={}", api3, raft3);
    sleep(Duration::from_millis(500)).await;
    // Add node 2 to cluster via member_add API
    println!("Adding node 2 to cluster via member_add API...");
    let member2_id = client1
        .member_add(2, raft2.to_string(), false) // node_id=2, false=voter
        .await
        .expect("Failed to add node 2");
    println!("Node 2 added with ID: {}", member2_id);
    assert_eq!(member2_id, 2, "Node 2 should have ID 2");
    // Add node 3 to cluster via member_add API
    println!("Adding node 3 to cluster via member_add API...");
    let member3_id = client1
        .member_add(3, raft3.to_string(), false) // node_id=3, false=voter
        .await
        .expect("Failed to add node 3");
    println!("Node 3 added with ID: {}", member3_id);
    assert_eq!(member3_id, 3, "Node 3 should have ID 3");
    // Wait for cluster membership changes to propagate
    sleep(Duration::from_secs(3)).await;
    // Verify all nodes see the same leader
    let status1 = client1.status().await.expect("Failed to get status from node 1");
    println!("Node 1 final status: leader={}, term={}", status1.leader, status1.raft_term);
    let mut client2 = Client::connect(format!("http://{}", api2))
        .await
        .expect("Failed to connect to node 2");
    let status2 = client2.status().await.expect("Failed to get status from node 2");
    println!("Node 2 final status: leader={}, term={}", status2.leader, status2.raft_term);
    let mut client3 = Client::connect(format!("http://{}", api3))
        .await
        .expect("Failed to connect to node 3");
    let status3 = client3.status().await.expect("Failed to get status from node 3");
    println!("Node 3 final status: leader={}, term={}", status3.leader, status3.raft_term);
    // All nodes should agree on the leader
    assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
    assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
    assert!(status1.leader > 0, "No leader elected");
    println!("✓ 3-node cluster formed successfully with join flow");
    // Cleanup
    handle1.abort();
    handle2.abort();
    handle3.abort();
 }
 #[tokio::test]
 #[ignore]
 async fn test_3node_state_replication() {
    println!("\n=== Test: 3-Node State Replication ===");
    // Start cluster
    let (config1, _temp1) = cluster_config(1);
    let api1 = config1.network.api_addr;
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    let (config2, _temp2) = cluster_config(2);
    let api2 = config2.network.api_addr;
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    let (config3, _temp3) = cluster_config(3);
    let api3 = config3.network.api_addr;
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    sleep(Duration::from_secs(2)).await;
    println!("Cluster started");
    // Write data to node 1 (leader)
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .unwrap();
    println!("Writing test data to node 1...");
    client1.put("test/key1", "value1").await.unwrap();
    client1.put("test/key2", "value2").await.unwrap();
    client1.put("test/key3", "value3").await.unwrap();
    // Wait for replication
    sleep(Duration::from_millis(500)).await;
    // Read from node 2 and node 3 (followers)
    println!("Reading from node 2...");
    let mut client2 = Client::connect(format!("http://{}", api2))
        .await
        .unwrap();
    let val2 = client2.get("test/key1").await.unwrap();
    assert_eq!(val2, Some(b"value1".to_vec()), "Data not replicated to node 2");
    println!("Reading from node 3...");
    let mut client3 = Client::connect(format!("http://{}", api3))
        .await
        .unwrap();
    let val3 = client3.get("test/key1").await.unwrap();
    assert_eq!(val3, Some(b"value1".to_vec()), "Data not replicated to node 3");
    println!("✓ State replication verified");
    // Cleanup
    handle1.abort();
    handle2.abort();
    handle3.abort();
 }
 #[tokio::test]
 #[ignore]
 async fn test_3node_follower_crash() {
    println!("\n=== Test: Follower Crash (Node Remains Available) ===");
    // Start cluster
    let (config1, _temp1) = cluster_config(1);
    let api1 = config1.network.api_addr;
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    let (config2, _temp2) = cluster_config(2);
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    let (config3, _temp3) = cluster_config(3);
    let api3 = config3.network.api_addr;
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    sleep(Duration::from_secs(2)).await;
    println!("Cluster started");
    // Write initial data
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .unwrap();
    println!("Writing initial data...");
    client1.put("test/before-crash", "initial").await.unwrap();
    // Kill node 2 (follower)
    println!("Killing node 2 (follower)...");
    handle2.abort();
    sleep(Duration::from_millis(500)).await;
    // Cluster should still be operational (2/3 quorum)
    println!("Writing data after crash...");
    client1
        .put("test/after-crash", "still-working")
        .await
        .expect("Write should succeed with 2/3 quorum");
    // Read from node 3
    let mut client3 = Client::connect(format!("http://{}", api3))
        .await
        .unwrap();
    let val = client3.get("test/after-crash").await.unwrap();
    assert_eq!(val, Some(b"still-working".to_vec()));
    println!("✓ Cluster operational after follower crash");
    // Cleanup
    handle1.abort();
    handle3.abort();
 }
 #[tokio::test]
 #[ignore]
 async fn test_3node_leader_crash_reelection() {
    println!("\n=== Test: Leader Crash & Re-election ===");
    // Start cluster
    let (config1, _temp1) = cluster_config(1);
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    let (config2, _temp2) = cluster_config(2);
    let api2 = config2.network.api_addr;
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    let (config3, _temp3) = cluster_config(3);
    let api3 = config3.network.api_addr;
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    sleep(Duration::from_secs(2)).await;
    println!("Cluster started");
    // Determine initial leader
    let mut client2 = Client::connect(format!("http://{}", api2))
        .await
        .unwrap();
    let initial_status = client2.status().await.unwrap();
    let initial_leader = initial_status.leader;
    println!("Initial leader: node {}", initial_leader);
    // Kill the leader (assume node 1)
    println!("Killing leader (node 1)...");
    handle1.abort();
    // Wait for re-election (should be < 1s per requirements)
    println!("Waiting for re-election...");
    sleep(Duration::from_secs(1)).await;
    // Verify new leader elected
    let new_status = client2.status().await.unwrap();
    println!(
        "New leader: node {}, term: {}",
        new_status.leader, new_status.raft_term
    );
    assert!(new_status.leader > 0, "No new leader elected");
    assert!(
        new_status.raft_term > initial_status.raft_term,
        "Raft term should increase after re-election"
    );
    println!("✓ Leader re-election successful within 1s");
    // Verify cluster still functional
    let mut client3 = Client::connect(format!("http://{}", api3))
        .await
        .unwrap();
    client3
        .put("test/post-reelection", "functional")
        .await
        .expect("Cluster should be functional after re-election");
    println!("✓ Cluster operational after re-election");
    // Cleanup
    handle2.abort();
    handle3.abort();
 }
 /// Test 3-node cluster with learners only (no voter promotion)
 /// T041 Workaround: Avoids change_membership by keeping nodes as learners
 #[tokio::test]
 #[ignore] // Run with: cargo test --test cluster_integration test_3node_with_learners -- --ignored
 async fn test_3node_with_learners() {
    println!("\n=== Test: 3-Node Cluster with Learners (T041 Workaround) ===");
    // Start Node 1 (bootstrap alone as single voter)
    let (config1, _temp1) = cluster_config_with_join(1);
    let api1 = config1.network.api_addr;
    let raft1 = config1.network.raft_addr;
    println!("Creating Node 1 (bootstrap)...");
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    println!("Node 1 started: API={}, Raft={}", api1, raft1);
    // Wait for node 1 to become leader
    sleep(Duration::from_secs(2)).await;
    // Verify node 1 is leader
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .expect("Failed to connect to node 1");
    let status1 = client1.status().await.expect("Failed to get status");
    println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
    assert_eq!(status1.leader, 1, "Node 1 should be leader");
    // Start Node 2
    let (config2, _temp2) = cluster_config_with_join(2);
    let api2 = config2.network.api_addr;
    let raft2 = config2.network.raft_addr;
    println!("Creating Node 2...");
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    println!("Node 2 started: API={}, Raft={}", api2, raft2);
    sleep(Duration::from_millis(500)).await;
    // Start Node 3
    let (config3, _temp3) = cluster_config_with_join(3);
    let api3 = config3.network.api_addr;
    let raft3 = config3.network.raft_addr;
    println!("Creating Node 3...");
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    println!("Node 3 started: API={}, Raft={}", api3, raft3);
    sleep(Duration::from_millis(500)).await;
    // Add node 2 as LEARNER (is_learner=true, no voter promotion)
    println!("Adding node 2 as learner (no voter promotion)...");
    let member2_id = client1
        .member_add(2, raft2.to_string(), true) // is_learner=true
        .await
        .expect("Failed to add node 2 as learner");
    println!("Node 2 added as learner with ID: {}", member2_id);
    assert_eq!(member2_id, 2);
    // Add node 3 as LEARNER
    println!("Adding node 3 as learner (no voter promotion)...");
    let member3_id = client1
        .member_add(3, raft3.to_string(), true) // is_learner=true
        .await
        .expect("Failed to add node 3 as learner");
    println!("Node 3 added as learner with ID: {}", member3_id);
    assert_eq!(member3_id, 3);
    // Wait for replication
    sleep(Duration::from_secs(2)).await;
    // Test write on leader
    println!("Testing KV write on leader...");
    client1.put("test-key", "test-value").await.expect("Put failed");
    // Wait for replication to learners
    sleep(Duration::from_secs(1)).await;
    // Verify data replicated to learner (should be able to read)
    let mut client2 = Client::connect(format!("http://{}", api2))
        .await
        .expect("Failed to connect to node 2");
    // Note: Reading from a learner may require forwarding to leader
    // For now, just verify the cluster is operational
    let status2 = client2.status().await.expect("Failed to get status from learner");
    println!("Node 2 (learner) status: leader={}, term={}", status2.leader, status2.raft_term);
    // All nodes should see node 1 as leader
    assert_eq!(status2.leader, 1, "Learner should see node 1 as leader");
    println!("✓ 3-node cluster with learners working");
    // Cleanup
    handle1.abort();
    handle2.abort();
    handle3.abort();
 }
 /// Test 3-node cluster formation using staggered bootstrap (DISABLED - doesn't work)
 #[tokio::test]
 #[ignore]
 async fn test_3node_simultaneous_bootstrap_disabled() {
    println!("\n=== Test: 3-Node Staggered Bootstrap (T041 Workaround) ===");
    // Start Node 1 first (bootstrap=true, will initialize with full membership)
    let (config1, _temp1) = cluster_config_simultaneous_bootstrap(1);
    let api1 = config1.network.api_addr;
    println!("Creating Node 1 (bootstrap)...");
    let server1 = Server::new(config1).await.unwrap();
    let handle1 = tokio::spawn(async move { server1.run().await });
    println!("Node 1 started: API={}", api1);
    // Give node 1 time to become leader
    println!("Waiting for Node 1 to become leader (3s)...");
    sleep(Duration::from_secs(3)).await;
    // Verify node 1 is leader
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .expect("Failed to connect to node 1");
    let status1 = client1.status().await.expect("Failed to get status");
    println!("Node 1 status before others: leader={}, term={}", status1.leader, status1.raft_term);
    // Now start nodes 2 and 3
    let (config2, _temp2) = cluster_config_simultaneous_bootstrap(2);
    let api2 = config2.network.api_addr;
    println!("Creating Node 2...");
    let server2 = Server::new(config2).await.unwrap();
    let handle2 = tokio::spawn(async move { server2.run().await });
    println!("Node 2 started: API={}", api2);
    let (config3, _temp3) = cluster_config_simultaneous_bootstrap(3);
    let api3 = config3.network.api_addr;
    println!("Creating Node 3...");
    let server3 = Server::new(config3).await.unwrap();
    let handle3 = tokio::spawn(async move { server3.run().await });
    println!("Node 3 started: API={}", api3);
    // Wait for cluster to stabilize
    println!("Waiting for cluster to stabilize (5s)...");
    sleep(Duration::from_secs(5)).await;
    // Verify cluster formed and leader elected
    let mut client1 = Client::connect(format!("http://{}", api1))
        .await
        .expect("Failed to connect to node 1");
    let status1 = client1.status().await.expect("Failed to get status from node 1");
    println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
    let mut client2 = Client::connect(format!("http://{}", api2))
        .await
        .expect("Failed to connect to node 2");
    let status2 = client2.status().await.expect("Failed to get status from node 2");
    println!("Node 2 status: leader={}, term={}", status2.leader, status2.raft_term);
    let mut client3 = Client::connect(format!("http://{}", api3))
        .await
        .expect("Failed to connect to node 3");
    let status3 = client3.status().await.expect("Failed to get status from node 3");
    println!("Node 3 status: leader={}, term={}", status3.leader, status3.raft_term);
    // All nodes should agree on the leader
    assert!(status1.leader > 0, "No leader elected");
    assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
    assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
    // Test KV operations on the cluster
    println!("Testing KV operations...");
    client1.put("test-key", "test-value").await.expect("Put failed");
    // Wait for commit to propagate to followers via heartbeat (heartbeat_interval=100ms)
    sleep(Duration::from_millis(200)).await;
    let value = client2.get("test-key").await.expect("Get failed");
    assert_eq!(value, Some(b"test-value".to_vec()), "Value not replicated");
    println!("✓ 3-node cluster formed successfully with simultaneous bootstrap");
    // Cleanup
    handle1.abort();
    handle2.abort();
    handle3.abort();
 }
--- a/chainfire/crates/chainfire-server/tests/integration_test.rs
+++ b/chainfire/crates/chainfire-server/tests/integration_test.rs
@ -1,175 +0,0 @@
 //! Integration tests for Chainfire
 //!
 //! These tests verify that the server, client, and all components work together correctly.
 use chainfire_client::Client;
 use chainfire_server::{
    config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
    server::Server,
 };
 use std::time::Duration;
 use tokio::time::sleep;
 /// Create a test server configuration
 fn test_config(port: u16) -> (ServerConfig, tempfile::TempDir) {
    use std::net::SocketAddr;
    let api_addr: SocketAddr = format!("127.0.0.1:{}", port).parse().unwrap();
    let raft_addr: SocketAddr = format!("127.0.0.1:{}", port + 100).parse().unwrap();
    let gossip_addr: SocketAddr = format!("127.0.0.1:{}", port + 200).parse().unwrap();
    let temp_dir = tempfile::tempdir().unwrap();
    let config = ServerConfig {
        node: NodeConfig {
            id: 1,
            name: format!("test-node-{}", port),
            role: "control_plane".to_string(),
        },
        cluster: ClusterConfig {
            id: 1,
            bootstrap: true,
            initial_members: vec![],
        },
        network: NetworkConfig {
            api_addr,
            http_addr: "127.0.0.1:28081".parse().unwrap(),
            raft_addr,
            gossip_addr,
            tls: None,
        },
        storage: StorageConfig {
            data_dir: temp_dir.path().to_path_buf(),
        },
        raft: RaftConfig::default(),
    };
    (config, temp_dir)
 }
 #[tokio::test]
 async fn test_single_node_kv_operations() {
    // Start server
    let (config, _temp_dir) = test_config(23790);
    let api_addr = config.network.api_addr;
    let server = Server::new(config).await.unwrap();
    // Run server in background
    let server_handle = tokio::spawn(async move {
        let _ = server.run().await;
    });
    // Wait for server to start and Raft leader election
    // Increased from 500ms to 2000ms for CI/constrained environments
    sleep(Duration::from_millis(2000)).await;
    // Connect client
    let mut client = Client::connect(format!("http://{}", api_addr))
        .await
        .unwrap();
    // Test put with retry (leader election may still be in progress)
    let mut rev = 0;
    for attempt in 0..5 {
        match client.put("test/key1", "value1").await {
            Ok(r) => {
                rev = r;
                break;
            }
            Err(e) if attempt < 4 => {
                eprintln!("Put attempt {} failed: {}, retrying...", attempt + 1, e);
                sleep(Duration::from_millis(500)).await;
            }
            Err(e) => panic!("Put failed after 5 attempts: {}", e),
        }
    }
    assert!(rev > 0);
    // Test get
    let value = client.get("test/key1").await.unwrap();
    assert_eq!(value, Some(b"value1".to_vec()));
    // Test put with different value
    let rev2 = client.put("test/key1", "value2").await.unwrap();
    assert!(rev2 > rev);
    // Test get updated value
    let value = client.get("test/key1").await.unwrap();
    assert_eq!(value, Some(b"value2".to_vec()));
    // Test get non-existent key
    let value = client.get("test/nonexistent").await.unwrap();
    assert!(value.is_none());
    // Test delete
    let deleted = client.delete("test/key1").await.unwrap();
    assert!(deleted);
    // Verify deletion
    let value = client.get("test/key1").await.unwrap();
    assert!(value.is_none());
    // Test delete non-existent key
    let deleted = client.delete("test/nonexistent").await.unwrap();
    assert!(!deleted);
    // Test prefix operations
    client.put("prefix/a", "1").await.unwrap();
    client.put("prefix/b", "2").await.unwrap();
    client.put("prefix/c", "3").await.unwrap();
    client.put("other/key", "other").await.unwrap();
    let prefix_values = client.get_prefix("prefix/").await.unwrap();
    assert_eq!(prefix_values.len(), 3);
    // Cleanup
    server_handle.abort();
 }
 #[tokio::test]
 async fn test_cluster_status() {
    let (config, _temp_dir) = test_config(23800);
    let api_addr = config.network.api_addr;
    let server = Server::new(config).await.unwrap();
    let server_handle = tokio::spawn(async move {
        let _ = server.run().await;
    });
    sleep(Duration::from_millis(500)).await;
    let mut client = Client::connect(format!("http://{}", api_addr))
        .await
        .unwrap();
    let status = client.status().await.unwrap();
    assert_eq!(status.leader, 1);
    assert!(status.raft_term > 0);
    server_handle.abort();
 }
 #[tokio::test]
 async fn test_string_convenience_methods() {
    let (config, _temp_dir) = test_config(23810);
    let api_addr = config.network.api_addr;
    let server = Server::new(config).await.unwrap();
    let server_handle = tokio::spawn(async move {
        let _ = server.run().await;
    });
    sleep(Duration::from_millis(500)).await;
    let mut client = Client::connect(format!("http://{}", api_addr))
        .await
        .unwrap();
    // Test string methods
    client.put_str("/config/name", "chainfire").await.unwrap();
    let value = client.get_str("/config/name").await.unwrap();
    assert_eq!(value, Some("chainfire".to_string()));
    server_handle.abort();
 }
--- a/chainfire/data/CURRENT
+++ b/chainfire/data/CURRENT
@ -1 +0,0 @@
 MANIFEST-000005
--- a/chainfire/data/IDENTITY
+++ b/chainfire/data/IDENTITY
@ -1 +0,0 @@
 9b9417c1-5d46-4b8a-b14e-ac341643df55
--- a/chainfire/data/LOCK
+++ b/chainfire/data/LOCK
--- a/chainfire/data/LOG
+++ b/chainfire/data/LOG
--- a/chainfire/data/MANIFEST-000005
+++ b/chainfire/data/MANIFEST-000005
--- a/chainfire/data/OPTIONS-000007
+++ b/chainfire/data/OPTIONS-000007
@ -1,684 +0,0 @@
 # This is a RocksDB option file.
 #
 # For detailed file format spec, please refer to the example file
 # in examples/rocksdb_option_file_example.ini
 #
 [Version]
  rocksdb_version=10.5.1
  options_file_version=1.1
 [DBOptions]
  compaction_readahead_size=2097152
  strict_bytes_per_sync=false
  bytes_per_sync=1048576
  max_background_jobs=4
  avoid_flush_during_shutdown=false
  max_background_flushes=-1
  delayed_write_rate=16777216
  max_open_files=-1
  max_subcompactions=1
  writable_file_max_buffer_size=1048576
  wal_bytes_per_sync=0
  max_background_compactions=-1
  max_total_wal_size=0
  delete_obsolete_files_period_micros=21600000000
  stats_dump_period_sec=600
  stats_history_buffer_size=1048576
  stats_persist_period_sec=600
  follower_refresh_catchup_period_ms=10000
  enforce_single_del_contracts=true
  lowest_used_cache_tier=kNonVolatileBlockTier
  bgerror_resume_retry_interval=1000000
  metadata_write_temperature=kUnknown
  best_efforts_recovery=false
  log_readahead_size=0
  write_identity_file=true
  write_dbid_to_manifest=true
  prefix_seek_opt_in_only=false
  wal_compression=kNoCompression
  manual_wal_flush=false
  db_host_id=__hostname__
  two_write_queues=false
  allow_ingest_behind=false
  skip_checking_sst_file_sizes_on_db_open=false
  flush_verify_memtable_count=true
  atomic_flush=false
  verify_sst_unique_id_in_manifest=true
  skip_stats_update_on_db_open=false
  track_and_verify_wals=false
  track_and_verify_wals_in_manifest=false
  compaction_verify_record_count=true
  paranoid_checks=true
  create_if_missing=true
  max_write_batch_group_size_bytes=1048576
  follower_catchup_retry_count=10
  avoid_flush_during_recovery=false
  file_checksum_gen_factory=nullptr
  enable_thread_tracking=false
  allow_fallocate=true
  allow_data_in_errors=false
  error_if_exists=false
  use_direct_io_for_flush_and_compaction=false
  background_close_inactive_wals=false
  create_missing_column_families=true
  WAL_size_limit_MB=0
  use_direct_reads=false
  persist_stats_to_disk=false
  allow_2pc=false
  max_log_file_size=0
  is_fd_close_on_exec=true
  avoid_unnecessary_blocking_io=false
  max_file_opening_threads=16
  wal_filter=nullptr
  wal_write_temperature=kUnknown
  follower_catchup_retry_wait_ms=100
  allow_mmap_reads=false
  allow_mmap_writes=false
  use_adaptive_mutex=false
  use_fsync=false
  table_cache_numshardbits=6
  dump_malloc_stats=false
  db_write_buffer_size=0
  keep_log_file_num=1000
  max_bgerror_resume_count=2147483647
  allow_concurrent_memtable_write=true
  recycle_log_file_num=0
  log_file_time_to_roll=0
  manifest_preallocation_size=4194304
  enable_write_thread_adaptive_yield=true
  WAL_ttl_seconds=0
  max_manifest_file_size=1073741824
  wal_recovery_mode=kPointInTimeRecovery
  enable_pipelined_write=false
  write_thread_slow_yield_usec=3
  unordered_write=false
  write_thread_max_yield_usec=100
  advise_random_on_open=true
  info_log_level=INFO_LEVEL
 [CFOptions "default"]
  memtable_max_range_deletions=0
  compression_manager=nullptr
  compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_memory_checks=false
  memtable_avg_op_scan_flush_trigger=0
  block_protection_bytes_per_key=0
  uncache_aggressiveness=0
  bottommost_file_compaction_delay=0
  memtable_protection_bytes_per_key=0
  experimental_mempurge_threshold=0.000000
  bottommost_compression=kDisableCompressionOption
  sample_for_compression=0
  prepopulate_blob_cache=kDisable
  blob_file_starting_level=0
  blob_compaction_readahead_size=0
  table_factory=BlockBasedTable
  max_successive_merges=0
  max_write_buffer_number=2
  prefix_extractor=nullptr
  memtable_huge_page_size=0
  write_buffer_size=67108864
  strict_max_successive_merges=false
  arena_block_size=1048576
  memtable_op_scan_flush_trigger=0
  level0_file_num_compaction_trigger=4
  report_bg_io_stats=false
  inplace_update_num_locks=10000
  memtable_prefix_bloom_size_ratio=0.000000
  level0_stop_writes_trigger=36
  blob_compression_type=kNoCompression
  level0_slowdown_writes_trigger=20
  hard_pending_compaction_bytes_limit=274877906944
  target_file_size_multiplier=1
  bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_file_checks=false
  blob_garbage_collection_force_threshold=1.000000
  enable_blob_files=false
  soft_pending_compaction_bytes_limit=68719476736
  target_file_size_base=67108864
  max_compaction_bytes=1677721600
  disable_auto_compactions=false
  min_blob_size=0
  memtable_whole_key_filtering=false
  max_bytes_for_level_base=268435456
  last_level_temperature=kUnknown
  preserve_internal_time_seconds=0
  compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  max_bytes_for_level_multiplier=10.000000
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_sequential_skip_in_iterations=8
  compression=kSnappyCompression
  default_write_temperature=kUnknown
  compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
  blob_garbage_collection_age_cutoff=0.250000
  ttl=2592000
  periodic_compaction_seconds=0
  preclude_last_level_data_seconds=0
  blob_file_size=268435456
  enable_blob_garbage_collection=false
  persist_user_defined_timestamps=true
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  bloom_locality=0
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=true
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_size_to_maintain=0
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  num_levels=7
  force_consistency_checks=true
  sst_partitioner_factory=nullptr
  default_temperature=kUnknown
  disallow_memtable_writes=false
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
 [TableOptions/BlockBasedTable "default"]
  num_file_reads_for_auto_readahead=2
  initial_auto_readahead_size=8192
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  enable_index_compression=true
  verify_compression=false
  prepopulate_block_cache=kDisable
  format_version=6
  use_delta_encoding=true
  pin_top_level_index_and_filter=true
  read_amp_bytes_per_bit=0
  decouple_partitioned_filters=false
  partition_filters=false
  metadata_block_size=4096
  max_auto_readahead_size=262144
  index_block_restart_interval=1
  block_size_deviation=10
  block_size=4096
  detect_filter_construct_corruption=false
  no_block_cache=false
  checksum=kXXH3
  filter_policy=nullptr
  data_block_hash_table_util_ratio=0.750000
  block_restart_interval=16
  index_type=kBinarySearch
  pin_l0_filter_and_index_blocks_in_cache=false
  data_block_index_type=kDataBlockBinarySearch
  cache_index_and_filter_blocks_with_high_priority=true
  whole_key_filtering=true
  index_shortening=kShortenSeparators
  cache_index_and_filter_blocks=false
  block_align=false
  optimize_filters_for_memory=true
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
 [CFOptions "raft_logs"]
  memtable_max_range_deletions=0
  compression_manager=nullptr
  compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_memory_checks=false
  memtable_avg_op_scan_flush_trigger=0
  block_protection_bytes_per_key=0
  uncache_aggressiveness=0
  bottommost_file_compaction_delay=0
  memtable_protection_bytes_per_key=0
  experimental_mempurge_threshold=0.000000
  bottommost_compression=kDisableCompressionOption
  sample_for_compression=0
  prepopulate_blob_cache=kDisable
  blob_file_starting_level=0
  blob_compaction_readahead_size=0
  table_factory=BlockBasedTable
  max_successive_merges=0
  max_write_buffer_number=3
  prefix_extractor=nullptr
  memtable_huge_page_size=0
  write_buffer_size=67108864
  strict_max_successive_merges=false
  arena_block_size=1048576
  memtable_op_scan_flush_trigger=0
  level0_file_num_compaction_trigger=4
  report_bg_io_stats=false
  inplace_update_num_locks=10000
  memtable_prefix_bloom_size_ratio=0.000000
  level0_stop_writes_trigger=36
  blob_compression_type=kNoCompression
  level0_slowdown_writes_trigger=20
  hard_pending_compaction_bytes_limit=274877906944
  target_file_size_multiplier=1
  bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_file_checks=false
  blob_garbage_collection_force_threshold=1.000000
  enable_blob_files=false
  soft_pending_compaction_bytes_limit=68719476736
  target_file_size_base=67108864
  max_compaction_bytes=1677721600
  disable_auto_compactions=false
  min_blob_size=0
  memtable_whole_key_filtering=false
  max_bytes_for_level_base=268435456
  last_level_temperature=kUnknown
  preserve_internal_time_seconds=0
  compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  max_bytes_for_level_multiplier=10.000000
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_sequential_skip_in_iterations=8
  compression=kSnappyCompression
  default_write_temperature=kUnknown
  compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
  blob_garbage_collection_age_cutoff=0.250000
  ttl=2592000
  periodic_compaction_seconds=0
  preclude_last_level_data_seconds=0
  blob_file_size=268435456
  enable_blob_garbage_collection=false
  persist_user_defined_timestamps=true
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  bloom_locality=0
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=true
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_size_to_maintain=0
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  num_levels=7
  force_consistency_checks=true
  sst_partitioner_factory=nullptr
  default_temperature=kUnknown
  disallow_memtable_writes=false
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
 [TableOptions/BlockBasedTable "raft_logs"]
  num_file_reads_for_auto_readahead=2
  initial_auto_readahead_size=8192
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  enable_index_compression=true
  verify_compression=false
  prepopulate_block_cache=kDisable
  format_version=6
  use_delta_encoding=true
  pin_top_level_index_and_filter=true
  read_amp_bytes_per_bit=0
  decouple_partitioned_filters=false
  partition_filters=false
  metadata_block_size=4096
  max_auto_readahead_size=262144
  index_block_restart_interval=1
  block_size_deviation=10
  block_size=4096
  detect_filter_construct_corruption=false
  no_block_cache=false
  checksum=kXXH3
  filter_policy=nullptr
  data_block_hash_table_util_ratio=0.750000
  block_restart_interval=16
  index_type=kBinarySearch
  pin_l0_filter_and_index_blocks_in_cache=false
  data_block_index_type=kDataBlockBinarySearch
  cache_index_and_filter_blocks_with_high_priority=true
  whole_key_filtering=true
  index_shortening=kShortenSeparators
  cache_index_and_filter_blocks=false
  block_align=false
  optimize_filters_for_memory=true
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
 [CFOptions "raft_meta"]
  memtable_max_range_deletions=0
  compression_manager=nullptr
  compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_memory_checks=false
  memtable_avg_op_scan_flush_trigger=0
  block_protection_bytes_per_key=0
  uncache_aggressiveness=0
  bottommost_file_compaction_delay=0
  memtable_protection_bytes_per_key=0
  experimental_mempurge_threshold=0.000000
  bottommost_compression=kDisableCompressionOption
  sample_for_compression=0
  prepopulate_blob_cache=kDisable
  blob_file_starting_level=0
  blob_compaction_readahead_size=0
  table_factory=BlockBasedTable
  max_successive_merges=0
  max_write_buffer_number=2
  prefix_extractor=nullptr
  memtable_huge_page_size=0
  write_buffer_size=16777216
  strict_max_successive_merges=false
  arena_block_size=1048576
  memtable_op_scan_flush_trigger=0
  level0_file_num_compaction_trigger=4
  report_bg_io_stats=false
  inplace_update_num_locks=10000
  memtable_prefix_bloom_size_ratio=0.000000
  level0_stop_writes_trigger=36
  blob_compression_type=kNoCompression
  level0_slowdown_writes_trigger=20
  hard_pending_compaction_bytes_limit=274877906944
  target_file_size_multiplier=1
  bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_file_checks=false
  blob_garbage_collection_force_threshold=1.000000
  enable_blob_files=false
  soft_pending_compaction_bytes_limit=68719476736
  target_file_size_base=67108864
  max_compaction_bytes=1677721600
  disable_auto_compactions=false
  min_blob_size=0
  memtable_whole_key_filtering=false
  max_bytes_for_level_base=268435456
  last_level_temperature=kUnknown
  preserve_internal_time_seconds=0
  compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  max_bytes_for_level_multiplier=10.000000
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_sequential_skip_in_iterations=8
  compression=kSnappyCompression
  default_write_temperature=kUnknown
  compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
  blob_garbage_collection_age_cutoff=0.250000
  ttl=2592000
  periodic_compaction_seconds=0
  preclude_last_level_data_seconds=0
  blob_file_size=268435456
  enable_blob_garbage_collection=false
  persist_user_defined_timestamps=true
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  bloom_locality=0
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=true
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_size_to_maintain=0
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  num_levels=7
  force_consistency_checks=true
  sst_partitioner_factory=nullptr
  default_temperature=kUnknown
  disallow_memtable_writes=false
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
 [TableOptions/BlockBasedTable "raft_meta"]
  num_file_reads_for_auto_readahead=2
  initial_auto_readahead_size=8192
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  enable_index_compression=true
  verify_compression=false
  prepopulate_block_cache=kDisable
  format_version=6
  use_delta_encoding=true
  pin_top_level_index_and_filter=true
  read_amp_bytes_per_bit=0
  decouple_partitioned_filters=false
  partition_filters=false
  metadata_block_size=4096
  max_auto_readahead_size=262144
  index_block_restart_interval=1
  block_size_deviation=10
  block_size=4096
  detect_filter_construct_corruption=false
  no_block_cache=false
  checksum=kXXH3
  filter_policy=nullptr
  data_block_hash_table_util_ratio=0.750000
  block_restart_interval=16
  index_type=kBinarySearch
  pin_l0_filter_and_index_blocks_in_cache=false
  data_block_index_type=kDataBlockBinarySearch
  cache_index_and_filter_blocks_with_high_priority=true
  whole_key_filtering=true
  index_shortening=kShortenSeparators
  cache_index_and_filter_blocks=false
  block_align=false
  optimize_filters_for_memory=true
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
 [CFOptions "key_value"]
  memtable_max_range_deletions=0
  compression_manager=nullptr
  compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_memory_checks=false
  memtable_avg_op_scan_flush_trigger=0
  block_protection_bytes_per_key=0
  uncache_aggressiveness=0
  bottommost_file_compaction_delay=0
  memtable_protection_bytes_per_key=0
  experimental_mempurge_threshold=0.000000
  bottommost_compression=kDisableCompressionOption
  sample_for_compression=0
  prepopulate_blob_cache=kDisable
  blob_file_starting_level=0
  blob_compaction_readahead_size=0
  table_factory=BlockBasedTable
  max_successive_merges=0
  max_write_buffer_number=4
  prefix_extractor=rocksdb.FixedPrefix.8
  memtable_huge_page_size=0
  write_buffer_size=134217728
  strict_max_successive_merges=false
  arena_block_size=1048576
  memtable_op_scan_flush_trigger=0
  level0_file_num_compaction_trigger=4
  report_bg_io_stats=false
  inplace_update_num_locks=10000
  memtable_prefix_bloom_size_ratio=0.000000
  level0_stop_writes_trigger=36
  blob_compression_type=kNoCompression
  level0_slowdown_writes_trigger=20
  hard_pending_compaction_bytes_limit=274877906944
  target_file_size_multiplier=1
  bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_file_checks=false
  blob_garbage_collection_force_threshold=1.000000
  enable_blob_files=false
  soft_pending_compaction_bytes_limit=68719476736
  target_file_size_base=67108864
  max_compaction_bytes=1677721600
  disable_auto_compactions=false
  min_blob_size=0
  memtable_whole_key_filtering=false
  max_bytes_for_level_base=268435456
  last_level_temperature=kUnknown
  preserve_internal_time_seconds=0
  compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  max_bytes_for_level_multiplier=10.000000
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_sequential_skip_in_iterations=8
  compression=kSnappyCompression
  default_write_temperature=kUnknown
  compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
  blob_garbage_collection_age_cutoff=0.250000
  ttl=2592000
  periodic_compaction_seconds=0
  preclude_last_level_data_seconds=0
  blob_file_size=268435456
  enable_blob_garbage_collection=false
  persist_user_defined_timestamps=true
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  bloom_locality=0
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=true
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_size_to_maintain=0
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  num_levels=7
  force_consistency_checks=true
  sst_partitioner_factory=nullptr
  default_temperature=kUnknown
  disallow_memtable_writes=false
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
 [TableOptions/BlockBasedTable "key_value"]
  num_file_reads_for_auto_readahead=2
  initial_auto_readahead_size=8192
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  enable_index_compression=true
  verify_compression=false
  prepopulate_block_cache=kDisable
  format_version=6
  use_delta_encoding=true
  pin_top_level_index_and_filter=true
  read_amp_bytes_per_bit=0
  decouple_partitioned_filters=false
  partition_filters=false
  metadata_block_size=4096
  max_auto_readahead_size=262144
  index_block_restart_interval=1
  block_size_deviation=10
  block_size=4096
  detect_filter_construct_corruption=false
  no_block_cache=false
  checksum=kXXH3
  filter_policy=nullptr
  data_block_hash_table_util_ratio=0.750000
  block_restart_interval=16
  index_type=kBinarySearch
  pin_l0_filter_and_index_blocks_in_cache=false
  data_block_index_type=kDataBlockBinarySearch
  cache_index_and_filter_blocks_with_high_priority=true
  whole_key_filtering=true
  index_shortening=kShortenSeparators
  cache_index_and_filter_blocks=false
  block_align=false
  optimize_filters_for_memory=true
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
 [CFOptions "snapshot"]
  memtable_max_range_deletions=0
  compression_manager=nullptr
  compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_memory_checks=false
  memtable_avg_op_scan_flush_trigger=0
  block_protection_bytes_per_key=0
  uncache_aggressiveness=0
  bottommost_file_compaction_delay=0
  memtable_protection_bytes_per_key=0
  experimental_mempurge_threshold=0.000000
  bottommost_compression=kDisableCompressionOption
  sample_for_compression=0
  prepopulate_blob_cache=kDisable
  blob_file_starting_level=0
  blob_compaction_readahead_size=0
  table_factory=BlockBasedTable
  max_successive_merges=0
  max_write_buffer_number=2
  prefix_extractor=nullptr
  memtable_huge_page_size=0
  write_buffer_size=33554432
  strict_max_successive_merges=false
  arena_block_size=1048576
  memtable_op_scan_flush_trigger=0
  level0_file_num_compaction_trigger=4
  report_bg_io_stats=false
  inplace_update_num_locks=10000
  memtable_prefix_bloom_size_ratio=0.000000
  level0_stop_writes_trigger=36
  blob_compression_type=kNoCompression
  level0_slowdown_writes_trigger=20
  hard_pending_compaction_bytes_limit=274877906944
  target_file_size_multiplier=1
  bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
  paranoid_file_checks=false
  blob_garbage_collection_force_threshold=1.000000
  enable_blob_files=false
  soft_pending_compaction_bytes_limit=68719476736
  target_file_size_base=67108864
  max_compaction_bytes=1677721600
  disable_auto_compactions=false
  min_blob_size=0
  memtable_whole_key_filtering=false
  max_bytes_for_level_base=268435456
  last_level_temperature=kUnknown
  preserve_internal_time_seconds=0
  compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
  max_bytes_for_level_multiplier=10.000000
  max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
  max_sequential_skip_in_iterations=8
  compression=kSnappyCompression
  default_write_temperature=kUnknown
  compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
  blob_garbage_collection_age_cutoff=0.250000
  ttl=2592000
  periodic_compaction_seconds=0
  preclude_last_level_data_seconds=0
  blob_file_size=268435456
  enable_blob_garbage_collection=false
  persist_user_defined_timestamps=true
  compaction_pri=kMinOverlappingRatio
  compaction_filter_factory=nullptr
  comparator=leveldb.BytewiseComparator
  bloom_locality=0
  merge_operator=nullptr
  compaction_filter=nullptr
  level_compaction_dynamic_level_bytes=true
  optimize_filters_for_hits=false
  inplace_update_support=false
  max_write_buffer_size_to_maintain=0
  memtable_factory=SkipListFactory
  memtable_insert_with_hint_prefix_extractor=nullptr
  num_levels=7
  force_consistency_checks=true
  sst_partitioner_factory=nullptr
  default_temperature=kUnknown
  disallow_memtable_writes=false
  compaction_style=kCompactionStyleLevel
  min_write_buffer_number_to_merge=1
 [TableOptions/BlockBasedTable "snapshot"]
  num_file_reads_for_auto_readahead=2
  initial_auto_readahead_size=8192
  metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
  enable_index_compression=true
  verify_compression=false
  prepopulate_block_cache=kDisable
  format_version=6
  use_delta_encoding=true
  pin_top_level_index_and_filter=true
  read_amp_bytes_per_bit=0
  decouple_partitioned_filters=false
  partition_filters=false
  metadata_block_size=4096
  max_auto_readahead_size=262144
  index_block_restart_interval=1
  block_size_deviation=10
  block_size=4096
  detect_filter_construct_corruption=false
  no_block_cache=false
  checksum=kXXH3
  filter_policy=nullptr
  data_block_hash_table_util_ratio=0.750000
  block_restart_interval=16
  index_type=kBinarySearch
  pin_l0_filter_and_index_blocks_in_cache=false
  data_block_index_type=kDataBlockBinarySearch
  cache_index_and_filter_blocks_with_high_priority=true
  whole_key_filtering=true
  index_shortening=kShortenSeparators
  cache_index_and_filter_blocks=false
  block_align=false
  optimize_filters_for_memory=true
  flush_block_policy_factory=FlushBlockBySizePolicyFactory
--- a/chainfire_t003_gap_analysis.md
+++ b/chainfire_t003_gap_analysis.md
@ -1,240 +0,0 @@
 # Chainfire T003 Feature Gap Analysis
 **Audit Date:** 2025-12-08
 **Spec Version:** 1.0
 **Implementation Path:** `/home/centra/cloud/chainfire/crates/`
 ---
 ## Executive Summary
 **Total Features Analyzed:** 32
 **Implemented:** 20 (62.5%)
 **Partially Implemented:** 5 (15.6%)
 **Missing:** 7 (21.9%)
 The core KV operations, Raft consensus, Watch functionality, and basic cluster management are implemented and functional. Critical gaps exist in TTL/Lease management, read consistency controls, and transaction completeness. Production readiness is blocked by missing lease service and lack of authentication.
 ---
 ## Feature Gap Matrix
 | Feature | Spec Section | Status | Priority | Complexity | Notes |
 |---------|--------------|--------|----------|------------|-------|
 | **Lease Service (TTL)** | 8.3, 4.1 | ❌ Missing | P0 | Medium (3-5d) | Protocol has lease field but no Lease gRPC service; critical for production |
 | **TTL Expiration Logic** | 4.1, spec line 22-23 | ❌ Missing | P0 | Medium (3-5d) | lease_id stored but no background expiration worker |
 | **Read Consistency Levels** | 4.1 | ❌ Missing | P0 | Small (1-2d) | Local/Serializable/Linearizable not implemented; all reads are undefined consistency |
 | **Range Ops in Transactions** | 4.2, line 224-229 | ⚠️ Partial | P1 | Small (1-2d) | RequestOp has RangeRequest but returns dummy Delete op (kv_service.rs:224-229) |
 | **Transaction Responses** | 3.1, kv_service.rs:194 | ⚠️ Partial | P1 | Small (1-2d) | TxnResponse.responses is empty vec; TODO comment in code |
 | **Point-in-Time Reads** | 3.1, 7.3 | ⚠️ Partial | P1 | Medium (3-5d) | RangeRequest has revision field but KvStore doesn't use it |
 | **StorageBackend Trait** | 3.3 | ❌ Missing | P1 | Medium (3-5d) | Spec defines trait (lines 166-174) but not in chainfire-core |
 | **Prometheus Metrics** | 7.2 | ❌ Missing | P1 | Small (1-2d) | Spec mentions endpoint but no implementation |
 | **Health Check Service** | 7.2 | ❌ Missing | P1 | Small (1d) | gRPC health check not visible |
 | **Authentication** | 6.1 | ❌ Missing | P2 | Large (1w+) | Spec says "Planned"; mTLS for peers, tokens for clients |
 | **Authorization/RBAC** | 6.2 | ❌ Missing | P2 | Large (1w+) | Requires IAM integration |
 | **Namespace Quotas** | 6.3 | ❌ Missing | P2 | Medium (3-5d) | Per-namespace resource limits |
 | **KV Service - Range** | 3.1 | ✅ Implemented | - | - | Single key, range scan, prefix scan all working |
 | **KV Service - Put** | 3.1 | ✅ Implemented | - | - | Including prev_kv support |
 | **KV Service - Delete** | 3.1 | ✅ Implemented | - | - | Single and range delete working |
 | **KV Service - Txn (Basic)** | 3.1 | ✅ Implemented | - | - | Compare conditions and basic ops working |
 | **Watch Service** | 3.1 | ✅ Implemented | - | - | Bidirectional streaming, create/cancel/progress |
 | **Cluster Service - All** | 3.1 | ✅ Implemented | - | - | MemberAdd/Remove/List/Status all present |
 | **Client Library - Core** | 3.2 | ✅ Implemented | - | - | Connect, put, get, delete, CAS implemented |
 | **Client - Prefix Scan** | 3.2 | ✅ Implemented | - | - | get_prefix method exists |
 | **ClusterEventHandler** | 3.3 | ✅ Implemented | - | - | All 8 callbacks defined in callbacks.rs |
 | **KvEventHandler** | 3.3 | ✅ Implemented | - | - | on_key_changed, on_key_deleted, on_prefix_changed |
 | **ClusterBuilder** | 3.4 | ✅ Implemented | - | - | Embeddable library with builder pattern |
 | **MVCC Support** | 4.3 | ✅ Implemented | - | - | Global revision counter, create/mod revisions tracked |
 | **RocksDB Storage** | 4.3 | ✅ Implemented | - | - | Column families: raft_logs, raft_meta, key_value, snapshot |
 | **Raft Integration** | 2.0 | ✅ Implemented | - | - | OpenRaft 0.9 integrated, Vote/AppendEntries/Snapshot RPCs |
 | **SWIM Gossip** | 2.1 | ⚠️ Present | P2 | - | chainfire-gossip crate exists but integration unclear |
 | **Server Binary** | 7.1 | ✅ Implemented | - | - | CLI with config file, env vars, bootstrap support |
 | **Config Management** | 5.0 | ✅ Implemented | - | - | TOML config, env vars, CLI overrides |
 | **Watch - Historical Replay** | 3.1 | ⚠️ Partial | P2 | Medium (3-5d) | start_revision exists in proto but historical storage unclear |
 | **Snapshot & Backup** | 7.3 | ⚠️ Partial | P2 | Small (1-2d) | Raft snapshot exists but manual backup procedure not documented |
 | **etcd Compatibility** | 8.3 | ⚠️ Partial | P2 | - | API similar but package names differ; missing Lease service breaks compatibility |
 ---
 ## Critical Gaps (P0)
 ### 1. Lease Service & TTL Expiration
 **Impact:** Blocks production use cases requiring automatic key expiration (sessions, locks, ephemeral data)
 **Evidence:**
 - `/home/centra/cloud/chainfire/proto/chainfire.proto` has no `Lease` service definition
 - `KvEntry` has `lease_id: Option<i64>` field (types/kv.rs:23) but no expiration logic
 - No background worker to delete expired keys
 - etcd compatibility broken without Lease service
 **Fix Required:**
 1. Add Lease service to proto: `LeaseGrant`, `LeaseRevoke`, `LeaseKeepAlive`, `LeaseTimeToLive`
 2. Implement lease storage and expiration worker in chainfire-storage
 3. Wire lease_id checks to KV operations
 4. Add lease_id index for efficient expiration queries
 ---
 ### 2. Read Consistency Levels
 **Impact:** Cannot guarantee linearizable reads; stale reads possible on followers
 **Evidence:**
 - Spec defines `ReadConsistency` enum (spec lines 208-215)
 - No implementation in chainfire-storage or chainfire-api
 - RangeRequest in kv_service.rs always reads from local storage without consistency checks
 **Fix Required:**
 1. Add consistency parameter to RangeRequest
 2. Implement leader verification for Linearizable reads
 3. Add committed index check for Serializable reads
 4. Default to Linearizable for safety
 ---
 ### 3. Range Operations in Transactions
 **Impact:** Cannot atomically read-then-write in transactions; limits CAS use cases
 **Evidence:**
 ```rust
 // /home/centra/cloud/chainfire/crates/chainfire-api/src/kv_service.rs:224-229
 crate::proto::request_op::Request::RequestRange(_) => {
    // Range operations in transactions are not supported yet
    TxnOp::Delete { key: vec![] }  // Returns dummy operation!
 }
 ```
 **Fix Required:**
 1. Extend `chainfire_types::command::TxnOp` to include `Range` variant
 2. Update state_machine.rs to handle read operations in transactions
 3. Return range results in TxnResponse.responses
 ---
 ## Important Gaps (P1)
 ### 4. Transaction Response Completeness
 **Evidence:**
 ```rust
 // /home/centra/cloud/chainfire/crates/chainfire-api/src/kv_service.rs:194
 Ok(Response::new(TxnResponse {
    header: Some(self.make_header(response.revision)),
    succeeded: response.succeeded,
    responses: vec![], // TODO: fill in responses
 }))
 ```
 **Fix:** Collect operation results during txn execution and populate responses vector
 ---
 ### 5. Point-in-Time Reads (MVCC Historical Queries)
 **Evidence:**
 - RangeRequest has `revision` field (proto/chainfire.proto:78)
 - KvStore.range() doesn't use revision parameter
 - No revision-indexed storage in RocksDB
 **Fix:** Implement versioned key storage or revision-based snapshots
 ---
 ### 6. StorageBackend Trait Abstraction
 **Evidence:**
 - Spec defines trait (lines 166-174) for pluggable backends
 - chainfire-storage is RocksDB-only
 - No trait in chainfire-core/src/
 **Fix:** Extract trait and implement for RocksDB; enables memory backend testing
 ---
 ### 7. Observability
 **Gaps:**
 - No Prometheus metrics (spec mentions endpoint at 7.2)
 - No gRPC health check service
 - Limited structured logging
 **Fix:** Add metrics crate, implement health checks, expose /metrics endpoint
 ---
 ## Nice-to-Have Gaps (P2)
 - **Authentication/Authorization:** Spec marks as "Planned" - mTLS and RBAC
 - **Namespace Quotas:** Resource limits per tenant
 - **SWIM Gossip Integration:** chainfire-gossip crate exists but usage unclear
 - **Watch Historical Replay:** start_revision in proto but storage unclear
 - **Advanced etcd Compat:** Package name differences, field naming variations
 ---
 ## Key Findings
 ### Strengths
 1. **Solid Core Implementation:** KV operations, Raft consensus, and basic transactions work well
 2. **Watch System:** Fully functional with bidirectional streaming and event dispatch
 3. **Client Library:** Well-designed with CAS and convenience methods
 4. **Architecture:** Clean separation of concerns across crates
 5. **Testing:** State machine has unit tests for core operations
 ### Weaknesses
 1. **Incomplete Transactions:** Missing range ops and response population breaks advanced use cases
 2. **No TTL Support:** Critical for production; requires full Lease service implementation
 3. **Undefined Read Consistency:** Dangerous for distributed systems; needs immediate attention
 4. **Limited Observability:** No metrics or health checks hinders production deployment
 ### Blockers for Production
 1. Lease service implementation (P0)
 2. Read consistency guarantees (P0)
 3. Transaction completeness (P1)
 4. Basic metrics/health checks (P1)
 ---
 ## Recommendations
 ### Phase 1: Production Readiness (2-3 weeks)
 1. Implement Lease service and TTL expiration worker
 2. Add read consistency levels (default to Linearizable)
 3. Complete transaction responses
 4. Add basic Prometheus metrics and health checks
 ### Phase 2: Feature Completeness (1-2 weeks)
 1. Support range operations in transactions
 2. Implement point-in-time reads
 3. Extract StorageBackend trait
 4. Document and test SWIM gossip integration
 ### Phase 3: Hardening (2-3 weeks)
 1. Add authentication (mTLS for peers)
 2. Implement basic authorization
 3. Add namespace quotas
 4. Comprehensive integration tests
 ---
 ## Appendix: Implementation Evidence
 ### Transaction Compare Logic
 **Location:** `/home/centra/cloud/chainfire/crates/chainfire-storage/src/state_machine.rs:148-228`
 - ✅ Supports Version, CreateRevision, ModRevision, Value comparisons
 - ✅ Handles Equal, NotEqual, Greater, Less operators
 - ✅ Atomic execution of success/failure ops
 ### Watch Implementation
 **Location:** `/home/centra/cloud/chainfire/crates/chainfire-watch/`
 - ✅ WatchRegistry with event dispatch
 - ✅ WatchStream for bidirectional gRPC
 - ✅ KeyMatcher for prefix/range watches
 - ✅ Integration with state machine (state_machine.rs:82-88)
 ### Client CAS Example
 **Location:** `/home/centra/cloud/chainfire/chainfire-client/src/client.rs:228-299`
 - ✅ Uses transactions for compare-and-swap
 - ✅ Returns CasOutcome with current/new versions
 - ⚠️ Fallback read on failure uses range op (demonstrates txn range gap)
 ---
 **Report Generated:** 2025-12-08
 **Auditor:** Claude Code Agent
 **Next Review:** After Phase 1 implementation
--- a/coronafs/Cargo.lock
+++ b/coronafs/Cargo.lock
--- a/coronafs/Cargo.toml
+++ b/coronafs/Cargo.toml
@ -0,0 +1,32 @@
 [workspace]
 resolver = "2"
 members = [
    "crates/coronafs-server",
 ]
 [workspace.package]
 version = "0.1.0"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 rust-version = "1.75"
 authors = ["PhotonCloud Contributors"]
 repository = "https://github.com/photoncloud/photoncloud"
 [workspace.dependencies]
 axum = "0.8"
 clap = { version = "4", features = ["derive"] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 tokio = { version = "1.40", features = ["full"] }
 toml = "0.8"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 anyhow = "1.0"
 thiserror = "1.0"
 chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
 [workspace.lints.rust]
 unsafe_code = "deny"
 [workspace.lints.clippy]
 all = "warn"
--- a/coronafs/README.md
+++ b/coronafs/README.md
@ -0,0 +1,18 @@
 # CoronaFS
 CoronaFS is PhotonCloud's mutable VM-volume layer.
 Current implementation:
 - custom block backend, not NFS
 - control API on `coronafs-server`
 - raw volume data stored under `/var/lib/coronafs/volumes`
 - exported to workers as `nbd://` targets via `qemu-nbd`
 - primary consumer: `plasmavmc` managed VM volumes
 Intentional split:
 - mutable VM volumes live on CoronaFS
 - immutable VM images live in LightningStor object storage
 This keeps VM root/data disks on a shared block path while leaving image distribution on the object layer.
--- a/coronafs/crates/coronafs-server/Cargo.toml
+++ b/coronafs/crates/coronafs-server/Cargo.toml
@ -0,0 +1,27 @@
 [package]
 name = "coronafs-server"
 version.workspace = true
 edition.workspace = true
 license.workspace = true
 rust-version.workspace = true
 [[bin]]
 name = "coronafs-server"
 path = "src/main.rs"
 [dependencies]
 axum = { workspace = true }
 clap = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 tokio = { workspace = true }
 toml = { workspace = true }
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 anyhow = { workspace = true }
 thiserror = { workspace = true }
 chrono = { workspace = true }
 futures-util = "0.3"
 [lints]
 workspace = true
--- a/coronafs/crates/coronafs-server/src/config.rs
+++ b/coronafs/crates/coronafs-server/src/config.rs
@ -0,0 +1,61 @@
 use serde::{Deserialize, Serialize};
 use std::net::SocketAddr;
 use std::path::PathBuf;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(default)]
 pub struct ServerConfig {
    pub listen_addr: SocketAddr,
    pub advertise_host: String,
    pub data_dir: PathBuf,
    pub export_bind_addr: String,
    pub export_base_port: u16,
    pub export_port_count: u16,
    pub export_shared_clients: u16,
    pub export_cache_mode: String,
    pub export_aio_mode: String,
    pub export_discard_mode: String,
    pub export_detect_zeroes_mode: String,
    pub preallocate: bool,
    pub sync_on_write: bool,
    pub qemu_nbd_path: PathBuf,
    pub qemu_img_path: PathBuf,
    pub log_level: String,
 }
 impl Default for ServerConfig {
    fn default() -> Self {
        Self {
            listen_addr: "0.0.0.0:50088".parse().expect("valid listen addr"),
            advertise_host: "127.0.0.1".to_string(),
            data_dir: PathBuf::from("/var/lib/coronafs"),
            export_bind_addr: "0.0.0.0".to_string(),
            export_base_port: 11000,
            export_port_count: 512,
            export_shared_clients: 32,
            export_cache_mode: "none".to_string(),
            export_aio_mode: "io_uring".to_string(),
            export_discard_mode: "unmap".to_string(),
            export_detect_zeroes_mode: "unmap".to_string(),
            preallocate: true,
            sync_on_write: false,
            qemu_nbd_path: PathBuf::from("qemu-nbd"),
            qemu_img_path: PathBuf::from("qemu-img"),
            log_level: "info".to_string(),
        }
    }
 }
 impl ServerConfig {
    pub fn volume_dir(&self) -> PathBuf {
        self.data_dir.join("volumes")
    }
    pub fn metadata_dir(&self) -> PathBuf {
        self.data_dir.join("metadata")
    }
    pub fn pid_dir(&self) -> PathBuf {
        self.data_dir.join("pids")
    }
 }
--- a/coronafs/crates/coronafs-server/src/main.rs
+++ b/coronafs/crates/coronafs-server/src/main.rs
@ -0,0 +1,748 @@
 mod config;
 use anyhow::{anyhow, Context, Result};
 use axum::body::Body;
 use axum::extract::{Path, Query, State};
 use axum::http::StatusCode;
 use axum::response::{IntoResponse, Response};
 use axum::routing::{get, post, put};
 use axum::{Json, Router};
 use clap::Parser;
 use config::ServerConfig;
 use futures_util::StreamExt;
 use serde::{Deserialize, Serialize};
 #[cfg(unix)]
 use std::os::unix::fs::PermissionsExt;
 use std::collections::{HashMap, HashSet};
 use std::path::{Path as FsPath, PathBuf};
 use std::sync::Arc;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 use tokio::process::Command;
 use tokio::sync::Mutex;
 use tracing_subscriber::EnvFilter;
 #[derive(Parser, Debug)]
 #[command(author, version, about)]
 struct Args {
    #[arg(short, long, default_value = "coronafs.toml")]
    config: PathBuf,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct VolumeMetadata {
    id: String,
    size_bytes: u64,
    port: Option<u16>,
    export_pid: Option<u32>,
    created_at: String,
    updated_at: String,
 }
 #[derive(Debug, Serialize)]
 struct VolumeResponse {
    id: String,
    size_bytes: u64,
    path: String,
    export: Option<ExportResponse>,
 }
 #[derive(Debug, Serialize)]
 struct ExportResponse {
    uri: String,
    port: u16,
    pid: Option<u32>,
 }
 #[derive(Debug, Deserialize)]
 struct CreateVolumeRequest {
    size_bytes: u64,
 }
 #[derive(Debug, Deserialize)]
 struct ResizeVolumeRequest {
    size_bytes: u64,
 }
 #[derive(Debug, Deserialize)]
 struct ImportQuery {
    size_bytes: Option<u64>,
 }
 #[derive(Debug, Deserialize)]
 struct ExportQuery {
    read_only: Option<bool>,
 }
 #[derive(Clone)]
 struct AppState {
    config: Arc<ServerConfig>,
    volume_guards: Arc<Mutex<HashMap<String, Arc<Mutex<()>>>>>,
    reserved_ports: Arc<Mutex<HashSet<u16>>>,
 }
 impl AppState {
    async fn new(config: ServerConfig) -> Result<Self> {
        prepare_dirs(&config).await?;
        let reserved_ports = collect_reserved_ports(&config).await?;
        Ok(Self {
            config: Arc::new(config),
            volume_guards: Arc::new(Mutex::new(HashMap::new())),
            reserved_ports: Arc::new(Mutex::new(reserved_ports)),
        })
    }
    async fn volume_guard(&self, volume_id: &str) -> Arc<Mutex<()>> {
        let mut guards = self.volume_guards.lock().await;
        guards
            .entry(volume_id.to_string())
            .or_insert_with(|| Arc::new(Mutex::new(())))
            .clone()
    }
 }
 #[derive(Debug)]
 struct ApiError {
    status: StatusCode,
    message: String,
 }
 impl ApiError {
    fn new(status: StatusCode, message: impl Into<String>) -> Self {
        Self {
            status,
            message: message.into(),
        }
    }
    fn not_found(message: impl Into<String>) -> Self {
        Self::new(StatusCode::NOT_FOUND, message)
    }
    fn internal(err: anyhow::Error) -> Self {
        Self::new(StatusCode::INTERNAL_SERVER_ERROR, err.to_string())
    }
 }
 impl IntoResponse for ApiError {
    fn into_response(self) -> Response {
        (
            self.status,
            Json(serde_json::json!({
                "error": self.message,
            })),
        )
            .into_response()
    }
 }
 type ApiResult<T> = Result<Json<T>, ApiError>;
 #[tokio::main]
 async fn main() -> Result<()> {
    let args = Args::parse();
    let config = if args.config.exists() {
        let contents = fs::read_to_string(&args.config)
            .await
            .with_context(|| format!("failed to read config {}", args.config.display()))?;
        toml::from_str::<ServerConfig>(&contents)
            .with_context(|| format!("failed to parse config {}", args.config.display()))?
    } else {
        ServerConfig::default()
    };
    tracing_subscriber::fmt()
        .with_env_filter(
            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
        )
        .init();
    let listen_addr = config.listen_addr;
    let state = AppState::new(config).await?;
    let app = Router::new()
        .route("/healthz", get(healthz))
        .route("/v1/volumes/{id}", put(create_blank_volume).get(get_volume).delete(delete_volume))
        .route("/v1/volumes/{id}/import", put(import_volume))
        .route("/v1/volumes/{id}/resize", post(resize_volume))
        .route("/v1/volumes/{id}/export", post(ensure_export))
        .with_state(state);
    tracing::info!(%listen_addr, "starting CoronaFS server");
    let listener = tokio::net::TcpListener::bind(listen_addr).await?;
    axum::serve(listener, app).await?;
    Ok(())
 }
 async fn healthz() -> Json<serde_json::Value> {
    Json(serde_json::json!({"status": "ok"}))
 }
 async fn create_blank_volume(
    State(state): State<AppState>,
    Path(id): Path<String>,
    Json(req): Json<CreateVolumeRequest>,
 ) -> ApiResult<VolumeResponse> {
    let volume_guard = state.volume_guard(&id).await;
    let _guard = volume_guard.lock().await;
    create_blank_impl(&state, &id, req.size_bytes)
        .await
        .map(Json)
        .map_err(ApiError::internal)
 }
 async fn import_volume(
    State(state): State<AppState>,
    Path(id): Path<String>,
    Query(query): Query<ImportQuery>,
    body: Body,
 ) -> ApiResult<VolumeResponse> {
    let volume_guard = state.volume_guard(&id).await;
    let _guard = volume_guard.lock().await;
    import_impl(&state, &id, query.size_bytes, body)
        .await
        .map(Json)
        .map_err(ApiError::internal)
 }
 async fn get_volume(
    State(state): State<AppState>,
    Path(id): Path<String>,
 ) -> ApiResult<VolumeResponse> {
    load_response(&state, &id)
        .await
        .ok_or_else(|| ApiError::not_found(format!("volume {id} not found")))
        .map(Json)
 }
 async fn ensure_export(
    State(state): State<AppState>,
    Path(id): Path<String>,
    Query(query): Query<ExportQuery>,
 ) -> ApiResult<VolumeResponse> {
    let volume_guard = state.volume_guard(&id).await;
    let _guard = volume_guard.lock().await;
    ensure_export_impl(&state, &id, query.read_only.unwrap_or(false))
        .await
        .map(Json)
        .map_err(ApiError::internal)
 }
 async fn resize_volume(
    State(state): State<AppState>,
    Path(id): Path<String>,
    Json(req): Json<ResizeVolumeRequest>,
 ) -> ApiResult<VolumeResponse> {
    let volume_guard = state.volume_guard(&id).await;
    let _guard = volume_guard.lock().await;
    resize_impl(&state, &id, req.size_bytes)
        .await
        .map(Json)
        .map_err(ApiError::internal)
 }
 async fn delete_volume(
    State(state): State<AppState>,
    Path(id): Path<String>,
 ) -> Result<StatusCode, ApiError> {
    let volume_guard = state.volume_guard(&id).await;
    let _guard = volume_guard.lock().await;
    delete_impl(&state, &id)
        .await
        .map(|_| StatusCode::NO_CONTENT)
        .map_err(ApiError::internal)
 }
 async fn prepare_dirs(config: &ServerConfig) -> Result<()> {
    fs::create_dir_all(config.volume_dir()).await?;
    fs::create_dir_all(config.metadata_dir()).await?;
    fs::create_dir_all(config.pid_dir()).await?;
    Ok(())
 }
 async fn create_blank_impl(state: &AppState, id: &str, size_bytes: u64) -> Result<VolumeResponse> {
    let path = volume_path(&state.config, id);
    let meta_path = metadata_path(&state.config, id);
    if fs::try_exists(&meta_path).await.unwrap_or(false) {
        return load_response_required(state, id).await;
    }
    if state.config.preallocate {
        let status = Command::new("fallocate")
            .args(["-l", &size_bytes.to_string(), path.to_string_lossy().as_ref()])
            .status()
            .await;
        match status {
            Ok(status) if status.success() => {}
            _ => {
                let file = fs::File::create(&path).await?;
                file.set_len(size_bytes).await?;
            }
        }
    } else {
        let file = fs::File::create(&path).await?;
        file.set_len(size_bytes).await?;
    }
    let meta = VolumeMetadata {
        id: id.to_string(),
        size_bytes,
        port: None,
        export_pid: None,
        created_at: chrono::Utc::now().to_rfc3339(),
        updated_at: chrono::Utc::now().to_rfc3339(),
    };
    ensure_volume_file_permissions(&path).await?;
    save_metadata(&meta_path, &meta).await?;
    load_response_required(state, id).await
 }
 async fn import_impl(
    state: &AppState,
    id: &str,
    size_bytes: Option<u64>,
    body: Body,
 ) -> Result<VolumeResponse> {
    let path = volume_path(&state.config, id);
    let meta_path = metadata_path(&state.config, id);
    let tmp_path = temp_import_path(&state.config, id);
    if let Some(size_bytes) = size_bytes {
        create_or_preallocate_file(&tmp_path, size_bytes, state.config.preallocate).await?;
    }
    let mut stream = body.into_data_stream();
    let mut file = fs::OpenOptions::new()
        .create(true)
        .write(true)
        .truncate(size_bytes.is_none())
        .open(&tmp_path)
        .await
        .with_context(|| format!("failed to create {}", tmp_path.display()))?;
    let mut bytes_written = 0u64;
    while let Some(chunk) = stream.next().await {
        let chunk = chunk.context("failed to read request body chunk")?;
        bytes_written = bytes_written.saturating_add(chunk.len() as u64);
        file.write_all(&chunk)
            .await
            .with_context(|| format!("failed to write {}", tmp_path.display()))?;
    }
    if let Some(size_bytes) = size_bytes {
        file.set_len(size_bytes).await?;
    }
    if state.config.sync_on_write {
        file.sync_all().await?;
    }
    drop(file);
    fs::rename(&tmp_path, &path).await?;
    ensure_volume_file_permissions(&path).await?;
    let actual_size = fs::metadata(&path).await?.len();
    let meta = VolumeMetadata {
        id: id.to_string(),
        size_bytes: size_bytes.unwrap_or(actual_size),
        port: None,
        export_pid: None,
        created_at: chrono::Utc::now().to_rfc3339(),
        updated_at: chrono::Utc::now().to_rfc3339(),
    };
    save_metadata(&meta_path, &meta).await?;
    tracing::info!(
        volume_id = id,
        bytes_written,
        volume_size = actual_size,
        "Imported raw volume into CoronaFS"
    );
    load_response_required(state, id).await
 }
 async fn resize_impl(state: &AppState, id: &str, size_bytes: u64) -> Result<VolumeResponse> {
    let meta_path = metadata_path(&state.config, id);
    let path = volume_path(&state.config, id);
    let mut meta = load_metadata(&meta_path)
        .await?
        .ok_or_else(|| anyhow!("volume {id} not found"))?;
    let reserved_port = meta.port;
    stop_export_if_running(&state.config, id, &mut meta).await?;
    release_export_port(state, reserved_port).await;
    meta.port = None;
    let status = Command::new(&state.config.qemu_img_path)
        .args([
            "resize",
            "-f",
            "raw",
            path.to_string_lossy().as_ref(),
            &size_bytes.to_string(),
        ])
        .status()
        .await
        .context("failed to spawn qemu-img resize")?;
    if !status.success() {
        return Err(anyhow!("qemu-img resize failed for {}", path.display()));
    }
    meta.size_bytes = size_bytes;
    meta.updated_at = chrono::Utc::now().to_rfc3339();
    save_metadata(&meta_path, &meta).await?;
    load_response_required(state, id).await
 }
 async fn ensure_export_impl(state: &AppState, id: &str, read_only: bool) -> Result<VolumeResponse> {
    let meta_path = metadata_path(&state.config, id);
    let mut meta = load_metadata(&meta_path)
        .await?
        .ok_or_else(|| anyhow!("volume {id} not found"))?;
    if let Some(pid) = meta.export_pid {
        if process_running(pid).await {
            if let Some(port) = meta.port {
                mark_port_reserved(state, port).await;
            }
            return load_response_required(state, id).await;
        }
    }
    let port = reserve_export_port(state, meta.port).await?;
    let pid_path = pid_path(&state.config, id);
    let path = volume_path(&state.config, id);
    let effective_aio_mode = export_aio_mode(&state.config.export_cache_mode, &state.config.export_aio_mode);
    let mut command = Command::new(&state.config.qemu_nbd_path);
    command.args([
        "--fork",
        "--persistent",
        "--pid-file",
        pid_path.to_string_lossy().as_ref(),
        "--shared",
        &state.config.export_shared_clients.to_string(),
        "--cache",
        &state.config.export_cache_mode,
        "--aio",
        effective_aio_mode,
        "--discard",
        &state.config.export_discard_mode,
        "--detect-zeroes",
        &state.config.export_detect_zeroes_mode,
        "--format",
        "raw",
        "--bind",
        &state.config.export_bind_addr,
        "--port",
        &port.to_string(),
    ]);
    if read_only {
        command.arg("--read-only");
    }
    command.arg(path.to_string_lossy().as_ref());
    let status = command
        .status()
        .await
        .context("failed to spawn qemu-nbd")?;
    if !status.success() {
        release_export_port(state, Some(port)).await;
        return Err(anyhow!("qemu-nbd failed to export volume {id} on port {port}"));
    }
    let pid = match read_pid_file(&pid_path).await {
        Ok(pid) => pid,
        Err(err) => {
            release_export_port(state, Some(port)).await;
            return Err(err);
        }
    };
    meta.port = Some(port);
    meta.export_pid = Some(pid);
    meta.updated_at = chrono::Utc::now().to_rfc3339();
    save_metadata(&meta_path, &meta).await?;
    if let Err(err) = wait_for_tcp_listen(export_probe_host(&state.config), port).await {
        let _ = stop_export_if_running(&state.config, id, &mut meta).await;
        release_export_port(state, Some(port)).await;
        return Err(err);
    }
    load_response_required(state, id).await
 }
 fn export_aio_mode<'a>(cache_mode: &str, aio_mode: &'a str) -> &'a str {
    if aio_mode == "native" && !matches!(cache_mode, "none" | "directsync") {
        tracing::warn!(
            cache_mode,
            requested_aio_mode = aio_mode,
            effective_aio_mode = "threads",
            "CoronaFS export cache mode is incompatible with qemu-nbd native AIO; falling back to threads",
        );
        "threads"
    } else {
        aio_mode
    }
 }
 async fn delete_impl(state: &AppState, id: &str) -> Result<()> {
    let meta_path = metadata_path(&state.config, id);
    if let Some(mut meta) = load_metadata(&meta_path).await? {
        let reserved_port = meta.port;
        stop_export_if_running(&state.config, id, &mut meta).await?;
        release_export_port(state, reserved_port).await;
    }
    let path = volume_path(&state.config, id);
    if fs::try_exists(&path).await.unwrap_or(false) {
        fs::remove_file(&path).await?;
    }
    if fs::try_exists(&meta_path).await.unwrap_or(false) {
        fs::remove_file(&meta_path).await?;
    }
    let pid_path = pid_path(&state.config, id);
    if fs::try_exists(&pid_path).await.unwrap_or(false) {
        fs::remove_file(pid_path).await?;
    }
    Ok(())
 }
 async fn load_response(state: &AppState, id: &str) -> Option<VolumeResponse> {
    match load_response_required(state, id).await {
        Ok(response) => Some(response),
        Err(_) => None,
    }
 }
 async fn load_response_required(state: &AppState, id: &str) -> Result<VolumeResponse> {
    let meta = load_metadata(&metadata_path(&state.config, id))
        .await?
        .ok_or_else(|| anyhow!("volume {id} not found"))?;
    let export = match (meta.port, meta.export_pid) {
        (Some(port), pid) if pid.map(process_running_sync).unwrap_or(false) => Some(ExportResponse {
            uri: format!("nbd://{}:{}", state.config.advertise_host, port),
            port,
            pid,
        }),
        _ => None,
    };
    Ok(VolumeResponse {
        id: meta.id,
        size_bytes: meta.size_bytes,
        path: volume_path(&state.config, id).display().to_string(),
        export,
    })
 }
 async fn load_metadata(path: &FsPath) -> Result<Option<VolumeMetadata>> {
    if !fs::try_exists(path).await.unwrap_or(false) {
        return Ok(None);
    }
    let bytes = fs::read(path).await?;
    Ok(Some(serde_json::from_slice(&bytes)?))
 }
 async fn save_metadata(path: &FsPath, meta: &VolumeMetadata) -> Result<()> {
    let bytes = serde_json::to_vec_pretty(meta)?;
    let tmp_path = path.with_extension("json.tmp");
    fs::write(&tmp_path, bytes).await?;
    fs::rename(&tmp_path, path).await?;
    Ok(())
 }
 async fn stop_export_if_running(config: &ServerConfig, id: &str, meta: &mut VolumeMetadata) -> Result<()> {
    if let Some(pid) = meta.export_pid {
        if process_running(pid).await {
            let status = Command::new("kill")
                .args(["-TERM", &pid.to_string()])
                .status()
                .await
                .context("failed to terminate qemu-nbd export")?;
            if !status.success() {
                return Err(anyhow!("failed to stop qemu-nbd export pid {pid}"));
            }
            let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
            while process_running(pid).await {
                if std::time::Instant::now() >= deadline {
                    let _ = Command::new("kill")
                        .args(["-KILL", &pid.to_string()])
                        .status()
                        .await;
                    break;
                }
                tokio::time::sleep(std::time::Duration::from_millis(100)).await;
            }
        }
    }
    meta.export_pid = None;
    let pid_path = pid_path(config, id);
    if fs::try_exists(&pid_path).await.unwrap_or(false) {
        fs::remove_file(pid_path).await?;
    }
    Ok(())
 }
 async fn process_running(pid: u32) -> bool {
    fs::try_exists(format!("/proc/{pid}")).await.unwrap_or(false)
 }
 fn process_running_sync(pid: u32) -> bool {
    FsPath::new("/proc").join(pid.to_string()).exists()
 }
 async fn read_pid_file(path: &FsPath) -> Result<u32> {
    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
    loop {
        if fs::try_exists(path).await.unwrap_or(false) {
            let contents = fs::read_to_string(path).await?;
            return contents
                .trim()
                .parse::<u32>()
                .with_context(|| format!("invalid pid file {}", path.display()));
        }
        if std::time::Instant::now() >= deadline {
            return Err(anyhow!("timed out waiting for pid file {}", path.display()));
        }
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
    }
 }
 async fn wait_for_tcp_listen(host: &str, port: u16) -> Result<()> {
    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
    loop {
        if tokio::net::TcpStream::connect((host, port)).await.is_ok() {
            return Ok(());
        }
        if std::time::Instant::now() >= deadline {
            return Err(anyhow!("timed out waiting for export {}:{}", host, port));
        }
        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
    }
 }
 async fn collect_reserved_ports(config: &ServerConfig) -> Result<HashSet<u16>> {
    let mut reserved = HashSet::new();
    let mut entries = fs::read_dir(config.metadata_dir()).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        let Some(mut meta) = load_metadata(&path).await? else {
            continue;
        };
        match (meta.port, meta.export_pid) {
            (Some(port), Some(pid)) if process_running(pid).await => {
                reserved.insert(port);
            }
            (Some(_), _) | (_, Some(_)) => {
                meta.port = None;
                meta.export_pid = None;
                meta.updated_at = chrono::Utc::now().to_rfc3339();
                save_metadata(&path, &meta).await?;
            }
            _ => {}
        }
    }
    Ok(reserved)
 }
 async fn reserve_export_port(state: &AppState, preferred_port: Option<u16>) -> Result<u16> {
    let mut reserved = state.reserved_ports.lock().await;
    if let Some(port) = preferred_port {
        if port_is_usable(&state.config, &reserved, port).await {
            reserved.insert(port);
            return Ok(port);
        }
    }
    let start = state.config.export_base_port as u32;
    let end = start + state.config.export_port_count as u32;
    for port in start..end {
        let port_u16 = port as u16;
        if port_is_usable(&state.config, &reserved, port_u16).await {
            reserved.insert(port_u16);
            return Ok(port_u16);
        }
    }
    Err(anyhow!(
        "no free export ports left in range {}..{}",
        state.config.export_base_port,
        state.config.export_base_port + state.config.export_port_count
    ))
 }
 async fn port_is_usable(config: &ServerConfig, reserved: &HashSet<u16>, port: u16) -> bool {
    let start = config.export_base_port as u32;
    let end = start + config.export_port_count as u32;
    if (port as u32) < start || (port as u32) >= end || reserved.contains(&port) {
        return false;
    }
    tokio::net::TcpListener::bind((config.export_bind_addr.as_str(), port))
        .await
        .is_ok()
 }
 async fn mark_port_reserved(state: &AppState, port: u16) {
    let mut reserved = state.reserved_ports.lock().await;
    reserved.insert(port);
 }
 async fn release_export_port(state: &AppState, port: Option<u16>) {
    if let Some(port) = port {
        let mut reserved = state.reserved_ports.lock().await;
        reserved.remove(&port);
    }
 }
 fn export_probe_host(config: &ServerConfig) -> &str {
    match config.export_bind_addr.as_str() {
        "0.0.0.0" | "::" | "" => "127.0.0.1",
        host => host,
    }
 }
 async fn create_or_preallocate_file(path: &FsPath, size_bytes: u64, preallocate: bool) -> Result<()> {
    if preallocate {
        let status = Command::new("fallocate")
            .args(["-l", &size_bytes.to_string(), path.to_string_lossy().as_ref()])
            .status()
            .await;
        if matches!(status, Ok(status) if status.success()) {
            return Ok(());
        }
    }
    let file = fs::File::create(path).await?;
    file.set_len(size_bytes).await?;
    ensure_volume_file_permissions(path).await?;
    Ok(())
 }
 async fn ensure_volume_file_permissions(path: &FsPath) -> Result<()> {
    #[cfg(unix)]
    {
        let permissions = std::fs::Permissions::from_mode(0o660);
        fs::set_permissions(path, permissions).await?;
    }
    Ok(())
 }
 fn volume_path(config: &ServerConfig, id: &str) -> PathBuf {
    config.volume_dir().join(format!("{id}.raw"))
 }
 fn metadata_path(config: &ServerConfig, id: &str) -> PathBuf {
    config.metadata_dir().join(format!("{id}.json"))
 }
 fn pid_path(config: &ServerConfig, id: &str) -> PathBuf {
    config.pid_dir().join(format!("{id}.pid"))
 }
 fn temp_import_path(config: &ServerConfig, id: &str) -> PathBuf {
    config.data_dir.join(format!("{id}.import.tmp"))
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn export_aio_mode_falls_back_for_cached_exports() {
        assert_eq!(export_aio_mode("writeback", "native"), "threads");
        assert_eq!(export_aio_mode("none", "native"), "native");
        assert_eq!(export_aio_mode("directsync", "native"), "native");
        assert_eq!(export_aio_mode("writeback", "threads"), "threads");
    }
    #[test]
    fn export_probe_host_prefers_loopback_for_wildcard_bind() {
        let mut config = ServerConfig::default();
        config.export_bind_addr = "0.0.0.0".to_string();
        assert_eq!(export_probe_host(&config), "127.0.0.1");
        config.export_bind_addr = "10.100.0.11".to_string();
        assert_eq!(export_probe_host(&config), "10.100.0.11");
    }
 }
--- a/crates/photon-auth-client/Cargo.toml
+++ b/crates/photon-auth-client/Cargo.toml
@ -0,0 +1,10 @@
 [package]
 name = "photon-auth-client"
 version = "0.1.0"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 description = "Shared IAM auth client wrapper for PhotonCloud services"
 [dependencies]
 anyhow = "1.0"
 iam-service-auth = { path = "../../iam/crates/iam-service-auth" }
--- a/crates/photon-auth-client/src/lib.rs
+++ b/crates/photon-auth-client/src/lib.rs
@ -0,0 +1,10 @@
 pub use iam_service_auth::{
    get_tenant_context, resolve_tenant_ids_from_context, resource_for_tenant, AuthService,
    TenantContext,
 };
 pub async fn connect_iam(endpoint: &str) -> anyhow::Result<AuthService> {
    AuthService::new(endpoint).await.map_err(|error| {
        anyhow::anyhow!("failed to connect to IAM server at {}: {}", endpoint, error)
    })
 }
--- a/crates/photon-config/Cargo.toml
+++ b/crates/photon-config/Cargo.toml
@ -0,0 +1,11 @@
 [package]
 name = "photon-config"
 version = "0.1.0"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 description = "Shared configuration loading helpers for PhotonCloud"
 [dependencies]
 anyhow = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 toml = "0.8"
--- a/crates/photon-config/src/lib.rs
+++ b/crates/photon-config/src/lib.rs
@ -0,0 +1,58 @@
 use anyhow::Context;
 use serde::de::DeserializeOwned;
 use std::fs;
 use std::path::Path;
 pub fn load_toml_config<T>(path: &Path) -> anyhow::Result<T>
 where
    T: DeserializeOwned + Default,
 {
    if !path.exists() {
        return Ok(T::default());
    }
    let contents = fs::read_to_string(path)
        .with_context(|| format!("failed to read config file {}", path.display()))?;
    toml::from_str(&contents)
        .with_context(|| format!("failed to parse config file {}", path.display()))
 }
 #[cfg(test)]
 mod tests {
    use super::load_toml_config;
    use serde::Deserialize;
    use std::fs;
    use std::path::PathBuf;
    use std::time::{SystemTime, UNIX_EPOCH};
    #[derive(Debug, Default, Deserialize, PartialEq)]
    struct TestConfig {
        value: String,
    }
    fn temp_path(name: &str) -> PathBuf {
        let nanos = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap()
            .as_nanos();
        std::env::temp_dir().join(format!("{}-{}-{}.toml", name, std::process::id(), nanos))
    }
    #[test]
    fn returns_default_when_file_is_missing() {
        let path = temp_path("photon-config-missing");
        let config: TestConfig = load_toml_config(&path).unwrap();
        assert_eq!(config, TestConfig::default());
    }
    #[test]
    fn loads_existing_toml_file() {
        let path = temp_path("photon-config-load");
        fs::write(&path, "value = \"hello\"\n").unwrap();
        let config: TestConfig = load_toml_config(&path).unwrap();
        assert_eq!(config.value, "hello");
        let _ = fs::remove_file(path);
    }
 }
--- a/Show more
+++ b/Show more
		`@ -1 +0,0 @@`
			`/nix/store/nixfmms2rbqi07a0sqjf5l32mm28y1iz-initrd`
		`@ -1 +0,0 @@`
			`/nix/store/nmi1f4lsswcr9dmm1r6j6a8b7rar5gl4-linux-6.18`