WIP snapshot: preserve dirty worktree
This commit is contained in:
parent
1b9ed0cd22
commit
ed0f9f42f4
736 changed files with 62387 additions and 111098 deletions
36
.github/workflows/nix.yml
vendored
36
.github/workflows/nix.yml
vendored
|
|
@ -14,6 +14,7 @@ jobs:
|
||||||
workspaces: ${{ steps.filter.outputs.changes }}
|
workspaces: ${{ steps.filter.outputs.changes }}
|
||||||
any_changed: ${{ steps.filter.outputs.workspaces_any_changed }}
|
any_changed: ${{ steps.filter.outputs.workspaces_any_changed }}
|
||||||
global_changed: ${{ steps.filter.outputs.global }}
|
global_changed: ${{ steps.filter.outputs.global }}
|
||||||
|
shared_crates_changed: ${{ steps.filter.outputs.shared_crates }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: dorny/paths-filter@v3
|
- uses: dorny/paths-filter@v3
|
||||||
|
|
@ -24,9 +25,17 @@ jobs:
|
||||||
- 'flake.nix'
|
- 'flake.nix'
|
||||||
- 'flake.lock'
|
- 'flake.lock'
|
||||||
- 'nix/**'
|
- 'nix/**'
|
||||||
|
- 'nix-nos/**'
|
||||||
- '.github/workflows/nix.yml'
|
- '.github/workflows/nix.yml'
|
||||||
- 'Cargo.toml'
|
- 'Cargo.toml'
|
||||||
- 'Cargo.lock'
|
- 'Cargo.lock'
|
||||||
|
- 'crates/**'
|
||||||
|
- 'client-common/**'
|
||||||
|
- 'baremetal/**'
|
||||||
|
- 'scripts/**'
|
||||||
|
- 'specifications/**'
|
||||||
|
- 'docs/**'
|
||||||
|
shared_crates: 'crates/**'
|
||||||
chainfire: 'chainfire/**'
|
chainfire: 'chainfire/**'
|
||||||
flaredb: 'flaredb/**'
|
flaredb: 'flaredb/**'
|
||||||
iam: 'iam/**'
|
iam: 'iam/**'
|
||||||
|
|
@ -60,7 +69,21 @@ jobs:
|
||||||
|
|
||||||
- name: Run PhotonCloud Gate
|
- name: Run PhotonCloud Gate
|
||||||
run: |
|
run: |
|
||||||
nix run .#gate-ci -- --workspace ${{ matrix.workspace }} --tier 0 --no-logs
|
nix run ./nix/ci#gate-ci -- --workspace ${{ matrix.workspace }} --tier 0 --no-logs
|
||||||
|
|
||||||
|
shared-crates-gate:
|
||||||
|
needs: filter
|
||||||
|
if: ${{ needs.filter.outputs.shared_crates_changed == 'true' }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: gate (shared crates)
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: DeterminateSystems/nix-installer-action@v11
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||||
|
|
||||||
|
- name: Run Shared Crates Gate
|
||||||
|
run: |
|
||||||
|
nix run ./nix/ci#gate-ci -- --shared-crates --tier 0 --no-logs
|
||||||
|
|
||||||
# Build server packages (tier 1+)
|
# Build server packages (tier 1+)
|
||||||
build:
|
build:
|
||||||
|
|
@ -88,7 +111,7 @@ jobs:
|
||||||
|
|
||||||
# Summary job for PR status checks
|
# Summary job for PR status checks
|
||||||
ci-status:
|
ci-status:
|
||||||
needs: [filter, gate]
|
needs: [filter, gate, shared-crates-gate]
|
||||||
if: always()
|
if: always()
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -97,10 +120,19 @@ jobs:
|
||||||
if [[ "${{ needs.gate.result }}" == "failure" ]]; then
|
if [[ "${{ needs.gate.result }}" == "failure" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then
|
if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then
|
||||||
if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
|
if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
|
||||||
echo "Gate was skipped despite changes. This is unexpected."
|
echo "Gate was skipped despite changes. This is unexpected."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
|
||||||
|
if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then
|
||||||
|
echo "Shared crates gate was skipped despite crates/** changes. This is unexpected."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
echo "CI passed or was correctly skipped."
|
echo "CI passed or was correctly skipped."
|
||||||
|
|
|
||||||
18
.gitignore
vendored
18
.gitignore
vendored
|
|
@ -1,5 +1,10 @@
|
||||||
# Claude Code
|
# Claude Code
|
||||||
.cccc/
|
.cccc/
|
||||||
|
.code/
|
||||||
|
.codex/
|
||||||
|
.claude.json
|
||||||
|
.ralphrc
|
||||||
|
.sisyphus/
|
||||||
|
|
||||||
# Rust
|
# Rust
|
||||||
target/
|
target/
|
||||||
|
|
@ -9,6 +14,7 @@ target/
|
||||||
# Nix
|
# Nix
|
||||||
result
|
result
|
||||||
result-*
|
result-*
|
||||||
|
plasmavmc/result
|
||||||
|
|
||||||
# local CI artifacts
|
# local CI artifacts
|
||||||
work/
|
work/
|
||||||
|
|
@ -33,6 +39,8 @@ Thumbs.db
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
|
quanta/test_output_renamed.log
|
||||||
|
plasmavmc/kvm_test_output.log
|
||||||
|
|
||||||
# VM disk images and ISOs (large binary files)
|
# VM disk images and ISOs (large binary files)
|
||||||
**/*.qcow2
|
**/*.qcow2
|
||||||
|
|
@ -54,3 +62,13 @@ flaredb/repomix-output.xml
|
||||||
# Temporary files
|
# Temporary files
|
||||||
*.tmp
|
*.tmp
|
||||||
*.bak
|
*.bak
|
||||||
|
tmp_test.txt
|
||||||
|
tmp_test_write.txt
|
||||||
|
tmp_write_check.txt
|
||||||
|
|
||||||
|
# Runtime state
|
||||||
|
data/
|
||||||
|
chainfire/data/
|
||||||
|
flaredb/data/
|
||||||
|
creditservice/.tmp/
|
||||||
|
nightlight/.tmp/
|
||||||
|
|
|
||||||
27
CONTRIBUTING.md
Normal file
27
CONTRIBUTING.md
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
PhotonCloud uses Nix as the primary development and validation entrypoint.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix develop
|
||||||
|
```
|
||||||
|
|
||||||
|
## Before sending changes
|
||||||
|
|
||||||
|
Run the strongest local validation you can afford.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
```
|
||||||
|
|
||||||
|
For smaller changes, use the narrower commands under `nix/test-cluster`.
|
||||||
|
|
||||||
|
## Expectations
|
||||||
|
|
||||||
|
- keep service startup on file-first `--config` paths
|
||||||
|
- prefer Nix-native workflows over ad hoc host scripts
|
||||||
|
- do not commit secrets, private keys, runtime state, or generated disk images
|
||||||
|
- document new validation entrypoints in `README.md` or `docs/`
|
||||||
|
- when changing multi-node behavior, validate on the VM cluster rather than only with unit tests
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
Title: Foreman Task Brief (Project-specific)
|
|
||||||
|
|
||||||
Purpose (free text)
|
|
||||||
- Complete PROJECT.md Item 12 (Nightlight) - the FINAL infrastructure component (COMPLETE)
|
|
||||||
- Achieve 12/12 PROJECT.md deliverables (NOW 12/12)
|
|
||||||
- Prepare for production deployment using T032 bare-metal provisioning
|
|
||||||
|
|
||||||
Current objectives (ranked, short)
|
|
||||||
- 1) T033 Nightlight completion: S4 PromQL Engine (P0), S5 Storage, S6 Integration
|
|
||||||
- 2) Production deployment prep: NixOS modules + Nightlight observability stack
|
|
||||||
- 3) Deferred features: T029.S5 practical app demo, FlareDB SQL layer (post-MVP)
|
|
||||||
|
|
||||||
Standing work (edit freely)
|
|
||||||
- Task status monitoring: Check docs/por/T*/task.yaml for stale/blocked tasks
|
|
||||||
- Risk radar: Monitor POR.md Risk Radar for new/escalating risks
|
|
||||||
- Progress tracking: Verify step completion matches claimed LOC/test counts
|
|
||||||
- Stale task alerts: Flag tasks with no progress >48h
|
|
||||||
- Evidence validation: Spot-check evidence trail (cargo check, test counts)
|
|
||||||
|
|
||||||
Useful references
|
|
||||||
- PROJECT.md
|
|
||||||
- docs/por/POR.md
|
|
||||||
- docs/por/T*/task.yaml (active tasks)
|
|
||||||
- docs/evidence/** and .cccc/work/**
|
|
||||||
|
|
||||||
How to act each run
|
|
||||||
- Do one useful, non-interactive step within the time box (≤ 30m).
|
|
||||||
- Save temporary outputs to .cccc/work/foreman/<YYYYMMDD-HHMMSS>/.
|
|
||||||
- Write one message to .cccc/mailbox/foreman/to_peer.md with header To: Both|PeerA|PeerB and wrap body in <TO_PEER>..</TO_PEER>.
|
|
||||||
|
|
||||||
Escalation
|
|
||||||
- If a decision is needed, write a 6–10 line RFD and ask the peer.
|
|
||||||
|
|
||||||
Safety
|
|
||||||
- Do not modify orchestrator code/policies; provide checkable artifacts.
|
|
||||||
202
LICENSE
Normal file
202
LICENSE
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
37
Makefile
Normal file
37
Makefile
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
# PhotonCloud Makefile
|
||||||
|
# Unifies build and test commands
|
||||||
|
|
||||||
|
.PHONY: all build cluster-up cluster-down cluster-status cluster-validate cluster-smoke cluster-matrix cluster-bench-storage clean
|
||||||
|
|
||||||
|
# Build all services (using Nix)
|
||||||
|
build:
|
||||||
|
nix build .#packages.x86_64-linux.default
|
||||||
|
|
||||||
|
# Cluster Management
|
||||||
|
cluster-up:
|
||||||
|
nix run ./nix/test-cluster#cluster -- start
|
||||||
|
|
||||||
|
cluster-down:
|
||||||
|
nix run ./nix/test-cluster#cluster -- stop
|
||||||
|
|
||||||
|
cluster-status:
|
||||||
|
nix run ./nix/test-cluster#cluster -- status
|
||||||
|
|
||||||
|
cluster-validate:
|
||||||
|
nix run ./nix/test-cluster#cluster -- validate
|
||||||
|
|
||||||
|
cluster-smoke:
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
|
||||||
|
cluster-matrix:
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||||
|
|
||||||
|
cluster-bench-storage:
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-storage-bench
|
||||||
|
|
||||||
|
cluster-clean:
|
||||||
|
nix run ./nix/test-cluster#cluster -- clean
|
||||||
|
|
||||||
|
# Clean up build artifacts
|
||||||
|
clean:
|
||||||
|
rm -rf result
|
||||||
398
Nix-NOS.md
398
Nix-NOS.md
|
|
@ -1,398 +0,0 @@
|
||||||
# PlasmaCloud/PhotonCloud と Nix-NOS の統合分析
|
|
||||||
|
|
||||||
## Architecture Decision (2025-12-13)
|
|
||||||
|
|
||||||
**決定:** Nix-NOSを汎用ネットワークモジュールとして別リポジトリに分離する。
|
|
||||||
|
|
||||||
### Three-Layer Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
Layer 3: PlasmaCloud Cluster (T061)
|
|
||||||
- plasmacloud-cluster.nix
|
|
||||||
- cluster-config.json生成
|
|
||||||
- Deployer (Rust)
|
|
||||||
depends on ↓
|
|
||||||
|
|
||||||
Layer 2: PlasmaCloud Network (T061)
|
|
||||||
- plasmacloud-network.nix
|
|
||||||
- FiberLB BGP連携
|
|
||||||
- PrismNET統合
|
|
||||||
depends on ↓
|
|
||||||
|
|
||||||
Layer 1: Nix-NOS Generic (T062) ← 別リポジトリ
|
|
||||||
- BGP (BIRD2/GoBGP)
|
|
||||||
- VLAN
|
|
||||||
- Network interfaces
|
|
||||||
- PlasmaCloudを知らない汎用モジュール
|
|
||||||
```
|
|
||||||
|
|
||||||
### Repository Structure
|
|
||||||
|
|
||||||
- **github.com/centra/nix-nos**: Layer 1 (汎用、VyOS/OpenWrt代替)
|
|
||||||
- **github.com/centra/plasmacloud**: Layers 2+3 (既存リポジトリ)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 1. 既存プロジェクトの概要
|
|
||||||
|
|
||||||
PlasmaCloud(PhotonCloud)は、以下のコンポーネントで構成されるクラウド基盤プロジェクト:
|
|
||||||
|
|
||||||
### コアサービス
|
|
||||||
| コンポーネント | 役割 | 技術スタック |
|
|
||||||
|---------------|------|-------------|
|
|
||||||
| **ChainFire** | 分散KVストア(etcd互換) | Rust, Raft (openraft) |
|
|
||||||
| **FlareDB** | SQLデータベース | Rust, KVバックエンド |
|
|
||||||
| **IAM** | 認証・認可 | Rust, JWT/mTLS |
|
|
||||||
| **PlasmaVMC** | VM管理 | Rust, KVM/FireCracker |
|
|
||||||
| **PrismNET** | オーバーレイネットワーク | Rust, OVN連携 |
|
|
||||||
| **LightningSTOR** | オブジェクトストレージ | Rust, S3互換 |
|
|
||||||
| **FlashDNS** | DNS | Rust, hickory-dns |
|
|
||||||
| **FiberLB** | ロードバランサー | Rust, L4/L7, BGP予定 |
|
|
||||||
| **NightLight** | メトリクス | Rust, Prometheus互換 |
|
|
||||||
| **k8shost** | コンテナオーケストレーション | Rust, K8s API互換 |
|
|
||||||
|
|
||||||
### インフラ層
|
|
||||||
- **NixOSモジュール**: 各サービス用 (`nix/modules/`)
|
|
||||||
- **first-boot-automation**: 自動クラスタ参加
|
|
||||||
- **PXE/Netboot**: ベアメタルプロビジョニング
|
|
||||||
- **TLS証明書管理**: 開発用証明書生成スクリプト
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 2. Nix-NOS との統合ポイント
|
|
||||||
|
|
||||||
### 2.1 Baremetal Provisioning → Deployer強化
|
|
||||||
|
|
||||||
**既存の実装:**
|
|
||||||
```
|
|
||||||
first-boot-automation.nix
|
|
||||||
├── cluster-config.json による設定注入
|
|
||||||
├── bootstrap vs join の自動判定
|
|
||||||
├── マーカーファイルによる冪等性
|
|
||||||
└── systemd サービス連携
|
|
||||||
```
|
|
||||||
|
|
||||||
**Nix-NOSで追加すべき機能:**
|
|
||||||
|
|
||||||
| 既存 | Nix-NOS追加 |
|
|
||||||
|------|-------------|
|
|
||||||
| cluster-config.json (手動作成) | topology.nix から自動生成 |
|
|
||||||
| 単一クラスタ構成 | 複数クラスタ/サイト対応 |
|
|
||||||
| nixos-anywhere 依存 | Deployer (Phone Home + Push) |
|
|
||||||
| 固定IP設定 | IPAM連携による動的割当 |
|
|
||||||
|
|
||||||
**統合設計:**
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# topology.nix(Nix-NOS)
|
|
||||||
{
|
|
||||||
nix-nos.clusters.plasmacloud = {
|
|
||||||
nodes = {
|
|
||||||
"node01" = {
|
|
||||||
role = "control-plane";
|
|
||||||
ip = "10.0.1.10";
|
|
||||||
services = [ "chainfire" "flaredb" "iam" ];
|
|
||||||
};
|
|
||||||
"node02" = { role = "control-plane"; ip = "10.0.1.11"; };
|
|
||||||
"node03" = { role = "worker"; ip = "10.0.1.12"; };
|
|
||||||
};
|
|
||||||
|
|
||||||
# Nix-NOSが自動生成 → first-boot-automationが読む
|
|
||||||
# cluster-config.json の内容をNix評価時に決定
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2.2 Network Management → PrismNET + FiberLB + Nix-NOS BGP
|
|
||||||
|
|
||||||
**既存の実装:**
|
|
||||||
```
|
|
||||||
PrismNET (prismnet/)
|
|
||||||
├── VPC/Subnet/Port管理
|
|
||||||
├── Security Groups
|
|
||||||
├── IPAM
|
|
||||||
└── OVN連携
|
|
||||||
|
|
||||||
FiberLB (fiberlb/)
|
|
||||||
├── L4/L7ロードバランシング
|
|
||||||
├── ヘルスチェック
|
|
||||||
├── VIP管理
|
|
||||||
└── BGP統合(設計済み、GoBGPサイドカー)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Nix-NOSで追加すべき機能:**
|
|
||||||
|
|
||||||
```
|
|
||||||
Nix-NOS Network Layer
|
|
||||||
├── BGP設定生成(BIRD2)
|
|
||||||
│ ├── iBGP/eBGP自動計算
|
|
||||||
│ ├── Route Reflector対応
|
|
||||||
│ └── ポリシー抽象化
|
|
||||||
├── topology.nix → systemd-networkd
|
|
||||||
├── OpenWrt/Cisco設定生成(将来)
|
|
||||||
└── FiberLB BGP連携
|
|
||||||
```
|
|
||||||
|
|
||||||
**統合設計:**
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# Nix-NOSのBGPモジュール → FiberLBのGoBGP設定に統合
|
|
||||||
{
|
|
||||||
nix-nos.network.bgp = {
|
|
||||||
autonomousSystems = {
|
|
||||||
"65000" = {
|
|
||||||
members = [ "node01" "node02" "node03" ];
|
|
||||||
ibgp.strategy = "route-reflector";
|
|
||||||
ibgp.reflectors = [ "node01" ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# FiberLBのVIPをBGPで広報
|
|
||||||
vipAdvertisements = {
|
|
||||||
"fiberlb" = {
|
|
||||||
vips = [ "10.0.100.1" "10.0.100.2" ];
|
|
||||||
nextHop = "self";
|
|
||||||
communities = [ "65000:100" ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# FiberLBモジュールとの連携
|
|
||||||
services.fiberlb.bgp = {
|
|
||||||
enable = true;
|
|
||||||
# Nix-NOSが生成するGoBGP設定を参照
|
|
||||||
configFile = config.nix-nos.network.bgp.gobgpConfig;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2.3 K8sパチモン → k8shost + Pure NixOS Alternative
|
|
||||||
|
|
||||||
**既存の実装:**
|
|
||||||
```
|
|
||||||
k8shost (k8shost/)
|
|
||||||
├── Pod管理(gRPC API)
|
|
||||||
├── Service管理(ClusterIP/NodePort)
|
|
||||||
├── Node管理
|
|
||||||
├── CNI連携
|
|
||||||
├── CSI連携
|
|
||||||
└── FiberLB/FlashDNS連携
|
|
||||||
```
|
|
||||||
|
|
||||||
**Nix-NOSの役割:**
|
|
||||||
|
|
||||||
k8shostはすでにKubernetesのパチモンとして機能している。Nix-NOSは:
|
|
||||||
|
|
||||||
1. **k8shostを使う場合**: k8shostクラスタ自体のデプロイをNix-NOSで管理
|
|
||||||
2. **Pure NixOS(K8sなし)**: より軽量な選択肢として、Systemd + Nix-NOSでサービス管理
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ Orchestration Options │
|
|
||||||
├─────────────────────────────────────────────────────────────┤
|
|
||||||
│ Option A: k8shost (K8s-like) │
|
|
||||||
│ ┌─────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Nix-NOS manages: cluster topology, network, certs │ │
|
|
||||||
│ │ k8shost manages: pods, services, scaling │ │
|
|
||||||
│ └─────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ Option B: Pure NixOS (K8s-free) │
|
|
||||||
│ ┌─────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Nix-NOS manages: everything │ │
|
|
||||||
│ │ systemd + containers, static service discovery │ │
|
|
||||||
│ │ Use case: クラウド基盤自体の管理 │ │
|
|
||||||
│ └─────────────────────────────────────────────────────┘ │
|
|
||||||
└─────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**重要な洞察:**
|
|
||||||
|
|
||||||
> 「クラウドの基盤そのものを作るのにKubernetesは使いたくない」
|
|
||||||
|
|
||||||
これは正しいアプローチ。PlasmaCloudのコアサービス(ChainFire, FlareDB, IAM等)は:
|
|
||||||
- K8sの上で動くのではなく、K8sを提供する側
|
|
||||||
- Pure NixOS + Systemdで管理されるべき
|
|
||||||
- Nix-NOSはこのレイヤーを担当
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 3. 具体的な統合計画
|
|
||||||
|
|
||||||
### Phase 1: Baremetal Provisioning統合
|
|
||||||
|
|
||||||
**目標:** first-boot-automationをNix-NOSのtopology.nixと連携
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# nix/modules/first-boot-automation.nix への追加
|
|
||||||
{ config, lib, ... }:
|
|
||||||
let
|
|
||||||
# Nix-NOSのトポロジーから設定を生成
|
|
||||||
clusterConfig =
|
|
||||||
if config.nix-nos.cluster != null then
|
|
||||||
config.nix-nos.cluster.generateClusterConfig {
|
|
||||||
hostname = config.networking.hostName;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
# 従来のcluster-config.json読み込み
|
|
||||||
builtins.fromJSON (builtins.readFile /etc/nixos/secrets/cluster-config.json);
|
|
||||||
in {
|
|
||||||
# 既存のfirst-boot-automationロジックはそのまま
|
|
||||||
# ただし設定ソースをNix-NOSに切り替え可能に
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Phase 2: BGP/Network統合
|
|
||||||
|
|
||||||
**目標:** FiberLBのBGP連携(T055.S3)をNix-NOSで宣言的に管理
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# nix/modules/fiberlb-bgp-nixnos.nix
|
|
||||||
{ config, lib, pkgs, ... }:
|
|
||||||
let
|
|
||||||
fiberlbCfg = config.services.fiberlb;
|
|
||||||
nixnosBgp = config.nix-nos.network.bgp;
|
|
||||||
in {
|
|
||||||
config = lib.mkIf (fiberlbCfg.enable && nixnosBgp.enable) {
|
|
||||||
# GoBGP設定をNix-NOSから生成
|
|
||||||
services.gobgpd = {
|
|
||||||
enable = true;
|
|
||||||
configFile = pkgs.writeText "gobgp.yaml" (
|
|
||||||
nixnosBgp.generateGobgpConfig {
|
|
||||||
localAs = nixnosBgp.getLocalAs config.networking.hostName;
|
|
||||||
routerId = nixnosBgp.getRouterId config.networking.hostName;
|
|
||||||
neighbors = nixnosBgp.getPeers config.networking.hostName;
|
|
||||||
}
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
# FiberLBにGoBGPアドレスを注入
|
|
||||||
services.fiberlb.bgp = {
|
|
||||||
gobgpAddress = "127.0.0.1:50051";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Phase 3: Deployer実装
|
|
||||||
|
|
||||||
**目標:** Phone Home + Push型デプロイメントコントローラー
|
|
||||||
|
|
||||||
```
|
|
||||||
plasmacloud/
|
|
||||||
├── deployer/ # 新規追加
|
|
||||||
│ ├── src/
|
|
||||||
│ │ ├── api.rs # Phone Home API
|
|
||||||
│ │ ├── orchestrator.rs # デプロイワークフロー
|
|
||||||
│ │ ├── state.rs # ノード状態管理(ChainFire連携)
|
|
||||||
│ │ └── iso_generator.rs # ISO自動生成
|
|
||||||
│ └── Cargo.toml
|
|
||||||
└── nix/
|
|
||||||
└── modules/
|
|
||||||
└── deployer.nix # NixOSモジュール
|
|
||||||
```
|
|
||||||
|
|
||||||
**ChainFireとの連携:**
|
|
||||||
|
|
||||||
DeployerはChainFireを状態ストアとして使用:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
// deployer/src/state.rs
|
|
||||||
struct NodeState {
|
|
||||||
hostname: String,
|
|
||||||
status: NodeStatus, // Pending, Provisioning, Active, Failed
|
|
||||||
bootstrap_key_hash: Option<String>,
|
|
||||||
ssh_pubkey: Option<String>,
|
|
||||||
last_seen: DateTime<Utc>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DeployerState {
|
|
||||||
async fn register_node(&self, node: &NodeState) -> Result<()> {
|
|
||||||
// ChainFireに保存
|
|
||||||
self.chainfire_client
|
|
||||||
.put(format!("deployer/nodes/{}", node.hostname), node.to_json())
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 4. アーキテクチャ全体図
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ Nix-NOS Layer │
|
|
||||||
│ ┌─────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ topology.nix │ │
|
|
||||||
│ │ - ノード定義 │ │
|
|
||||||
│ │ - ネットワークトポロジー │ │
|
|
||||||
│ │ - サービス配置 │ │
|
|
||||||
│ └─────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │ │
|
|
||||||
│ generates │ │
|
|
||||||
│ ▼ │
|
|
||||||
│ ┌──────────────┬──────────────┬──────────────┬──────────────┐ │
|
|
||||||
│ │ NixOS Config │ BIRD Config │ GoBGP Config │ cluster- │ │
|
|
||||||
│ │ (systemd) │ (BGP) │ (FiberLB) │ config.json │ │
|
|
||||||
│ └──────────────┴──────────────┴──────────────┴──────────────┘ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────────────────────────────────────────────────────────┐
|
|
||||||
│ PlasmaCloud Services │
|
|
||||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Control Plane │ │
|
|
||||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
|
||||||
│ │ │ChainFire │ │ FlareDB │ │ IAM │ │ Deployer │ │ │
|
|
||||||
│ │ │(Raft KV) │ │ (SQL) │ │(AuthN/Z) │ │ (新規) │ │ │
|
|
||||||
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
|
||||||
│ └───────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Network Plane │ │
|
|
||||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
|
||||||
│ │ │ PrismNET │ │ FiberLB │ │ FlashDNS │ │ BIRD2 │ │ │
|
|
||||||
│ │ │ (OVN) │ │(LB+BGP) │ │ (DNS) │ │(Nix-NOS) │ │ │
|
|
||||||
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
|
||||||
│ └───────────────────────────────────────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
|
||||||
│ │ Compute Plane │ │
|
|
||||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
|
||||||
│ │ │PlasmaVMC │ │ k8shost │ │Lightning │ │ │
|
|
||||||
│ │ │(VM/FC) │ │(K8s-like)│ │ STOR │ │ │
|
|
||||||
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
|
|
||||||
│ └───────────────────────────────────────────────────────────────┘ │
|
|
||||||
└─────────────────────────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 5. 優先度と実装順序
|
|
||||||
|
|
||||||
| 優先度 | 機能 | 依存関係 | 工数 |
|
|
||||||
|--------|------|----------|------|
|
|
||||||
| **P0** | topology.nix → cluster-config.json生成 | なし | 1週間 |
|
|
||||||
| **P0** | BGPモジュール(BIRD2設定生成) | なし | 2週間 |
|
|
||||||
| **P1** | FiberLB BGP連携(GoBGP) | T055.S3完了 | 2週間 |
|
|
||||||
| **P1** | Deployer基本実装 | ChainFire | 3週間 |
|
|
||||||
| **P2** | OpenWrt設定生成 | BGPモジュール | 2週間 |
|
|
||||||
| **P2** | ISO自動生成パイプライン | Deployer完了後 | 1週間 |
|
|
||||||
| **P2** | 各サービスの設定をNixで管理可能なように | なし | 適当 |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 6. 結論
|
|
||||||
|
|
||||||
PlasmaCloud/PhotonCloudプロジェクトは、Nix-NOSの構想を実装するための**理想的な基盤**:
|
|
||||||
|
|
||||||
1. **すでにNixOSモジュール化されている** → Nix-NOSモジュールとの統合が容易
|
|
||||||
2. **first-boot-automationが存在** → Deployerの基礎として活用可能
|
|
||||||
3. **FiberLBにBGP設計がある** → Nix-NOSのBGPモジュールと自然に統合
|
|
||||||
4. **ChainFireが状態ストア** → Deployer状態管理に利用可能
|
|
||||||
5. **k8shostが存在するがK8sではない** → 「K8sパチモン」の哲学と一致
|
|
||||||
|
|
||||||
**次のアクション:**
|
|
||||||
1. Nix-NOSモジュールをPlasmaCloudリポジトリに追加
|
|
||||||
2. topology.nix → cluster-config.json生成の実装
|
|
||||||
3. BGPモジュール(BIRD2)の実装とFiberLB連携
|
|
||||||
105
PROJECT.md
105
PROJECT.md
|
|
@ -1,105 +0,0 @@
|
||||||
# Project Overview
|
|
||||||
これは、日本発のクラウド基盤を作るためのプロジェクトです。
|
|
||||||
OpenStackなどの既存の使いにくいクラウド基板の代替となり、ついでに基礎技術を各種ソフトウェアに転用できるようにする。
|
|
||||||
|
|
||||||
# Principal
|
|
||||||
Peer Aへ:**自分で戦略を**決めて良い!好きにやれ!
|
|
||||||
|
|
||||||
# Current Priorities
|
|
||||||
一通り実装を終わらせ、使いやすいプラットフォームと仕様が完成することを目標とする。
|
|
||||||
実装すべきもの:
|
|
||||||
1. クラスター管理用KVS(chainfire)
|
|
||||||
- これは、ライブラリとして作ることにする。単体でとりあえずKVSとして簡易的にも使えるという想定。
|
|
||||||
- Raft+Gossip。
|
|
||||||
2. IAM基盤(aegisという名前にしたい。)
|
|
||||||
- 様々な認証方法に対応しておいてほしい。
|
|
||||||
- あと、サービス感の認証もうまくやる必要がある。mTLSでやることになるだろう。IAMとしてやるのが正解かどうかはわからないが。
|
|
||||||
3. DBaaSのための高速KVS(FlareDB)
|
|
||||||
- そこそこクエリ効率の良いKVSを作り、その上にSQL互換レイヤーなどが乗れるようにする。
|
|
||||||
- 超高速である必要がある。
|
|
||||||
- 結果整合性モードと強整合性モードを両方載せられるようにしたい。
|
|
||||||
- Tsurugiのような高速なDBが参考になるかも知れない。
|
|
||||||
- DBaaSのためでもあるが、高速分散KVSということで、他のもののメタデータストアとして使えるべき。
|
|
||||||
- Chainfireとの棲み分けとしては、Chainfireは単体で使う時用と、大規模な場合はクラスター管理に集中させ、メタデータのストア(特に、サービ ス感の連携をするような場合は他のサービスのメタデータにアクセスしたくなるだろう。その時に、このKVSから読めれば良い。)はFlareDBにすると良 さそう。
|
|
||||||
4. VM基盤(PlasmaVMC)
|
|
||||||
- ちゃんとした抽象化をすることで、様々なVMを扱えるようにしたい(KVM,FireCracker,mvisorなどなど)
|
|
||||||
5. オブジェクトストレージ基盤(LightningSTOR)
|
|
||||||
- この基盤の標準的な感じの(ある程度共通化されており、使いやすい)APIと、S3互換なAPIがあると良いかも
|
|
||||||
- メタデータストアにFlareDBが使えるように当然なっているべき
|
|
||||||
6. DNS(FlashDNS)
|
|
||||||
- PowerDNSを100%完全に代替可能なようにしてほしい。
|
|
||||||
- Route53のようなサービスが作れるようにしたい。
|
|
||||||
- BINDも使いたくない。
|
|
||||||
- 逆引きDNSをやるためにとんでもない行数のBINDのファイルを書くというのがあり、バカバカしすぎるのでサブネットマスクみたいなものに対応すると良い。
|
|
||||||
- DNS All-Rounderという感じにしたい。
|
|
||||||
7. ロードバランサー(FiberLB)
|
|
||||||
- 超高速なロードバランサーとは名ばかりで、実体としてはBGPでやるので良いような気がしている。
|
|
||||||
- AWS ELBみたいなことをできるようにしたい。
|
|
||||||
- MaglevによるL4ロードバランシング
|
|
||||||
- BGP AnycastによるL2ロードバランシング
|
|
||||||
- L7ロードバランシング
|
|
||||||
- これらをいい感じにできると良い(既存のソフトウェアでできるかも?これは要確認。)
|
|
||||||
8. Kubernetesクラスタをいい感じにホストできるもの?
|
|
||||||
- k0sとかk3sとかが参考になるかも知れない。
|
|
||||||
9. これらをNixOS上で動くようにパッケージ化をしたりすると良い(Flake化?)。
|
|
||||||
- あと、Nixで設定できると良い。まあ設定ファイルを生成するだけなのでそれはできると思うが
|
|
||||||
10. Nixによるベアメタルプロビジョニング(Deployer)
|
|
||||||
- Phone Home + Push型のデプロイメントコントローラー
|
|
||||||
- topology.nix からクラスタ設定を自動生成
|
|
||||||
- ChainFireを状態ストアとして使用
|
|
||||||
- ISO自動生成パイプライン対応
|
|
||||||
11. オーバーレイネットワーク
|
|
||||||
- マルチテナントでもうまく動くためには、ユーザーの中でアクセスできるネットワークなど、考えなければいけないことが山ほどある。これを処理 するものも必要。
|
|
||||||
- とりあえずネットワーク部分自体の実装はOVNとかで良い。
|
|
||||||
12. オブザーバビリティコンポーネント(NightLight)
|
|
||||||
- メトリクスストアが必要
|
|
||||||
- VictoriaMetricsはmTLSが有料なので、作る必要がある
|
|
||||||
- 完全オープンソースでやりたいからね
|
|
||||||
- 最低限、Prometheus互換(PromQL)とスケーラビリティ、Push型というのは必須になる
|
|
||||||
- メトリクスのデータをどこに置くかは良く良く考えないといけない。スケーラビリティを考えるとS3互換ストレージの上に載せたいが…?
|
|
||||||
- あと、圧縮するかどうかなど
|
|
||||||
13. クレジット・クオータ管理(CreditService)
|
|
||||||
- プロジェクトごとのリソース使用量と課金を管理する「銀行」のようなサービス
|
|
||||||
- 各サービス(PlasmaVMCなど)からのリソース作成リクエストをインターセプトして残高確認(Admission Control)を行う
|
|
||||||
- NightLightから使用量メトリクスを収集して定期的に残高を引き落とす(Billing Batch)
|
|
||||||
|
|
||||||
# Recent Changes (2025-12-11)
|
|
||||||
- **Renaming**:
|
|
||||||
- `Nightlight` -> `NightLight` (監視・メトリクス)
|
|
||||||
- `PrismNET` -> `PrismNET` (ネットワーク)
|
|
||||||
- `PlasmaCloud` -> `PhotonCloud` (プロジェクト全体コードネーム)
|
|
||||||
- **Architecture Decision**:
|
|
||||||
- IAMにクオータ管理を持たせず、専用の `CreditService` を新設することを決定。
|
|
||||||
- `NightLight` を使用量計測のバックエンドとして活用する方針を策定。
|
|
||||||
|
|
||||||
# Next Steps
|
|
||||||
1. **CreditServiceの実装**:
|
|
||||||
- プロジェクトごとのWallet管理、残高管理機能
|
|
||||||
- gRPC APIによるAdmission Controlの実装
|
|
||||||
2. **NightLightの実装完了**:
|
|
||||||
- 永続化層とクエリエンジンの完成
|
|
||||||
- `CreditService` へのデータ提供機能の実装
|
|
||||||
3. **PlasmaVMCの改修**:
|
|
||||||
- `CreditService` と連携したリソース作成時のチェック処理追加
|
|
||||||
- プロジェクト単位のリソース総量制限の実装
|
|
||||||
|
|
||||||
# 守るべき事柄
|
|
||||||
1. Rustで書く。
|
|
||||||
2. 全部のソフトウェアにおいて、コードベースの構造や依存ライブラリ、仕様や使い方を揃えて、統一感があるようにする。
|
|
||||||
3. テスト可能なように作る。また、テストをちゃんと書く。スケーラブルかどうかや、実際に動くかどうかもテスト可能なように良く考えたうえで作る。
|
|
||||||
4. スケーラビリティに気をつけて書く。ボトルネックになる箇所はないか?と常に確認する。
|
|
||||||
5. 統一感ある仕様をちゃんと考える。(specificationsの中にmdで書いていってほしい。1ソフトウェアごとにフォルダを作り、その中に仕様を書く。 )
|
|
||||||
6. 設定ファイルについても統一感ある仕様が必要。
|
|
||||||
7. マルチテナントに関して最初から考慮したうえで設計する(次の年にAWSやGCPでそのまま採用されてもおかしくないような性能や使いやすさが必要)。
|
|
||||||
8. ホームラボ用途も満たすようにしたい。
|
|
||||||
9. NixのFlakeで環境を作ったり固定したりすると良い。
|
|
||||||
10. 前方互換性は気にする必要がない(すでにある実装に縛られる必要はなく、両方を変更して良い)。v2とかv3とかそういうふうにバージョンを増やしていくのはやめてほしい。そうではなく、完璧な一つの実装を作ることに専念してほしい。
|
|
||||||
11. ライブラリは可能な限り最新版を使う。この先も長くメンテナンスされることを想定したい。
|
|
||||||
|
|
||||||
# 実戦テスト
|
|
||||||
全ての作ったコンポーネントについて、実践的なテストを作ってバグや仕様の悪い点を洗い出し、修正する。
|
|
||||||
NixやVM、コンテナなどあらゆるものを活用してよい。
|
|
||||||
これにより、実用レベルまで持っていくことが期待される。
|
|
||||||
実用的なアプリケーションを作ってみるとか、パフォーマンスを実際に高負荷な試験で確認するとか、そのレベルのものが求められている。
|
|
||||||
また、各コンポーネントごとのテストも行うべきだが、様々なものを組み合わせるテストも行うべきである。これも含まれる。
|
|
||||||
また、設定のやり方がちゃんと統一されているかなど、細かい点まで気を配ってやる必要がある。
|
|
||||||
527
README.md
527
README.md
|
|
@ -1,507 +1,50 @@
|
||||||
# PhotonCloud (旧 PlasmaCloud)
|
# PhotonCloud
|
||||||
|
|
||||||
**A modern, multi-tenant cloud infrastructure platform built in Rust**
|
PhotonCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository.
|
||||||
|
|
||||||
> NOTE: プロジェクトコードネームを PlasmaCloud から PhotonCloud に改称。コンポーネント名も Nightlight → NightLight へ統一済み(詳細は `PROJECT.md` の Recent Changes を参照)。
|
The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
|
||||||
> 併存する「PlasmaCloud」表記は旧コードネームを指します。PhotonCloud と読み替えてください。
|
|
||||||
|
|
||||||
PhotonCloud provides a complete cloud computing stack with strong tenant isolation, role-based access control (RBAC), and seamless integration between compute, networking, and storage services.
|
## Components
|
||||||
|
|
||||||
## MVP-Beta Status: COMPLETE ✅
|
- `chainfire`: replicated coordination store
|
||||||
|
- `flaredb`: replicated KV and metadata store
|
||||||
The MVP-Beta milestone validates end-to-end tenant isolation and core infrastructure provisioning:
|
- `iam`: identity, token issuance, and authorization
|
||||||
|
- `prismnet`: tenant networking control plane
|
||||||
- ✅ **IAM**: User authentication, RBAC, multi-tenant isolation
|
- `flashdns`: authoritative DNS service
|
||||||
- ✅ **PrismNET**: VPC overlay networking with tenant boundaries
|
- `fiberlb`: load balancer control plane and dataplane
|
||||||
- ✅ **PlasmaVMC**: VM provisioning with network attachment
|
- `plasmavmc`: VM control plane and worker agents
|
||||||
- ✅ **Integration**: E2E tests validate complete tenant path
|
- `coronafs`: shared filesystem for mutable VM volumes
|
||||||
|
- `lightningstor`: object storage and VM image backing
|
||||||
**Test Results**: 8/8 integration tests passing
|
- `k8shost`: Kubernetes-style hosting control plane
|
||||||
- IAM: 6/6 tenant path tests
|
- `apigateway`: external API and proxy surface
|
||||||
- Network+VM: 2/2 integration tests
|
- `nightlight`: metrics ingestion and query service
|
||||||
|
- `creditservice`: minimal reference quota/credit service
|
||||||
|
- `deployer`: bootstrap and phone-home deployment service
|
||||||
|
- `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Get Started in 3 Steps
|
|
||||||
|
|
||||||
1. **Deploy the Platform**
|
|
||||||
```bash
|
|
||||||
# Start IAM service
|
|
||||||
cd iam && cargo run --bin iam-server -- --port 50080
|
|
||||||
|
|
||||||
# Start PrismNET service
|
|
||||||
cd prismnet && cargo run --bin prismnet-server -- --port 50081
|
|
||||||
|
|
||||||
# Start PlasmaVMC service
|
|
||||||
cd plasmavmc && cargo run --bin plasmavmc-server -- --port 50082
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Onboard Your First Tenant**
|
|
||||||
```bash
|
|
||||||
# Create user, provision network, deploy VM
|
|
||||||
# See detailed guide below
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Verify End-to-End**
|
|
||||||
```bash
|
|
||||||
# Run integration tests
|
|
||||||
cd iam && cargo test --test tenant_path_integration
|
|
||||||
cd plasmavmc && cargo test --test prismnet_integration -- --ignored
|
|
||||||
```
|
|
||||||
|
|
||||||
**For detailed instructions**: [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ User / API Client │
|
|
||||||
└─────────────────────────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
↓
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ IAM (Identity & Access Management) │
|
|
||||||
│ • User authentication & JWT tokens │
|
|
||||||
│ • RBAC with hierarchical scopes (Org → Project) │
|
|
||||||
│ • Cross-tenant access denial │
|
|
||||||
└─────────────────────────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
┌─────────────┴─────────────┐
|
|
||||||
↓ ↓
|
|
||||||
┌──────────────────────┐ ┌──────────────────────┐
|
|
||||||
│ PrismNET │ │ PlasmaVMC │
|
|
||||||
│ • VPC overlay │────▶│ • VM provisioning │
|
|
||||||
│ • Subnets + DHCP │ │ • Hypervisor mgmt │
|
|
||||||
│ • Ports (IP/MAC) │ │ • Network attach │
|
|
||||||
│ • Security Groups │ │ • KVM, Firecracker │
|
|
||||||
└──────────────────────┘ └──────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**Full Architecture**: [MVP-Beta Tenant Path Architecture](docs/architecture/mvp-beta-tenant-path.md)
|
|
||||||
|
|
||||||
## Core Components
|
|
||||||
|
|
||||||
### IAM (Identity & Access Management)
|
|
||||||
|
|
||||||
**Location**: `/iam`
|
|
||||||
|
|
||||||
Multi-tenant identity and access management with comprehensive RBAC.
|
|
||||||
|
|
||||||
**Features**:
|
|
||||||
- User and service account management
|
|
||||||
- Hierarchical scopes: System → Organization → Project
|
|
||||||
- Custom role creation with fine-grained permissions
|
|
||||||
- Policy evaluation with conditional logic
|
|
||||||
- JWT token issuance with tenant claims
|
|
||||||
|
|
||||||
**Services**:
|
|
||||||
- `IamAdminService`: User, role, and policy management
|
|
||||||
- `IamAuthzService`: Authorization and permission checks
|
|
||||||
- `IamTokenService`: Token issuance and validation
|
|
||||||
|
|
||||||
**Quick Start**:
|
|
||||||
```bash
|
```bash
|
||||||
cd iam
|
nix develop
|
||||||
cargo build --release
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
cargo run --bin iam-server -- --port 50080
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### PrismNET (Network Virtualization)
|
## Main Entrypoints
|
||||||
|
|
||||||
**Location**: `/prismnet`
|
- workspace flake: [flake.nix](/home/centra/cloud/flake.nix)
|
||||||
|
- VM validation harness: [nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
|
||||||
|
- shared volume notes: [coronafs/README.md](/home/centra/cloud/coronafs/README.md)
|
||||||
|
- minimal quota-service rationale: [creditservice/README.md](/home/centra/cloud/creditservice/README.md)
|
||||||
|
- archived manual VM launch scripts: [baremetal/vm-cluster/README.md](/home/centra/cloud/baremetal/vm-cluster/README.md)
|
||||||
|
|
||||||
VPC-based overlay networking with tenant isolation.
|
## Repository Guide
|
||||||
|
|
||||||
**Features**:
|
- [docs/README.md](/home/centra/cloud/docs/README.md): documentation entrypoint
|
||||||
- Virtual Private Cloud (VPC) provisioning
|
- [docs/testing.md](/home/centra/cloud/docs/testing.md): validation path summary
|
||||||
- Subnet management with CIDR allocation
|
- [docs/component-matrix.md](/home/centra/cloud/docs/component-matrix.md): supported multi-component compositions
|
||||||
- Port allocation with IP/MAC assignment
|
- [docs/storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
|
||||||
- DHCP server integration
|
- `plans/`: design notes and exploration documents
|
||||||
- Security group enforcement
|
|
||||||
- OVN integration for production networking
|
|
||||||
|
|
||||||
**Services**:
|
## Scope
|
||||||
- `VpcService`: VPC lifecycle management
|
|
||||||
- `SubnetService`: Subnet CRUD operations
|
|
||||||
- `PortService`: Port allocation and attachment
|
|
||||||
- `SecurityGroupService`: Firewall rule management
|
|
||||||
|
|
||||||
**Quick Start**:
|
PhotonCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
|
||||||
```bash
|
|
||||||
cd prismnet
|
|
||||||
export IAM_ENDPOINT=http://localhost:50080
|
|
||||||
cargo build --release
|
|
||||||
cargo run --bin prismnet-server -- --port 50081
|
|
||||||
```
|
|
||||||
|
|
||||||
### PlasmaVMC (VM Provisioning & Management)
|
|
||||||
|
|
||||||
**Location**: `/plasmavmc`
|
|
||||||
|
|
||||||
Virtual machine lifecycle management with hypervisor abstraction.
|
|
||||||
|
|
||||||
**Features**:
|
|
||||||
- VM provisioning with tenant scoping
|
|
||||||
- Hypervisor abstraction (KVM, Firecracker)
|
|
||||||
- Network attachment via PrismNET ports
|
|
||||||
- CPU, memory, and disk configuration
|
|
||||||
- VM metadata persistence (ChainFire)
|
|
||||||
- Live migration support (planned)
|
|
||||||
|
|
||||||
**Services**:
|
|
||||||
- `VmService`: VM lifecycle (create, start, stop, delete)
|
|
||||||
|
|
||||||
**Quick Start**:
|
|
||||||
```bash
|
|
||||||
cd plasmavmc
|
|
||||||
export NOVANET_ENDPOINT=http://localhost:50081
|
|
||||||
export IAM_ENDPOINT=http://localhost:50080
|
|
||||||
cargo build --release
|
|
||||||
cargo run --bin plasmavmc-server -- --port 50082
|
|
||||||
```
|
|
||||||
|
|
||||||
## Future Components (Roadmap)
|
|
||||||
|
|
||||||
### FlashDNS (DNS Service)
|
|
||||||
|
|
||||||
**Status**: Planned for next milestone
|
|
||||||
|
|
||||||
DNS resolution within tenant VPCs with automatic record creation.
|
|
||||||
|
|
||||||
**Features** (Planned):
|
|
||||||
- Tenant-scoped DNS zones
|
|
||||||
- Automatic hostname assignment for VMs
|
|
||||||
- DNS record lifecycle tied to resources
|
|
||||||
- Integration with PrismNET for VPC resolution
|
|
||||||
|
|
||||||
### FiberLB (Load Balancing)
|
|
||||||
|
|
||||||
**Status**: Planned for next milestone
|
|
||||||
|
|
||||||
Layer 4/7 load balancing with tenant isolation.
|
|
||||||
|
|
||||||
**Features** (Planned):
|
|
||||||
- Load balancer provisioning within VPCs
|
|
||||||
- Backend pool management (VM targets)
|
|
||||||
- VIP allocation from tenant subnets
|
|
||||||
- Health checks and failover
|
|
||||||
|
|
||||||
### LightningStor (Block Storage)
|
|
||||||
|
|
||||||
**Status**: Planned for next milestone
|
|
||||||
|
|
||||||
Distributed block storage with snapshot support.
|
|
||||||
|
|
||||||
**Features** (Planned):
|
|
||||||
- Volume creation and attachment to VMs
|
|
||||||
- Snapshot lifecycle management
|
|
||||||
- Replication and high availability
|
|
||||||
- Integration with ChainFire for immutable logs
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
### Integration Test Suite
|
|
||||||
|
|
||||||
PlasmaCloud includes comprehensive integration tests validating the complete E2E tenant path.
|
|
||||||
|
|
||||||
**IAM Tests** (6 tests, 778 LOC):
|
|
||||||
```bash
|
|
||||||
cd iam
|
|
||||||
cargo test --test tenant_path_integration
|
|
||||||
|
|
||||||
# Tests:
|
|
||||||
# ✅ test_tenant_setup_flow
|
|
||||||
# ✅ test_cross_tenant_denial
|
|
||||||
# ✅ test_rbac_project_scope
|
|
||||||
# ✅ test_hierarchical_scope_inheritance
|
|
||||||
# ✅ test_custom_role_fine_grained_permissions
|
|
||||||
# ✅ test_multiple_role_bindings
|
|
||||||
```
|
|
||||||
|
|
||||||
**Network + VM Tests** (2 tests, 570 LOC):
|
|
||||||
```bash
|
|
||||||
cd plasmavmc
|
|
||||||
cargo test --test prismnet_integration -- --ignored
|
|
||||||
|
|
||||||
# Tests:
|
|
||||||
# ✅ prismnet_port_attachment_lifecycle
|
|
||||||
# ✅ test_network_tenant_isolation
|
|
||||||
```
|
|
||||||
|
|
||||||
**Coverage**: 8/8 tests passing (100% success rate)
|
|
||||||
|
|
||||||
See [E2E Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md) for detailed test descriptions.
|
|
||||||
|
|
||||||
## Documentation
|
|
||||||
|
|
||||||
### Getting Started
|
|
||||||
|
|
||||||
- **[Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)**: Complete walkthrough of deploying your first tenant
|
|
||||||
|
|
||||||
### Architecture
|
|
||||||
|
|
||||||
- **[MVP-Beta Tenant Path](docs/architecture/mvp-beta-tenant-path.md)**: Complete system architecture with diagrams
|
|
||||||
- **[Component Integration](docs/architecture/mvp-beta-tenant-path.md#component-boundaries)**: How services communicate
|
|
||||||
|
|
||||||
### Testing & Validation
|
|
||||||
|
|
||||||
- **[E2E Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md)**: Comprehensive test suite description
|
|
||||||
- **[T023 Summary](docs/por/T023-e2e-tenant-path/SUMMARY.md)**: MVP-Beta deliverables and test results
|
|
||||||
|
|
||||||
### Component Specifications
|
|
||||||
|
|
||||||
- [IAM Specification](specifications/iam.md)
|
|
||||||
- [PrismNET Specification](specifications/prismnet.md)
|
|
||||||
- [PlasmaVMC Specification](specifications/plasmavmc.md)
|
|
||||||
|
|
||||||
## Tenant Isolation Model
|
|
||||||
|
|
||||||
PlasmaCloud enforces tenant isolation at three layers:
|
|
||||||
|
|
||||||
### Layer 1: IAM Policy Enforcement
|
|
||||||
|
|
||||||
Every API call is validated against the user's JWT token:
|
|
||||||
- Token includes `org_id` and `project_id` claims
|
|
||||||
- Resources are scoped as: `org/{org_id}/project/{project_id}/{resource_type}/{id}`
|
|
||||||
- RBAC policies enforce: `resource.org_id == token.org_id`
|
|
||||||
- Cross-tenant access results in 403 Forbidden
|
|
||||||
|
|
||||||
### Layer 2: Network VPC Isolation
|
|
||||||
|
|
||||||
Each VPC provides a logical network boundary:
|
|
||||||
- VPC scoped to an `org_id`
|
|
||||||
- OVN overlay ensures traffic isolation between VPCs
|
|
||||||
- Different tenants can use the same CIDR without collision
|
|
||||||
- Security groups provide intra-VPC firewall rules
|
|
||||||
|
|
||||||
### Layer 3: VM Scoping
|
|
||||||
|
|
||||||
Virtual machines are scoped to tenant organizations:
|
|
||||||
- VM metadata includes `org_id` and `project_id`
|
|
||||||
- VMs can only attach to ports in their tenant's VPC
|
|
||||||
- VM operations filter by token scope
|
|
||||||
- Hypervisor isolation ensures compute boundary
|
|
||||||
|
|
||||||
**Validation**: All three layers tested in [cross-tenant denial tests](docs/por/T023-e2e-tenant-path/e2e_test.md#test-scenario-2-cross-tenant-denial).
|
|
||||||
|
|
||||||
## Example Workflow
|
|
||||||
|
|
||||||
### Create a Tenant with Network and VM
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1. Authenticate and get token
|
|
||||||
grpcurl -plaintext -d '{
|
|
||||||
"principal_id": "alice",
|
|
||||||
"org_id": "acme-corp",
|
|
||||||
"project_id": "project-alpha"
|
|
||||||
}' localhost:50080 iam.v1.IamTokenService/IssueToken
|
|
||||||
|
|
||||||
export TOKEN="<your-token>"
|
|
||||||
|
|
||||||
# 2. Create VPC
|
|
||||||
grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
|
|
||||||
"org_id": "acme-corp",
|
|
||||||
"project_id": "project-alpha",
|
|
||||||
"name": "main-vpc",
|
|
||||||
"cidr": "10.0.0.0/16"
|
|
||||||
}' localhost:50081 prismnet.v1.VpcService/CreateVpc
|
|
||||||
|
|
||||||
export VPC_ID="<vpc-id>"
|
|
||||||
|
|
||||||
# 3. Create Subnet
|
|
||||||
grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
|
|
||||||
"org_id": "acme-corp",
|
|
||||||
"project_id": "project-alpha",
|
|
||||||
"vpc_id": "'$VPC_ID'",
|
|
||||||
"name": "web-subnet",
|
|
||||||
"cidr": "10.0.1.0/24",
|
|
||||||
"gateway": "10.0.1.1",
|
|
||||||
"dhcp_enabled": true
|
|
||||||
}' localhost:50081 prismnet.v1.SubnetService/CreateSubnet
|
|
||||||
|
|
||||||
export SUBNET_ID="<subnet-id>"
|
|
||||||
|
|
||||||
# 4. Create Port
|
|
||||||
grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
|
|
||||||
"org_id": "acme-corp",
|
|
||||||
"project_id": "project-alpha",
|
|
||||||
"subnet_id": "'$SUBNET_ID'",
|
|
||||||
"name": "vm-port",
|
|
||||||
"ip_address": "10.0.1.10"
|
|
||||||
}' localhost:50081 prismnet.v1.PortService/CreatePort
|
|
||||||
|
|
||||||
export PORT_ID="<port-id>"
|
|
||||||
|
|
||||||
# 5. Create VM with Network
|
|
||||||
grpcurl -plaintext -H "Authorization: Bearer $TOKEN" -d '{
|
|
||||||
"name": "web-server-1",
|
|
||||||
"org_id": "acme-corp",
|
|
||||||
"project_id": "project-alpha",
|
|
||||||
"spec": {
|
|
||||||
"network": [{
|
|
||||||
"id": "eth0",
|
|
||||||
"port_id": "'$PORT_ID'"
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
}' localhost:50082 plasmavmc.v1.VmService/CreateVm
|
|
||||||
```
|
|
||||||
|
|
||||||
**Full walkthrough**: See [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
|
|
||||||
|
|
||||||
## Development
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
- Rust 1.70+ with Cargo
|
|
||||||
- Protocol Buffers compiler (protoc)
|
|
||||||
- Optional: KVM for real VM execution
|
|
||||||
- Optional: OVN for production networking
|
|
||||||
|
|
||||||
### Build from Source
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Clone repository
|
|
||||||
git clone https://github.com/your-org/plasmacloud.git
|
|
||||||
cd cloud
|
|
||||||
|
|
||||||
# Initialize submodules
|
|
||||||
git submodule update --init --recursive
|
|
||||||
|
|
||||||
# Build all components
|
|
||||||
cd iam && cargo build --release
|
|
||||||
cd ../prismnet && cargo build --release
|
|
||||||
cd ../plasmavmc && cargo build --release
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run Tests
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# IAM tests
|
|
||||||
cd iam && cargo test --test tenant_path_integration
|
|
||||||
|
|
||||||
# Network + VM tests
|
|
||||||
cd plasmavmc && cargo test --test prismnet_integration -- --ignored
|
|
||||||
|
|
||||||
# Unit tests (all components)
|
|
||||||
cargo test
|
|
||||||
```
|
|
||||||
|
|
||||||
### Project Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
cloud/
|
|
||||||
├── iam/ # Identity & Access Management
|
|
||||||
│ ├── crates/
|
|
||||||
│ │ ├── iam-api/ # gRPC services
|
|
||||||
│ │ ├── iam-authz/ # Authorization engine
|
|
||||||
│ │ ├── iam-store/ # Data persistence
|
|
||||||
│ │ └── iam-types/ # Core types
|
|
||||||
│ └── tests/
|
|
||||||
│ └── tenant_path_integration.rs # E2E tests
|
|
||||||
│
|
|
||||||
├── prismnet/ # Network Virtualization
|
|
||||||
│ ├── crates/
|
|
||||||
│ │ ├── prismnet-server/ # gRPC services
|
|
||||||
│ │ ├── prismnet-api/ # Protocol buffers
|
|
||||||
│ │ ├── prismnet-metadata/ # Metadata store
|
|
||||||
│ │ └── prismnet-ovn/ # OVN integration
|
|
||||||
│ └── proto/
|
|
||||||
│
|
|
||||||
├── plasmavmc/ # VM Provisioning
|
|
||||||
│ ├── crates/
|
|
||||||
│ │ ├── plasmavmc-server/ # VM service
|
|
||||||
│ │ ├── plasmavmc-api/ # Protocol buffers
|
|
||||||
│ │ ├── plasmavmc-hypervisor/ # Hypervisor abstraction
|
|
||||||
│ │ ├── plasmavmc-kvm/ # KVM backend
|
|
||||||
│ │ └── plasmavmc-firecracker/ # Firecracker backend
|
|
||||||
│ └── tests/
|
|
||||||
│ └── prismnet_integration.rs # E2E tests
|
|
||||||
│
|
|
||||||
├── flashdns/ # DNS Service (planned)
|
|
||||||
├── fiberlb/ # Load Balancing (planned)
|
|
||||||
├── lightningstor/ # Block Storage (planned)
|
|
||||||
│
|
|
||||||
├── chainfire/ # Immutable event log (submodule)
|
|
||||||
├── flaredb/ # Distributed metadata store (submodule)
|
|
||||||
│
|
|
||||||
├── docs/
|
|
||||||
│ ├── architecture/ # Architecture docs
|
|
||||||
│ ├── getting-started/ # Onboarding guides
|
|
||||||
│ └── por/ # Plan of Record (POR) docs
|
|
||||||
│ └── T023-e2e-tenant-path/ # MVP-Beta deliverables
|
|
||||||
│
|
|
||||||
├── specifications/ # Component specifications
|
|
||||||
└── README.md # This file
|
|
||||||
```
|
|
||||||
|
|
||||||
## Contributing
|
|
||||||
|
|
||||||
We welcome contributions! Please follow these guidelines:
|
|
||||||
|
|
||||||
1. **Fork the repository** and create a feature branch
|
|
||||||
2. **Write tests** for new functionality
|
|
||||||
3. **Update documentation** as needed
|
|
||||||
4. **Run tests** before submitting PR: `cargo test`
|
|
||||||
5. **Follow Rust style**: Use `cargo fmt` and `cargo clippy`
|
|
||||||
|
|
||||||
### Code Review Process
|
|
||||||
|
|
||||||
1. All PRs require at least one approval
|
|
||||||
2. CI must pass (tests, formatting, lints)
|
|
||||||
3. Documentation must be updated for user-facing changes
|
|
||||||
4. Integration tests required for new features
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
PlasmaCloud is licensed under the Apache License 2.0. See [LICENSE](LICENSE) for details.
|
|
||||||
|
|
||||||
## Support & Community
|
|
||||||
|
|
||||||
- **GitHub Issues**: Report bugs or request features
|
|
||||||
- **Documentation**: See [docs/](docs/) for detailed guides
|
|
||||||
- **Architecture**: Review [architecture docs](docs/architecture/mvp-beta-tenant-path.md) for design decisions
|
|
||||||
|
|
||||||
## Roadmap
|
|
||||||
|
|
||||||
### Completed (MVP-Beta) ✅
|
|
||||||
|
|
||||||
- [x] IAM with RBAC and tenant scoping
|
|
||||||
- [x] PrismNET VPC overlay networking
|
|
||||||
- [x] PlasmaVMC VM provisioning
|
|
||||||
- [x] End-to-end integration tests
|
|
||||||
- [x] Comprehensive documentation
|
|
||||||
|
|
||||||
### In Progress
|
|
||||||
|
|
||||||
- [ ] FlashDNS integration (S3)
|
|
||||||
- [ ] FiberLB integration (S4)
|
|
||||||
- [ ] LightningStor integration (S5)
|
|
||||||
|
|
||||||
### Planned
|
|
||||||
|
|
||||||
- [ ] FlareDB persistence for production
|
|
||||||
- [ ] ChainFire integration for VM metadata
|
|
||||||
- [ ] OVN production deployment
|
|
||||||
- [ ] Kubernetes integration
|
|
||||||
- [ ] Terraform provider
|
|
||||||
- [ ] Web UI / Dashboard
|
|
||||||
|
|
||||||
## Acknowledgments
|
|
||||||
|
|
||||||
PlasmaCloud builds upon:
|
|
||||||
- **ChainFire**: Immutable event log for audit trails
|
|
||||||
- **FlareDB**: Distributed metadata store
|
|
||||||
- **OVN (Open Virtual Network)**: Production-grade overlay networking
|
|
||||||
- **gRPC**: High-performance RPC framework
|
|
||||||
- **Rust**: Safe, concurrent systems programming
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
**Status**: MVP-Beta Complete ✅
|
|
||||||
**Last Updated**: 2025-12-09
|
|
||||||
**Next Milestone**: FlashDNS, FiberLB, LightningStor integration
|
|
||||||
|
|
||||||
For detailed information, see:
|
|
||||||
- [Tenant Onboarding Guide](docs/getting-started/tenant-onboarding.md)
|
|
||||||
- [Architecture Documentation](docs/architecture/mvp-beta-tenant-path.md)
|
|
||||||
- [Test Documentation](docs/por/T023-e2e-tenant-path/e2e_test.md)
|
|
||||||
|
|
|
||||||
13
SECURITY.md
Normal file
13
SECURITY.md
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Security Policy
|
||||||
|
|
||||||
|
Do not report sensitive vulnerabilities through public issues.
|
||||||
|
|
||||||
|
Use the repository security advisory workflow or a private maintainer contact channel when this repository is published.
|
||||||
|
|
||||||
|
When reporting, include:
|
||||||
|
|
||||||
|
- affected component
|
||||||
|
- impact summary
|
||||||
|
- reproduction steps
|
||||||
|
- configuration assumptions
|
||||||
|
- any suggested mitigation or patch direction
|
||||||
|
|
@ -1,54 +0,0 @@
|
||||||
# Architectural Gap Analysis: Compute & Core
|
|
||||||
|
|
||||||
**Date:** 2025-12-08
|
|
||||||
**Scope:** Core Infrastructure (Chainfire, IAM, FlareDB) & Application Services (FlashDNS, PlasmaVMC)
|
|
||||||
|
|
||||||
## Executive Summary
|
|
||||||
|
|
||||||
The platform's core infrastructure ("Data" and "Identity" pillars) is in excellent shape, with implementation matching specifications closely. However, the "Compute" pillar (PlasmaVMC) exhibits a significant architectural deviation from its specification, currently existing as a monolithic prototype rather than the specified distributed control plane/agent model.
|
|
||||||
|
|
||||||
## Component Status Matrix
|
|
||||||
|
|
||||||
| Component | Role | Specification Status | Implementation Status | Alignment |
|
|
||||||
|-----------|------|----------------------|-----------------------|-----------|
|
|
||||||
| **Chainfire** | Cluster KVS | High | High | ✅ Strong |
|
|
||||||
| **Aegis (IAM)** | Identity | High | High | ✅ Strong |
|
|
||||||
| **FlareDB** | DBaaS KVS | High | High | ✅ Strong |
|
|
||||||
| **FlashDNS** | DNS Service | High | High | ✅ Strong |
|
|
||||||
| **PlasmaVMC** | VM Platform | High | **Low / Prototype** | ❌ **Mismatch** |
|
|
||||||
|
|
||||||
## Detailed Findings
|
|
||||||
|
|
||||||
### 1. Core Infrastructure (Chainfire, Aegis, FlareDB)
|
|
||||||
* **Chainfire:** Fully implemented crate structure. Detailed feature gap analysis exists (`chainfire_t003_gap_analysis.md`).
|
|
||||||
* **Aegis:** Correctly structured with `iam-server`, `iam-authn`, `iam-authz`, etc. Integration with Chainfire/FlareDB backends is present in `main.rs`.
|
|
||||||
* **FlareDB:** Correctly structured with `flaredb-pd`, `flaredb-server` (Multi-Raft), and reserved namespaces for IAM/Metrics.
|
|
||||||
|
|
||||||
### 2. Application Services (FlashDNS)
|
|
||||||
* **Status:** Excellent.
|
|
||||||
* **Evidence:** Crate structure matches spec. Integration with Chainfire (storage) and Aegis (auth) is visible in configuration and code.
|
|
||||||
|
|
||||||
### 3. Compute Platform (PlasmaVMC) - The Gap
|
|
||||||
* **Specification:** Describes a distributed system with:
|
|
||||||
* **Control Plane:** API, Scheduler, Image management.
|
|
||||||
* **Agent:** Runs on compute nodes, manages local hypervisors.
|
|
||||||
* **Communication:** gRPC between Control Plane and Agent.
|
|
||||||
* **Current Implementation:** Monolithic `plasmavmc-server`.
|
|
||||||
* The `server` binary directly initializes `HypervisorRegistry` and registers `KvmBackend`/`FireCrackerBackend`.
|
|
||||||
* **Missing Crates:**
|
|
||||||
* `plasmavmc-agent` (Critical)
|
|
||||||
* `plasmavmc-client`
|
|
||||||
* `plasmavmc-core` (Scheduler logic)
|
|
||||||
* **Implication:** The current code cannot support multi-node deployment or scheduling. It effectively runs the control plane *on* the hypervisor node.
|
|
||||||
|
|
||||||
## Recommendations
|
|
||||||
|
|
||||||
1. **Prioritize PlasmaVMC Refactoring:** The immediate engineering focus should be to split `plasmavmc-server` into:
|
|
||||||
* `plasmavmc-server` (Control Plane, Scheduler, API)
|
|
||||||
* `plasmavmc-agent` (Node status, Hypervisor control)
|
|
||||||
2. **Implement Agent Protocol:** Define the gRPC interface between Server and Agent (`agent.proto` mentioned in spec but possibly missing or unused).
|
|
||||||
3. **Leverage Existing Foundation:** The `plasmavmc-hypervisor` trait is solid. The `agent` implementation should simply wrap this existing trait, making the refactor straightforward.
|
|
||||||
|
|
||||||
## Conclusion
|
|
||||||
|
|
||||||
The project foundation is solid. The "Data" and "Identity" layers are ready for higher-level integration. The "Compute" layer requires architectural realignment to meet the distributed design goals.
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
Peer Aへ:
|
|
||||||
/a あなたはpeerAです。戦略決定と計画立案に特化してください。実際の作業は、peerBへ依頼してください。PROJECT.mdは度々更新されることがあるので、PORに内容を追加したり、適切にMVPを設定・到達状況を確認するなどもあなたの仕事です。ともかく、終える前に確実にタスクをpeerBに渡すことを考えてください。
|
|
||||||
|
|
||||||
Peer Bへ:
|
|
||||||
/b peerAからの実装依頼に基づいて実装や実験などの作業を行い、終わったあとは必ずpeerAに結果を(to_peer.mdで)報告してください。高品質に作業を行うことに集中してください。
|
|
||||||
5094
advice.md
5094
advice.md
File diff suppressed because one or more lines are too long
819
apigateway/Cargo.lock
generated
819
apigateway/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -23,7 +23,7 @@ tokio = { version = "1.40", features = ["full"] }
|
||||||
|
|
||||||
# HTTP server
|
# HTTP server
|
||||||
axum = "0.7"
|
axum = "0.7"
|
||||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream", "json"] }
|
||||||
|
|
||||||
# Serialization
|
# Serialization
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
|
@ -31,7 +31,7 @@ serde_json = "1.0"
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
|
|
||||||
# gRPC
|
# gRPC
|
||||||
tonic = "0.12"
|
tonic = { version = "0.12", features = ["tls"] }
|
||||||
tonic-build = "0.12"
|
tonic-build = "0.12"
|
||||||
prost = "0.13"
|
prost = "0.13"
|
||||||
prost-types = "0.13"
|
prost-types = "0.13"
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ bytes = "1"
|
||||||
iam-api = { path = "../../../iam/crates/iam-api" }
|
iam-api = { path = "../../../iam/crates/iam-api" }
|
||||||
iam-authn = { path = "../../../iam/crates/iam-authn" }
|
iam-authn = { path = "../../../iam/crates/iam-authn" }
|
||||||
iam-authz = { path = "../../../iam/crates/iam-authz" }
|
iam-authz = { path = "../../../iam/crates/iam-authz" }
|
||||||
|
iam-service-auth = { path = "../../../iam/crates/iam-service-auth" }
|
||||||
iam-store = { path = "../../../iam/crates/iam-store" }
|
iam-store = { path = "../../../iam/crates/iam-store" }
|
||||||
iam-types = { path = "../../../iam/crates/iam-types" }
|
iam-types = { path = "../../../iam/crates/iam-types" }
|
||||||
creditservice-api = { path = "../../../creditservice/crates/creditservice-api" }
|
creditservice-api = { path = "../../../creditservice/crates/creditservice-api" }
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,8 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::pin::Pin;
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::task::{Context, Poll};
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use apigateway_api::proto::{
|
use apigateway_api::proto::{
|
||||||
|
|
@ -13,15 +11,13 @@ use apigateway_api::proto::{
|
||||||
use apigateway_api::{GatewayAuthServiceClient, GatewayCreditServiceClient};
|
use apigateway_api::{GatewayAuthServiceClient, GatewayCreditServiceClient};
|
||||||
use axum::{
|
use axum::{
|
||||||
body::{to_bytes, Body},
|
body::{to_bytes, Body},
|
||||||
extract::State,
|
extract::{ConnectInfo, State},
|
||||||
http::{HeaderMap, Request, StatusCode, Uri},
|
http::{HeaderMap, Request, StatusCode, Uri},
|
||||||
response::Response,
|
response::Response,
|
||||||
routing::{any, get},
|
routing::{any, get},
|
||||||
Json, Router,
|
Json, Router,
|
||||||
};
|
};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use bytes::Bytes;
|
|
||||||
use futures_core::Stream;
|
|
||||||
use reqwest::{Client, Url};
|
use reqwest::{Client, Url};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};
|
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};
|
||||||
|
|
@ -31,8 +27,41 @@ use tracing_subscriber::EnvFilter;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
const DEFAULT_REQUEST_ID_HEADER: &str = "x-request-id";
|
const DEFAULT_REQUEST_ID_HEADER: &str = "x-request-id";
|
||||||
|
const PHOTON_AUTH_TOKEN_HEADER: &str = "x-photon-auth-token";
|
||||||
const DEFAULT_AUTH_TIMEOUT_MS: u64 = 500;
|
const DEFAULT_AUTH_TIMEOUT_MS: u64 = 500;
|
||||||
const DEFAULT_CREDIT_TIMEOUT_MS: u64 = 500;
|
const DEFAULT_CREDIT_TIMEOUT_MS: u64 = 500;
|
||||||
|
const DEFAULT_UPSTREAM_TIMEOUT_MS: u64 = 10_000;
|
||||||
|
const RESERVED_AUTH_HEADERS: [&str; 10] = [
|
||||||
|
"authorization",
|
||||||
|
"x-photon-auth-token",
|
||||||
|
"x-subject-id",
|
||||||
|
"x-org-id",
|
||||||
|
"x-project-id",
|
||||||
|
"x-roles",
|
||||||
|
"x-scopes",
|
||||||
|
"x-iam-session-id",
|
||||||
|
"x-iam-principal-kind",
|
||||||
|
"x-iam-auth-method",
|
||||||
|
];
|
||||||
|
const AUTH_PROVIDER_BLOCK_HEADERS: [&str; 17] = [
|
||||||
|
"authorization",
|
||||||
|
"x-photon-auth-token",
|
||||||
|
"x-subject-id",
|
||||||
|
"x-org-id",
|
||||||
|
"x-project-id",
|
||||||
|
"x-roles",
|
||||||
|
"x-scopes",
|
||||||
|
"proxy-authorization",
|
||||||
|
"cookie",
|
||||||
|
"set-cookie",
|
||||||
|
"host",
|
||||||
|
"connection",
|
||||||
|
"upgrade",
|
||||||
|
"keep-alive",
|
||||||
|
"te",
|
||||||
|
"trailer",
|
||||||
|
"transfer-encoding",
|
||||||
|
];
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
|
|
@ -62,6 +91,22 @@ fn default_credit_units() -> u64 {
|
||||||
1
|
1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_upstream_timeout_ms() -> u64 {
|
||||||
|
DEFAULT_UPSTREAM_TIMEOUT_MS
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
struct TlsConfig {
|
||||||
|
#[serde(default)]
|
||||||
|
ca_file: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
cert_file: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
key_file: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
domain_name: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
struct AuthProviderConfig {
|
struct AuthProviderConfig {
|
||||||
name: String,
|
name: String,
|
||||||
|
|
@ -70,6 +115,8 @@ struct AuthProviderConfig {
|
||||||
endpoint: String,
|
endpoint: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
timeout_ms: Option<u64>,
|
timeout_ms: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
tls: Option<TlsConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
@ -80,6 +127,8 @@ struct CreditProviderConfig {
|
||||||
endpoint: String,
|
endpoint: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
timeout_ms: Option<u64>,
|
timeout_ms: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
tls: Option<TlsConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
@ -103,6 +152,8 @@ struct RouteCreditConfig {
|
||||||
#[serde(default = "default_commit_policy")]
|
#[serde(default = "default_commit_policy")]
|
||||||
commit_on: CommitPolicy,
|
commit_on: CommitPolicy,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
allow_header_subject: bool,
|
||||||
|
#[serde(default)]
|
||||||
attributes: HashMap<String, String>,
|
attributes: HashMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -114,6 +165,8 @@ struct RouteConfig {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
strip_prefix: bool,
|
strip_prefix: bool,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
auth: Option<RouteAuthConfig>,
|
auth: Option<RouteAuthConfig>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
credit: Option<RouteCreditConfig>,
|
credit: Option<RouteCreditConfig>,
|
||||||
|
|
@ -134,6 +187,12 @@ struct ServerConfig {
|
||||||
log_level: String,
|
log_level: String,
|
||||||
#[serde(default = "default_max_body_bytes")]
|
#[serde(default = "default_max_body_bytes")]
|
||||||
max_body_bytes: usize,
|
max_body_bytes: usize,
|
||||||
|
#[serde(default = "default_max_response_bytes")]
|
||||||
|
max_response_bytes: usize,
|
||||||
|
#[serde(default = "default_upstream_timeout_ms")]
|
||||||
|
upstream_timeout_ms: u64,
|
||||||
|
#[serde(default)]
|
||||||
|
trust_forwarded_headers: bool,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
auth_providers: Vec<AuthProviderConfig>,
|
auth_providers: Vec<AuthProviderConfig>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
|
@ -148,6 +207,9 @@ impl Default for ServerConfig {
|
||||||
http_addr: default_http_addr(),
|
http_addr: default_http_addr(),
|
||||||
log_level: default_log_level(),
|
log_level: default_log_level(),
|
||||||
max_body_bytes: default_max_body_bytes(),
|
max_body_bytes: default_max_body_bytes(),
|
||||||
|
max_response_bytes: default_max_response_bytes(),
|
||||||
|
upstream_timeout_ms: default_upstream_timeout_ms(),
|
||||||
|
trust_forwarded_headers: false,
|
||||||
auth_providers: Vec::new(),
|
auth_providers: Vec::new(),
|
||||||
credit_providers: Vec::new(),
|
credit_providers: Vec::new(),
|
||||||
routes: Vec::new(),
|
routes: Vec::new(),
|
||||||
|
|
@ -175,9 +237,12 @@ struct Args {
|
||||||
struct ServerState {
|
struct ServerState {
|
||||||
routes: Vec<Route>,
|
routes: Vec<Route>,
|
||||||
client: Client,
|
client: Client,
|
||||||
|
upstream_timeout: Duration,
|
||||||
max_body_bytes: usize,
|
max_body_bytes: usize,
|
||||||
|
max_response_bytes: usize,
|
||||||
auth_providers: HashMap<String, AuthProvider>,
|
auth_providers: HashMap<String, AuthProvider>,
|
||||||
credit_providers: HashMap<String, CreditProvider>,
|
credit_providers: HashMap<String, CreditProvider>,
|
||||||
|
trust_forwarded_headers: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|
@ -211,6 +276,13 @@ struct SubjectInfo {
|
||||||
scopes: Vec<String>,
|
scopes: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct CreditSubject {
|
||||||
|
subject_id: String,
|
||||||
|
org_id: String,
|
||||||
|
project_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct AuthDecision {
|
struct AuthDecision {
|
||||||
allow: bool,
|
allow: bool,
|
||||||
|
|
@ -238,84 +310,6 @@ struct CreditReservation {
|
||||||
reservation_id: String,
|
reservation_id: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CreditFinalizeState {
|
|
||||||
state: Arc<ServerState>,
|
|
||||||
route: Route,
|
|
||||||
reservation: Option<CreditReservation>,
|
|
||||||
status: reqwest::StatusCode,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CreditFinalizeState {
|
|
||||||
fn spawn_success(self) {
|
|
||||||
tokio::spawn(async move {
|
|
||||||
finalize_credit(&self.state, &self.route, self.reservation, self.status).await;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
fn spawn_abort(self) {
|
|
||||||
tokio::spawn(async move {
|
|
||||||
finalize_credit_abort(&self.state, &self.route, self.reservation).await;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct CreditFinalizeStream {
|
|
||||||
bytes: Option<Bytes>,
|
|
||||||
finalize: Option<CreditFinalizeState>,
|
|
||||||
completed: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CreditFinalizeStream {
|
|
||||||
fn new(bytes: Bytes, finalize: CreditFinalizeState) -> Self {
|
|
||||||
Self {
|
|
||||||
bytes: Some(bytes),
|
|
||||||
finalize: Some(finalize),
|
|
||||||
completed: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn finalize_success(&mut self) {
|
|
||||||
if self.completed {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
self.completed = true;
|
|
||||||
if let Some(finalize) = self.finalize.take() {
|
|
||||||
finalize.spawn_success();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn finalize_abort(&mut self) {
|
|
||||||
if self.completed {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
self.completed = true;
|
|
||||||
if let Some(finalize) = self.finalize.take() {
|
|
||||||
finalize.spawn_abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Stream for CreditFinalizeStream {
|
|
||||||
type Item = Result<Bytes, io::Error>;
|
|
||||||
|
|
||||||
fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
|
||||||
if let Some(bytes) = self.bytes.take() {
|
|
||||||
return Poll::Ready(Some(Ok(bytes)));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.finalize_success();
|
|
||||||
Poll::Ready(None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for CreditFinalizeStream {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
if !self.completed {
|
|
||||||
self.finalize_abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct RequestContext {
|
struct RequestContext {
|
||||||
request_id: String,
|
request_id: String,
|
||||||
|
|
@ -341,6 +335,10 @@ fn default_max_body_bytes() -> usize {
|
||||||
16 * 1024 * 1024
|
16 * 1024 * 1024
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_max_response_bytes() -> usize {
|
||||||
|
default_max_body_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
|
|
@ -374,10 +372,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let routes = build_routes(config.routes)?;
|
let routes = build_routes(config.routes)?;
|
||||||
let auth_providers = build_auth_providers(config.auth_providers).await?;
|
let auth_providers = build_auth_providers(config.auth_providers).await?;
|
||||||
let credit_providers = build_credit_providers(config.credit_providers).await?;
|
let credit_providers = build_credit_providers(config.credit_providers).await?;
|
||||||
|
let upstream_timeout = Duration::from_millis(config.upstream_timeout_ms);
|
||||||
|
let client = Client::builder().build()?;
|
||||||
|
|
||||||
info!("Starting API gateway");
|
info!("Starting API gateway");
|
||||||
info!(" HTTP: {}", config.http_addr);
|
info!(" HTTP: {}", config.http_addr);
|
||||||
info!(" Max body bytes: {}", config.max_body_bytes);
|
info!(" Max body bytes: {}", config.max_body_bytes);
|
||||||
|
info!(" Max response bytes: {}", config.max_response_bytes);
|
||||||
|
|
||||||
if !routes.is_empty() {
|
if !routes.is_empty() {
|
||||||
info!("Configured {} routes", routes.len());
|
info!("Configured {} routes", routes.len());
|
||||||
|
|
@ -394,10 +395,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
|
||||||
let state = Arc::new(ServerState {
|
let state = Arc::new(ServerState {
|
||||||
routes,
|
routes,
|
||||||
client: Client::new(),
|
client,
|
||||||
|
upstream_timeout,
|
||||||
max_body_bytes: config.max_body_bytes,
|
max_body_bytes: config.max_body_bytes,
|
||||||
|
max_response_bytes: config.max_response_bytes,
|
||||||
auth_providers,
|
auth_providers,
|
||||||
credit_providers,
|
credit_providers,
|
||||||
|
trust_forwarded_headers: config.trust_forwarded_headers,
|
||||||
});
|
});
|
||||||
|
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
|
|
@ -408,7 +412,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
.with_state(state);
|
.with_state(state);
|
||||||
|
|
||||||
let listener = tokio::net::TcpListener::bind(config.http_addr).await?;
|
let listener = tokio::net::TcpListener::bind(config.http_addr).await?;
|
||||||
axum::serve(listener, app).await?;
|
axum::serve(listener, app.into_make_service_with_connect_info::<SocketAddr>()).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
@ -427,6 +431,7 @@ async fn list_routes(State(state): State<Arc<ServerState>>) -> Json<Vec<RouteCon
|
||||||
|
|
||||||
async fn proxy(
|
async fn proxy(
|
||||||
State(state): State<Arc<ServerState>>,
|
State(state): State<Arc<ServerState>>,
|
||||||
|
ConnectInfo(remote_addr): ConnectInfo<SocketAddr>,
|
||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
) -> Result<Response<Body>, StatusCode> {
|
) -> Result<Response<Body>, StatusCode> {
|
||||||
let path = request.uri().path();
|
let path = request.uri().path();
|
||||||
|
|
@ -441,15 +446,16 @@ async fn proxy(
|
||||||
path: request.uri().path().to_string(),
|
path: request.uri().path().to_string(),
|
||||||
raw_query: request.uri().query().unwrap_or("").to_string(),
|
raw_query: request.uri().query().unwrap_or("").to_string(),
|
||||||
headers: headers_to_map(request.headers()),
|
headers: headers_to_map(request.headers()),
|
||||||
client_ip: extract_client_ip(request.headers()),
|
client_ip: extract_client_ip(
|
||||||
|
request.headers(),
|
||||||
|
remote_addr,
|
||||||
|
state.trust_forwarded_headers,
|
||||||
|
),
|
||||||
route_name: route.config.name.clone(),
|
route_name: route.config.name.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let auth_token = request
|
let auth_token = extract_auth_token(request.headers());
|
||||||
.headers()
|
let forward_client_auth_headers = route.config.auth.is_none();
|
||||||
.get(axum::http::header::AUTHORIZATION)
|
|
||||||
.and_then(|value| value.to_str().ok())
|
|
||||||
.map(|value| value.to_string());
|
|
||||||
|
|
||||||
let auth_outcome = enforce_auth(&state, &route, &context, auth_token).await?;
|
let auth_outcome = enforce_auth(&state, &route, &context, auth_token).await?;
|
||||||
let credit_reservation =
|
let credit_reservation =
|
||||||
|
|
@ -457,11 +463,22 @@ async fn proxy(
|
||||||
|
|
||||||
let target_url = build_upstream_url(&route, request.uri())?;
|
let target_url = build_upstream_url(&route, request.uri())?;
|
||||||
|
|
||||||
let mut builder = state.client.request(request.method().clone(), target_url);
|
let request_timeout =
|
||||||
|
Duration::from_millis(route.config.timeout_ms.unwrap_or(state.upstream_timeout.as_millis() as u64));
|
||||||
|
let mut builder = state
|
||||||
|
.client
|
||||||
|
.request(request.method().clone(), target_url)
|
||||||
|
.timeout(request_timeout);
|
||||||
for (name, value) in request.headers().iter() {
|
for (name, value) in request.headers().iter() {
|
||||||
if name == axum::http::header::HOST || name == axum::http::header::CONNECTION {
|
if name == axum::http::header::HOST || name == axum::http::header::CONNECTION {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if is_reserved_auth_header(name) {
|
||||||
|
if forward_client_auth_headers && should_preserve_client_auth_header(name.as_str()) {
|
||||||
|
builder = builder.header(name, value);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
builder = builder.header(name, value);
|
builder = builder.header(name, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -481,6 +498,12 @@ async fn proxy(
|
||||||
};
|
};
|
||||||
|
|
||||||
let status = response.status();
|
let status = response.status();
|
||||||
|
if let Some(content_length) = response.content_length() {
|
||||||
|
if state.max_response_bytes > 0 && content_length as usize > state.max_response_bytes {
|
||||||
|
finalize_credit_abort(&state, &route, credit_reservation).await;
|
||||||
|
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut response_builder = Response::builder().status(status);
|
let mut response_builder = Response::builder().status(status);
|
||||||
let headers = response_builder
|
let headers = response_builder
|
||||||
|
|
@ -494,23 +517,22 @@ async fn proxy(
|
||||||
headers.insert(name, value.clone());
|
headers.insert(name, value.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
let bytes = match response.bytes().await {
|
let body = match response.bytes().await {
|
||||||
Ok(bytes) => bytes,
|
Ok(body) => body,
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
finalize_credit_abort(&state, &route, credit_reservation).await;
|
finalize_credit_abort(&state, &route, credit_reservation).await;
|
||||||
return Err(StatusCode::BAD_GATEWAY);
|
return Err(StatusCode::BAD_GATEWAY);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
if state.max_response_bytes > 0 && body.len() > state.max_response_bytes {
|
||||||
|
finalize_credit_abort(&state, &route, credit_reservation).await;
|
||||||
|
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
||||||
|
}
|
||||||
|
|
||||||
let finalize = CreditFinalizeState {
|
finalize_credit(&state, &route, credit_reservation, status).await;
|
||||||
state: Arc::clone(&state),
|
|
||||||
route,
|
|
||||||
reservation: credit_reservation,
|
|
||||||
status,
|
|
||||||
};
|
|
||||||
|
|
||||||
response_builder
|
response_builder
|
||||||
.body(Body::from_stream(CreditFinalizeStream::new(bytes, finalize)))
|
.body(Body::from(body))
|
||||||
.map_err(|_| StatusCode::BAD_GATEWAY)
|
.map_err(|_| StatusCode::BAD_GATEWAY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -592,7 +614,22 @@ async fn enforce_credit(
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let decision = reserve_credit(state, credit_cfg, context, subject).await;
|
let credit_subject = resolve_credit_subject(context, subject, credit_cfg.allow_header_subject);
|
||||||
|
if credit_subject.is_none() {
|
||||||
|
if credit_cfg.mode == PolicyMode::Required {
|
||||||
|
return Err(StatusCode::UNAUTHORIZED);
|
||||||
|
}
|
||||||
|
warn!("Credit skipped: missing org/project scope");
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let decision = reserve_credit(
|
||||||
|
state,
|
||||||
|
credit_cfg,
|
||||||
|
context,
|
||||||
|
credit_subject.as_ref().expect("credit subject resolved"),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
apply_credit_mode(credit_cfg.mode, credit_cfg.fail_open, decision)
|
apply_credit_mode(credit_cfg.mode, credit_cfg.fail_open, decision)
|
||||||
.map(|decision| {
|
.map(|decision| {
|
||||||
decision.map(|decision| CreditReservation {
|
decision.map(|decision| CreditReservation {
|
||||||
|
|
@ -696,26 +733,56 @@ async fn authorize_request(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn resolve_credit_subject(
|
||||||
|
context: &RequestContext,
|
||||||
|
subject: Option<&SubjectInfo>,
|
||||||
|
allow_header_subject: bool,
|
||||||
|
) -> Option<CreditSubject> {
|
||||||
|
if let Some(subject) = subject {
|
||||||
|
return Some(CreditSubject {
|
||||||
|
subject_id: subject.subject_id.clone(),
|
||||||
|
org_id: subject.org_id.clone(),
|
||||||
|
project_id: subject.project_id.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if !allow_header_subject {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let org_id = context.headers.get("x-org-id")?.trim();
|
||||||
|
let project_id = context.headers.get("x-project-id")?.trim();
|
||||||
|
if org_id.is_empty() || project_id.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let subject_id = context
|
||||||
|
.headers
|
||||||
|
.get("x-subject-id")
|
||||||
|
.map(|value| value.trim().to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
Some(CreditSubject {
|
||||||
|
subject_id,
|
||||||
|
org_id: org_id.to_string(),
|
||||||
|
project_id: project_id.to_string(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
async fn reserve_credit(
|
async fn reserve_credit(
|
||||||
state: &ServerState,
|
state: &ServerState,
|
||||||
credit_cfg: &RouteCreditConfig,
|
credit_cfg: &RouteCreditConfig,
|
||||||
context: &RequestContext,
|
context: &RequestContext,
|
||||||
subject: Option<&SubjectInfo>,
|
credit_subject: &CreditSubject,
|
||||||
) -> Result<CreditDecision, StatusCode> {
|
) -> Result<CreditDecision, StatusCode> {
|
||||||
let provider = state
|
let provider = state
|
||||||
.credit_providers
|
.credit_providers
|
||||||
.get(&credit_cfg.provider)
|
.get(&credit_cfg.provider)
|
||||||
.ok_or(StatusCode::INTERNAL_SERVER_ERROR)?;
|
.ok_or(StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
let (subject_id, org_id, project_id) = subject
|
let subject_id = credit_subject.subject_id.clone();
|
||||||
.map(|subject| {
|
let org_id = credit_subject.org_id.clone();
|
||||||
(
|
let project_id = credit_subject.project_id.clone();
|
||||||
subject.subject_id.clone(),
|
|
||||||
subject.org_id.clone(),
|
|
||||||
subject.project_id.clone(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
match provider {
|
match provider {
|
||||||
CreditProvider::Grpc(provider) => {
|
CreditProvider::Grpc(provider) => {
|
||||||
|
|
@ -875,8 +942,14 @@ async fn rollback_credit(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_auth_headers(mut builder: reqwest::RequestBuilder, outcome: &AuthOutcome) -> reqwest::RequestBuilder {
|
fn apply_auth_headers(
|
||||||
|
mut builder: reqwest::RequestBuilder,
|
||||||
|
outcome: &AuthOutcome,
|
||||||
|
) -> reqwest::RequestBuilder {
|
||||||
for (key, value) in &outcome.headers {
|
for (key, value) in &outcome.headers {
|
||||||
|
if !should_forward_auth_header(key) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
builder = builder.header(key, value);
|
builder = builder.header(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -896,6 +969,39 @@ fn apply_auth_headers(mut builder: reqwest::RequestBuilder, outcome: &AuthOutcom
|
||||||
builder
|
builder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn build_client_tls_config(
|
||||||
|
tls: &Option<TlsConfig>,
|
||||||
|
) -> Result<Option<ClientTlsConfig>, Box<dyn std::error::Error>> {
|
||||||
|
let Some(tls) = tls else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut tls_config = ClientTlsConfig::new();
|
||||||
|
|
||||||
|
if let Some(ca_file) = &tls.ca_file {
|
||||||
|
let ca = tokio::fs::read(ca_file).await?;
|
||||||
|
tls_config = tls_config.ca_certificate(Certificate::from_pem(ca));
|
||||||
|
}
|
||||||
|
|
||||||
|
match (&tls.cert_file, &tls.key_file) {
|
||||||
|
(Some(cert_file), Some(key_file)) => {
|
||||||
|
let cert = tokio::fs::read(cert_file).await?;
|
||||||
|
let key = tokio::fs::read(key_file).await?;
|
||||||
|
tls_config = tls_config.identity(Identity::from_pem(cert, key));
|
||||||
|
}
|
||||||
|
(None, None) => {}
|
||||||
|
_ => {
|
||||||
|
return Err(config_error("tls requires both cert_file and key_file").into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(domain) = &tls.domain_name {
|
||||||
|
tls_config = tls_config.domain_name(domain);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(tls_config))
|
||||||
|
}
|
||||||
|
|
||||||
async fn build_auth_providers(
|
async fn build_auth_providers(
|
||||||
configs: Vec<AuthProviderConfig>,
|
configs: Vec<AuthProviderConfig>,
|
||||||
) -> Result<HashMap<String, AuthProvider>, Box<dyn std::error::Error>> {
|
) -> Result<HashMap<String, AuthProvider>, Box<dyn std::error::Error>> {
|
||||||
|
|
@ -913,11 +1019,19 @@ async fn build_auth_providers(
|
||||||
|
|
||||||
match provider_type.as_str() {
|
match provider_type.as_str() {
|
||||||
"grpc" => {
|
"grpc" => {
|
||||||
let endpoint = Endpoint::from_shared(config.endpoint.clone())?
|
let mut endpoint = Endpoint::from_shared(config.endpoint.clone())?
|
||||||
.connect_timeout(Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS)))
|
.connect_timeout(Duration::from_millis(
|
||||||
.timeout(Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS)));
|
config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS),
|
||||||
|
))
|
||||||
|
.timeout(Duration::from_millis(
|
||||||
|
config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS),
|
||||||
|
));
|
||||||
|
if let Some(tls) = build_client_tls_config(&config.tls).await? {
|
||||||
|
endpoint = endpoint.tls_config(tls)?;
|
||||||
|
}
|
||||||
let channel = endpoint.connect().await?;
|
let channel = endpoint.connect().await?;
|
||||||
let timeout = Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS));
|
let timeout =
|
||||||
|
Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS));
|
||||||
providers.insert(
|
providers.insert(
|
||||||
config.name.clone(),
|
config.name.clone(),
|
||||||
AuthProvider::Grpc(GrpcAuthProvider {
|
AuthProvider::Grpc(GrpcAuthProvider {
|
||||||
|
|
@ -956,7 +1070,7 @@ async fn build_credit_providers(
|
||||||
|
|
||||||
match provider_type.as_str() {
|
match provider_type.as_str() {
|
||||||
"grpc" => {
|
"grpc" => {
|
||||||
let endpoint = Endpoint::from_shared(config.endpoint.clone())?
|
let mut endpoint = Endpoint::from_shared(config.endpoint.clone())?
|
||||||
.connect_timeout(Duration::from_millis(
|
.connect_timeout(Duration::from_millis(
|
||||||
config
|
config
|
||||||
.timeout_ms
|
.timeout_ms
|
||||||
|
|
@ -968,6 +1082,10 @@ async fn build_credit_providers(
|
||||||
.unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS),
|
.unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
if let Some(tls) = build_client_tls_config(&config.tls).await? {
|
||||||
|
endpoint = endpoint.tls_config(tls)?;
|
||||||
|
}
|
||||||
|
|
||||||
let channel = endpoint.connect().await?;
|
let channel = endpoint.connect().await?;
|
||||||
let timeout = Duration::from_millis(
|
let timeout = Duration::from_millis(
|
||||||
config
|
config
|
||||||
|
|
@ -1049,13 +1167,34 @@ fn extract_request_id(headers: &HeaderMap) -> String {
|
||||||
.unwrap_or_else(|| Uuid::new_v4().to_string())
|
.unwrap_or_else(|| Uuid::new_v4().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_client_ip(headers: &HeaderMap) -> String {
|
fn extract_client_ip(
|
||||||
headers
|
headers: &HeaderMap,
|
||||||
.get("x-forwarded-for")
|
remote_addr: SocketAddr,
|
||||||
.and_then(|value| value.to_str().ok())
|
trust_forwarded_headers: bool,
|
||||||
.and_then(|value| value.split(',').next())
|
) -> String {
|
||||||
.map(|value| value.trim().to_string())
|
if trust_forwarded_headers {
|
||||||
.unwrap_or_default()
|
if let Some(value) = headers
|
||||||
|
.get("x-forwarded-for")
|
||||||
|
.and_then(|value| value.to_str().ok())
|
||||||
|
.and_then(|value| value.split(',').next())
|
||||||
|
{
|
||||||
|
let trimmed = value.trim();
|
||||||
|
if !trimmed.is_empty() {
|
||||||
|
return trimmed.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(value) = headers
|
||||||
|
.get("x-real-ip")
|
||||||
|
.and_then(|value| value.to_str().ok())
|
||||||
|
{
|
||||||
|
let trimmed = value.trim();
|
||||||
|
if !trimmed.is_empty() {
|
||||||
|
return trimmed.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
remote_addr.ip().to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
|
fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
|
||||||
|
|
@ -1073,6 +1212,78 @@ fn headers_to_map(headers: &HeaderMap) -> HashMap<String, String> {
|
||||||
map
|
map
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_auth_token(headers: &HeaderMap) -> Option<String> {
|
||||||
|
let auth_header = headers
|
||||||
|
.get(axum::http::header::AUTHORIZATION)
|
||||||
|
.and_then(|value| value.to_str().ok());
|
||||||
|
if let Some(token) = auth_header.and_then(parse_auth_token_value) {
|
||||||
|
return Some(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
let photon_header = headers
|
||||||
|
.get(PHOTON_AUTH_TOKEN_HEADER)
|
||||||
|
.and_then(|value| value.to_str().ok());
|
||||||
|
photon_header.and_then(parse_auth_token_value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_reserved_auth_header(name: &axum::http::header::HeaderName) -> bool {
|
||||||
|
is_reserved_auth_header_str(name.as_str())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_reserved_auth_header_str(name: &str) -> bool {
|
||||||
|
let header = name.to_ascii_lowercase();
|
||||||
|
RESERVED_AUTH_HEADERS.iter().any(|value| *value == header)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn should_forward_auth_header(name: &str) -> bool {
|
||||||
|
let header = name.to_ascii_lowercase();
|
||||||
|
if AUTH_PROVIDER_BLOCK_HEADERS
|
||||||
|
.iter()
|
||||||
|
.any(|value| *value == header)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
header.starts_with("x-")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn should_preserve_client_auth_header(name: &str) -> bool {
|
||||||
|
let header = name.to_ascii_lowercase();
|
||||||
|
header == "authorization" || header == PHOTON_AUTH_TOKEN_HEADER
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_auth_token_value(value: &str) -> Option<String> {
|
||||||
|
let trimmed = value.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(token) = parse_bearer_token(trimmed) {
|
||||||
|
return Some(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy support: allow raw token values without a scheme.
|
||||||
|
if trimmed.split_whitespace().count() != 1 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(trimmed.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_bearer_token(value: &str) -> Option<String> {
|
||||||
|
let mut parts = value.split_whitespace();
|
||||||
|
let scheme = parts.next()?;
|
||||||
|
if !scheme.eq_ignore_ascii_case("bearer") {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let token = parts.next()?;
|
||||||
|
if parts.next().is_some() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(token.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
fn normalize_path_prefix(prefix: &str) -> String {
|
fn normalize_path_prefix(prefix: &str) -> String {
|
||||||
let trimmed = prefix.trim();
|
let trimmed = prefix.trim();
|
||||||
if trimmed.is_empty() {
|
if trimmed.is_empty() {
|
||||||
|
|
@ -1104,7 +1315,22 @@ fn normalize_upstream_base_path(path: &str) -> String {
|
||||||
fn match_route<'a>(routes: &'a [Route], path: &str) -> Option<&'a Route> {
|
fn match_route<'a>(routes: &'a [Route], path: &str) -> Option<&'a Route> {
|
||||||
routes
|
routes
|
||||||
.iter()
|
.iter()
|
||||||
.find(|route| path.starts_with(&route.config.path_prefix))
|
.find(|route| path_matches_prefix(path, &route.config.path_prefix))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path_matches_prefix(path: &str, prefix: &str) -> bool {
|
||||||
|
if prefix == "/" {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if path == prefix {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
match path.strip_prefix(prefix) {
|
||||||
|
Some(stripped) => stripped.starts_with('/'),
|
||||||
|
None => false,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strip_prefix_path(path: &str, prefix: &str) -> String {
|
fn strip_prefix_path(path: &str, prefix: &str) -> String {
|
||||||
|
|
@ -1160,9 +1386,8 @@ fn build_upstream_url(route: &Route, uri: &Uri) -> Result<Url, StatusCode> {
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use axum::routing::get;
|
use axum::routing::get;
|
||||||
use creditservice_api::{
|
use creditservice_api::{CreditServiceImpl, CreditStorage, GatewayCreditServiceImpl};
|
||||||
CreditServiceImpl, CreditStorage, GatewayCreditServiceImpl, GatewayCreditServiceServer,
|
use apigateway_api::GatewayCreditServiceServer;
|
||||||
};
|
|
||||||
use creditservice_types::Wallet;
|
use creditservice_types::Wallet;
|
||||||
use iam_api::{GatewayAuthServiceImpl, GatewayAuthServiceServer};
|
use iam_api::{GatewayAuthServiceImpl, GatewayAuthServiceServer};
|
||||||
use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey};
|
use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey};
|
||||||
|
|
@ -1173,12 +1398,28 @@ mod tests {
|
||||||
use tonic::transport::Server;
|
use tonic::transport::Server;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
async fn wait_for_test_tcp(addr: SocketAddr) {
|
||||||
|
let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
|
||||||
|
loop {
|
||||||
|
if tokio::net::TcpStream::connect(addr).await.is_ok() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assert!(
|
||||||
|
tokio::time::Instant::now() < deadline,
|
||||||
|
"timed out waiting for test listener {}",
|
||||||
|
addr
|
||||||
|
);
|
||||||
|
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn route_config(name: &str, prefix: &str, upstream: &str, strip_prefix: bool) -> RouteConfig {
|
fn route_config(name: &str, prefix: &str, upstream: &str, strip_prefix: bool) -> RouteConfig {
|
||||||
RouteConfig {
|
RouteConfig {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
path_prefix: prefix.to_string(),
|
path_prefix: prefix.to_string(),
|
||||||
upstream: upstream.to_string(),
|
upstream: upstream.to_string(),
|
||||||
strip_prefix,
|
strip_prefix,
|
||||||
|
timeout_ms: None,
|
||||||
auth: None,
|
auth: None,
|
||||||
credit: None,
|
credit: None,
|
||||||
}
|
}
|
||||||
|
|
@ -1202,7 +1443,21 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn start_upstream() -> SocketAddr {
|
async fn start_upstream() -> SocketAddr {
|
||||||
let app = Router::new().route("/v1/echo", get(|| async { "ok" }));
|
let app = Router::new()
|
||||||
|
.route("/v1/echo", get(|| async { "ok" }))
|
||||||
|
.route(
|
||||||
|
"/v1/echo-auth",
|
||||||
|
get(|headers: HeaderMap| async move {
|
||||||
|
Json(serde_json::json!({
|
||||||
|
"authorization": headers
|
||||||
|
.get(axum::http::header::AUTHORIZATION)
|
||||||
|
.and_then(|value| value.to_str().ok()),
|
||||||
|
"photon_token": headers
|
||||||
|
.get(PHOTON_AUTH_TOKEN_HEADER)
|
||||||
|
.and_then(|value| value.to_str().ok()),
|
||||||
|
}))
|
||||||
|
}),
|
||||||
|
);
|
||||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
|
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
|
||||||
.await
|
.await
|
||||||
.expect("bind upstream");
|
.expect("bind upstream");
|
||||||
|
|
@ -1210,6 +1465,7 @@ mod tests {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
axum::serve(listener, app).await.expect("upstream serve");
|
axum::serve(listener, app).await.expect("upstream serve");
|
||||||
});
|
});
|
||||||
|
wait_for_test_tcp(addr).await;
|
||||||
addr
|
addr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1279,10 +1535,11 @@ mod tests {
|
||||||
.expect("iam gateway serve");
|
.expect("iam gateway serve");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
wait_for_test_tcp(addr).await;
|
||||||
(addr, issued.token)
|
(addr, issued.token)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn start_credit_gateway() -> SocketAddr {
|
async fn start_credit_gateway(iam_addr: &SocketAddr) -> SocketAddr {
|
||||||
let storage = creditservice_api::InMemoryStorage::new();
|
let storage = creditservice_api::InMemoryStorage::new();
|
||||||
let wallet = Wallet::new("proj-1".into(), "org-1".into(), 100);
|
let wallet = Wallet::new("proj-1".into(), "org-1".into(), 100);
|
||||||
storage
|
storage
|
||||||
|
|
@ -1290,7 +1547,13 @@ mod tests {
|
||||||
.await
|
.await
|
||||||
.expect("wallet create");
|
.expect("wallet create");
|
||||||
|
|
||||||
let credit_service = Arc::new(CreditServiceImpl::new(storage));
|
let auth_service = Arc::new(
|
||||||
|
iam_service_auth::AuthService::new(&format!("http://{}", iam_addr))
|
||||||
|
.await
|
||||||
|
.expect("auth service"),
|
||||||
|
);
|
||||||
|
|
||||||
|
let credit_service = Arc::new(CreditServiceImpl::new(storage, auth_service));
|
||||||
let gateway_credit = GatewayCreditServiceImpl::new(credit_service);
|
let gateway_credit = GatewayCreditServiceImpl::new(credit_service);
|
||||||
|
|
||||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
|
let listener = tokio::net::TcpListener::bind("127.0.0.1:0")
|
||||||
|
|
@ -1305,6 +1568,7 @@ mod tests {
|
||||||
.expect("credit gateway serve");
|
.expect("credit gateway serve");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
wait_for_test_tcp(addr).await;
|
||||||
addr
|
addr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1343,6 +1607,23 @@ mod tests {
|
||||||
assert_eq!(matched.config.name, "api-v1");
|
assert_eq!(matched.config.name, "api-v1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_route_segment_boundary() {
|
||||||
|
let routes = build_routes(vec![
|
||||||
|
route_config("api", "/api", "http://example.com", false),
|
||||||
|
route_config("api2", "/api2", "http://example.com", false),
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let matched = match_route(&routes, "/api2").unwrap();
|
||||||
|
assert_eq!(matched.config.name, "api2");
|
||||||
|
|
||||||
|
let matched = match_route(&routes, "/api2/health").unwrap();
|
||||||
|
assert_eq!(matched.config.name, "api2");
|
||||||
|
|
||||||
|
assert!(match_route(&routes, "/apiary").is_none());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_build_upstream_url_preserves_query() {
|
fn test_build_upstream_url_preserves_query() {
|
||||||
let routes = build_routes(vec![route_config(
|
let routes = build_routes(vec![route_config(
|
||||||
|
|
@ -1419,13 +1700,14 @@ mod tests {
|
||||||
async fn test_gateway_auth_and_credit_flow() {
|
async fn test_gateway_auth_and_credit_flow() {
|
||||||
let upstream_addr = start_upstream().await;
|
let upstream_addr = start_upstream().await;
|
||||||
let (iam_addr, token) = start_iam_gateway().await;
|
let (iam_addr, token) = start_iam_gateway().await;
|
||||||
let credit_addr = start_credit_gateway().await;
|
let credit_addr = start_credit_gateway(&iam_addr).await;
|
||||||
|
|
||||||
let routes = build_routes(vec![RouteConfig {
|
let routes = build_routes(vec![RouteConfig {
|
||||||
name: "public".to_string(),
|
name: "public".to_string(),
|
||||||
path_prefix: "/v1".to_string(),
|
path_prefix: "/v1".to_string(),
|
||||||
upstream: format!("http://{}", upstream_addr),
|
upstream: format!("http://{}", upstream_addr),
|
||||||
strip_prefix: false,
|
strip_prefix: false,
|
||||||
|
timeout_ms: None,
|
||||||
auth: Some(RouteAuthConfig {
|
auth: Some(RouteAuthConfig {
|
||||||
provider: "iam".to_string(),
|
provider: "iam".to_string(),
|
||||||
mode: PolicyMode::Required,
|
mode: PolicyMode::Required,
|
||||||
|
|
@ -1437,6 +1719,7 @@ mod tests {
|
||||||
units: 1,
|
units: 1,
|
||||||
fail_open: false,
|
fail_open: false,
|
||||||
commit_on: CommitPolicy::Success,
|
commit_on: CommitPolicy::Success,
|
||||||
|
allow_header_subject: false,
|
||||||
attributes: HashMap::new(),
|
attributes: HashMap::new(),
|
||||||
}),
|
}),
|
||||||
}])
|
}])
|
||||||
|
|
@ -1447,6 +1730,7 @@ mod tests {
|
||||||
provider_type: "grpc".to_string(),
|
provider_type: "grpc".to_string(),
|
||||||
endpoint: format!("http://{}", iam_addr),
|
endpoint: format!("http://{}", iam_addr),
|
||||||
timeout_ms: Some(1000),
|
timeout_ms: Some(1000),
|
||||||
|
tls: None,
|
||||||
}])
|
}])
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
@ -1464,19 +1748,124 @@ mod tests {
|
||||||
let state = Arc::new(ServerState {
|
let state = Arc::new(ServerState {
|
||||||
routes,
|
routes,
|
||||||
client: Client::new(),
|
client: Client::new(),
|
||||||
|
upstream_timeout: Duration::from_secs(5),
|
||||||
max_body_bytes: 1024 * 1024,
|
max_body_bytes: 1024 * 1024,
|
||||||
|
max_response_bytes: 1024 * 1024,
|
||||||
auth_providers,
|
auth_providers,
|
||||||
credit_providers,
|
credit_providers,
|
||||||
|
trust_forwarded_headers: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
let deadline = tokio::time::Instant::now() + Duration::from_secs(10);
|
||||||
|
let mut response = None;
|
||||||
|
while tokio::time::Instant::now() < deadline {
|
||||||
|
let request = Request::builder()
|
||||||
|
.method("GET")
|
||||||
|
.uri("/v1/echo")
|
||||||
|
.header(axum::http::header::AUTHORIZATION, &token)
|
||||||
|
.body(Body::empty())
|
||||||
|
.expect("request build");
|
||||||
|
|
||||||
|
match proxy(
|
||||||
|
State(Arc::clone(&state)),
|
||||||
|
ConnectInfo("127.0.0.1:40000".parse().unwrap()),
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(ok) => {
|
||||||
|
response = Some(ok);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(StatusCode::BAD_GATEWAY) => {
|
||||||
|
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||||
|
}
|
||||||
|
Err(status) => panic!("unexpected proxy status: {}", status),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let response = response.expect("gateway auth+credit test timed out waiting for ready backends");
|
||||||
|
assert_eq!(response.status(), StatusCode::OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_proxy_forwards_client_auth_headers_when_route_has_no_auth() {
|
||||||
|
let upstream_addr = start_upstream().await;
|
||||||
|
let routes = build_routes(vec![route_config(
|
||||||
|
"passthrough",
|
||||||
|
"/v1",
|
||||||
|
&format!("http://{}", upstream_addr),
|
||||||
|
false,
|
||||||
|
)])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let state = Arc::new(ServerState {
|
||||||
|
routes,
|
||||||
|
client: Client::new(),
|
||||||
|
upstream_timeout: Duration::from_secs(5),
|
||||||
|
max_body_bytes: 1024 * 1024,
|
||||||
|
max_response_bytes: 1024 * 1024,
|
||||||
|
auth_providers: HashMap::new(),
|
||||||
|
credit_providers: HashMap::new(),
|
||||||
|
trust_forwarded_headers: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
let request = Request::builder()
|
let request = Request::builder()
|
||||||
.method("GET")
|
.method("GET")
|
||||||
.uri("/v1/echo")
|
.uri("/v1/echo-auth")
|
||||||
.header(axum::http::header::AUTHORIZATION, token)
|
.header(axum::http::header::AUTHORIZATION, "Bearer passthrough-token")
|
||||||
|
.header(PHOTON_AUTH_TOKEN_HEADER, "photon-token")
|
||||||
.body(Body::empty())
|
.body(Body::empty())
|
||||||
.expect("request build");
|
.expect("request build");
|
||||||
|
|
||||||
let response = proxy(State(state), request).await.unwrap();
|
let response = proxy(
|
||||||
|
State(state),
|
||||||
|
ConnectInfo("127.0.0.1:40000".parse().unwrap()),
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
assert_eq!(response.status(), StatusCode::OK);
|
assert_eq!(response.status(), StatusCode::OK);
|
||||||
|
|
||||||
|
let body = to_bytes(response.into_body(), 1024 * 1024).await.unwrap();
|
||||||
|
let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||||
|
assert_eq!(json.get("authorization").and_then(|v| v.as_str()), Some("Bearer passthrough-token"));
|
||||||
|
assert_eq!(json.get("photon_token").and_then(|v| v.as_str()), Some("photon-token"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_auth_token_accepts_bearer_authorization() {
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
axum::http::header::AUTHORIZATION,
|
||||||
|
"Bearer abc123".parse().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(extract_auth_token(&headers).as_deref(), Some("abc123"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_auth_token_accepts_legacy_raw_authorization() {
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
axum::http::header::AUTHORIZATION,
|
||||||
|
"raw-token".parse().unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(extract_auth_token(&headers).as_deref(), Some("raw-token"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_auth_token_falls_back_to_photon_header() {
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
axum::http::header::AUTHORIZATION,
|
||||||
|
"Basic abc".parse().unwrap(),
|
||||||
|
);
|
||||||
|
headers.insert(PHOTON_AUTH_TOKEN_HEADER, "photon-token".parse().unwrap());
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
extract_auth_token(&headers).as_deref(),
|
||||||
|
Some("photon-token")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,763 +0,0 @@
|
||||||
# First-Boot Automation Architecture
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The first-boot automation system provides automated cluster joining and service initialization for bare-metal provisioned nodes. It handles two critical scenarios:
|
|
||||||
|
|
||||||
1. **Bootstrap Mode**: First 3 nodes initialize a new Raft cluster
|
|
||||||
2. **Join Mode**: Additional nodes join an existing cluster
|
|
||||||
|
|
||||||
This document describes the architecture, design decisions, and implementation details.
|
|
||||||
|
|
||||||
## System Architecture
|
|
||||||
|
|
||||||
### Component Hierarchy
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ NixOS Boot Process │
|
|
||||||
└────────────────────┬────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────────────────────────────────────────────────┐
|
|
||||||
│ systemd.target: multi-user.target │
|
|
||||||
└────────────────────┬────────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
┌───────────────┼───────────────┐
|
|
||||||
│ │ │
|
|
||||||
▼ ▼ ▼
|
|
||||||
┌──────────┐ ┌──────────┐ ┌──────────┐
|
|
||||||
│chainfire │ │ flaredb │ │ iam │
|
|
||||||
│.service │ │.service │ │.service │
|
|
||||||
└────┬─────┘ └────┬─────┘ └────┬─────┘
|
|
||||||
│ │ │
|
|
||||||
▼ ▼ ▼
|
|
||||||
┌──────────────────────────────────────────┐
|
|
||||||
│ chainfire-cluster-join.service │
|
|
||||||
│ - Waits for local chainfire health │
|
|
||||||
│ - Checks bootstrap flag │
|
|
||||||
│ - Joins cluster if bootstrap=false │
|
|
||||||
└────────────────┬─────────────────────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────────────────────────────────────┐
|
|
||||||
│ flaredb-cluster-join.service │
|
|
||||||
│ - Requires chainfire-cluster-join │
|
|
||||||
│ - Waits for local flaredb health │
|
|
||||||
│ - Joins FlareDB cluster │
|
|
||||||
└────────────────┬─────────────────────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────────────────────────────────────┐
|
|
||||||
│ iam-initial-setup.service │
|
|
||||||
│ - Waits for IAM health │
|
|
||||||
│ - Creates admin user if needed │
|
|
||||||
│ - Generates initial tokens │
|
|
||||||
└────────────────┬─────────────────────────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────────────────────────────────────┐
|
|
||||||
│ cluster-health-check.service │
|
|
||||||
│ - Polls all service health endpoints │
|
|
||||||
│ - Verifies cluster membership │
|
|
||||||
│ - Reports to journald │
|
|
||||||
└──────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────┐
|
|
||||||
│ Provisioning Server │
|
|
||||||
│ - Generates cluster-config.json │
|
|
||||||
│ - Copies to /etc/nixos/secrets/ │
|
|
||||||
└────────────────┬────────────────────────┘
|
|
||||||
│
|
|
||||||
│ nixos-anywhere
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────────────────────────────┐
|
|
||||||
│ Target Node │
|
|
||||||
│ /etc/nixos/secrets/cluster-config.json │
|
|
||||||
└────────────────┬────────────────────────┘
|
|
||||||
│
|
|
||||||
│ Read by NixOS module
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────────────────────────────┐
|
|
||||||
│ first-boot-automation.nix │
|
|
||||||
│ - Parses JSON config │
|
|
||||||
│ - Creates systemd services │
|
|
||||||
│ - Sets up dependencies │
|
|
||||||
└────────────────┬────────────────────────┘
|
|
||||||
│
|
|
||||||
│ systemd activation
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌─────────────────────────────────────────┐
|
|
||||||
│ Cluster Join Services │
|
|
||||||
│ - Execute join logic │
|
|
||||||
│ - Create marker files │
|
|
||||||
│ - Log to journald │
|
|
||||||
└─────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
## Bootstrap vs Join Decision Logic
|
|
||||||
|
|
||||||
### Decision Tree
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────┐
|
|
||||||
│ Node Boots │
|
|
||||||
└────────┬────────┘
|
|
||||||
│
|
|
||||||
┌────────▼────────┐
|
|
||||||
│ Read cluster- │
|
|
||||||
│ config.json │
|
|
||||||
└────────┬────────┘
|
|
||||||
│
|
|
||||||
┌────────▼────────┐
|
|
||||||
│ bootstrap=true? │
|
|
||||||
└────────┬────────┘
|
|
||||||
│
|
|
||||||
┌────────────┴────────────┐
|
|
||||||
│ │
|
|
||||||
YES ▼ ▼ NO
|
|
||||||
┌─────────────────┐ ┌─────────────────┐
|
|
||||||
│ Bootstrap Mode │ │ Join Mode │
|
|
||||||
│ │ │ │
|
|
||||||
│ - Skip cluster │ │ - Wait for │
|
|
||||||
│ join API │ │ local health │
|
|
||||||
│ - Raft cluster │ │ - Contact │
|
|
||||||
│ initializes │ │ leader │
|
|
||||||
│ internally │ │ - POST to │
|
|
||||||
│ - Create marker │ │ /member/add │
|
|
||||||
│ - Exit success │ │ - Retry 5x │
|
|
||||||
└─────────────────┘ └─────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
### Bootstrap Mode (bootstrap: true)
|
|
||||||
|
|
||||||
**When to use:**
|
|
||||||
- First 3 nodes in a new cluster
|
|
||||||
- Nodes configured with matching `initial_peers`
|
|
||||||
- No existing cluster to join
|
|
||||||
|
|
||||||
**Behavior:**
|
|
||||||
1. Service starts with `--initial-cluster` parameter containing all bootstrap peers
|
|
||||||
2. Raft consensus protocol automatically elects leader
|
|
||||||
3. Cluster join service detects bootstrap mode and exits immediately
|
|
||||||
4. No API calls to leader (cluster doesn't exist yet)
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"bootstrap": true,
|
|
||||||
"initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Marker file:** `/var/lib/first-boot-automation/.chainfire-initialized`
|
|
||||||
|
|
||||||
### Join Mode (bootstrap: false)
|
|
||||||
|
|
||||||
**When to use:**
|
|
||||||
- Nodes joining an existing cluster
|
|
||||||
- Expansion or replacement nodes
|
|
||||||
- Leader URL is known and reachable
|
|
||||||
|
|
||||||
**Behavior:**
|
|
||||||
1. Service starts with no initial cluster configuration
|
|
||||||
2. Cluster join service waits for local service health
|
|
||||||
3. POST to leader's `/admin/member/add` with node info
|
|
||||||
4. Leader adds member to Raft configuration
|
|
||||||
5. Node joins cluster and synchronizes state
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"bootstrap": false,
|
|
||||||
"leader_url": "https://node01.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.13:2380"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Marker file:** `/var/lib/first-boot-automation/.chainfire-joined`
|
|
||||||
|
|
||||||
## Idempotency and State Management
|
|
||||||
|
|
||||||
### Marker Files
|
|
||||||
|
|
||||||
The system uses marker files to track initialization state:
|
|
||||||
|
|
||||||
```
|
|
||||||
/var/lib/first-boot-automation/
|
|
||||||
├── .chainfire-initialized # Bootstrap node initialized
|
|
||||||
├── .chainfire-joined # Node joined cluster
|
|
||||||
├── .flaredb-initialized # FlareDB bootstrap
|
|
||||||
├── .flaredb-joined # FlareDB joined
|
|
||||||
└── .iam-initialized # IAM setup complete
|
|
||||||
```
|
|
||||||
|
|
||||||
**Purpose:**
|
|
||||||
- Prevent duplicate join attempts on reboot
|
|
||||||
- Support idempotent operations
|
|
||||||
- Enable troubleshooting (check timestamps)
|
|
||||||
|
|
||||||
**Format:** ISO8601 timestamp of initialization
|
|
||||||
```
|
|
||||||
2025-12-10T10:30:45+00:00
|
|
||||||
```
|
|
||||||
|
|
||||||
### State Transitions
|
|
||||||
|
|
||||||
```
|
|
||||||
┌──────────────┐
|
|
||||||
│ First Boot │
|
|
||||||
│ (no marker) │
|
|
||||||
└──────┬───────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
┌──────────────┐
|
|
||||||
│ Check Config │
|
|
||||||
│ bootstrap=? │
|
|
||||||
└──────┬───────┘
|
|
||||||
│
|
|
||||||
├─(true)──▶ Bootstrap ──▶ Create .initialized ──▶ Done
|
|
||||||
│
|
|
||||||
└─(false)─▶ Join ──▶ Create .joined ──▶ Done
|
|
||||||
│
|
|
||||||
│ (reboot)
|
|
||||||
▼
|
|
||||||
┌──────────────┐
|
|
||||||
│ Marker Exists│
|
|
||||||
│ Skip Join │
|
|
||||||
└──────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
## Retry Logic and Error Handling
|
|
||||||
|
|
||||||
### Health Check Retry
|
|
||||||
|
|
||||||
**Parameters:**
|
|
||||||
- Timeout: 120 seconds (configurable)
|
|
||||||
- Retry Interval: 5 seconds
|
|
||||||
- Max Elapsed: 300 seconds
|
|
||||||
|
|
||||||
**Logic:**
|
|
||||||
```bash
|
|
||||||
START_TIME=$(date +%s)
|
|
||||||
while true; do
|
|
||||||
ELAPSED=$(($(date +%s) - START_TIME))
|
|
||||||
if [[ $ELAPSED -ge $TIMEOUT ]]; then
|
|
||||||
exit 1 # Timeout
|
|
||||||
fi
|
|
||||||
|
|
||||||
HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL")
|
|
||||||
if [[ "$HTTP_CODE" == "200" ]]; then
|
|
||||||
exit 0 # Success
|
|
||||||
fi
|
|
||||||
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
### Cluster Join Retry
|
|
||||||
|
|
||||||
**Parameters:**
|
|
||||||
- Max Attempts: 5 (configurable)
|
|
||||||
- Retry Delay: 10 seconds
|
|
||||||
- Exponential Backoff: Optional (not implemented)
|
|
||||||
|
|
||||||
**Logic:**
|
|
||||||
```bash
|
|
||||||
for ATTEMPT in $(seq 1 $MAX_ATTEMPTS); do
|
|
||||||
HTTP_CODE=$(curl -X POST "$LEADER_URL/admin/member/add" -d "$PAYLOAD")
|
|
||||||
|
|
||||||
if [[ "$HTTP_CODE" == "200" || "$HTTP_CODE" == "201" ]]; then
|
|
||||||
exit 0 # Success
|
|
||||||
elif [[ "$HTTP_CODE" == "409" ]]; then
|
|
||||||
exit 2 # Already member
|
|
||||||
fi
|
|
||||||
|
|
||||||
sleep $RETRY_DELAY
|
|
||||||
done
|
|
||||||
|
|
||||||
exit 1 # Max attempts exhausted
|
|
||||||
```
|
|
||||||
|
|
||||||
### Error Codes
|
|
||||||
|
|
||||||
**Health Check:**
|
|
||||||
- `0`: Service healthy
|
|
||||||
- `1`: Timeout or unhealthy
|
|
||||||
|
|
||||||
**Cluster Join:**
|
|
||||||
- `0`: Successfully joined
|
|
||||||
- `1`: Failed after max attempts
|
|
||||||
- `2`: Already joined (idempotent)
|
|
||||||
- `3`: Invalid arguments
|
|
||||||
|
|
||||||
**Bootstrap Detector:**
|
|
||||||
- `0`: Should bootstrap
|
|
||||||
- `1`: Should join existing
|
|
||||||
- `2`: Configuration error
|
|
||||||
|
|
||||||
## Security Considerations
|
|
||||||
|
|
||||||
### TLS Certificate Handling
|
|
||||||
|
|
||||||
**Requirements:**
|
|
||||||
- All inter-node communication uses TLS
|
|
||||||
- Self-signed certificates supported via `-k` flag to curl
|
|
||||||
- Certificate validation in production (remove `-k`)
|
|
||||||
|
|
||||||
**Certificate Paths:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"tls": {
|
|
||||||
"enabled": true,
|
|
||||||
"ca_cert_path": "/etc/nixos/secrets/ca.crt",
|
|
||||||
"node_cert_path": "/etc/nixos/secrets/node01.crt",
|
|
||||||
"node_key_path": "/etc/nixos/secrets/node01.key"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Integration with T031:**
|
|
||||||
- Certificates generated by T031 TLS automation
|
|
||||||
- Copied to target during provisioning
|
|
||||||
- Read by services at startup
|
|
||||||
|
|
||||||
### Secrets Management
|
|
||||||
|
|
||||||
**Cluster Configuration:**
|
|
||||||
- Stored in `/etc/nixos/secrets/cluster-config.json`
|
|
||||||
- Permissions: `0600 root:root` (recommended)
|
|
||||||
- Contains sensitive data: URLs, IPs, topology
|
|
||||||
|
|
||||||
**API Credentials:**
|
|
||||||
- IAM admin credentials (future implementation)
|
|
||||||
- Stored in separate file: `/etc/nixos/secrets/iam-admin.json`
|
|
||||||
- Never logged to journald
|
|
||||||
|
|
||||||
### Attack Surface
|
|
||||||
|
|
||||||
**Mitigations:**
|
|
||||||
1. **Network-level**: Firewall rules restrict cluster API ports
|
|
||||||
2. **Application-level**: mTLS for authenticated requests
|
|
||||||
3. **Access control**: SystemD service isolation
|
|
||||||
4. **Audit**: All operations logged to journald with structured JSON
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
### T024 NixOS Modules
|
|
||||||
|
|
||||||
The first-boot automation module imports and extends service modules:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# Example: netboot-control-plane.nix
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
../modules/chainfire.nix
|
|
||||||
../modules/flaredb.nix
|
|
||||||
../modules/iam.nix
|
|
||||||
../modules/first-boot-automation.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
services.first-boot-automation.enable = true;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### T031 TLS Certificates
|
|
||||||
|
|
||||||
**Dependencies:**
|
|
||||||
- TLS certificates must exist before first boot
|
|
||||||
- Provisioning script copies certificates to `/etc/nixos/secrets/`
|
|
||||||
- Services read certificates at startup
|
|
||||||
|
|
||||||
**Certificate Generation:**
|
|
||||||
```bash
|
|
||||||
# On provisioning server (T031)
|
|
||||||
./tls/generate-node-cert.sh node01.example.com 10.0.1.10
|
|
||||||
|
|
||||||
# Copied to target
|
|
||||||
scp ca.crt node01.crt node01.key root@10.0.1.10:/etc/nixos/secrets/
|
|
||||||
```
|
|
||||||
|
|
||||||
### T032.S1-S3 PXE/Netboot
|
|
||||||
|
|
||||||
**Boot Flow:**
|
|
||||||
1. PXE boot loads iPXE firmware
|
|
||||||
2. iPXE chainloads NixOS kernel/initrd
|
|
||||||
3. NixOS installer runs (nixos-anywhere)
|
|
||||||
4. System installed to disk with first-boot automation
|
|
||||||
5. Reboot into installed system
|
|
||||||
6. First-boot automation executes
|
|
||||||
|
|
||||||
**Configuration Injection:**
|
|
||||||
```bash
|
|
||||||
# During nixos-anywhere provisioning
|
|
||||||
mkdir -p /mnt/etc/nixos/secrets
|
|
||||||
cp cluster-config.json /mnt/etc/nixos/secrets/
|
|
||||||
chmod 600 /mnt/etc/nixos/secrets/cluster-config.json
|
|
||||||
```
|
|
||||||
|
|
||||||
## Service Dependencies
|
|
||||||
|
|
||||||
### Systemd Ordering
|
|
||||||
|
|
||||||
**Chainfire:**
|
|
||||||
```
|
|
||||||
After: network-online.target, chainfire.service
|
|
||||||
Before: flaredb-cluster-join.service
|
|
||||||
Wants: network-online.target
|
|
||||||
```
|
|
||||||
|
|
||||||
**FlareDB:**
|
|
||||||
```
|
|
||||||
After: chainfire-cluster-join.service, flaredb.service
|
|
||||||
Requires: chainfire-cluster-join.service
|
|
||||||
Before: iam-initial-setup.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**IAM:**
|
|
||||||
```
|
|
||||||
After: flaredb-cluster-join.service, iam.service
|
|
||||||
Before: cluster-health-check.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**Health Check:**
|
|
||||||
```
|
|
||||||
After: chainfire-cluster-join, flaredb-cluster-join, iam-initial-setup
|
|
||||||
Type: oneshot (no RemainAfterExit)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Dependency Graph
|
|
||||||
|
|
||||||
```
|
|
||||||
network-online.target
|
|
||||||
│
|
|
||||||
├──▶ chainfire.service
|
|
||||||
│ │
|
|
||||||
│ ▼
|
|
||||||
│ chainfire-cluster-join.service
|
|
||||||
│ │
|
|
||||||
├──▶ flaredb.service
|
|
||||||
│ │
|
|
||||||
│ ▼
|
|
||||||
└────▶ flaredb-cluster-join.service
|
|
||||||
│
|
|
||||||
┌────┴────┐
|
|
||||||
│ │
|
|
||||||
iam.service │
|
|
||||||
│ │
|
|
||||||
▼ │
|
|
||||||
iam-initial-setup.service
|
|
||||||
│ │
|
|
||||||
└────┬────┘
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
cluster-health-check.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Logging and Observability
|
|
||||||
|
|
||||||
### Structured Logging
|
|
||||||
|
|
||||||
All scripts output JSON-formatted logs:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"timestamp": "2025-12-10T10:30:45+00:00",
|
|
||||||
"level": "INFO",
|
|
||||||
"service": "chainfire",
|
|
||||||
"operation": "cluster-join",
|
|
||||||
"message": "Successfully joined cluster"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Benefits:**
|
|
||||||
- Machine-readable for log aggregation (T025)
|
|
||||||
- Easy filtering with `journalctl -o json`
|
|
||||||
- Includes context (service, operation, timestamp)
|
|
||||||
|
|
||||||
### Querying Logs
|
|
||||||
|
|
||||||
**View all first-boot automation logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service -u flaredb-cluster-join.service \
|
|
||||||
-u iam-initial-setup.service -u cluster-health-check.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**Filter by log level:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service | grep '"level":"ERROR"'
|
|
||||||
```
|
|
||||||
|
|
||||||
**Follow live:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service -f
|
|
||||||
```
|
|
||||||
|
|
||||||
### Health Check Integration
|
|
||||||
|
|
||||||
**T025 Observability:**
|
|
||||||
- Health check service can POST to metrics endpoint
|
|
||||||
- Prometheus scraping of `/health` endpoints
|
|
||||||
- Alerts on cluster join failures
|
|
||||||
|
|
||||||
**Future:**
|
|
||||||
- Webhook to provisioning server on completion
|
|
||||||
- Slack/email notifications on errors
|
|
||||||
- Dashboard showing cluster join status
|
|
||||||
|
|
||||||
## Performance Characteristics
|
|
||||||
|
|
||||||
### Boot Time Analysis
|
|
||||||
|
|
||||||
**Typical Timeline (3-node cluster):**
|
|
||||||
```
|
|
||||||
T+0s : systemd starts
|
|
||||||
T+5s : network-online.target reached
|
|
||||||
T+10s : chainfire.service starts
|
|
||||||
T+15s : chainfire healthy
|
|
||||||
T+15s : chainfire-cluster-join runs (bootstrap, immediate exit)
|
|
||||||
T+20s : flaredb.service starts
|
|
||||||
T+25s : flaredb healthy
|
|
||||||
T+25s : flaredb-cluster-join runs (bootstrap, immediate exit)
|
|
||||||
T+30s : iam.service starts
|
|
||||||
T+35s : iam healthy
|
|
||||||
T+35s : iam-initial-setup runs
|
|
||||||
T+40s : cluster-health-check runs
|
|
||||||
T+40s : Node fully operational
|
|
||||||
```
|
|
||||||
|
|
||||||
**Join Mode (node joining existing cluster):**
|
|
||||||
```
|
|
||||||
T+0s : systemd starts
|
|
||||||
T+5s : network-online.target reached
|
|
||||||
T+10s : chainfire.service starts
|
|
||||||
T+15s : chainfire healthy
|
|
||||||
T+15s : chainfire-cluster-join runs
|
|
||||||
T+20s : POST to leader, wait for response
|
|
||||||
T+25s : Successfully joined chainfire cluster
|
|
||||||
T+25s : flaredb.service starts
|
|
||||||
T+30s : flaredb healthy
|
|
||||||
T+30s : flaredb-cluster-join runs
|
|
||||||
T+35s : Successfully joined flaredb cluster
|
|
||||||
T+40s : iam-initial-setup (skips, already initialized)
|
|
||||||
T+45s : cluster-health-check runs
|
|
||||||
T+45s : Node fully operational
|
|
||||||
```
|
|
||||||
|
|
||||||
### Bottlenecks
|
|
||||||
|
|
||||||
**Health Check Polling:**
|
|
||||||
- 5-second intervals may be too aggressive
|
|
||||||
- Recommendation: Exponential backoff
|
|
||||||
|
|
||||||
**Network Latency:**
|
|
||||||
- Join requests block on network RTT
|
|
||||||
- Mitigation: Ensure low-latency cluster network
|
|
||||||
|
|
||||||
**Raft Synchronization:**
|
|
||||||
- New member must catch up on Raft log
|
|
||||||
- Time depends on log size (seconds to minutes)
|
|
||||||
|
|
||||||
## Failure Modes and Recovery
|
|
||||||
|
|
||||||
### Common Failures
|
|
||||||
|
|
||||||
**1. Leader Unreachable**
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Join request failed: connection error"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Diagnosis:**
|
|
||||||
- Check network connectivity: `ping node01.example.com`
|
|
||||||
- Verify firewall rules: `iptables -L`
|
|
||||||
- Check leader service status: `systemctl status chainfire.service`
|
|
||||||
|
|
||||||
**Recovery:**
|
|
||||||
```bash
|
|
||||||
# Fix network/firewall, then restart join service
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**2. Invalid Configuration**
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Configuration file not found"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Diagnosis:**
|
|
||||||
- Verify file exists: `ls -la /etc/nixos/secrets/cluster-config.json`
|
|
||||||
- Check JSON syntax: `jq . /etc/nixos/secrets/cluster-config.json`
|
|
||||||
|
|
||||||
**Recovery:**
|
|
||||||
```bash
|
|
||||||
# Fix configuration, then restart
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**3. Service Not Healthy**
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Health check timeout"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Diagnosis:**
|
|
||||||
- Check service logs: `journalctl -u chainfire.service`
|
|
||||||
- Verify service is running: `systemctl status chainfire.service`
|
|
||||||
- Test health endpoint: `curl -k https://localhost:2379/health`
|
|
||||||
|
|
||||||
**Recovery:**
|
|
||||||
```bash
|
|
||||||
# Restart the main service
|
|
||||||
systemctl restart chainfire.service
|
|
||||||
|
|
||||||
# Join service will auto-retry after RestartSec
|
|
||||||
```
|
|
||||||
|
|
||||||
**4. Already Member**
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"WARN","message":"Node already member of cluster (HTTP 409)"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Diagnosis:**
|
|
||||||
- This is normal on reboots
|
|
||||||
- Marker file created to prevent future attempts
|
|
||||||
|
|
||||||
**Recovery:**
|
|
||||||
- No action needed (idempotent behavior)
|
|
||||||
|
|
||||||
### Manual Cluster Join
|
|
||||||
|
|
||||||
If automation fails, manual join:
|
|
||||||
|
|
||||||
**Chainfire:**
|
|
||||||
```bash
|
|
||||||
curl -k -X POST https://node01.example.com:2379/admin/member/add \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"id":"node04","raft_addr":"10.0.1.13:2380"}'
|
|
||||||
|
|
||||||
# Create marker to prevent auto-retry
|
|
||||||
mkdir -p /var/lib/first-boot-automation
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
```
|
|
||||||
|
|
||||||
**FlareDB:**
|
|
||||||
```bash
|
|
||||||
curl -k -X POST https://node01.example.com:2479/admin/member/add \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"id":"node04","raft_addr":"10.0.1.13:2480"}'
|
|
||||||
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
|
|
||||||
```
|
|
||||||
|
|
||||||
### Rollback Procedure
|
|
||||||
|
|
||||||
**Remove from cluster:**
|
|
||||||
```bash
|
|
||||||
# On leader
|
|
||||||
curl -k -X DELETE https://node01.example.com:2379/admin/member/node04
|
|
||||||
|
|
||||||
# On node being removed
|
|
||||||
systemctl stop chainfire.service
|
|
||||||
rm -rf /var/lib/chainfire/*
|
|
||||||
rm /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
|
|
||||||
# Re-enable automation
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Future Enhancements
|
|
||||||
|
|
||||||
### Planned Improvements
|
|
||||||
|
|
||||||
**1. Exponential Backoff**
|
|
||||||
- Current: Fixed 10-second delay
|
|
||||||
- Future: 1s, 2s, 4s, 8s, 16s exponential backoff
|
|
||||||
|
|
||||||
**2. Leader Discovery**
|
|
||||||
- Current: Static leader URL in config
|
|
||||||
- Future: DNS SRV records for dynamic discovery
|
|
||||||
|
|
||||||
**3. Webhook Notifications**
|
|
||||||
- POST to provisioning server on completion
|
|
||||||
- Include node info, join time, cluster health
|
|
||||||
|
|
||||||
**4. Pre-flight Checks**
|
|
||||||
- Validate network connectivity before attempting join
|
|
||||||
- Check TLS certificate validity
|
|
||||||
- Verify disk space, memory, CPU requirements
|
|
||||||
|
|
||||||
**5. Automated Testing**
|
|
||||||
- Integration tests with real cluster
|
|
||||||
- Simulate failures (network partitions, leader crashes)
|
|
||||||
- Validate idempotency
|
|
||||||
|
|
||||||
**6. Configuration Validation**
|
|
||||||
- JSON schema validation at boot
|
|
||||||
- Fail fast on invalid configuration
|
|
||||||
- Provide clear error messages
|
|
||||||
|
|
||||||
## References
|
|
||||||
|
|
||||||
- **T024**: NixOS service modules
|
|
||||||
- **T025**: Observability and monitoring
|
|
||||||
- **T031**: TLS certificate automation
|
|
||||||
- **T032.S1-S3**: PXE boot, netboot images, provisioning
|
|
||||||
- **Design Document**: `/home/centra/cloud/docs/por/T032-baremetal-provisioning/design.md`
|
|
||||||
|
|
||||||
## Appendix: Configuration Schema
|
|
||||||
|
|
||||||
### cluster-config.json Schema
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"required": ["node_id", "node_role", "bootstrap", "cluster_name", "leader_url", "raft_addr"],
|
|
||||||
"properties": {
|
|
||||||
"node_id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Unique node identifier"
|
|
||||||
},
|
|
||||||
"node_role": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["control-plane", "worker", "all-in-one"]
|
|
||||||
},
|
|
||||||
"bootstrap": {
|
|
||||||
"type": "boolean",
|
|
||||||
"description": "True for first 3 nodes, false for join"
|
|
||||||
},
|
|
||||||
"cluster_name": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"leader_url": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "uri"
|
|
||||||
},
|
|
||||||
"raft_addr": {
|
|
||||||
"type": "string",
|
|
||||||
"pattern": "^[0-9.]+:[0-9]+$"
|
|
||||||
},
|
|
||||||
"initial_peers": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
},
|
|
||||||
"flaredb_peers": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
@ -1,858 +0,0 @@
|
||||||
# First-Boot Automation for Bare-Metal Provisioning
|
|
||||||
|
|
||||||
Automated cluster joining and service initialization for bare-metal provisioned NixOS nodes.
|
|
||||||
|
|
||||||
## Table of Contents
|
|
||||||
|
|
||||||
- [Overview](#overview)
|
|
||||||
- [Quick Start](#quick-start)
|
|
||||||
- [Configuration](#configuration)
|
|
||||||
- [Bootstrap vs Join](#bootstrap-vs-join)
|
|
||||||
- [Systemd Services](#systemd-services)
|
|
||||||
- [Troubleshooting](#troubleshooting)
|
|
||||||
- [Manual Operations](#manual-operations)
|
|
||||||
- [Security](#security)
|
|
||||||
- [Examples](#examples)
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The first-boot automation system handles automated cluster joining for distributed services (Chainfire, FlareDB, IAM) on first boot of bare-metal provisioned nodes. It supports two modes:
|
|
||||||
|
|
||||||
- **Bootstrap Mode**: Initialize a new Raft cluster (first 3 nodes)
|
|
||||||
- **Join Mode**: Join an existing cluster (additional nodes)
|
|
||||||
|
|
||||||
### Features
|
|
||||||
|
|
||||||
- Automated health checking with retries
|
|
||||||
- Idempotent operations (safe to run multiple times)
|
|
||||||
- Structured JSON logging to journald
|
|
||||||
- Graceful failure handling with configurable retries
|
|
||||||
- Integration with TLS certificates (T031)
|
|
||||||
- Support for both bootstrap and runtime join scenarios
|
|
||||||
|
|
||||||
### Architecture
|
|
||||||
|
|
||||||
See [ARCHITECTURE.md](ARCHITECTURE.md) for detailed design documentation.
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
1. Node provisioned via T032.S1-S3 (PXE boot and installation)
|
|
||||||
2. Cluster configuration file at `/etc/nixos/secrets/cluster-config.json`
|
|
||||||
3. TLS certificates at `/etc/nixos/secrets/` (T031)
|
|
||||||
4. Network connectivity to cluster leader (for join mode)
|
|
||||||
|
|
||||||
### Enable First-Boot Automation
|
|
||||||
|
|
||||||
In your NixOS configuration:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# /etc/nixos/configuration.nix
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
./nix/modules/first-boot-automation.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
services.first-boot-automation = {
|
|
||||||
enable = true;
|
|
||||||
configFile = "/etc/nixos/secrets/cluster-config.json";
|
|
||||||
|
|
||||||
# Optional: disable specific services
|
|
||||||
enableChainfire = true;
|
|
||||||
enableFlareDB = true;
|
|
||||||
enableIAM = true;
|
|
||||||
enableHealthCheck = true;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### First Boot
|
|
||||||
|
|
||||||
After provisioning and reboot:
|
|
||||||
|
|
||||||
1. Node boots from disk
|
|
||||||
2. systemd starts services
|
|
||||||
3. First-boot automation runs automatically
|
|
||||||
4. Cluster join completes within 30-60 seconds
|
|
||||||
|
|
||||||
Check status:
|
|
||||||
```bash
|
|
||||||
systemctl status chainfire-cluster-join.service
|
|
||||||
systemctl status flaredb-cluster-join.service
|
|
||||||
systemctl status iam-initial-setup.service
|
|
||||||
systemctl status cluster-health-check.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
### cluster-config.json Format
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node01",
|
|
||||||
"node_role": "control-plane",
|
|
||||||
"bootstrap": true,
|
|
||||||
"cluster_name": "prod-cluster",
|
|
||||||
"leader_url": "https://node01.prod.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.10:2380",
|
|
||||||
"initial_peers": [
|
|
||||||
"node01:2380",
|
|
||||||
"node02:2380",
|
|
||||||
"node03:2380"
|
|
||||||
],
|
|
||||||
"flaredb_peers": [
|
|
||||||
"node01:2480",
|
|
||||||
"node02:2480",
|
|
||||||
"node03:2480"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Required Fields
|
|
||||||
|
|
||||||
| Field | Type | Description |
|
|
||||||
|-------|------|-------------|
|
|
||||||
| `node_id` | string | Unique identifier for this node |
|
|
||||||
| `node_role` | string | Node role: `control-plane`, `worker`, or `all-in-one` |
|
|
||||||
| `bootstrap` | boolean | `true` for first 3 nodes, `false` for additional nodes |
|
|
||||||
| `cluster_name` | string | Cluster identifier |
|
|
||||||
| `leader_url` | string | HTTPS URL of cluster leader (used for join) |
|
|
||||||
| `raft_addr` | string | This node's Raft address (IP:port) |
|
|
||||||
| `initial_peers` | array | List of bootstrap peer addresses |
|
|
||||||
| `flaredb_peers` | array | List of FlareDB peer addresses |
|
|
||||||
|
|
||||||
### Optional Fields
|
|
||||||
|
|
||||||
| Field | Type | Description |
|
|
||||||
|-------|------|-------------|
|
|
||||||
| `node_ip` | string | Node's primary IP address |
|
|
||||||
| `node_fqdn` | string | Fully qualified domain name |
|
|
||||||
| `datacenter` | string | Datacenter identifier |
|
|
||||||
| `rack` | string | Rack identifier |
|
|
||||||
| `services` | object | Per-service configuration |
|
|
||||||
| `tls` | object | TLS certificate paths |
|
|
||||||
| `network` | object | Network CIDR ranges |
|
|
||||||
|
|
||||||
### Example Configurations
|
|
||||||
|
|
||||||
See [examples/](examples/) directory:
|
|
||||||
|
|
||||||
- `cluster-config-bootstrap.json` - Bootstrap node (first 3)
|
|
||||||
- `cluster-config-join.json` - Join node (additional)
|
|
||||||
- `cluster-config-all-in-one.json` - Single-node deployment
|
|
||||||
|
|
||||||
## Bootstrap vs Join
|
|
||||||
|
|
||||||
### Bootstrap Mode (bootstrap: true)
|
|
||||||
|
|
||||||
**When to use:**
|
|
||||||
- First 3 nodes in a new cluster
|
|
||||||
- Nodes configured with matching `initial_peers`
|
|
||||||
- No existing cluster to join
|
|
||||||
|
|
||||||
**Behavior:**
|
|
||||||
1. Services start with `--initial-cluster` configuration
|
|
||||||
2. Raft consensus automatically elects leader
|
|
||||||
3. Cluster join service detects bootstrap mode and exits immediately
|
|
||||||
4. Marker file created: `/var/lib/first-boot-automation/.chainfire-initialized`
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node01",
|
|
||||||
"bootstrap": true,
|
|
||||||
"initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Join Mode (bootstrap: false)
|
|
||||||
|
|
||||||
**When to use:**
|
|
||||||
- Nodes joining an existing cluster
|
|
||||||
- Expansion or replacement nodes
|
|
||||||
- Leader is known and reachable
|
|
||||||
|
|
||||||
**Behavior:**
|
|
||||||
1. Service starts with no initial cluster config
|
|
||||||
2. Waits for local service to be healthy (max 120s)
|
|
||||||
3. POST to leader's `/admin/member/add` endpoint
|
|
||||||
4. Retries up to 5 times with 10s delay
|
|
||||||
5. Marker file created: `/var/lib/first-boot-automation/.chainfire-joined`
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node04",
|
|
||||||
"bootstrap": false,
|
|
||||||
"leader_url": "https://node01.prod.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.13:2380"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Decision Matrix
|
|
||||||
|
|
||||||
| Scenario | bootstrap | initial_peers | leader_url |
|
|
||||||
|----------|-----------|---------------|------------|
|
|
||||||
| Node 1 (first) | `true` | all 3 nodes | self |
|
|
||||||
| Node 2 (first) | `true` | all 3 nodes | self |
|
|
||||||
| Node 3 (first) | `true` | all 3 nodes | self |
|
|
||||||
| Node 4+ (join) | `false` | all 3 nodes | node 1 |
|
|
||||||
|
|
||||||
## Systemd Services
|
|
||||||
|
|
||||||
### chainfire-cluster-join.service
|
|
||||||
|
|
||||||
**Description:** Joins Chainfire cluster on first boot
|
|
||||||
|
|
||||||
**Dependencies:**
|
|
||||||
- After: `network-online.target`, `chainfire.service`
|
|
||||||
- Before: `flaredb-cluster-join.service`
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
- Type: `oneshot`
|
|
||||||
- RemainAfterExit: `true`
|
|
||||||
- Restart: `on-failure`
|
|
||||||
|
|
||||||
**Logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
### flaredb-cluster-join.service
|
|
||||||
|
|
||||||
**Description:** Joins FlareDB cluster after Chainfire
|
|
||||||
|
|
||||||
**Dependencies:**
|
|
||||||
- After: `chainfire-cluster-join.service`, `flaredb.service`
|
|
||||||
- Requires: `chainfire-cluster-join.service`
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
- Type: `oneshot`
|
|
||||||
- RemainAfterExit: `true`
|
|
||||||
- Restart: `on-failure`
|
|
||||||
|
|
||||||
**Logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u flaredb-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
### iam-initial-setup.service
|
|
||||||
|
|
||||||
**Description:** IAM initial setup and admin user creation
|
|
||||||
|
|
||||||
**Dependencies:**
|
|
||||||
- After: `flaredb-cluster-join.service`, `iam.service`
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
- Type: `oneshot`
|
|
||||||
- RemainAfterExit: `true`
|
|
||||||
|
|
||||||
**Logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u iam-initial-setup.service
|
|
||||||
```
|
|
||||||
|
|
||||||
### cluster-health-check.service
|
|
||||||
|
|
||||||
**Description:** Validates cluster health on first boot
|
|
||||||
|
|
||||||
**Dependencies:**
|
|
||||||
- After: all cluster-join services
|
|
||||||
|
|
||||||
**Configuration:**
|
|
||||||
- Type: `oneshot`
|
|
||||||
- RemainAfterExit: `false`
|
|
||||||
|
|
||||||
**Logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u cluster-health-check.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Check Service Status
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Overall status
|
|
||||||
systemctl status chainfire-cluster-join.service
|
|
||||||
systemctl status flaredb-cluster-join.service
|
|
||||||
|
|
||||||
# Detailed logs with JSON output
|
|
||||||
journalctl -u chainfire-cluster-join.service -o json-pretty
|
|
||||||
|
|
||||||
# Follow logs in real-time
|
|
||||||
journalctl -u chainfire-cluster-join.service -f
|
|
||||||
```
|
|
||||||
|
|
||||||
### Common Issues
|
|
||||||
|
|
||||||
#### 1. Health Check Timeout
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Health check timeout after 120s"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Causes:**
|
|
||||||
- Service not starting (check main service logs)
|
|
||||||
- Port conflict
|
|
||||||
- TLS certificate issues
|
|
||||||
|
|
||||||
**Solutions:**
|
|
||||||
```bash
|
|
||||||
# Check main service
|
|
||||||
systemctl status chainfire.service
|
|
||||||
journalctl -u chainfire.service
|
|
||||||
|
|
||||||
# Test health endpoint manually
|
|
||||||
curl -k https://localhost:2379/health
|
|
||||||
|
|
||||||
# Restart services
|
|
||||||
systemctl restart chainfire.service
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Leader Unreachable
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Join request failed: connection error"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Causes:**
|
|
||||||
- Network connectivity issues
|
|
||||||
- Firewall blocking ports
|
|
||||||
- Leader not running
|
|
||||||
- Wrong leader URL in config
|
|
||||||
|
|
||||||
**Solutions:**
|
|
||||||
```bash
|
|
||||||
# Test network connectivity
|
|
||||||
ping node01.prod.example.com
|
|
||||||
curl -k https://node01.prod.example.com:2379/health
|
|
||||||
|
|
||||||
# Check firewall
|
|
||||||
iptables -L -n | grep 2379
|
|
||||||
|
|
||||||
# Verify configuration
|
|
||||||
jq '.leader_url' /etc/nixos/secrets/cluster-config.json
|
|
||||||
|
|
||||||
# Try manual join (see below)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Invalid Configuration
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Configuration file not found"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Causes:**
|
|
||||||
- Missing configuration file
|
|
||||||
- Wrong file path
|
|
||||||
- Invalid JSON syntax
|
|
||||||
- Missing required fields
|
|
||||||
|
|
||||||
**Solutions:**
|
|
||||||
```bash
|
|
||||||
# Check file exists
|
|
||||||
ls -la /etc/nixos/secrets/cluster-config.json
|
|
||||||
|
|
||||||
# Validate JSON syntax
|
|
||||||
jq . /etc/nixos/secrets/cluster-config.json
|
|
||||||
|
|
||||||
# Check required fields
|
|
||||||
jq '.node_id, .bootstrap, .leader_url' /etc/nixos/secrets/cluster-config.json
|
|
||||||
|
|
||||||
# Fix and restart
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 4. Already Member (Reboot)
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"WARN","message":"Already member of cluster (HTTP 409)"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Explanation:**
|
|
||||||
- This is **normal** on reboots
|
|
||||||
- Marker file prevents duplicate joins
|
|
||||||
- No action needed
|
|
||||||
|
|
||||||
**Verify:**
|
|
||||||
```bash
|
|
||||||
# Check marker file
|
|
||||||
cat /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
|
|
||||||
# Should show timestamp: 2025-12-10T10:30:45+00:00
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 5. Join Retry Exhausted
|
|
||||||
|
|
||||||
**Symptom:**
|
|
||||||
```json
|
|
||||||
{"level":"ERROR","message":"Failed to join cluster after 5 attempts"}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Causes:**
|
|
||||||
- Persistent network issues
|
|
||||||
- Leader down or overloaded
|
|
||||||
- Invalid node configuration
|
|
||||||
- Cluster at capacity
|
|
||||||
|
|
||||||
**Solutions:**
|
|
||||||
```bash
|
|
||||||
# Check cluster status on leader
|
|
||||||
curl -k https://node01.prod.example.com:2379/admin/cluster/members | jq
|
|
||||||
|
|
||||||
# Verify this node's configuration
|
|
||||||
jq '.node_id, .raft_addr' /etc/nixos/secrets/cluster-config.json
|
|
||||||
|
|
||||||
# Increase retry attempts (edit NixOS config)
|
|
||||||
# Or perform manual join (see below)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Verify Cluster Membership
|
|
||||||
|
|
||||||
**On leader node:**
|
|
||||||
```bash
|
|
||||||
# Chainfire members
|
|
||||||
curl -k https://localhost:2379/admin/cluster/members | jq
|
|
||||||
|
|
||||||
# FlareDB members
|
|
||||||
curl -k https://localhost:2479/admin/cluster/members | jq
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected output:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"members": [
|
|
||||||
{"id": "node01", "raft_addr": "10.0.1.10:2380", "status": "healthy"},
|
|
||||||
{"id": "node02", "raft_addr": "10.0.1.11:2380", "status": "healthy"},
|
|
||||||
{"id": "node03", "raft_addr": "10.0.1.12:2380", "status": "healthy"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Check Marker Files
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# List all marker files
|
|
||||||
ls -la /var/lib/first-boot-automation/
|
|
||||||
|
|
||||||
# View timestamps
|
|
||||||
cat /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
cat /var/lib/first-boot-automation/.flaredb-joined
|
|
||||||
```
|
|
||||||
|
|
||||||
### Reset and Re-join
|
|
||||||
|
|
||||||
**Warning:** This will remove the node from the cluster and rejoin.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Stop services
|
|
||||||
systemctl stop chainfire.service flaredb.service
|
|
||||||
|
|
||||||
# Remove data and markers
|
|
||||||
rm -rf /var/lib/chainfire/*
|
|
||||||
rm -rf /var/lib/flaredb/*
|
|
||||||
rm /var/lib/first-boot-automation/.chainfire-*
|
|
||||||
rm /var/lib/first-boot-automation/.flaredb-*
|
|
||||||
|
|
||||||
# Restart (will auto-join)
|
|
||||||
systemctl start chainfire.service
|
|
||||||
systemctl restart chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Manual Operations
|
|
||||||
|
|
||||||
### Manual Cluster Join
|
|
||||||
|
|
||||||
If automation fails, perform manual join:
|
|
||||||
|
|
||||||
**Chainfire:**
|
|
||||||
```bash
|
|
||||||
# On joining node, ensure service is running and healthy
|
|
||||||
curl -k https://localhost:2379/health
|
|
||||||
|
|
||||||
# From any node, add member to cluster
|
|
||||||
curl -k -X POST https://node01.prod.example.com:2379/admin/member/add \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"id": "node04",
|
|
||||||
"raft_addr": "10.0.1.13:2380"
|
|
||||||
}'
|
|
||||||
|
|
||||||
# Create marker to prevent auto-retry
|
|
||||||
mkdir -p /var/lib/first-boot-automation
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
```
|
|
||||||
|
|
||||||
**FlareDB:**
|
|
||||||
```bash
|
|
||||||
curl -k -X POST https://node01.prod.example.com:2479/admin/member/add \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"id": "node04",
|
|
||||||
"raft_addr": "10.0.1.13:2480"
|
|
||||||
}'
|
|
||||||
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
|
|
||||||
```
|
|
||||||
|
|
||||||
### Remove Node from Cluster
|
|
||||||
|
|
||||||
**On leader:**
|
|
||||||
```bash
|
|
||||||
# Chainfire
|
|
||||||
curl -k -X DELETE https://node01.prod.example.com:2379/admin/member/node04
|
|
||||||
|
|
||||||
# FlareDB
|
|
||||||
curl -k -X DELETE https://node01.prod.example.com:2479/admin/member/node04
|
|
||||||
```
|
|
||||||
|
|
||||||
**On removed node:**
|
|
||||||
```bash
|
|
||||||
# Stop services
|
|
||||||
systemctl stop chainfire.service flaredb.service
|
|
||||||
|
|
||||||
# Clean up data
|
|
||||||
rm -rf /var/lib/chainfire/*
|
|
||||||
rm -rf /var/lib/flaredb/*
|
|
||||||
rm /var/lib/first-boot-automation/.chainfire-*
|
|
||||||
rm /var/lib/first-boot-automation/.flaredb-*
|
|
||||||
```
|
|
||||||
|
|
||||||
### Disable First-Boot Automation
|
|
||||||
|
|
||||||
If you need to disable automation:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# In NixOS configuration
|
|
||||||
services.first-boot-automation.enable = false;
|
|
||||||
```
|
|
||||||
|
|
||||||
Or stop services temporarily:
|
|
||||||
```bash
|
|
||||||
systemctl stop chainfire-cluster-join.service
|
|
||||||
systemctl disable chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
### Re-enable After Manual Operations
|
|
||||||
|
|
||||||
After manual cluster operations:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Create marker files to indicate join complete
|
|
||||||
mkdir -p /var/lib/first-boot-automation
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.chainfire-joined
|
|
||||||
date -Iseconds > /var/lib/first-boot-automation/.flaredb-joined
|
|
||||||
|
|
||||||
# Or re-enable automation (will skip if markers exist)
|
|
||||||
systemctl enable --now chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
## Security
|
|
||||||
|
|
||||||
### TLS Certificates
|
|
||||||
|
|
||||||
**Requirements:**
|
|
||||||
- All cluster communication uses TLS
|
|
||||||
- Certificates must exist before first boot
|
|
||||||
- Generated by T031 TLS automation
|
|
||||||
|
|
||||||
**Certificate Paths:**
|
|
||||||
```
|
|
||||||
/etc/nixos/secrets/
|
|
||||||
├── ca.crt # CA certificate
|
|
||||||
├── node01.crt # Node certificate
|
|
||||||
└── node01.key # Node private key (mode 0600)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Permissions:**
|
|
||||||
```bash
|
|
||||||
chmod 600 /etc/nixos/secrets/node01.key
|
|
||||||
chmod 644 /etc/nixos/secrets/node01.crt
|
|
||||||
chmod 644 /etc/nixos/secrets/ca.crt
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration File Security
|
|
||||||
|
|
||||||
**Cluster configuration contains sensitive data:**
|
|
||||||
- IP addresses and network topology
|
|
||||||
- Service URLs
|
|
||||||
- Node identifiers
|
|
||||||
|
|
||||||
**Recommended permissions:**
|
|
||||||
```bash
|
|
||||||
chmod 600 /etc/nixos/secrets/cluster-config.json
|
|
||||||
chown root:root /etc/nixos/secrets/cluster-config.json
|
|
||||||
```
|
|
||||||
|
|
||||||
### Network Security
|
|
||||||
|
|
||||||
**Required firewall rules:**
|
|
||||||
```bash
|
|
||||||
# Chainfire
|
|
||||||
iptables -A INPUT -p tcp --dport 2379 -s 10.0.1.0/24 -j ACCEPT # API
|
|
||||||
iptables -A INPUT -p tcp --dport 2380 -s 10.0.1.0/24 -j ACCEPT # Raft
|
|
||||||
iptables -A INPUT -p tcp --dport 2381 -s 10.0.1.0/24 -j ACCEPT # Gossip
|
|
||||||
|
|
||||||
# FlareDB
|
|
||||||
iptables -A INPUT -p tcp --dport 2479 -s 10.0.1.0/24 -j ACCEPT # API
|
|
||||||
iptables -A INPUT -p tcp --dport 2480 -s 10.0.1.0/24 -j ACCEPT # Raft
|
|
||||||
|
|
||||||
# IAM
|
|
||||||
iptables -A INPUT -p tcp --dport 8080 -s 10.0.1.0/24 -j ACCEPT # API
|
|
||||||
```
|
|
||||||
|
|
||||||
### Production Considerations
|
|
||||||
|
|
||||||
**For production deployments:**
|
|
||||||
|
|
||||||
1. **Remove `-k` flag from curl** (validate TLS certificates)
|
|
||||||
2. **Implement mTLS** for client authentication
|
|
||||||
3. **Rotate credentials** regularly
|
|
||||||
4. **Audit logs** with structured logging
|
|
||||||
5. **Monitor health endpoints** continuously
|
|
||||||
6. **Backup cluster state** before changes
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
### Example 1: 3-Node Bootstrap Cluster
|
|
||||||
|
|
||||||
**Node 1:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node01",
|
|
||||||
"bootstrap": true,
|
|
||||||
"raft_addr": "10.0.1.10:2380",
|
|
||||||
"initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Node 2:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node02",
|
|
||||||
"bootstrap": true,
|
|
||||||
"raft_addr": "10.0.1.11:2380",
|
|
||||||
"initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Node 3:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node03",
|
|
||||||
"bootstrap": true,
|
|
||||||
"raft_addr": "10.0.1.12:2380",
|
|
||||||
"initial_peers": ["node01:2380", "node02:2380", "node03:2380"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Provisioning:**
|
|
||||||
```bash
|
|
||||||
# Provision all 3 nodes simultaneously
|
|
||||||
for i in {1..3}; do
|
|
||||||
nixos-anywhere --flake .#node0$i root@node0$i.example.com &
|
|
||||||
done
|
|
||||||
wait
|
|
||||||
|
|
||||||
# Nodes will bootstrap automatically on first boot
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example 2: Join Existing Cluster
|
|
||||||
|
|
||||||
**Node 4 (joining):**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "node04",
|
|
||||||
"bootstrap": false,
|
|
||||||
"leader_url": "https://node01.prod.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.13:2380"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Provisioning:**
|
|
||||||
```bash
|
|
||||||
nixos-anywhere --flake .#node04 root@node04.example.com
|
|
||||||
|
|
||||||
# Node will automatically join on first boot
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example 3: Single-Node All-in-One
|
|
||||||
|
|
||||||
**For development/testing:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"node_id": "aio01",
|
|
||||||
"bootstrap": true,
|
|
||||||
"raft_addr": "10.0.2.10:2380",
|
|
||||||
"initial_peers": ["aio01:2380"],
|
|
||||||
"flaredb_peers": ["aio01:2480"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Provisioning:**
|
|
||||||
```bash
|
|
||||||
nixos-anywhere --flake .#aio01 root@aio01.example.com
|
|
||||||
```
|
|
||||||
|
|
||||||
## Integration with Other Systems
|
|
||||||
|
|
||||||
### T024 NixOS Modules
|
|
||||||
|
|
||||||
First-boot automation integrates with service modules:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
./nix/modules/chainfire.nix
|
|
||||||
./nix/modules/flaredb.nix
|
|
||||||
./nix/modules/first-boot-automation.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
services.chainfire.enable = true;
|
|
||||||
services.flaredb.enable = true;
|
|
||||||
services.first-boot-automation.enable = true;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### T025 Observability
|
|
||||||
|
|
||||||
Health checks integrate with Prometheus:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# prometheus.yml
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: 'cluster-health'
|
|
||||||
static_configs:
|
|
||||||
- targets: ['node01:2379', 'node02:2379', 'node03:2379']
|
|
||||||
metrics_path: '/health'
|
|
||||||
```
|
|
||||||
|
|
||||||
### T031 TLS Certificates
|
|
||||||
|
|
||||||
Certificates generated by T031 are used automatically:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# On provisioning server
|
|
||||||
./tls/generate-node-cert.sh node01.example.com 10.0.1.10
|
|
||||||
|
|
||||||
# Copied during nixos-anywhere
|
|
||||||
# First-boot automation reads from /etc/nixos/secrets/
|
|
||||||
```
|
|
||||||
|
|
||||||
## Logs and Debugging
|
|
||||||
|
|
||||||
### Structured Logging
|
|
||||||
|
|
||||||
All logs are JSON-formatted:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"timestamp": "2025-12-10T10:30:45+00:00",
|
|
||||||
"level": "INFO",
|
|
||||||
"service": "chainfire",
|
|
||||||
"operation": "cluster-join",
|
|
||||||
"message": "Successfully joined cluster"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Query Examples
|
|
||||||
|
|
||||||
**All first-boot logs:**
|
|
||||||
```bash
|
|
||||||
journalctl -u "*cluster-join*" -u "*initial-setup*" -u "*health-check*"
|
|
||||||
```
|
|
||||||
|
|
||||||
**Errors only:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service | grep '"level":"ERROR"'
|
|
||||||
```
|
|
||||||
|
|
||||||
**Last boot only:**
|
|
||||||
```bash
|
|
||||||
journalctl -b -u chainfire-cluster-join.service
|
|
||||||
```
|
|
||||||
|
|
||||||
**JSON output for parsing:**
|
|
||||||
```bash
|
|
||||||
journalctl -u chainfire-cluster-join.service -o json | jq '.MESSAGE'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Tuning
|
|
||||||
|
|
||||||
### Timeout Configuration
|
|
||||||
|
|
||||||
Adjust timeouts in NixOS module:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
services.first-boot-automation = {
|
|
||||||
enable = true;
|
|
||||||
|
|
||||||
# Override default ports if needed
|
|
||||||
chainfirePort = 2379;
|
|
||||||
flaredbPort = 2479;
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
### Retry Configuration
|
|
||||||
|
|
||||||
Modify retry logic in scripts:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# baremetal/first-boot/cluster-join.sh
|
|
||||||
MAX_ATTEMPTS=10 # Increase from 5
|
|
||||||
RETRY_DELAY=15 # Increase from 10s
|
|
||||||
```
|
|
||||||
|
|
||||||
### Health Check Interval
|
|
||||||
|
|
||||||
Adjust polling interval:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# In service scripts
|
|
||||||
sleep 10 # Increase from 5s for less aggressive polling
|
|
||||||
```
|
|
||||||
|
|
||||||
## Support and Contributing
|
|
||||||
|
|
||||||
### Getting Help
|
|
||||||
|
|
||||||
1. Check logs: `journalctl -u chainfire-cluster-join.service`
|
|
||||||
2. Review troubleshooting section above
|
|
||||||
3. Consult [ARCHITECTURE.md](ARCHITECTURE.md) for design details
|
|
||||||
4. Check cluster status on leader node
|
|
||||||
|
|
||||||
### Reporting Issues
|
|
||||||
|
|
||||||
Include in bug reports:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Gather diagnostic information
|
|
||||||
journalctl -u chainfire-cluster-join.service > cluster-join.log
|
|
||||||
systemctl status chainfire-cluster-join.service > service-status.txt
|
|
||||||
cat /etc/nixos/secrets/cluster-config.json > config.json # Redact sensitive data!
|
|
||||||
ls -la /var/lib/first-boot-automation/ > markers.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
### Development
|
|
||||||
|
|
||||||
See [ARCHITECTURE.md](ARCHITECTURE.md) for contributing guidelines.
|
|
||||||
|
|
||||||
## References
|
|
||||||
|
|
||||||
- **ARCHITECTURE.md**: Detailed design documentation
|
|
||||||
- **T024**: NixOS service modules
|
|
||||||
- **T025**: Observability and monitoring
|
|
||||||
- **T031**: TLS certificate automation
|
|
||||||
- **T032.S1-S3**: PXE boot and provisioning
|
|
||||||
- **Design Document**: `/home/centra/cloud/docs/por/T032-baremetal-provisioning/design.md`
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Internal use only - Centra Cloud Platform
|
|
||||||
|
|
@ -47,9 +47,9 @@ if command -v jq &> /dev/null; then
|
||||||
NODE_ROLE=$(echo "$CONFIG_JSON" | jq -r '.node_role // "unknown"')
|
NODE_ROLE=$(echo "$CONFIG_JSON" | jq -r '.node_role // "unknown"')
|
||||||
else
|
else
|
||||||
# Fallback to grep/sed for minimal environments
|
# Fallback to grep/sed for minimal environments
|
||||||
BOOTSTRAP=$(echo "$CONFIG_JSON" | grep -oP '"bootstrap"\s*:\s*\K(true|false)' || echo "false")
|
BOOTSTRAP=$(echo "$CONFIG_JSON" | grep -Eo '"bootstrap"[[:space:]]*:[[:space:]]*(true|false)' | head -n1 | sed -E 's/.*:[[:space:]]*(true|false)/\1/' || echo "false")
|
||||||
NODE_ID=$(echo "$CONFIG_JSON" | grep -oP '"node_id"\s*:\s*"\K[^"]+' || echo "unknown")
|
NODE_ID=$(echo "$CONFIG_JSON" | grep -Eo '"node_id"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_id"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
|
||||||
NODE_ROLE=$(echo "$CONFIG_JSON" | grep -oP '"node_role"\s*:\s*"\K[^"]+' || echo "unknown")
|
NODE_ROLE=$(echo "$CONFIG_JSON" | grep -Eo '"node_role"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_role"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "INFO" "Node configuration: id=$NODE_ID, role=$NODE_ROLE, bootstrap=$BOOTSTRAP"
|
log "INFO" "Node configuration: id=$NODE_ID, role=$NODE_ROLE, bootstrap=$BOOTSTRAP"
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,9 @@ LEADER_URL="${3:-}"
|
||||||
JOIN_PAYLOAD="${4:-}"
|
JOIN_PAYLOAD="${4:-}"
|
||||||
MAX_ATTEMPTS="${5:-5}"
|
MAX_ATTEMPTS="${5:-5}"
|
||||||
RETRY_DELAY="${6:-10}"
|
RETRY_DELAY="${6:-10}"
|
||||||
|
CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
|
||||||
|
CURL_MAX_TIME="${CURL_MAX_TIME:-15}"
|
||||||
|
CURL_INSECURE="${CURL_INSECURE:-1}"
|
||||||
|
|
||||||
FIRST_BOOT_MARKER="/var/lib/first-boot-automation/.${SERVICE_NAME}-joined"
|
FIRST_BOOT_MARKER="/var/lib/first-boot-automation/.${SERVICE_NAME}-joined"
|
||||||
|
|
||||||
|
|
@ -81,7 +84,11 @@ else
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" 2>/dev/null || echo "000")
|
CURL_FLAGS=(-s -o /dev/null -w "%{http_code}" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
|
||||||
|
if [[ "$CURL_INSECURE" == "1" ]]; then
|
||||||
|
CURL_FLAGS+=(-k)
|
||||||
|
fi
|
||||||
|
HTTP_CODE=$(curl "${CURL_FLAGS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000")
|
||||||
|
|
||||||
if [[ "$HTTP_CODE" == "200" ]]; then
|
if [[ "$HTTP_CODE" == "200" ]]; then
|
||||||
log "INFO" "Local $SERVICE_NAME is healthy"
|
log "INFO" "Local $SERVICE_NAME is healthy"
|
||||||
|
|
@ -109,13 +116,20 @@ for ATTEMPT in $(seq 1 "$MAX_ATTEMPTS"); do
|
||||||
|
|
||||||
# Make join request to leader
|
# Make join request to leader
|
||||||
RESPONSE_FILE=$(mktemp)
|
RESPONSE_FILE=$(mktemp)
|
||||||
HTTP_CODE=$(curl -k -s -w "%{http_code}" -o "$RESPONSE_FILE" \
|
PAYLOAD_FILE=$(mktemp)
|
||||||
|
printf '%s' "$JOIN_PAYLOAD" > "$PAYLOAD_FILE"
|
||||||
|
|
||||||
|
CURL_FLAGS=(-s -w "%{http_code}" -o "$RESPONSE_FILE" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
|
||||||
|
if [[ "$CURL_INSECURE" == "1" ]]; then
|
||||||
|
CURL_FLAGS+=(-k)
|
||||||
|
fi
|
||||||
|
HTTP_CODE=$(curl "${CURL_FLAGS[@]}" \
|
||||||
-X POST "$LEADER_URL/admin/member/add" \
|
-X POST "$LEADER_URL/admin/member/add" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "$JOIN_PAYLOAD" 2>/dev/null || echo "000")
|
--data-binary "@$PAYLOAD_FILE" 2>/dev/null || echo "000")
|
||||||
|
|
||||||
RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null || echo "")
|
RESPONSE_BODY=$(cat "$RESPONSE_FILE" 2>/dev/null || echo "")
|
||||||
rm -f "$RESPONSE_FILE"
|
rm -f "$RESPONSE_FILE" "$PAYLOAD_FILE"
|
||||||
|
|
||||||
log "INFO" "Join request response: HTTP $HTTP_CODE"
|
log "INFO" "Join request response: HTTP $HTTP_CODE"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
{
|
|
||||||
"node_id": "aio01",
|
|
||||||
"node_role": "all-in-one",
|
|
||||||
"bootstrap": true,
|
|
||||||
"cluster_name": "dev-cluster",
|
|
||||||
"leader_url": "https://aio01.dev.example.com:2379",
|
|
||||||
"raft_addr": "10.0.2.10:2380",
|
|
||||||
"initial_peers": [
|
|
||||||
"aio01:2380"
|
|
||||||
],
|
|
||||||
"flaredb_peers": [
|
|
||||||
"aio01:2480"
|
|
||||||
],
|
|
||||||
"node_ip": "10.0.2.10",
|
|
||||||
"node_fqdn": "aio01.dev.example.com",
|
|
||||||
"datacenter": "dev",
|
|
||||||
"rack": "rack1",
|
|
||||||
"description": "Single-node all-in-one deployment for development/testing",
|
|
||||||
"services": {
|
|
||||||
"chainfire": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2379,
|
|
||||||
"raft_port": 2380,
|
|
||||||
"gossip_port": 2381
|
|
||||||
},
|
|
||||||
"flaredb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2479,
|
|
||||||
"raft_port": 2480
|
|
||||||
},
|
|
||||||
"iam": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8080
|
|
||||||
},
|
|
||||||
"plasmavmc": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8090
|
|
||||||
},
|
|
||||||
"novanet": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8091
|
|
||||||
},
|
|
||||||
"flashdns": {
|
|
||||||
"enabled": true,
|
|
||||||
"dns_port": 53,
|
|
||||||
"api_port": 8053
|
|
||||||
},
|
|
||||||
"fiberlb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8092
|
|
||||||
},
|
|
||||||
"lightningstor": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8093
|
|
||||||
},
|
|
||||||
"k8shost": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 10250
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tls": {
|
|
||||||
"enabled": true,
|
|
||||||
"ca_cert_path": "/etc/nixos/secrets/ca.crt",
|
|
||||||
"node_cert_path": "/etc/nixos/secrets/aio01.crt",
|
|
||||||
"node_key_path": "/etc/nixos/secrets/aio01.key"
|
|
||||||
},
|
|
||||||
"network": {
|
|
||||||
"cluster_network": "10.0.2.0/24",
|
|
||||||
"pod_network": "10.244.0.0/16",
|
|
||||||
"service_network": "10.96.0.0/12"
|
|
||||||
},
|
|
||||||
"development": {
|
|
||||||
"mode": "single-node",
|
|
||||||
"skip_replication_checks": true,
|
|
||||||
"allow_single_raft_member": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
{
|
|
||||||
"node_id": "node01",
|
|
||||||
"node_role": "control-plane",
|
|
||||||
"bootstrap": true,
|
|
||||||
"cluster_name": "prod-cluster",
|
|
||||||
"leader_url": "https://node01.prod.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.10:2380",
|
|
||||||
"initial_peers": [
|
|
||||||
"node01:2380",
|
|
||||||
"node02:2380",
|
|
||||||
"node03:2380"
|
|
||||||
],
|
|
||||||
"flaredb_peers": [
|
|
||||||
"node01:2480",
|
|
||||||
"node02:2480",
|
|
||||||
"node03:2480"
|
|
||||||
],
|
|
||||||
"node_ip": "10.0.1.10",
|
|
||||||
"node_fqdn": "node01.prod.example.com",
|
|
||||||
"datacenter": "dc1",
|
|
||||||
"rack": "rack1",
|
|
||||||
"description": "Bootstrap node for production cluster - initializes Raft cluster",
|
|
||||||
"services": {
|
|
||||||
"chainfire": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2379,
|
|
||||||
"raft_port": 2380,
|
|
||||||
"gossip_port": 2381
|
|
||||||
},
|
|
||||||
"flaredb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2479,
|
|
||||||
"raft_port": 2480
|
|
||||||
},
|
|
||||||
"iam": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8080
|
|
||||||
},
|
|
||||||
"plasmavmc": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8090
|
|
||||||
},
|
|
||||||
"novanet": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8091
|
|
||||||
},
|
|
||||||
"flashdns": {
|
|
||||||
"enabled": true,
|
|
||||||
"dns_port": 53,
|
|
||||||
"api_port": 8053
|
|
||||||
},
|
|
||||||
"fiberlb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8092
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tls": {
|
|
||||||
"enabled": true,
|
|
||||||
"ca_cert_path": "/etc/nixos/secrets/ca.crt",
|
|
||||||
"node_cert_path": "/etc/nixos/secrets/node01.crt",
|
|
||||||
"node_key_path": "/etc/nixos/secrets/node01.key"
|
|
||||||
},
|
|
||||||
"network": {
|
|
||||||
"cluster_network": "10.0.1.0/24",
|
|
||||||
"pod_network": "10.244.0.0/16",
|
|
||||||
"service_network": "10.96.0.0/12"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
{
|
|
||||||
"node_id": "node04",
|
|
||||||
"node_role": "control-plane",
|
|
||||||
"bootstrap": false,
|
|
||||||
"cluster_name": "prod-cluster",
|
|
||||||
"leader_url": "https://node01.prod.example.com:2379",
|
|
||||||
"raft_addr": "10.0.1.13:2380",
|
|
||||||
"initial_peers": [
|
|
||||||
"node01:2380",
|
|
||||||
"node02:2380",
|
|
||||||
"node03:2380"
|
|
||||||
],
|
|
||||||
"flaredb_peers": [
|
|
||||||
"node01:2480",
|
|
||||||
"node02:2480",
|
|
||||||
"node03:2480"
|
|
||||||
],
|
|
||||||
"node_ip": "10.0.1.13",
|
|
||||||
"node_fqdn": "node04.prod.example.com",
|
|
||||||
"datacenter": "dc1",
|
|
||||||
"rack": "rack2",
|
|
||||||
"description": "Additional node joining existing cluster - will contact leader to join",
|
|
||||||
"services": {
|
|
||||||
"chainfire": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2379,
|
|
||||||
"raft_port": 2380,
|
|
||||||
"gossip_port": 2381
|
|
||||||
},
|
|
||||||
"flaredb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 2479,
|
|
||||||
"raft_port": 2480
|
|
||||||
},
|
|
||||||
"iam": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8080
|
|
||||||
},
|
|
||||||
"plasmavmc": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8090
|
|
||||||
},
|
|
||||||
"novanet": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8091
|
|
||||||
},
|
|
||||||
"flashdns": {
|
|
||||||
"enabled": true,
|
|
||||||
"dns_port": 53,
|
|
||||||
"api_port": 8053
|
|
||||||
},
|
|
||||||
"fiberlb": {
|
|
||||||
"enabled": true,
|
|
||||||
"api_port": 8092
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tls": {
|
|
||||||
"enabled": true,
|
|
||||||
"ca_cert_path": "/etc/nixos/secrets/ca.crt",
|
|
||||||
"node_cert_path": "/etc/nixos/secrets/node04.crt",
|
|
||||||
"node_key_path": "/etc/nixos/secrets/node04.key"
|
|
||||||
},
|
|
||||||
"network": {
|
|
||||||
"cluster_network": "10.0.1.0/24",
|
|
||||||
"pod_network": "10.244.0.0/16",
|
|
||||||
"service_network": "10.96.0.0/12"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -19,6 +19,9 @@ SERVICE_NAME="${1:-}"
|
||||||
HEALTH_URL="${2:-}"
|
HEALTH_URL="${2:-}"
|
||||||
TIMEOUT="${3:-300}"
|
TIMEOUT="${3:-300}"
|
||||||
RETRY_INTERVAL="${4:-5}"
|
RETRY_INTERVAL="${4:-5}"
|
||||||
|
CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
|
||||||
|
CURL_MAX_TIME="${CURL_MAX_TIME:-10}"
|
||||||
|
CURL_INSECURE="${CURL_INSECURE:-1}"
|
||||||
|
|
||||||
# Validate arguments
|
# Validate arguments
|
||||||
if [[ -z "$SERVICE_NAME" || -z "$HEALTH_URL" ]]; then
|
if [[ -z "$SERVICE_NAME" || -z "$HEALTH_URL" ]]; then
|
||||||
|
|
@ -55,8 +58,12 @@ while true; do
|
||||||
ATTEMPT=$((ATTEMPT + 1))
|
ATTEMPT=$((ATTEMPT + 1))
|
||||||
log "INFO" "Health check attempt $ATTEMPT (elapsed: ${ELAPSED}s)"
|
log "INFO" "Health check attempt $ATTEMPT (elapsed: ${ELAPSED}s)"
|
||||||
|
|
||||||
# Perform health check (allow insecure TLS for self-signed certs)
|
# Perform health check (allow insecure TLS if configured)
|
||||||
HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" 2>/dev/null || echo "000")
|
CURL_FLAGS=(-s -o /dev/null -w "%{http_code}" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
|
||||||
|
if [[ "$CURL_INSECURE" == "1" ]]; then
|
||||||
|
CURL_FLAGS+=(-k)
|
||||||
|
fi
|
||||||
|
HTTP_CODE=$(curl "${CURL_FLAGS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000")
|
||||||
|
|
||||||
if [[ "$HTTP_CODE" == "200" ]]; then
|
if [[ "$HTTP_CODE" == "200" ]]; then
|
||||||
log "INFO" "Health check passed (HTTP $HTTP_CODE)"
|
log "INFO" "Health check passed (HTTP $HTTP_CODE)"
|
||||||
|
|
|
||||||
|
|
@ -1,570 +0,0 @@
|
||||||
# PlasmaCloud Netboot Image Builder - Technical Overview
|
|
||||||
|
|
||||||
## Introduction
|
|
||||||
|
|
||||||
This document provides a technical overview of the PlasmaCloud NixOS Image Builder, which generates bootable netboot images for bare-metal provisioning. This is part of T032 (Bare-Metal Provisioning) and specifically implements deliverable S3 (NixOS Image Builder).
|
|
||||||
|
|
||||||
## System Architecture
|
|
||||||
|
|
||||||
### High-Level Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────┐
|
|
||||||
│ Nix Flake │
|
|
||||||
│ (flake.nix) │
|
|
||||||
└──────────┬──────────┘
|
|
||||||
│
|
|
||||||
├─── nixosConfigurations
|
|
||||||
│ ├── netboot-control-plane
|
|
||||||
│ ├── netboot-worker
|
|
||||||
│ └── netboot-all-in-one
|
|
||||||
│
|
|
||||||
├─── packages (T024)
|
|
||||||
│ ├── chainfire-server
|
|
||||||
│ ├── flaredb-server
|
|
||||||
│ └── ... (8 services)
|
|
||||||
│
|
|
||||||
└─── modules (T024)
|
|
||||||
├── chainfire.nix
|
|
||||||
├── flaredb.nix
|
|
||||||
└── ... (8 modules)
|
|
||||||
|
|
||||||
Build Process
|
|
||||||
↓
|
|
||||||
|
|
||||||
┌─────────────────────┐
|
|
||||||
│ build-images.sh │
|
|
||||||
└──────────┬──────────┘
|
|
||||||
│
|
|
||||||
├─── nix build netbootRamdisk
|
|
||||||
├─── nix build kernel
|
|
||||||
└─── copy to artifacts/
|
|
||||||
|
|
||||||
Output
|
|
||||||
↓
|
|
||||||
|
|
||||||
┌─────────────────────┐
|
|
||||||
│ Netboot Artifacts │
|
|
||||||
├─────────────────────┤
|
|
||||||
│ bzImage (kernel) │
|
|
||||||
│ initrd (ramdisk) │
|
|
||||||
│ netboot.ipxe │
|
|
||||||
└─────────────────────┘
|
|
||||||
│
|
|
||||||
├─── PXE Server
|
|
||||||
│ (HTTP/TFTP)
|
|
||||||
│
|
|
||||||
└─── Target Machine
|
|
||||||
(PXE Boot)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Component Breakdown
|
|
||||||
|
|
||||||
### 1. Netboot Configurations
|
|
||||||
|
|
||||||
Located in `nix/images/`, these NixOS configurations define the netboot environment:
|
|
||||||
|
|
||||||
#### `netboot-base.nix`
|
|
||||||
**Purpose**: Common base configuration for all profiles
|
|
||||||
|
|
||||||
**Key Features**:
|
|
||||||
- Extends `netboot-minimal.nix` from nixpkgs
|
|
||||||
- SSH server with root login (key-based only)
|
|
||||||
- Generic kernel with broad hardware support
|
|
||||||
- Disk management tools (disko, parted, cryptsetup, lvm2)
|
|
||||||
- Network configuration (DHCP, predictable interface names)
|
|
||||||
- Serial console support (ttyS0, tty0)
|
|
||||||
- Minimal system (no docs, no sound)
|
|
||||||
|
|
||||||
**Package Inclusions**:
|
|
||||||
```nix
|
|
||||||
disko, parted, gptfdisk # Disk management
|
|
||||||
cryptsetup, lvm2 # Encryption and LVM
|
|
||||||
e2fsprogs, xfsprogs # Filesystem tools
|
|
||||||
iproute2, curl, tcpdump # Network tools
|
|
||||||
vim, tmux, htop # System tools
|
|
||||||
```
|
|
||||||
|
|
||||||
**Kernel Configuration**:
|
|
||||||
```nix
|
|
||||||
boot.kernelPackages = pkgs.linuxPackages_latest;
|
|
||||||
boot.kernelParams = [
|
|
||||||
"console=ttyS0,115200"
|
|
||||||
"console=tty0"
|
|
||||||
"loglevel=4"
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `netboot-control-plane.nix`
|
|
||||||
**Purpose**: Full control plane deployment
|
|
||||||
|
|
||||||
**Imports**:
|
|
||||||
- `netboot-base.nix` (base configuration)
|
|
||||||
- `../modules` (PlasmaCloud service modules)
|
|
||||||
|
|
||||||
**Service Inclusions**:
|
|
||||||
- Chainfire (ports 2379, 2380, 2381)
|
|
||||||
- FlareDB (ports 2479, 2480)
|
|
||||||
- IAM (port 8080)
|
|
||||||
- PlasmaVMC (port 8081)
|
|
||||||
- PrismNET (port 8082)
|
|
||||||
- FlashDNS (port 53)
|
|
||||||
- FiberLB (port 8083)
|
|
||||||
- LightningStor (port 8084)
|
|
||||||
- K8sHost (port 8085)
|
|
||||||
|
|
||||||
**Service State**: All services **disabled** by default via `lib.mkDefault false`
|
|
||||||
|
|
||||||
**Resource Limits** (for netboot environment):
|
|
||||||
```nix
|
|
||||||
MemoryMax = "512M"
|
|
||||||
CPUQuota = "50%"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `netboot-worker.nix`
|
|
||||||
**Purpose**: Compute-focused worker nodes
|
|
||||||
|
|
||||||
**Imports**:
|
|
||||||
- `netboot-base.nix`
|
|
||||||
- `../modules`
|
|
||||||
|
|
||||||
**Service Inclusions**:
|
|
||||||
- PlasmaVMC (VM management)
|
|
||||||
- PrismNET (SDN)
|
|
||||||
|
|
||||||
**Additional Features**:
|
|
||||||
- KVM virtualization support
|
|
||||||
- Open vSwitch for SDN
|
|
||||||
- QEMU and libvirt tools
|
|
||||||
- Optimized sysctl for VM workloads
|
|
||||||
|
|
||||||
**Performance Tuning**:
|
|
||||||
```nix
|
|
||||||
"fs.file-max" = 1000000;
|
|
||||||
"net.ipv4.ip_forward" = 1;
|
|
||||||
"net.core.netdev_max_backlog" = 5000;
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `netboot-all-in-one.nix`
|
|
||||||
**Purpose**: Single-node deployment with all services
|
|
||||||
|
|
||||||
**Imports**:
|
|
||||||
- `netboot-base.nix`
|
|
||||||
- `../modules`
|
|
||||||
|
|
||||||
**Combines**: All features from control-plane + worker
|
|
||||||
|
|
||||||
**Use Cases**:
|
|
||||||
- Development environments
|
|
||||||
- Small deployments
|
|
||||||
- Edge locations
|
|
||||||
- POC installations
|
|
||||||
|
|
||||||
### 2. Flake Integration
|
|
||||||
|
|
||||||
The main `flake.nix` exposes netboot configurations:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
nixosConfigurations = {
|
|
||||||
netboot-control-plane = nixpkgs.lib.nixosSystem {
|
|
||||||
system = "x86_64-linux";
|
|
||||||
modules = [ ./nix/images/netboot-control-plane.nix ];
|
|
||||||
};
|
|
||||||
|
|
||||||
netboot-worker = nixpkgs.lib.nixosSystem {
|
|
||||||
system = "x86_64-linux";
|
|
||||||
modules = [ ./nix/images/netboot-worker.nix ];
|
|
||||||
};
|
|
||||||
|
|
||||||
netboot-all-in-one = nixpkgs.lib.nixosSystem {
|
|
||||||
system = "x86_64-linux";
|
|
||||||
modules = [ ./nix/images/netboot-all-in-one.nix ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Build Script
|
|
||||||
|
|
||||||
`build-images.sh` orchestrates the build process:
|
|
||||||
|
|
||||||
**Workflow**:
|
|
||||||
1. Parse command-line arguments (--profile, --output-dir)
|
|
||||||
2. Create output directories
|
|
||||||
3. For each profile:
|
|
||||||
- Build netboot ramdisk: `nix build ...netbootRamdisk`
|
|
||||||
- Build kernel: `nix build ...kernel`
|
|
||||||
- Copy artifacts (bzImage, initrd)
|
|
||||||
- Generate iPXE boot script
|
|
||||||
- Calculate and display sizes
|
|
||||||
4. Verify outputs (file existence, size sanity checks)
|
|
||||||
5. Copy to PXE server (if available)
|
|
||||||
6. Print summary
|
|
||||||
|
|
||||||
**Build Commands**:
|
|
||||||
```bash
|
|
||||||
nix build .#nixosConfigurations.netboot-$profile.config.system.build.netbootRamdisk
|
|
||||||
nix build .#nixosConfigurations.netboot-$profile.config.system.build.kernel
|
|
||||||
```
|
|
||||||
|
|
||||||
**Output Structure**:
|
|
||||||
```
|
|
||||||
artifacts/
|
|
||||||
├── control-plane/
|
|
||||||
│ ├── bzImage # ~10-30 MB
|
|
||||||
│ ├── initrd # ~100-300 MB
|
|
||||||
│ ├── netboot.ipxe # iPXE script
|
|
||||||
│ ├── build.log # Build log
|
|
||||||
│ ├── initrd-link # Nix result symlink
|
|
||||||
│ └── kernel-link # Nix result symlink
|
|
||||||
├── worker/
|
|
||||||
│ └── ... (same structure)
|
|
||||||
└── all-in-one/
|
|
||||||
└── ... (same structure)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
### T024 NixOS Modules
|
|
||||||
|
|
||||||
The netboot configurations leverage T024 service modules:
|
|
||||||
|
|
||||||
**Module Structure** (example: chainfire.nix):
|
|
||||||
```nix
|
|
||||||
{
|
|
||||||
options.services.chainfire = {
|
|
||||||
enable = lib.mkEnableOption "chainfire service";
|
|
||||||
port = lib.mkOption { ... };
|
|
||||||
raftPort = lib.mkOption { ... };
|
|
||||||
package = lib.mkOption { ... };
|
|
||||||
};
|
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
|
||||||
users.users.chainfire = { ... };
|
|
||||||
systemd.services.chainfire = { ... };
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Package Availability**:
|
|
||||||
```nix
|
|
||||||
# In netboot-control-plane.nix
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
chainfire-server # From flake overlay
|
|
||||||
flaredb-server # From flake overlay
|
|
||||||
# ...
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
### T032.S2 PXE Infrastructure
|
|
||||||
|
|
||||||
The build script integrates with the PXE server:
|
|
||||||
|
|
||||||
**Copy Workflow**:
|
|
||||||
```bash
|
|
||||||
# Build script copies to:
|
|
||||||
chainfire/baremetal/pxe-server/assets/nixos/
|
|
||||||
├── control-plane/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ └── initrd
|
|
||||||
├── worker/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ └── initrd
|
|
||||||
└── all-in-one/
|
|
||||||
├── bzImage
|
|
||||||
└── initrd
|
|
||||||
```
|
|
||||||
|
|
||||||
**iPXE Boot Script** (generated):
|
|
||||||
```ipxe
|
|
||||||
#!ipxe
|
|
||||||
kernel ${boot-server}/control-plane/bzImage init=/nix/store/*/init console=ttyS0,115200
|
|
||||||
initrd ${boot-server}/control-plane/initrd
|
|
||||||
boot
|
|
||||||
```
|
|
||||||
|
|
||||||
## Build Process Deep Dive
|
|
||||||
|
|
||||||
### NixOS Netboot Build Internals
|
|
||||||
|
|
||||||
1. **netboot-minimal.nix** (from nixpkgs):
|
|
||||||
- Provides base netboot functionality
|
|
||||||
- Configures initrd with kexec support
|
|
||||||
- Sets up squashfs for Nix store
|
|
||||||
|
|
||||||
2. **Our Extensions**:
|
|
||||||
- Add PlasmaCloud service packages
|
|
||||||
- Configure SSH for nixos-anywhere
|
|
||||||
- Include provisioning tools (disko, etc.)
|
|
||||||
- Customize kernel and modules
|
|
||||||
|
|
||||||
3. **Build Outputs**:
|
|
||||||
- **bzImage**: Compressed Linux kernel
|
|
||||||
- **initrd**: Squashfs-compressed initial ramdisk containing:
|
|
||||||
- Minimal NixOS system
|
|
||||||
- Nix store with service packages
|
|
||||||
- Init scripts for booting
|
|
||||||
|
|
||||||
### Size Optimization Strategies
|
|
||||||
|
|
||||||
**Current Optimizations**:
|
|
||||||
```nix
|
|
||||||
documentation.enable = false; # -50MB
|
|
||||||
documentation.nixos.enable = false; # -20MB
|
|
||||||
i18n.supportedLocales = [ "en_US" ]; # -100MB
|
|
||||||
```
|
|
||||||
|
|
||||||
**Additional Strategies** (if needed):
|
|
||||||
- Use `linuxPackages_hardened` (smaller kernel)
|
|
||||||
- Remove unused kernel modules
|
|
||||||
- Compress with xz instead of gzip
|
|
||||||
- On-demand package fetching from HTTP substituter
|
|
||||||
|
|
||||||
**Expected Sizes**:
|
|
||||||
- **Control Plane**: ~250-350 MB (initrd)
|
|
||||||
- **Worker**: ~150-250 MB (initrd)
|
|
||||||
- **All-in-One**: ~300-400 MB (initrd)
|
|
||||||
|
|
||||||
## Boot Flow
|
|
||||||
|
|
||||||
### From PXE to Running System
|
|
||||||
|
|
||||||
```
|
|
||||||
1. PXE Boot
|
|
||||||
├─ DHCP discovers boot server
|
|
||||||
├─ TFTP loads iPXE binary
|
|
||||||
└─ iPXE executes boot script
|
|
||||||
|
|
||||||
2. Netboot Download
|
|
||||||
├─ HTTP downloads bzImage (~20MB)
|
|
||||||
├─ HTTP downloads initrd (~200MB)
|
|
||||||
└─ kexec into NixOS installer
|
|
||||||
|
|
||||||
3. NixOS Installer (in RAM)
|
|
||||||
├─ Init system starts
|
|
||||||
├─ Network configuration (DHCP)
|
|
||||||
├─ SSH server starts
|
|
||||||
└─ Ready for nixos-anywhere
|
|
||||||
|
|
||||||
4. Installation (nixos-anywhere)
|
|
||||||
├─ SSH connection established
|
|
||||||
├─ Disk partitioning (disko)
|
|
||||||
├─ NixOS system installation
|
|
||||||
├─ Secret injection
|
|
||||||
└─ Bootloader installation
|
|
||||||
|
|
||||||
5. First Boot (from disk)
|
|
||||||
├─ GRUB/systemd-boot loads
|
|
||||||
├─ Services start (enabled)
|
|
||||||
├─ Cluster join (if configured)
|
|
||||||
└─ Running PlasmaCloud node
|
|
||||||
```
|
|
||||||
|
|
||||||
## Customization Guide
|
|
||||||
|
|
||||||
### Adding a New Service
|
|
||||||
|
|
||||||
**Step 1**: Create NixOS module
|
|
||||||
```nix
|
|
||||||
# nix/modules/myservice.nix
|
|
||||||
{ config, lib, pkgs, ... }:
|
|
||||||
{
|
|
||||||
options.services.myservice = {
|
|
||||||
enable = lib.mkEnableOption "myservice";
|
|
||||||
};
|
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
|
||||||
systemd.services.myservice = { ... };
|
|
||||||
};
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2**: Add to flake packages
|
|
||||||
```nix
|
|
||||||
# flake.nix
|
|
||||||
packages.myservice-server = buildRustWorkspace { ... };
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3**: Include in netboot profile
|
|
||||||
```nix
|
|
||||||
# nix/images/netboot-control-plane.nix
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
myservice-server
|
|
||||||
];
|
|
||||||
|
|
||||||
services.myservice = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
### Creating a Custom Profile
|
|
||||||
|
|
||||||
**Step 1**: Create new netboot configuration
|
|
||||||
```nix
|
|
||||||
# nix/images/netboot-custom.nix
|
|
||||||
{ config, pkgs, lib, ... }:
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
./netboot-base.nix
|
|
||||||
../modules
|
|
||||||
];
|
|
||||||
|
|
||||||
# Your customizations
|
|
||||||
environment.systemPackages = [ ... ];
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2**: Add to flake
|
|
||||||
```nix
|
|
||||||
# flake.nix
|
|
||||||
nixosConfigurations.netboot-custom = nixpkgs.lib.nixosSystem {
|
|
||||||
system = "x86_64-linux";
|
|
||||||
modules = [ ./nix/images/netboot-custom.nix ];
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3**: Update build script
|
|
||||||
```bash
|
|
||||||
# build-images.sh
|
|
||||||
profiles_to_build=("control-plane" "worker" "all-in-one" "custom")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Security Model
|
|
||||||
|
|
||||||
### Netboot Phase
|
|
||||||
|
|
||||||
**Risk**: Netboot image has root SSH access enabled
|
|
||||||
|
|
||||||
**Mitigations**:
|
|
||||||
1. **Key-based authentication only** (no passwords)
|
|
||||||
2. **Isolated provisioning VLAN**
|
|
||||||
3. **MAC address whitelist in DHCP**
|
|
||||||
4. **Firewall disabled only during install**
|
|
||||||
|
|
||||||
### Post-Installation
|
|
||||||
|
|
||||||
Services remain disabled until final configuration enables them:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# In installed system configuration
|
|
||||||
services.chainfire.enable = true; # Overrides lib.mkDefault false
|
|
||||||
```
|
|
||||||
|
|
||||||
### Secret Management
|
|
||||||
|
|
||||||
Secrets are **NOT** embedded in netboot images:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# During nixos-anywhere installation:
|
|
||||||
scp secrets/* root@target:/tmp/secrets/
|
|
||||||
|
|
||||||
# Installed system references:
|
|
||||||
services.chainfire.settings.tls = {
|
|
||||||
cert_path = "/etc/nixos/secrets/tls-cert.pem";
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Characteristics
|
|
||||||
|
|
||||||
### Build Times
|
|
||||||
|
|
||||||
- **First build**: 30-60 minutes (downloads all dependencies)
|
|
||||||
- **Incremental builds**: 5-15 minutes (reuses cached artifacts)
|
|
||||||
- **With local cache**: 2-5 minutes
|
|
||||||
|
|
||||||
### Network Requirements
|
|
||||||
|
|
||||||
- **Initial download**: ~2GB (nixpkgs + dependencies)
|
|
||||||
- **Netboot download**: ~200-400MB per node
|
|
||||||
- **Installation**: ~500MB-2GB (depending on services)
|
|
||||||
|
|
||||||
### Hardware Requirements
|
|
||||||
|
|
||||||
**Build Machine**:
|
|
||||||
- CPU: 4+ cores recommended
|
|
||||||
- RAM: 8GB minimum, 16GB recommended
|
|
||||||
- Disk: 50GB free space
|
|
||||||
- Network: Broadband connection
|
|
||||||
|
|
||||||
**Target Machine**:
|
|
||||||
- RAM: 4GB minimum for netboot (8GB+ for production)
|
|
||||||
- Network: PXE boot support, DHCP
|
|
||||||
- Disk: Depends on disko configuration
|
|
||||||
|
|
||||||
## Testing Strategy
|
|
||||||
|
|
||||||
### Verification Steps
|
|
||||||
|
|
||||||
1. **Syntax Validation**:
|
|
||||||
```bash
|
|
||||||
nix flake check
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Build Test**:
|
|
||||||
```bash
|
|
||||||
./build-images.sh --profile control-plane
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Artifact Verification**:
|
|
||||||
```bash
|
|
||||||
file artifacts/control-plane/bzImage # Should be Linux kernel
|
|
||||||
file artifacts/control-plane/initrd # Should be compressed data
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **PXE Boot Test**:
|
|
||||||
- Boot VM from netboot image
|
|
||||||
- Verify SSH access
|
|
||||||
- Check available tools (disko, parted, etc.)
|
|
||||||
|
|
||||||
5. **Installation Test**:
|
|
||||||
- Run nixos-anywhere on test target
|
|
||||||
- Verify successful installation
|
|
||||||
- Check service availability
|
|
||||||
|
|
||||||
## Troubleshooting Matrix
|
|
||||||
|
|
||||||
| Symptom | Possible Cause | Solution |
|
|
||||||
|---------|---------------|----------|
|
|
||||||
| Build fails | Missing flakes | Enable experimental-features |
|
|
||||||
| Large initrd | Too many packages | Remove unused packages |
|
|
||||||
| SSH fails | Wrong SSH key | Update authorized_keys |
|
|
||||||
| Boot hangs | Wrong kernel params | Check console= settings |
|
|
||||||
| No network | DHCP issues | Verify useDHCP = true |
|
|
||||||
| Service missing | Package not built | Check flake overlay |
|
|
||||||
|
|
||||||
## Future Enhancements
|
|
||||||
|
|
||||||
### Planned Improvements
|
|
||||||
|
|
||||||
1. **Image Variants**:
|
|
||||||
- Minimal installer (no services)
|
|
||||||
- Debug variant (with extra tools)
|
|
||||||
- Rescue mode (for recovery)
|
|
||||||
|
|
||||||
2. **Build Optimizations**:
|
|
||||||
- Parallel profile builds
|
|
||||||
- Incremental rebuild detection
|
|
||||||
- Binary cache integration
|
|
||||||
|
|
||||||
3. **Security Enhancements**:
|
|
||||||
- Per-node SSH keys
|
|
||||||
- TPM-based secrets
|
|
||||||
- Measured boot support
|
|
||||||
|
|
||||||
4. **Monitoring**:
|
|
||||||
- Build metrics collection
|
|
||||||
- Size trend tracking
|
|
||||||
- Performance benchmarking
|
|
||||||
|
|
||||||
## References
|
|
||||||
|
|
||||||
- **NixOS Netboot**: https://nixos.wiki/wiki/Netboot
|
|
||||||
- **nixos-anywhere**: https://github.com/nix-community/nixos-anywhere
|
|
||||||
- **disko**: https://github.com/nix-community/disko
|
|
||||||
- **T032 Design**: `docs/por/T032-baremetal-provisioning/design.md`
|
|
||||||
- **T024 Modules**: `nix/modules/`
|
|
||||||
|
|
||||||
## Revision History
|
|
||||||
|
|
||||||
| Version | Date | Author | Changes |
|
|
||||||
|---------|------|--------|---------|
|
|
||||||
| 1.0 | 2025-12-10 | T032.S3 | Initial implementation |
|
|
||||||
|
|
@ -1,388 +0,0 @@
|
||||||
# PlasmaCloud NixOS Image Builder
|
|
||||||
|
|
||||||
This directory contains tools and configurations for building bootable NixOS netboot images for bare-metal provisioning of PlasmaCloud infrastructure.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The NixOS Image Builder generates netboot images (kernel + initrd) that can be served via PXE/iPXE to provision bare-metal servers with PlasmaCloud services. These images integrate with the T024 NixOS service modules and the T032.S2 PXE boot infrastructure.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
The image builder produces three deployment profiles:
|
|
||||||
|
|
||||||
### 1. Control Plane (`netboot-control-plane`)
|
|
||||||
Full control plane deployment with all 8 PlasmaCloud services:
|
|
||||||
- **Chainfire**: Distributed configuration and coordination
|
|
||||||
- **FlareDB**: Time-series metrics and events database
|
|
||||||
- **IAM**: Identity and access management
|
|
||||||
- **PlasmaVMC**: Virtual machine control plane
|
|
||||||
- **PrismNET**: Software-defined networking controller
|
|
||||||
- **FlashDNS**: High-performance DNS server
|
|
||||||
- **FiberLB**: Layer 4/7 load balancer
|
|
||||||
- **LightningStor**: Distributed block storage
|
|
||||||
- **K8sHost**: Kubernetes hosting component
|
|
||||||
|
|
||||||
**Use Cases**:
|
|
||||||
- Multi-node production clusters (3+ control plane nodes)
|
|
||||||
- High-availability deployments
|
|
||||||
- Separation of control and data planes
|
|
||||||
|
|
||||||
### 2. Worker (`netboot-worker`)
|
|
||||||
Compute-focused deployment for running tenant workloads:
|
|
||||||
- **PlasmaVMC**: Virtual machine control plane
|
|
||||||
- **PrismNET**: Software-defined networking
|
|
||||||
|
|
||||||
**Use Cases**:
|
|
||||||
- Worker nodes in multi-node clusters
|
|
||||||
- Dedicated compute capacity
|
|
||||||
- Scalable VM hosting
|
|
||||||
|
|
||||||
### 3. All-in-One (`netboot-all-in-one`)
|
|
||||||
Single-node deployment with all 8 services:
|
|
||||||
- All services from Control Plane profile
|
|
||||||
- Optimized for single-node operation
|
|
||||||
|
|
||||||
**Use Cases**:
|
|
||||||
- Development/testing environments
|
|
||||||
- Small deployments (1-3 nodes)
|
|
||||||
- Edge locations
|
|
||||||
- Proof-of-concept installations
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
### Build Environment
|
|
||||||
|
|
||||||
- **NixOS** or **Nix package manager** installed
|
|
||||||
- **Flakes** enabled in Nix configuration
|
|
||||||
- **Git** access to PlasmaCloud repository
|
|
||||||
- **Sufficient disk space**: ~10GB for build artifacts
|
|
||||||
|
|
||||||
### Enable Nix Flakes
|
|
||||||
|
|
||||||
If not already enabled, add to `/etc/nix/nix.conf` or `~/.config/nix/nix.conf`:
|
|
||||||
|
|
||||||
```
|
|
||||||
experimental-features = nix-command flakes
|
|
||||||
```
|
|
||||||
|
|
||||||
### Build Dependencies
|
|
||||||
|
|
||||||
The build process automatically handles all dependencies, but ensure you have:
|
|
||||||
- Working internet connection (for Nix binary cache)
|
|
||||||
- ~4GB RAM minimum
|
|
||||||
- ~10GB free disk space
|
|
||||||
|
|
||||||
## Build Instructions
|
|
||||||
|
|
||||||
### Quick Start
|
|
||||||
|
|
||||||
Build all profiles:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /home/centra/cloud/baremetal/image-builder
|
|
||||||
./build-images.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Build a specific profile:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Control plane only
|
|
||||||
./build-images.sh --profile control-plane
|
|
||||||
|
|
||||||
# Worker nodes only
|
|
||||||
./build-images.sh --profile worker
|
|
||||||
|
|
||||||
# All-in-one deployment
|
|
||||||
./build-images.sh --profile all-in-one
|
|
||||||
```
|
|
||||||
|
|
||||||
Custom output directory:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./build-images.sh --output-dir /srv/pxe/images
|
|
||||||
```
|
|
||||||
|
|
||||||
### Build Output
|
|
||||||
|
|
||||||
Each profile generates:
|
|
||||||
- `bzImage` - Linux kernel (~10-30 MB)
|
|
||||||
- `initrd` - Initial ramdisk (~100-300 MB)
|
|
||||||
- `netboot.ipxe` - iPXE boot script
|
|
||||||
- `build.log` - Build log for troubleshooting
|
|
||||||
|
|
||||||
Artifacts are placed in:
|
|
||||||
```
|
|
||||||
./artifacts/
|
|
||||||
├── control-plane/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ ├── initrd
|
|
||||||
│ ├── netboot.ipxe
|
|
||||||
│ └── build.log
|
|
||||||
├── worker/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ ├── initrd
|
|
||||||
│ ├── netboot.ipxe
|
|
||||||
│ └── build.log
|
|
||||||
└── all-in-one/
|
|
||||||
├── bzImage
|
|
||||||
├── initrd
|
|
||||||
├── netboot.ipxe
|
|
||||||
└── build.log
|
|
||||||
```
|
|
||||||
|
|
||||||
### Manual Build Commands
|
|
||||||
|
|
||||||
You can also build images directly with Nix:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build initrd
|
|
||||||
nix build .#nixosConfigurations.netboot-control-plane.config.system.build.netbootRamdisk
|
|
||||||
|
|
||||||
# Build kernel
|
|
||||||
nix build .#nixosConfigurations.netboot-control-plane.config.system.build.kernel
|
|
||||||
|
|
||||||
# Access artifacts
|
|
||||||
ls -lh result/
|
|
||||||
```
|
|
||||||
|
|
||||||
## Deployment
|
|
||||||
|
|
||||||
### Integration with PXE Server (T032.S2)
|
|
||||||
|
|
||||||
The build script automatically copies artifacts to the PXE server directory if it exists:
|
|
||||||
|
|
||||||
```
|
|
||||||
chainfire/baremetal/pxe-server/assets/nixos/
|
|
||||||
├── control-plane/
|
|
||||||
├── worker/
|
|
||||||
├── all-in-one/
|
|
||||||
├── bzImage-control-plane -> control-plane/bzImage
|
|
||||||
├── initrd-control-plane -> control-plane/initrd
|
|
||||||
├── bzImage-worker -> worker/bzImage
|
|
||||||
└── initrd-worker -> worker/initrd
|
|
||||||
```
|
|
||||||
|
|
||||||
### Manual Deployment
|
|
||||||
|
|
||||||
Copy artifacts to your PXE/HTTP server:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Example: Deploy to nginx serving directory
|
|
||||||
sudo cp -r ./artifacts/control-plane /srv/pxe/nixos/
|
|
||||||
sudo cp -r ./artifacts/worker /srv/pxe/nixos/
|
|
||||||
sudo cp -r ./artifacts/all-in-one /srv/pxe/nixos/
|
|
||||||
```
|
|
||||||
|
|
||||||
### iPXE Boot Configuration
|
|
||||||
|
|
||||||
Reference the images in your iPXE boot script:
|
|
||||||
|
|
||||||
```ipxe
|
|
||||||
#!ipxe
|
|
||||||
|
|
||||||
set boot-server 10.0.0.2:8080
|
|
||||||
|
|
||||||
:control-plane
|
|
||||||
kernel http://${boot-server}/nixos/control-plane/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
|
|
||||||
initrd http://${boot-server}/nixos/control-plane/initrd
|
|
||||||
boot
|
|
||||||
|
|
||||||
:worker
|
|
||||||
kernel http://${boot-server}/nixos/worker/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
|
|
||||||
initrd http://${boot-server}/nixos/worker/initrd
|
|
||||||
boot
|
|
||||||
```
|
|
||||||
|
|
||||||
## Customization
|
|
||||||
|
|
||||||
### Adding Services
|
|
||||||
|
|
||||||
To add a service to a profile, edit the corresponding configuration:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# nix/images/netboot-control-plane.nix
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
chainfire-server
|
|
||||||
flaredb-server
|
|
||||||
# ... existing services ...
|
|
||||||
my-custom-service # Add your service
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
### Custom Kernel Configuration
|
|
||||||
|
|
||||||
Modify `nix/images/netboot-base.nix`:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
boot.kernelPackages = pkgs.linuxPackages_6_6; # Specific kernel version
|
|
||||||
boot.kernelModules = [ "my-driver" ]; # Additional modules
|
|
||||||
boot.kernelParams = [ "my-param=value" ]; # Additional kernel parameters
|
|
||||||
```
|
|
||||||
|
|
||||||
### Additional Packages
|
|
||||||
|
|
||||||
Add packages to the netboot environment:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# nix/images/netboot-base.nix
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
# ... existing packages ...
|
|
||||||
|
|
||||||
# Your additions
|
|
||||||
python3
|
|
||||||
nodejs
|
|
||||||
custom-tool
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
### Hardware-Specific Configuration
|
|
||||||
|
|
||||||
See `examples/hardware-specific.nix` for hardware-specific customizations.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Build Failures
|
|
||||||
|
|
||||||
**Symptom**: Build fails with Nix errors
|
|
||||||
|
|
||||||
**Solutions**:
|
|
||||||
1. Check build log: `cat artifacts/PROFILE/build.log`
|
|
||||||
2. Verify Nix flakes are enabled
|
|
||||||
3. Update nixpkgs: `nix flake update`
|
|
||||||
4. Clear Nix store cache: `nix-collect-garbage -d`
|
|
||||||
|
|
||||||
### Missing Service Packages
|
|
||||||
|
|
||||||
**Symptom**: Error: "package not found"
|
|
||||||
|
|
||||||
**Solutions**:
|
|
||||||
1. Verify service is built: `nix build .#chainfire-server`
|
|
||||||
2. Check flake overlay: `nix flake show`
|
|
||||||
3. Rebuild all packages: `nix build .#default`
|
|
||||||
|
|
||||||
### Image Too Large
|
|
||||||
|
|
||||||
**Symptom**: Initrd > 500 MB
|
|
||||||
|
|
||||||
**Solutions**:
|
|
||||||
1. Remove unnecessary packages from `environment.systemPackages`
|
|
||||||
2. Disable documentation (already done in base config)
|
|
||||||
3. Use minimal kernel: `boot.kernelPackages = pkgs.linuxPackages_latest_hardened`
|
|
||||||
|
|
||||||
### PXE Boot Fails
|
|
||||||
|
|
||||||
**Symptom**: Server fails to boot netboot image
|
|
||||||
|
|
||||||
**Solutions**:
|
|
||||||
1. Verify artifacts are accessible via HTTP
|
|
||||||
2. Check iPXE script syntax
|
|
||||||
3. Verify kernel parameters in boot script
|
|
||||||
4. Check serial console output (ttyS0)
|
|
||||||
5. Ensure DHCP provides correct boot server IP
|
|
||||||
|
|
||||||
### SSH Access Issues
|
|
||||||
|
|
||||||
**Symptom**: Cannot SSH to netboot installer
|
|
||||||
|
|
||||||
**Solutions**:
|
|
||||||
1. Replace example SSH key in `nix/images/netboot-base.nix`
|
|
||||||
2. Verify network connectivity (DHCP, firewall)
|
|
||||||
3. Check SSH service is running: `systemctl status sshd`
|
|
||||||
|
|
||||||
## Configuration Reference
|
|
||||||
|
|
||||||
### Service Modules (T024 Integration)
|
|
||||||
|
|
||||||
All netboot profiles import PlasmaCloud service modules from `nix/modules/`:
|
|
||||||
|
|
||||||
- `chainfire.nix` - Chainfire configuration
|
|
||||||
- `flaredb.nix` - FlareDB configuration
|
|
||||||
- `iam.nix` - IAM configuration
|
|
||||||
- `plasmavmc.nix` - PlasmaVMC configuration
|
|
||||||
- `prismnet.nix` - PrismNET configuration
|
|
||||||
- `flashdns.nix` - FlashDNS configuration
|
|
||||||
- `fiberlb.nix` - FiberLB configuration
|
|
||||||
- `lightningstor.nix` - LightningStor configuration
|
|
||||||
- `k8shost.nix` - K8sHost configuration
|
|
||||||
|
|
||||||
Services are **disabled by default** in netboot images and enabled in final installed configurations.
|
|
||||||
|
|
||||||
### Netboot Base Configuration
|
|
||||||
|
|
||||||
Located at `nix/images/netboot-base.nix`, provides:
|
|
||||||
|
|
||||||
- SSH server with root access (key-based)
|
|
||||||
- Generic kernel with broad hardware support
|
|
||||||
- Disk management tools (disko, parted, cryptsetup, lvm2)
|
|
||||||
- Network tools (iproute2, curl, tcpdump)
|
|
||||||
- Serial console support (ttyS0, tty0)
|
|
||||||
- DHCP networking
|
|
||||||
- Minimal system configuration
|
|
||||||
|
|
||||||
### Profile Configurations
|
|
||||||
|
|
||||||
- `nix/images/netboot-control-plane.nix` - All 8 services
|
|
||||||
- `nix/images/netboot-worker.nix` - Compute services (PlasmaVMC, PrismNET)
|
|
||||||
- `nix/images/netboot-all-in-one.nix` - All services for single-node
|
|
||||||
|
|
||||||
## Security Considerations
|
|
||||||
|
|
||||||
### SSH Keys
|
|
||||||
|
|
||||||
**IMPORTANT**: The default SSH key in `netboot-base.nix` is an example placeholder. You MUST replace it with your actual provisioning key:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
users.users.root.openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3Nza... your-provisioning-key@host"
|
|
||||||
];
|
|
||||||
```
|
|
||||||
|
|
||||||
Generate a new key:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ssh-keygen -t ed25519 -C "provisioning@plasmacloud"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Network Security
|
|
||||||
|
|
||||||
- Netboot images have **firewall disabled** for installation phase
|
|
||||||
- Use isolated provisioning VLAN for PXE boot
|
|
||||||
- Implement MAC address whitelist in DHCP
|
|
||||||
- Enable firewall in final installed configurations
|
|
||||||
|
|
||||||
### Secrets Management
|
|
||||||
|
|
||||||
- Do NOT embed secrets in netboot images
|
|
||||||
- Use nixos-anywhere to inject secrets during installation
|
|
||||||
- Store secrets in `/etc/nixos/secrets/` on installed systems
|
|
||||||
- Use proper file permissions (0400 for keys)
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
After building images:
|
|
||||||
|
|
||||||
1. **Deploy to PXE Server**: Copy artifacts to HTTP server
|
|
||||||
2. **Configure DHCP/iPXE**: Set up boot infrastructure (see T032.S2)
|
|
||||||
3. **Prepare Node Configurations**: Create per-node configs for nixos-anywhere
|
|
||||||
4. **Test Boot Process**: Verify PXE boot on test hardware
|
|
||||||
5. **Run nixos-anywhere**: Install NixOS on target machines
|
|
||||||
|
|
||||||
## Resources
|
|
||||||
|
|
||||||
- **Design Document**: `docs/por/T032-baremetal-provisioning/design.md`
|
|
||||||
- **PXE Infrastructure**: `chainfire/baremetal/pxe-server/`
|
|
||||||
- **Service Modules**: `nix/modules/`
|
|
||||||
- **Example Configurations**: `baremetal/image-builder/examples/`
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
For issues or questions:
|
|
||||||
|
|
||||||
1. Check build logs: `artifacts/PROFILE/build.log`
|
|
||||||
2. Review design document: `docs/por/T032-baremetal-provisioning/design.md`
|
|
||||||
3. Examine example configurations: `examples/`
|
|
||||||
4. Verify service module configuration: `nix/modules/`
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Apache 2.0 - See LICENSE file for details
|
|
||||||
|
|
@ -77,7 +77,7 @@ Build NixOS netboot images for PlasmaCloud bare-metal provisioning.
|
||||||
OPTIONS:
|
OPTIONS:
|
||||||
--profile PROFILE Build specific profile:
|
--profile PROFILE Build specific profile:
|
||||||
- control-plane: All 8 PlasmaCloud services
|
- control-plane: All 8 PlasmaCloud services
|
||||||
- worker: Compute-focused services (PlasmaVMC, NovaNET)
|
- worker: Compute-focused services (PlasmaVMC, PrismNET)
|
||||||
- all-in-one: All services for single-node deployment
|
- all-in-one: All services for single-node deployment
|
||||||
- all: Build all profiles (default)
|
- all: Build all profiles (default)
|
||||||
|
|
||||||
|
|
@ -97,7 +97,7 @@ EXAMPLES:
|
||||||
|
|
||||||
PROFILES:
|
PROFILES:
|
||||||
control-plane - Full control plane with all 8 services
|
control-plane - Full control plane with all 8 services
|
||||||
worker - Worker node with PlasmaVMC and NovaNET
|
worker - Worker node with PlasmaVMC and PrismNET
|
||||||
all-in-one - Single-node deployment with all services
|
all-in-one - Single-node deployment with all services
|
||||||
|
|
||||||
OUTPUT:
|
OUTPUT:
|
||||||
|
|
@ -141,6 +141,16 @@ build_profile() {
|
||||||
cp -f "$profile_dir/initrd-link/initrd" "$profile_dir/initrd"
|
cp -f "$profile_dir/initrd-link/initrd" "$profile_dir/initrd"
|
||||||
cp -f "$profile_dir/kernel-link/bzImage" "$profile_dir/bzImage"
|
cp -f "$profile_dir/kernel-link/bzImage" "$profile_dir/bzImage"
|
||||||
|
|
||||||
|
# Resolve init path from the build (avoids hardcoding store paths)
|
||||||
|
local init_path="/init"
|
||||||
|
if toplevel=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-$profile.config.system.build.toplevel" 2>/dev/null); then
|
||||||
|
if [ -n "$toplevel" ]; then
|
||||||
|
init_path="${toplevel}/init"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
print_warning "Failed to resolve init path for $profile; using /init"
|
||||||
|
fi
|
||||||
|
|
||||||
# Generate iPXE boot script
|
# Generate iPXE boot script
|
||||||
print_info " Generating iPXE boot script..."
|
print_info " Generating iPXE boot script..."
|
||||||
cat > "$profile_dir/netboot.ipxe" << EOF
|
cat > "$profile_dir/netboot.ipxe" << EOF
|
||||||
|
|
@ -159,7 +169,7 @@ echo Initrd: initrd
|
||||||
echo
|
echo
|
||||||
|
|
||||||
# Load kernel and initrd
|
# Load kernel and initrd
|
||||||
kernel \${boot-server}/$profile/bzImage init=/nix/store/*/init console=ttyS0,115200 console=tty0 loglevel=4
|
kernel \${boot-server}/$profile/bzImage init=${init_path} console=ttyS0,115200 console=tty0 loglevel=4
|
||||||
initrd \${boot-server}/$profile/initrd
|
initrd \${boot-server}/$profile/initrd
|
||||||
|
|
||||||
# Boot
|
# Boot
|
||||||
|
|
|
||||||
|
|
@ -1,361 +0,0 @@
|
||||||
{ config, pkgs, lib, ... }:
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# CUSTOM NETBOOT CONFIGURATION EXAMPLE
|
|
||||||
# ==============================================================================
|
|
||||||
# This example demonstrates how to create a custom netboot configuration with:
|
|
||||||
# - Custom kernel version and modules
|
|
||||||
# - Additional packages for specialized use cases
|
|
||||||
# - Hardware-specific drivers
|
|
||||||
# - Custom network configuration
|
|
||||||
# - Debugging tools
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# 1. Copy this file to nix/images/netboot-custom.nix
|
|
||||||
# 2. Add to flake.nix:
|
|
||||||
# nixosConfigurations.netboot-custom = nixpkgs.lib.nixosSystem {
|
|
||||||
# system = "x86_64-linux";
|
|
||||||
# modules = [ ./nix/images/netboot-custom.nix ];
|
|
||||||
# };
|
|
||||||
# 3. Build: ./build-images.sh --profile custom
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
../netboot-base.nix # Adjust path as needed
|
|
||||||
../../modules # PlasmaCloud service modules
|
|
||||||
];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CUSTOM KERNEL CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Use specific kernel version instead of latest
|
|
||||||
boot.kernelPackages = pkgs.linuxPackages_6_6; # LTS kernel
|
|
||||||
|
|
||||||
# Add custom kernel modules for specialized hardware
|
|
||||||
boot.kernelModules = [
|
|
||||||
# Infiniband/RDMA support
|
|
||||||
"ib_core"
|
|
||||||
"ib_uverbs"
|
|
||||||
"mlx5_core"
|
|
||||||
"mlx5_ib"
|
|
||||||
|
|
||||||
# GPU support (for GPU compute nodes)
|
|
||||||
"nvidia"
|
|
||||||
"nvidia_uvm"
|
|
||||||
|
|
||||||
# Custom storage controller
|
|
||||||
"megaraid_sas"
|
|
||||||
"mpt3sas"
|
|
||||||
];
|
|
||||||
|
|
||||||
# Custom kernel parameters
|
|
||||||
boot.kernelParams = [
|
|
||||||
# Default console configuration
|
|
||||||
"console=ttyS0,115200"
|
|
||||||
"console=tty0"
|
|
||||||
"loglevel=4"
|
|
||||||
|
|
||||||
# Custom parameters
|
|
||||||
"intel_iommu=on" # Enable IOMMU for PCI passthrough
|
|
||||||
"iommu=pt" # Passthrough mode
|
|
||||||
"hugepagesz=2M" # 2MB hugepages
|
|
||||||
"hugepages=1024" # Allocate 1024 hugepages (2GB)
|
|
||||||
"isolcpus=2-7" # CPU isolation for real-time workloads
|
|
||||||
];
|
|
||||||
|
|
||||||
# Blacklist problematic modules
|
|
||||||
boot.blacklistedKernelModules = [
|
|
||||||
"nouveau" # Disable nouveau if using proprietary NVIDIA
|
|
||||||
"i915" # Disable Intel GPU if not needed
|
|
||||||
];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# ADDITIONAL PACKAGES
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
# Networking diagnostics
|
|
||||||
iperf3 # Network performance testing
|
|
||||||
mtr # Network diagnostic tool
|
|
||||||
nmap # Network scanner
|
|
||||||
wireshark-cli # Packet analyzer
|
|
||||||
|
|
||||||
# Storage tools
|
|
||||||
nvme-cli # NVMe management
|
|
||||||
smartmontools # SMART monitoring
|
|
||||||
fio # I/O performance testing
|
|
||||||
sg3_utils # SCSI utilities
|
|
||||||
|
|
||||||
# Hardware diagnostics
|
|
||||||
pciutils # lspci
|
|
||||||
usbutils # lsusb
|
|
||||||
dmidecode # Hardware information
|
|
||||||
lshw # Hardware lister
|
|
||||||
hwinfo # Hardware info tool
|
|
||||||
|
|
||||||
# Debugging tools
|
|
||||||
strace # System call tracer
|
|
||||||
ltrace # Library call tracer
|
|
||||||
gdb # GNU debugger
|
|
||||||
valgrind # Memory debugger
|
|
||||||
|
|
||||||
# Performance tools
|
|
||||||
perf # Linux perf tool
|
|
||||||
bpftrace # eBPF tracing
|
|
||||||
sysstat # System statistics (sar, iostat)
|
|
||||||
|
|
||||||
# Container/virtualization tools
|
|
||||||
qemu_full # Full QEMU with all features
|
|
||||||
libvirt # Virtualization management
|
|
||||||
virt-manager # VM management (CLI)
|
|
||||||
docker # Container runtime
|
|
||||||
podman # Alternative container runtime
|
|
||||||
|
|
||||||
# Development tools (for on-site debugging)
|
|
||||||
python3Full # Python with all modules
|
|
||||||
python3Packages.pip
|
|
||||||
nodejs # Node.js runtime
|
|
||||||
git # Version control
|
|
||||||
gcc # C compiler
|
|
||||||
rustc # Rust compiler
|
|
||||||
cargo # Rust package manager
|
|
||||||
|
|
||||||
# Custom tools
|
|
||||||
# Add your organization's custom packages here
|
|
||||||
];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CUSTOM NETWORK CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Static IP instead of DHCP (example)
|
|
||||||
networking.useDHCP = lib.mkForce false;
|
|
||||||
|
|
||||||
networking.interfaces.eth0 = {
|
|
||||||
useDHCP = false;
|
|
||||||
ipv4.addresses = [{
|
|
||||||
address = "10.0.1.100";
|
|
||||||
prefixLength = 24;
|
|
||||||
}];
|
|
||||||
};
|
|
||||||
|
|
||||||
networking.defaultGateway = "10.0.1.1";
|
|
||||||
networking.nameservers = [ "10.0.1.1" "8.8.8.8" ];
|
|
||||||
|
|
||||||
# Custom DNS domain
|
|
||||||
networking.domain = "custom.example.com";
|
|
||||||
|
|
||||||
# Enable jumbo frames
|
|
||||||
networking.interfaces.eth0.mtu = 9000;
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CUSTOM SSH CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Multiple SSH keys for different operators
|
|
||||||
users.users.root.openssh.authorizedKeys.keys = [
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator1Key operator1@example.com"
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator2Key operator2@example.com"
|
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOperator3Key operator3@example.com"
|
|
||||||
];
|
|
||||||
|
|
||||||
# Custom SSH port (for security through obscurity - not recommended for production)
|
|
||||||
# services.openssh.ports = [ 2222 ];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CUSTOM SERVICES
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable only specific PlasmaCloud services
|
|
||||||
services.plasmavmc = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8081;
|
|
||||||
};
|
|
||||||
|
|
||||||
services.prismnet = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8082;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# DEBUGGING AND LOGGING
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable verbose boot logging
|
|
||||||
boot.kernelParams = lib.mkAfter [ "loglevel=7" "debug" ];
|
|
||||||
|
|
||||||
# Enable systemd debug logging
|
|
||||||
systemd.services."serial-getty@ttyS0".environment = {
|
|
||||||
SYSTEMD_LOG_LEVEL = "debug";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Enable additional logging
|
|
||||||
services.journald.extraConfig = ''
|
|
||||||
Storage=persistent
|
|
||||||
MaxRetentionSec=7day
|
|
||||||
SystemMaxUse=1G
|
|
||||||
'';
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PERFORMANCE TUNING
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Custom sysctl settings for high-performance networking
|
|
||||||
boot.kernel.sysctl = {
|
|
||||||
# Network buffer sizes
|
|
||||||
"net.core.rmem_max" = 268435456; # 256 MB
|
|
||||||
"net.core.wmem_max" = 268435456; # 256 MB
|
|
||||||
"net.core.rmem_default" = 67108864; # 64 MB
|
|
||||||
"net.core.wmem_default" = 67108864; # 64 MB
|
|
||||||
|
|
||||||
# TCP tuning
|
|
||||||
"net.ipv4.tcp_rmem" = "4096 87380 134217728";
|
|
||||||
"net.ipv4.tcp_wmem" = "4096 65536 134217728";
|
|
||||||
"net.ipv4.tcp_congestion_control" = "bbr";
|
|
||||||
|
|
||||||
# Connection tracking
|
|
||||||
"net.netfilter.nf_conntrack_max" = 1048576;
|
|
||||||
|
|
||||||
# File descriptor limits
|
|
||||||
"fs.file-max" = 2097152;
|
|
||||||
|
|
||||||
# Virtual memory
|
|
||||||
"vm.swappiness" = 1;
|
|
||||||
"vm.vfs_cache_pressure" = 50;
|
|
||||||
"vm.dirty_ratio" = 10;
|
|
||||||
"vm.dirty_background_ratio" = 5;
|
|
||||||
|
|
||||||
# Kernel
|
|
||||||
"kernel.pid_max" = 4194304;
|
|
||||||
};
|
|
||||||
|
|
||||||
# Increase systemd limits
|
|
||||||
systemd.extraConfig = ''
|
|
||||||
DefaultLimitNOFILE=1048576
|
|
||||||
DefaultLimitNPROC=1048576
|
|
||||||
'';
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# HARDWARE-SPECIFIC CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable CPU microcode updates
|
|
||||||
hardware.cpu.intel.updateMicrocode = true;
|
|
||||||
hardware.cpu.amd.updateMicrocode = true;
|
|
||||||
|
|
||||||
# Enable firmware updates
|
|
||||||
hardware.enableRedistributableFirmware = true;
|
|
||||||
|
|
||||||
# GPU support (example for NVIDIA)
|
|
||||||
# Uncomment if using NVIDIA GPUs
|
|
||||||
# hardware.nvidia.modesetting.enable = true;
|
|
||||||
# services.xserver.videoDrivers = [ "nvidia" ];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CUSTOM INITIALIZATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Run custom script on boot
|
|
||||||
systemd.services.custom-init = {
|
|
||||||
description = "Custom initialization script";
|
|
||||||
wantedBy = [ "multi-user.target" ];
|
|
||||||
after = [ "network-online.target" ];
|
|
||||||
wants = [ "network-online.target" ];
|
|
||||||
|
|
||||||
serviceConfig = {
|
|
||||||
Type = "oneshot";
|
|
||||||
RemainAfterExit = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
script = ''
|
|
||||||
echo "Running custom initialization..."
|
|
||||||
|
|
||||||
# Example: Configure network interfaces
|
|
||||||
${pkgs.iproute2}/bin/ip link set dev eth1 up
|
|
||||||
|
|
||||||
# Example: Load custom kernel modules
|
|
||||||
${pkgs.kmod}/bin/modprobe custom_driver || true
|
|
||||||
|
|
||||||
# Example: Call home to provisioning server
|
|
||||||
${pkgs.curl}/bin/curl -X POST http://provisioning.example.com/api/register \
|
|
||||||
-d "hostname=$(hostname)" \
|
|
||||||
-d "ip=$(${pkgs.iproute2}/bin/ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}')" \
|
|
||||||
|| true
|
|
||||||
|
|
||||||
echo "Custom initialization complete"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# FIREWALL CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Custom firewall rules (disabled by default in netboot, but example provided)
|
|
||||||
networking.firewall = {
|
|
||||||
enable = lib.mkDefault false; # Disabled during provisioning
|
|
||||||
|
|
||||||
# When enabled, allow these ports
|
|
||||||
allowedTCPPorts = [
|
|
||||||
22 # SSH
|
|
||||||
8081 # PlasmaVMC
|
|
||||||
8082 # PrismNET
|
|
||||||
];
|
|
||||||
|
|
||||||
# Custom iptables rules
|
|
||||||
extraCommands = ''
|
|
||||||
# Allow ICMP
|
|
||||||
iptables -A INPUT -p icmp -j ACCEPT
|
|
||||||
|
|
||||||
# Rate limit SSH connections
|
|
||||||
iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --set
|
|
||||||
iptables -A INPUT -p tcp --dport 22 -m state --state NEW -m recent --update --seconds 60 --hitcount 4 -j DROP
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# NIX CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Custom binary caches
|
|
||||||
nix.settings = {
|
|
||||||
substituters = [
|
|
||||||
"https://cache.nixos.org"
|
|
||||||
"https://custom-cache.example.com" # Your organization's cache
|
|
||||||
];
|
|
||||||
|
|
||||||
trusted-public-keys = [
|
|
||||||
"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
|
|
||||||
"custom-cache.example.com:YourPublicKeyHere"
|
|
||||||
];
|
|
||||||
|
|
||||||
# Build settings
|
|
||||||
max-jobs = "auto";
|
|
||||||
cores = 0; # Use all available cores
|
|
||||||
|
|
||||||
# Experimental features
|
|
||||||
experimental-features = [ "nix-command" "flakes" "repl-flake" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# TIMEZONE AND LOCALE
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Custom timezone (instead of UTC)
|
|
||||||
time.timeZone = lib.mkForce "America/New_York";
|
|
||||||
|
|
||||||
# Additional locale support
|
|
||||||
i18n.supportedLocales = [
|
|
||||||
"en_US.UTF-8/UTF-8"
|
|
||||||
"ja_JP.UTF-8/UTF-8" # Japanese support
|
|
||||||
];
|
|
||||||
|
|
||||||
i18n.defaultLocale = "en_US.UTF-8";
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SYSTEM STATE VERSION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
system.stateVersion = "24.11";
|
|
||||||
}
|
|
||||||
|
|
@ -1,442 +0,0 @@
|
||||||
{ config, pkgs, lib, ... }:
|
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# HARDWARE-SPECIFIC NETBOOT CONFIGURATION EXAMPLE
|
|
||||||
# ==============================================================================
|
|
||||||
# This example demonstrates hardware-specific configurations for common
|
|
||||||
# bare-metal server platforms. Use this as a template for your specific hardware.
|
|
||||||
#
|
|
||||||
# Common Server Platforms:
|
|
||||||
# - Dell PowerEdge (R640, R650, R750)
|
|
||||||
# - HP ProLiant (DL360, DL380, DL560)
|
|
||||||
# - Supermicro (X11, X12 series)
|
|
||||||
# - Generic whitebox servers
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# 1. Copy relevant sections to your netboot configuration
|
|
||||||
# 2. Adjust based on your specific hardware
|
|
||||||
# 3. Test boot on target hardware
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
../netboot-base.nix
|
|
||||||
../../modules
|
|
||||||
];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# DELL POWEREDGE R640 CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
# Uncomment this section for Dell PowerEdge R640 servers
|
|
||||||
|
|
||||||
/*
|
|
||||||
# Hardware-specific kernel modules
|
|
||||||
boot.initrd.availableKernelModules = [
|
|
||||||
# Dell PERC RAID controller
|
|
||||||
"megaraid_sas"
|
|
||||||
|
|
||||||
# Intel X710 10GbE NIC
|
|
||||||
"i40e"
|
|
||||||
|
|
||||||
# NVMe drives
|
|
||||||
"nvme"
|
|
||||||
|
|
||||||
# Standard modules
|
|
||||||
"ahci"
|
|
||||||
"xhci_pci"
|
|
||||||
"usb_storage"
|
|
||||||
"sd_mod"
|
|
||||||
"sr_mod"
|
|
||||||
];
|
|
||||||
|
|
||||||
boot.kernelModules = [
|
|
||||||
"kvm-intel" # Intel VT-x
|
|
||||||
"ipmi_devintf" # IPMI interface
|
|
||||||
"ipmi_si" # IPMI system interface
|
|
||||||
];
|
|
||||||
|
|
||||||
# Dell-specific firmware
|
|
||||||
hardware.enableRedistributableFirmware = true;
|
|
||||||
hardware.cpu.intel.updateMicrocode = true;
|
|
||||||
|
|
||||||
# Network interface naming
|
|
||||||
# R640 typically has:
|
|
||||||
# - eno1, eno2: Onboard 1GbE (Intel i350)
|
|
||||||
# - ens1f0, ens1f1: PCIe 10GbE (Intel X710)
|
|
||||||
networking.interfaces = {
|
|
||||||
eno1 = { useDHCP = true; };
|
|
||||||
ens1f0 = {
|
|
||||||
useDHCP = false;
|
|
||||||
mtu = 9000; # Jumbo frames for 10GbE
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# iDRAC/IPMI configuration
|
|
||||||
services.freeipmi.enable = true;
|
|
||||||
|
|
||||||
# Dell OpenManage tools (optional)
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
ipmitool
|
|
||||||
freeipmi
|
|
||||||
];
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# HP PROLIANT DL360 GEN10 CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
# Uncomment this section for HP ProLiant DL360 Gen10 servers
|
|
||||||
|
|
||||||
/*
|
|
||||||
boot.initrd.availableKernelModules = [
|
|
||||||
# HP Smart Array controller
|
|
||||||
"hpsa"
|
|
||||||
|
|
||||||
# Broadcom/Intel NIC
|
|
||||||
"tg3"
|
|
||||||
"bnx2x"
|
|
||||||
"i40e"
|
|
||||||
|
|
||||||
# NVMe
|
|
||||||
"nvme"
|
|
||||||
|
|
||||||
# Standard
|
|
||||||
"ahci"
|
|
||||||
"xhci_pci"
|
|
||||||
"usb_storage"
|
|
||||||
"sd_mod"
|
|
||||||
];
|
|
||||||
|
|
||||||
boot.kernelModules = [
|
|
||||||
"kvm-intel"
|
|
||||||
"ipmi_devintf"
|
|
||||||
"ipmi_si"
|
|
||||||
];
|
|
||||||
|
|
||||||
hardware.enableRedistributableFirmware = true;
|
|
||||||
hardware.cpu.intel.updateMicrocode = true;
|
|
||||||
|
|
||||||
# HP-specific tools
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
ipmitool
|
|
||||||
smartmontools
|
|
||||||
];
|
|
||||||
|
|
||||||
# iLO/IPMI
|
|
||||||
services.freeipmi.enable = true;
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SUPERMICRO X11 SERIES CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
# Uncomment this section for Supermicro X11 series servers
|
|
||||||
|
|
||||||
/*
|
|
||||||
boot.initrd.availableKernelModules = [
|
|
||||||
# LSI/Broadcom RAID
|
|
||||||
"megaraid_sas"
|
|
||||||
"mpt3sas"
|
|
||||||
|
|
||||||
# Intel NIC (common on Supermicro)
|
|
||||||
"igb"
|
|
||||||
"ixgbe"
|
|
||||||
"i40e"
|
|
||||||
|
|
||||||
# NVMe
|
|
||||||
"nvme"
|
|
||||||
|
|
||||||
# Standard
|
|
||||||
"ahci"
|
|
||||||
"xhci_pci"
|
|
||||||
"ehci_pci"
|
|
||||||
"usb_storage"
|
|
||||||
"sd_mod"
|
|
||||||
];
|
|
||||||
|
|
||||||
boot.kernelModules = [
|
|
||||||
"kvm-intel" # Or kvm-amd for AMD CPUs
|
|
||||||
"ipmi_devintf"
|
|
||||||
"ipmi_si"
|
|
||||||
];
|
|
||||||
|
|
||||||
hardware.enableRedistributableFirmware = true;
|
|
||||||
|
|
||||||
# CPU-specific (adjust based on your CPU)
|
|
||||||
hardware.cpu.intel.updateMicrocode = true;
|
|
||||||
# hardware.cpu.amd.updateMicrocode = true; # For AMD CPUs
|
|
||||||
|
|
||||||
# IPMI configuration
|
|
||||||
services.freeipmi.enable = true;
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
ipmitool
|
|
||||||
dmidecode
|
|
||||||
smartmontools
|
|
||||||
];
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# GENERIC HIGH-PERFORMANCE SERVER
|
|
||||||
# ============================================================================
|
|
||||||
# This configuration works for most modern x86_64 servers
|
|
||||||
|
|
||||||
boot.initrd.availableKernelModules = [
|
|
||||||
# SATA/AHCI
|
|
||||||
"ahci"
|
|
||||||
"ata_piix"
|
|
||||||
|
|
||||||
# NVMe
|
|
||||||
"nvme"
|
|
||||||
|
|
||||||
# USB
|
|
||||||
"xhci_pci"
|
|
||||||
"ehci_pci"
|
|
||||||
"usb_storage"
|
|
||||||
"usbhid"
|
|
||||||
|
|
||||||
# SCSI/SAS
|
|
||||||
"sd_mod"
|
|
||||||
"sr_mod"
|
|
||||||
|
|
||||||
# Common RAID controllers
|
|
||||||
"megaraid_sas" # LSI MegaRAID
|
|
||||||
"mpt3sas" # LSI SAS3
|
|
||||||
"hpsa" # HP Smart Array
|
|
||||||
"aacraid" # Adaptec
|
|
||||||
|
|
||||||
# Network
|
|
||||||
"e1000e" # Intel GbE
|
|
||||||
"igb" # Intel GbE
|
|
||||||
"ixgbe" # Intel 10GbE
|
|
||||||
"i40e" # Intel 10/25/40GbE
|
|
||||||
"bnx2x" # Broadcom 10GbE
|
|
||||||
"mlx4_core" # Mellanox ConnectX-3
|
|
||||||
"mlx5_core" # Mellanox ConnectX-4/5
|
|
||||||
];
|
|
||||||
|
|
||||||
boot.kernelModules = [
|
|
||||||
"kvm-intel" # Intel VT-x
|
|
||||||
"kvm-amd" # AMD-V
|
|
||||||
];
|
|
||||||
|
|
||||||
# Enable all firmware
|
|
||||||
hardware.enableRedistributableFirmware = true;
|
|
||||||
|
|
||||||
# CPU microcode (both Intel and AMD)
|
|
||||||
hardware.cpu.intel.updateMicrocode = true;
|
|
||||||
hardware.cpu.amd.updateMicrocode = true;
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# NETWORK INTERFACE CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Predictable interface names disabled in base config, using eth0, eth1, etc.
|
|
||||||
# For specific hardware, you may want to use biosdevname or systemd naming
|
|
||||||
|
|
||||||
# Example: Bond configuration for redundancy
|
|
||||||
/*
|
|
||||||
networking.bonds.bond0 = {
|
|
||||||
interfaces = [ "eth0" "eth1" ];
|
|
||||||
driverOptions = {
|
|
||||||
mode = "802.3ad"; # LACP
|
|
||||||
xmit_hash_policy = "layer3+4";
|
|
||||||
lacp_rate = "fast";
|
|
||||||
miimon = "100";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
networking.interfaces.bond0 = {
|
|
||||||
useDHCP = true;
|
|
||||||
mtu = 9000;
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
# Example: VLAN configuration
|
|
||||||
/*
|
|
||||||
networking.vlans = {
|
|
||||||
vlan100 = {
|
|
||||||
id = 100;
|
|
||||||
interface = "eth0";
|
|
||||||
};
|
|
||||||
vlan200 = {
|
|
||||||
id = 200;
|
|
||||||
interface = "eth0";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
networking.interfaces.vlan100 = {
|
|
||||||
useDHCP = false;
|
|
||||||
ipv4.addresses = [{
|
|
||||||
address = "10.100.1.10";
|
|
||||||
prefixLength = 24;
|
|
||||||
}];
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# STORAGE CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable RAID support
|
|
||||||
boot.swraid.enable = true;
|
|
||||||
boot.swraid.mdadmConf = ''
|
|
||||||
ARRAY /dev/md0 level=raid1 num-devices=2
|
|
||||||
'';
|
|
||||||
|
|
||||||
# LVM support
|
|
||||||
services.lvm.enable = true;
|
|
||||||
|
|
||||||
# ZFS support (if needed)
|
|
||||||
# boot.supportedFilesystems = [ "zfs" ];
|
|
||||||
# boot.zfs.forceImportRoot = false;
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CPU-SPECIFIC OPTIMIZATIONS
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Intel-specific
|
|
||||||
boot.kernelParams = lib.mkIf (config.hardware.cpu.intel.updateMicrocode) [
|
|
||||||
"intel_pstate=active" # Use Intel P-State driver
|
|
||||||
"intel_iommu=on" # Enable IOMMU for VT-d
|
|
||||||
];
|
|
||||||
|
|
||||||
# AMD-specific
|
|
||||||
boot.kernelParams = lib.mkIf (config.hardware.cpu.amd.updateMicrocode) [
|
|
||||||
"amd_iommu=on" # Enable IOMMU for AMD-Vi
|
|
||||||
];
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# MEMORY CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Hugepages for high-performance applications (DPDK, databases)
|
|
||||||
boot.kernelParams = [
|
|
||||||
"hugepagesz=2M"
|
|
||||||
"hugepages=1024" # 2GB of 2MB hugepages
|
|
||||||
"default_hugepagesz=2M"
|
|
||||||
];
|
|
||||||
|
|
||||||
# Transparent Hugepages
|
|
||||||
boot.kernel.sysctl = {
|
|
||||||
"vm.nr_hugepages" = 1024;
|
|
||||||
# "vm.nr_overcommit_hugepages" = 512; # Additional hugepages if needed
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# IPMI/BMC CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable IPMI kernel modules
|
|
||||||
boot.kernelModules = [ "ipmi_devintf" "ipmi_si" ];
|
|
||||||
|
|
||||||
# IPMI tools
|
|
||||||
services.freeipmi.enable = true;
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
ipmitool # IPMI command-line tool
|
|
||||||
freeipmi # Alternative IPMI tools
|
|
||||||
];
|
|
||||||
|
|
||||||
# Example: Configure BMC network (usually done via IPMI)
|
|
||||||
# Run manually: ipmitool lan set 1 ipaddr 10.0.100.10
|
|
||||||
# Run manually: ipmitool lan set 1 netmask 255.255.255.0
|
|
||||||
# Run manually: ipmitool lan set 1 defgw ipaddr 10.0.100.1
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PERFORMANCE TUNING
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Set CPU governor for performance
|
|
||||||
powerManagement.cpuFreqGovernor = "performance";
|
|
||||||
|
|
||||||
# Disable power management features that can cause latency
|
|
||||||
boot.kernelParams = [
|
|
||||||
"processor.max_cstate=1" # Limit C-states
|
|
||||||
"intel_idle.max_cstate=1" # Limit idle states
|
|
||||||
"idle=poll" # Aggressive polling (high power usage!)
|
|
||||||
];
|
|
||||||
|
|
||||||
# Note: The above settings prioritize performance over power efficiency
|
|
||||||
# Remove or adjust for non-latency-sensitive workloads
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# HARDWARE MONITORING
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Enable hardware sensors
|
|
||||||
# services.lm_sensors.enable = true; # Uncomment if needed
|
|
||||||
|
|
||||||
# SMART monitoring
|
|
||||||
services.smartd = {
|
|
||||||
enable = true;
|
|
||||||
autodetect = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# GPU CONFIGURATION (if applicable)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# NVIDIA GPU
|
|
||||||
/*
|
|
||||||
hardware.nvidia = {
|
|
||||||
modesetting.enable = true;
|
|
||||||
powerManagement.enable = false;
|
|
||||||
powerManagement.finegrained = false;
|
|
||||||
open = false; # Use proprietary driver
|
|
||||||
nvidiaSettings = false; # No GUI needed
|
|
||||||
};
|
|
||||||
|
|
||||||
services.xserver.videoDrivers = [ "nvidia" ];
|
|
||||||
|
|
||||||
# NVIDIA Container Runtime (for GPU containers)
|
|
||||||
hardware.nvidia-container-toolkit.enable = true;
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
cudaPackages.cudatoolkit
|
|
||||||
nvidia-docker
|
|
||||||
];
|
|
||||||
*/
|
|
||||||
|
|
||||||
# AMD GPU
|
|
||||||
/*
|
|
||||||
boot.initrd.kernelModules = [ "amdgpu" ];
|
|
||||||
services.xserver.videoDrivers = [ "amdgpu" ];
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# INFINIBAND/RDMA (for high-performance networking)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
/*
|
|
||||||
boot.kernelModules = [
|
|
||||||
"ib_core"
|
|
||||||
"ib_uverbs"
|
|
||||||
"ib_umad"
|
|
||||||
"rdma_cm"
|
|
||||||
"rdma_ucm"
|
|
||||||
"mlx5_core"
|
|
||||||
"mlx5_ib"
|
|
||||||
];
|
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
|
||||||
rdma-core
|
|
||||||
libfabric
|
|
||||||
# perftest # RDMA performance tests
|
|
||||||
];
|
|
||||||
|
|
||||||
# Configure IPoIB (IP over InfiniBand)
|
|
||||||
networking.interfaces.ib0 = {
|
|
||||||
useDHCP = false;
|
|
||||||
ipv4.addresses = [{
|
|
||||||
address = "192.168.100.10";
|
|
||||||
prefixLength = 24;
|
|
||||||
}];
|
|
||||||
mtu = 65520; # Max for IPoIB connected mode
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SYSTEM STATE VERSION
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
system.stateVersion = "24.11";
|
|
||||||
}
|
|
||||||
|
|
@ -1,36 +1,22 @@
|
||||||
# QEMU Socket Networking VM Cluster
|
# Legacy Baremetal VM Cluster
|
||||||
|
|
||||||
## Architecture
|
`baremetal/vm-cluster` is no longer the primary local validation path.
|
||||||
|
|
||||||
**Topology:** 4 QEMU VMs connected via multicast socket networking (230.0.0.1:1234)
|
Use [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md) for canonical local VM validation:
|
||||||
|
|
||||||
**VMs:**
|
```bash
|
||||||
1. **pxe-server** (192.168.100.1) - Provides DHCP/TFTP/HTTP services
|
nix run ./nix/test-cluster#cluster -- smoke
|
||||||
2. **node01** (192.168.100.11) - Cluster node
|
```
|
||||||
3. **node02** (192.168.100.12) - Cluster node
|
|
||||||
4. **node03** (192.168.100.13) - Cluster node
|
|
||||||
|
|
||||||
**Network:** All VMs share L2 segment via QEMU multicast socket (no root privileges required)
|
This directory is kept only for the older manual T036 PXE and bare-metal style experiments.
|
||||||
|
|
||||||
## Files
|
## What remains here
|
||||||
|
|
||||||
- `node01.qcow2`, `node02.qcow2`, `node03.qcow2` - 100GB cluster node disks
|
- [`pxe-server/`](/home/centra/cloud/baremetal/vm-cluster/pxe-server): older PXE server configuration
|
||||||
- `pxe-server.qcow2` - 20GB PXE server disk
|
- [`legacy/`](/home/centra/cloud/baremetal/vm-cluster/legacy/README.md): archived manual deployment, validation, and ad hoc QEMU launch scripts
|
||||||
- `launch-pxe-server.sh` - PXE server startup script
|
|
||||||
- `launch-node01.sh`, `launch-node02.sh`, `launch-node03.sh` - Node startup scripts
|
|
||||||
- `pxe-server/` - PXE server configuration files
|
|
||||||
|
|
||||||
## MACs
|
## Status
|
||||||
|
|
||||||
- pxe-server: 52:54:00:00:00:01
|
- unsupported for regular development
|
||||||
- node01: 52:54:00:00:01:01
|
- not the release-validation path
|
||||||
- node02: 52:54:00:00:01:02
|
- retained only to preserve old manual experiments
|
||||||
- node03: 52:54:00:00:01:03
|
|
||||||
|
|
||||||
## Provisioning Flow
|
|
||||||
|
|
||||||
1. Start PXE server VM (Alpine Linux with dnsmasq)
|
|
||||||
2. Configure DHCP/TFTP/HTTP services
|
|
||||||
3. Deploy NixOS netboot artifacts
|
|
||||||
4. Start node VMs with PXE boot enabled
|
|
||||||
5. Nodes PXE boot and provision via nixos-anywhere
|
|
||||||
|
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
# Alpine Linux Answer File for Automated Installation
|
|
||||||
# For use with: setup-alpine -f alpine-answers.txt
|
|
||||||
|
|
||||||
# Keyboard layout
|
|
||||||
KEYMAPOPTS="us us"
|
|
||||||
|
|
||||||
# Hostname
|
|
||||||
HOSTNAMEOPTS="-n pxe-server"
|
|
||||||
|
|
||||||
# Network configuration
|
|
||||||
# eth0: multicast network (static 192.168.100.1)
|
|
||||||
# eth1: user network (DHCP for internet)
|
|
||||||
INTERFACESOPTS="auto lo
|
|
||||||
iface lo inet loopback
|
|
||||||
|
|
||||||
auto eth0
|
|
||||||
iface eth0 inet static
|
|
||||||
address 192.168.100.1
|
|
||||||
netmask 255.255.255.0
|
|
||||||
|
|
||||||
auto eth1
|
|
||||||
iface eth1 inet dhcp"
|
|
||||||
|
|
||||||
# DNS
|
|
||||||
DNSOPTS="8.8.8.8 8.8.4.4"
|
|
||||||
|
|
||||||
# Timezone
|
|
||||||
TIMEZONEOPTS="-z UTC"
|
|
||||||
|
|
||||||
# Proxy (none)
|
|
||||||
PROXYOPTS="none"
|
|
||||||
|
|
||||||
# APK mirror (auto-detect fastest)
|
|
||||||
APKREPOSOPTS="-f"
|
|
||||||
|
|
||||||
# SSH server
|
|
||||||
SSHDOPTS="-c openssh"
|
|
||||||
|
|
||||||
# NTP client
|
|
||||||
NTPOPTS="-c chrony"
|
|
||||||
|
|
||||||
# Disk mode (sys = traditional installation to disk)
|
|
||||||
DISKOPTS="-m sys /dev/vda"
|
|
||||||
|
|
||||||
# Additional packages to install
|
|
||||||
APKCACHEOPTS="/var/cache/apk"
|
|
||||||
18
baremetal/vm-cluster/legacy/README.md
Normal file
18
baremetal/vm-cluster/legacy/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Legacy Launch Scripts
|
||||||
|
|
||||||
|
These scripts are archived manual launch helpers from the older `baremetal/vm-cluster` workflow.
|
||||||
|
|
||||||
|
They are not the canonical test path and should not be used for normal validation.
|
||||||
|
|
||||||
|
Use the Nix-native harness instead:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix run ./nix/test-cluster#cluster -- smoke
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- `deploy-all.sh` and `validate-cluster.sh` are preserved only for the retired PXE/manual flow
|
||||||
|
- some scripts assume local disk images or host networking setup that is no longer maintained
|
||||||
|
- Alpine-specific flows are treated as retired
|
||||||
|
- supporting artifacts such as `alpine-answers.txt` are no longer kept current
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# T036 VM Cluster Deployment Script
|
# Legacy T036 VM cluster deployment script.
|
||||||
# Deploys all VMs via nixos-anywhere after VNC network configuration
|
# This is a manual bare-metal/PXE path. The canonical local VM validation path
|
||||||
|
# is nix/test-cluster/run-cluster.sh.
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
|
@ -11,6 +12,8 @@ cd "$REPO_ROOT"
|
||||||
|
|
||||||
echo "=== T036 VM Cluster Deployment ==="
|
echo "=== T036 VM Cluster Deployment ==="
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "This path is archived. Prefer: nix run ./nix/test-cluster#cluster -- smoke"
|
||||||
|
echo ""
|
||||||
echo "Prerequisites:"
|
echo "Prerequisites:"
|
||||||
echo " - PXE server booted and network configured (192.168.100.1)"
|
echo " - PXE server booted and network configured (192.168.100.1)"
|
||||||
echo " - Node01 booted and network configured (192.168.100.11)"
|
echo " - Node01 booted and network configured (192.168.100.11)"
|
||||||
|
|
@ -56,4 +59,5 @@ echo ""
|
||||||
echo "All VMs have been provisioned. Systems will reboot from disk."
|
echo "All VMs have been provisioned. Systems will reboot from disk."
|
||||||
echo "Wait 2-3 minutes for boot, then validate cluster..."
|
echo "Wait 2-3 minutes for boot, then validate cluster..."
|
||||||
echo ""
|
echo ""
|
||||||
echo "Next: Run ./validate-cluster.sh"
|
echo "Legacy next step: baremetal/vm-cluster/legacy/validate-cluster.sh"
|
||||||
|
echo "Preferred validation path: nix run ./nix/test-cluster#cluster -- smoke"
|
||||||
|
|
@ -10,6 +10,7 @@ set -euo pipefail
|
||||||
# - Telnet serial console
|
# - Telnet serial console
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||||
DISK="${SCRIPT_DIR}/node01.qcow2"
|
DISK="${SCRIPT_DIR}/node01.qcow2"
|
||||||
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
||||||
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
||||||
|
|
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
INIT_PATH="/init"
|
||||||
|
if command -v nix >/dev/null 2>&1; then
|
||||||
|
if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
|
||||||
|
INIT_PATH="${TOPLEVEL}/init"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
echo "Launching node01 with netboot (SSH key auth)..."
|
echo "Launching node01 with netboot (SSH key auth)..."
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
|
|
@ -65,7 +73,7 @@ qemu-system-x86_64 \
|
||||||
-drive file="${DISK}",if=virtio,format=qcow2 \
|
-drive file="${DISK}",if=virtio,format=qcow2 \
|
||||||
-kernel "${KERNEL}" \
|
-kernel "${KERNEL}" \
|
||||||
-initrd "${INITRD}" \
|
-initrd "${INITRD}" \
|
||||||
-append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
|
-append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
|
||||||
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
||||||
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
||||||
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
||||||
|
|
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
|
||||||
-m 16G \
|
-m 16G \
|
||||||
-drive file="$DISK",if=virtio,format=qcow2 \
|
-drive file="$DISK",if=virtio,format=qcow2 \
|
||||||
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
||||||
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
|
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
|
||||||
-boot order=n \
|
-boot order=n \
|
||||||
-vnc "$VNC_DISPLAY" \
|
-vnc "$VNC_DISPLAY" \
|
||||||
-serial telnet:localhost:4441,server,nowait \
|
-serial telnet:localhost:4441,server,nowait \
|
||||||
|
|
@ -10,6 +10,7 @@ set -euo pipefail
|
||||||
# - Telnet serial console
|
# - Telnet serial console
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||||
DISK="${SCRIPT_DIR}/node02.qcow2"
|
DISK="${SCRIPT_DIR}/node02.qcow2"
|
||||||
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
||||||
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
||||||
|
|
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
INIT_PATH="/init"
|
||||||
|
if command -v nix >/dev/null 2>&1; then
|
||||||
|
if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
|
||||||
|
INIT_PATH="${TOPLEVEL}/init"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
echo "Launching node02 with netboot (SSH key auth)..."
|
echo "Launching node02 with netboot (SSH key auth)..."
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
|
|
@ -65,7 +73,7 @@ qemu-system-x86_64 \
|
||||||
-drive file="${DISK}",if=virtio,format=qcow2 \
|
-drive file="${DISK}",if=virtio,format=qcow2 \
|
||||||
-kernel "${KERNEL}" \
|
-kernel "${KERNEL}" \
|
||||||
-initrd "${INITRD}" \
|
-initrd "${INITRD}" \
|
||||||
-append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
|
-append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
|
||||||
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
||||||
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
||||||
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
||||||
|
|
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
|
||||||
-m 16G \
|
-m 16G \
|
||||||
-drive file="$DISK",if=virtio,format=qcow2 \
|
-drive file="$DISK",if=virtio,format=qcow2 \
|
||||||
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
||||||
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
|
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
|
||||||
-boot order=n \
|
-boot order=n \
|
||||||
-vnc "$VNC_DISPLAY" \
|
-vnc "$VNC_DISPLAY" \
|
||||||
-serial telnet:localhost:4442,server,nowait \
|
-serial telnet:localhost:4442,server,nowait \
|
||||||
|
|
@ -10,6 +10,7 @@ set -euo pipefail
|
||||||
# - Telnet serial console
|
# - Telnet serial console
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||||
DISK="${SCRIPT_DIR}/node03.qcow2"
|
DISK="${SCRIPT_DIR}/node03.qcow2"
|
||||||
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
KERNEL="${SCRIPT_DIR}/netboot-kernel/bzImage"
|
||||||
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
INITRD="${SCRIPT_DIR}/netboot-initrd/initrd"
|
||||||
|
|
@ -37,6 +38,13 @@ if [ ! -f "$INITRD" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
INIT_PATH="/init"
|
||||||
|
if command -v nix >/dev/null 2>&1; then
|
||||||
|
if TOPLEVEL=$(nix eval --raw "$REPO_ROOT#nixosConfigurations.netboot-base.config.system.build.toplevel" 2>/dev/null); then
|
||||||
|
INIT_PATH="${TOPLEVEL}/init"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
echo "Launching node03 with netboot (SSH key auth)..."
|
echo "Launching node03 with netboot (SSH key auth)..."
|
||||||
echo "============================================"
|
echo "============================================"
|
||||||
|
|
@ -65,7 +73,7 @@ qemu-system-x86_64 \
|
||||||
-drive file="${DISK}",if=virtio,format=qcow2 \
|
-drive file="${DISK}",if=virtio,format=qcow2 \
|
||||||
-kernel "${KERNEL}" \
|
-kernel "${KERNEL}" \
|
||||||
-initrd "${INITRD}" \
|
-initrd "${INITRD}" \
|
||||||
-append "init=/nix/store/qj1ilfdd8fcrmz4pk282p5qdf2q0vkmh-nixos-system-nixos-kexec-26.05.20251205.f61125a/init console=ttyS0,115200 console=tty0 loglevel=4" \
|
-append "init=${INIT_PATH} console=ttyS0,115200 console=tty0 loglevel=4" \
|
||||||
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
-netdev vde,id=vde0,sock=/tmp/vde.sock \
|
||||||
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
-device virtio-net-pci,netdev=vde0,mac="${MAC_MCAST}" \
|
||||||
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
-netdev user,id=user0,hostfwd=tcp::${SSH_PORT}-:22 \
|
||||||
|
|
@ -45,7 +45,7 @@ exec qemu-system-x86_64 \
|
||||||
-m 16G \
|
-m 16G \
|
||||||
-drive file="$DISK",if=virtio,format=qcow2 \
|
-drive file="$DISK",if=virtio,format=qcow2 \
|
||||||
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
-netdev socket,mcast="$MCAST_ADDR",id=mcast0 \
|
||||||
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR",romfile= \
|
-device virtio-net-pci,netdev=mcast0,mac="$MAC_ADDR" \
|
||||||
-boot order=n \
|
-boot order=n \
|
||||||
-vnc "$VNC_DISPLAY" \
|
-vnc "$VNC_DISPLAY" \
|
||||||
-serial telnet:localhost:4443,server,nowait \
|
-serial telnet:localhost:4443,server,nowait \
|
||||||
|
|
@ -1,11 +1,22 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# T036 Cluster Validation Script
|
# Legacy T036 validation script.
|
||||||
# Validates cluster health and Raft formation per S6 acceptance criteria
|
# The canonical local VM validation path is now nix/test-cluster/run-cluster.sh.
|
||||||
|
# Keep this script only for the older manual PXE flow.
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
echo "=== T036 Cluster Validation ==="
|
echo "=== T036 Cluster Validation ==="
|
||||||
echo ""
|
echo ""
|
||||||
|
echo "This path is archived. Prefer: nix run ./nix/test-cluster#cluster -- smoke"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}"
|
||||||
|
CURL_MAX_TIME="${CURL_MAX_TIME:-10}"
|
||||||
|
CURL_INSECURE="${CURL_INSECURE:-1}"
|
||||||
|
CURL_FLAGS=(--connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME")
|
||||||
|
if [[ "$CURL_INSECURE" == "1" ]]; then
|
||||||
|
CURL_FLAGS+=(-k)
|
||||||
|
fi
|
||||||
|
|
||||||
# Wait for services to be ready
|
# Wait for services to be ready
|
||||||
echo "Waiting for cluster services to start (60 seconds)..."
|
echo "Waiting for cluster services to start (60 seconds)..."
|
||||||
|
|
@ -25,7 +36,7 @@ echo ""
|
||||||
echo "=== S6.2: Chainfire Cluster Validation ==="
|
echo "=== S6.2: Chainfire Cluster Validation ==="
|
||||||
echo ""
|
echo ""
|
||||||
echo "Checking Chainfire cluster members on node01..."
|
echo "Checking Chainfire cluster members on node01..."
|
||||||
curl -k https://192.168.100.11:2379/admin/cluster/members | jq . || echo "Chainfire API not ready"
|
curl "${CURL_FLAGS[@]}" https://192.168.100.11:2379/admin/cluster/members | jq . || echo "Chainfire API not ready"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Expected: 3 members (node01, node02, node03), one leader elected"
|
echo "Expected: 3 members (node01, node02, node03), one leader elected"
|
||||||
|
|
@ -34,34 +45,34 @@ echo ""
|
||||||
echo "=== S6.3: FlareDB Cluster Validation ==="
|
echo "=== S6.3: FlareDB Cluster Validation ==="
|
||||||
echo ""
|
echo ""
|
||||||
echo "Checking FlareDB cluster members on node01..."
|
echo "Checking FlareDB cluster members on node01..."
|
||||||
curl -k https://192.168.100.11:2479/admin/cluster/members | jq . || echo "FlareDB API not ready"
|
curl "${CURL_FLAGS[@]}" https://192.168.100.11:2479/admin/cluster/members | jq . || echo "FlareDB API not ready"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== S6.4: CRUD Operations Test ==="
|
echo "=== S6.4: CRUD Operations Test ==="
|
||||||
echo ""
|
echo ""
|
||||||
echo "Writing test key to FlareDB..."
|
echo "Writing test key to FlareDB..."
|
||||||
curl -k -X PUT https://192.168.100.11:2479/api/v1/kv/test-key \
|
curl "${CURL_FLAGS[@]}" -X PUT https://192.168.100.11:2479/api/v1/kv/test-key \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{"value": "hello-t036-cluster"}' || echo "Write failed"
|
-d '{"value": "hello-t036-cluster"}' || echo "Write failed"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Reading test key from node01..."
|
echo "Reading test key from node01..."
|
||||||
curl -k https://192.168.100.11:2479/api/v1/kv/test-key || echo "Read failed"
|
curl "${CURL_FLAGS[@]}" https://192.168.100.11:2479/api/v1/kv/test-key || echo "Read failed"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Reading test key from node02 (verify replication)..."
|
echo "Reading test key from node02 (verify replication)..."
|
||||||
curl -k https://192.168.100.12:2479/api/v1/kv/test-key || echo "Read failed"
|
curl "${CURL_FLAGS[@]}" https://192.168.100.12:2479/api/v1/kv/test-key || echo "Read failed"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Reading test key from node03 (verify replication)..."
|
echo "Reading test key from node03 (verify replication)..."
|
||||||
curl -k https://192.168.100.13:2479/api/v1/kv/test-key || echo "Read failed"
|
curl "${CURL_FLAGS[@]}" https://192.168.100.13:2479/api/v1/kv/test-key || echo "Read failed"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== S6.5: IAM Service Validation ==="
|
echo "=== S6.5: IAM Service Validation ==="
|
||||||
echo ""
|
echo ""
|
||||||
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
|
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
|
||||||
echo "Checking IAM health on $node..."
|
echo "Checking IAM health on $node..."
|
||||||
curl -k https://$node:8080/health || echo "IAM not ready on $node"
|
curl "${CURL_FLAGS[@]}" https://$node:8080/health || echo "IAM not ready on $node"
|
||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
@ -70,9 +81,9 @@ echo "=== S6.6: Health Checks ==="
|
||||||
echo ""
|
echo ""
|
||||||
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
|
for node in 192.168.100.11 192.168.100.12 192.168.100.13; do
|
||||||
echo "Node: $node"
|
echo "Node: $node"
|
||||||
echo " Chainfire: $(curl -sk https://$node:2379/health || echo 'N/A')"
|
echo " Chainfire: $(curl -s "${CURL_FLAGS[@]}" https://$node:2379/health || echo 'N/A')"
|
||||||
echo " FlareDB: $(curl -sk https://$node:2479/health || echo 'N/A')"
|
echo " FlareDB: $(curl -s "${CURL_FLAGS[@]}" https://$node:2479/health || echo 'N/A')"
|
||||||
echo " IAM: $(curl -sk https://$node:8080/health || echo 'N/A')"
|
echo " IAM: $(curl -s "${CURL_FLAGS[@]}" https://$node:8080/health || echo 'N/A')"
|
||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
/nix/store/nixfmms2rbqi07a0sqjf5l32mm28y1iz-initrd
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
/nix/store/nmi1f4lsswcr9dmm1r6j6a8b7rar5gl4-linux-6.18
|
|
||||||
|
|
@ -1,20 +1,10 @@
|
||||||
{ config, pkgs, lib, ... }:
|
{ config, pkgs, lib, modulesPath, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
<nixpkgs/nixos/modules/profiles/qemu-guest.nix>
|
"${modulesPath}/profiles/qemu-guest.nix"
|
||||||
];
|
];
|
||||||
|
|
||||||
# Boot configuration
|
|
||||||
boot.loader.grub.enable = true;
|
|
||||||
boot.loader.grub.device = "/dev/vda";
|
|
||||||
|
|
||||||
# Filesystems
|
|
||||||
fileSystems."/" = {
|
|
||||||
device = "/dev/vda1";
|
|
||||||
fsType = "ext4";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Network configuration
|
# Network configuration
|
||||||
networking.hostName = "pxe-server";
|
networking.hostName = "pxe-server";
|
||||||
networking.domain = "plasma.local";
|
networking.domain = "plasma.local";
|
||||||
|
|
@ -62,6 +52,7 @@
|
||||||
# DNS configuration
|
# DNS configuration
|
||||||
domain = "plasma.local";
|
domain = "plasma.local";
|
||||||
local = "/plasma.local/";
|
local = "/plasma.local/";
|
||||||
|
address = "/deployer.local/192.168.100.1";
|
||||||
|
|
||||||
# TFTP configuration
|
# TFTP configuration
|
||||||
enable-tftp = true;
|
enable-tftp = true;
|
||||||
|
|
@ -84,6 +75,17 @@
|
||||||
settings.PermitRootLogin = "yes";
|
settings.PermitRootLogin = "yes";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Deployer API for ISO phone-home bootstrap
|
||||||
|
services.deployer = {
|
||||||
|
enable = true;
|
||||||
|
bindAddr = "0.0.0.0:8080";
|
||||||
|
clusterId = "plasmacloud-vm-cluster";
|
||||||
|
requireChainfire = false;
|
||||||
|
allowUnauthenticated = true;
|
||||||
|
allowUnknownNodes = true;
|
||||||
|
allowTestMappings = false;
|
||||||
|
};
|
||||||
|
|
||||||
# Root password (for SSH access)
|
# Root password (for SSH access)
|
||||||
users.users.root.password = "plasmacloud";
|
users.users.root.password = "plasmacloud";
|
||||||
|
|
||||||
|
|
@ -92,6 +94,7 @@
|
||||||
vim
|
vim
|
||||||
curl
|
curl
|
||||||
htop
|
htop
|
||||||
|
deployer-server
|
||||||
];
|
];
|
||||||
|
|
||||||
# System state version
|
# System state version
|
||||||
|
|
|
||||||
135
bin/cloud-cli
Executable file
135
bin/cloud-cli
Executable file
|
|
@ -0,0 +1,135 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
# Default API Gateway URL (localhost forwarding from node06)
|
||||||
|
DEFAULT_API_URL = "http://localhost:8080"
|
||||||
|
|
||||||
|
def get_url(path):
|
||||||
|
return f"{DEFAULT_API_URL}{path}"
|
||||||
|
|
||||||
|
def headers(token=None):
|
||||||
|
h = {"Content-Type": "application/json"}
|
||||||
|
if token:
|
||||||
|
h["Authorization"] = f"Bearer {token}"
|
||||||
|
return h
|
||||||
|
|
||||||
|
def print_json(data):
|
||||||
|
print(json.dumps(data, indent=2))
|
||||||
|
|
||||||
|
def request(method, url, data=None, token=None):
|
||||||
|
parsed_headers = headers(token)
|
||||||
|
body = None
|
||||||
|
if data:
|
||||||
|
body = json.dumps(data).encode('utf-8')
|
||||||
|
|
||||||
|
req = urllib.request.Request(url, data=body, headers=parsed_headers, method=method)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req) as response:
|
||||||
|
if response.status in [200, 201, 204]:
|
||||||
|
if response.status == 204:
|
||||||
|
print("{}")
|
||||||
|
return
|
||||||
|
resp_data = json.load(response)
|
||||||
|
print_json(resp_data)
|
||||||
|
else:
|
||||||
|
print(f"Error {response.status}")
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
print(f"HTTP Error {e.code}: {e.read().decode()}")
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
print(f"Connection failed: {e.reason}")
|
||||||
|
|
||||||
|
def cmd_list_vpcs(args):
|
||||||
|
url = get_url("/api/v1/vpcs")
|
||||||
|
print(f"GET {url}")
|
||||||
|
request("GET", url, token=args.token)
|
||||||
|
|
||||||
|
def cmd_create_vpc(args):
|
||||||
|
url = get_url("/api/v1/vpcs")
|
||||||
|
data = {
|
||||||
|
"name": args.name,
|
||||||
|
"cidr_block": args.cidr,
|
||||||
|
"org_id": "org-default",
|
||||||
|
"project_id": "proj-default"
|
||||||
|
}
|
||||||
|
print(f"POST {url} with {data}")
|
||||||
|
request("POST", url, data=data, token=args.token)
|
||||||
|
|
||||||
|
def cmd_list_subnets(args):
|
||||||
|
url = get_url("/api/v1/subnets")
|
||||||
|
if args.vpc:
|
||||||
|
url += f"?vpc_id={args.vpc}"
|
||||||
|
print(f"GET {url}")
|
||||||
|
request("GET", url, token=args.token)
|
||||||
|
|
||||||
|
def cmd_create_subnet(args):
|
||||||
|
url = get_url("/api/v1/subnets")
|
||||||
|
data = {
|
||||||
|
"name": args.name,
|
||||||
|
"vpc_id": args.vpc,
|
||||||
|
"cidr_block": args.cidr,
|
||||||
|
"org_id": "org-default",
|
||||||
|
"project_id": "proj-default"
|
||||||
|
}
|
||||||
|
print(f"POST {url} with {data}")
|
||||||
|
request("POST", url, data=data, token=args.token)
|
||||||
|
|
||||||
|
def cmd_list_vms(args):
|
||||||
|
url = get_url("/api/v1/vms")
|
||||||
|
print(f"GET {url}")
|
||||||
|
request("GET", url, token=args.token)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global DEFAULT_API_URL
|
||||||
|
parser = argparse.ArgumentParser(description="PhotonCloud CLI")
|
||||||
|
parser.add_argument("--token", help="Auth token", default=os.environ.get("CLOUD_TOKEN"))
|
||||||
|
parser.add_argument("--url", help="API URL", default=DEFAULT_API_URL)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||||
|
|
||||||
|
# VPC Commands
|
||||||
|
vpc_parser = subparsers.add_parser("vpc", help="Manage VPCs")
|
||||||
|
vpc_sub = vpc_parser.add_subparsers(dest="subcommand", required=True)
|
||||||
|
|
||||||
|
vpc_list = vpc_sub.add_parser("list", help="List VPCs")
|
||||||
|
vpc_list.set_defaults(func=cmd_list_vpcs)
|
||||||
|
|
||||||
|
vpc_create = vpc_sub.add_parser("create", help="Create VPC")
|
||||||
|
vpc_create.add_argument("--name", required=True)
|
||||||
|
vpc_create.add_argument("--cidr", required=True)
|
||||||
|
vpc_create.set_defaults(func=cmd_create_vpc)
|
||||||
|
|
||||||
|
# Subnet Commands
|
||||||
|
subnet_parser = subparsers.add_parser("subnet", help="Manage Subnets")
|
||||||
|
subnet_sub = subnet_parser.add_subparsers(dest="subcommand", required=True)
|
||||||
|
|
||||||
|
subnet_list = subnet_sub.add_parser("list", help="List Subnets")
|
||||||
|
subnet_list.add_argument("--vpc", help="Filter by VPC ID")
|
||||||
|
subnet_list.set_defaults(func=cmd_list_subnets)
|
||||||
|
|
||||||
|
subnet_create = subnet_sub.add_parser("create", help="Create Subnet")
|
||||||
|
subnet_create.add_argument("--name", required=True)
|
||||||
|
subnet_create.add_argument("--vpc", required=True, help="VPC ID")
|
||||||
|
subnet_create.add_argument("--cidr", required=True)
|
||||||
|
subnet_create.set_defaults(func=cmd_create_subnet)
|
||||||
|
|
||||||
|
# VM Commands
|
||||||
|
vm_parser = subparsers.add_parser("vm", help="Manage VMs")
|
||||||
|
vm_sub = vm_parser.add_subparsers(dest="subcommand", required=True)
|
||||||
|
|
||||||
|
vm_list = vm_sub.add_parser("list", help="List VMs")
|
||||||
|
vm_list.set_defaults(func=cmd_list_vms)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.url:
|
||||||
|
DEFAULT_API_URL = args.url
|
||||||
|
|
||||||
|
args.func(args)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -1,87 +0,0 @@
|
||||||
RaftとGossipプロトコルを用いた、クラスター管理のための数万台までスケールするKey-Value Storeを書いてほしいです。
|
|
||||||
|
|
||||||
- プログラミング言語:rust
|
|
||||||
- テストをちゃんと書きながら書くことを推奨する。
|
|
||||||
- クラスターへの参加/削除/障害検知を行う。
|
|
||||||
|
|
||||||
では、**「Raft(合意形成)」と「Gossip(情報の拡散)」を組み合わせた場合、具体的にどうデータが流れ、どうやってノードが動き出すのか**、その具体的なフローを解説します。
|
|
||||||
|
|
||||||
-----
|
|
||||||
|
|
||||||
### 前提:このシステムの役割分担
|
|
||||||
|
|
||||||
* **Control Plane (CP):** Raftで構成された3〜7台(Raftアルゴリズムでうまく合意が取れる範囲)のサーバー。情報の「正規の持ち主」。いなくなったら自動でWorker Nodesから昇格する。
|
|
||||||
* **Worker Nodes (VM/DB Hosts):** 数百〜数千台の実働部隊。CPのクライアント。
|
|
||||||
|
|
||||||
### 1\. データはどのように書き込まれるか? (Write)
|
|
||||||
|
|
||||||
書き込みは **「必ず Control Plane の Raft Leader に対して」** 行います。Gossip経由での書き込みは(順序保証がないため)行いません。
|
|
||||||
|
|
||||||
例:「VM-A を Node-10 で起動したい」
|
|
||||||
|
|
||||||
1. **API Call:** 管理者(またはCLI)が、CPのAPIサーバーにリクエストを送ります。
|
|
||||||
2. **Raft Log:** CPのリーダーは、この変更を `Put(Key="/nodes/node-10/tasks/vm-a", Value="START")` としてRaftログに追加します。
|
|
||||||
3. **Commit:** 過半数のCPノードがログを保存したら「書き込み完了」と見なします。
|
|
||||||
|
|
||||||
ここまでは普通のDBと同じです。
|
|
||||||
|
|
||||||
### 2\. 各ノードはどのようにデータを取得し、通知を受けるか? (Read & Notify)
|
|
||||||
|
|
||||||
ここが最大のポイントです。数千台のノードが「自分宛ての命令はないか?」と毎秒ポーリング(問い合わせ)すると、CPがDDoS攻撃を受けたようにパンクします。
|
|
||||||
|
|
||||||
ここで **「Watch(ロングポーリング)」** という仕組みを使います。
|
|
||||||
|
|
||||||
#### A. Watchによる通知と取得(これがメイン)
|
|
||||||
|
|
||||||
Kubernetesやetcdが採用している方式です。
|
|
||||||
|
|
||||||
1. **接続維持:** Node-10 は起動時に CP に対して `Watch("/nodes/node-10/")` というリクエストを送ります。
|
|
||||||
2. **待機:** CP は「Node-10 以下のキーに変更があるまで、レスポンスを返さずに接続を維持(ブロック)」します。
|
|
||||||
3. **イベント発火:** 先ほどの書き込み(VM起動命令)が発生した瞬間、CP は待機していた Node-10 への接続を通じて「更新イベント(Event: PUT, Key: ...vm-a, Value: START)」を即座にプッシュします。
|
|
||||||
4. **アクション:** Node-10 は通知を受け取り次第、VMを起動します。
|
|
||||||
|
|
||||||
**結論:** 「書き込み後の通知」は絶対に必要です。それを効率よくやるのが **Watch API** です。
|
|
||||||
|
|
||||||
-----
|
|
||||||
|
|
||||||
### 3\. じゃあ Gossip はどこで使うのか?
|
|
||||||
|
|
||||||
「Raft + Watch」で完結しそうに見えますが、10台以上のスケール、特にVM基盤のような動的な環境では **Gossip が以下の「3つの穴」を埋めるために不可欠** になります。
|
|
||||||
|
|
||||||
#### ① Nodeの死活監視・インベントリ管理(下り方向)
|
|
||||||
|
|
||||||
CPが「Node-10にVMを立てたい」と思ったとき、「そもそもNode-10は生きているのか? IPは? 空きメモリは?」という情報を知る必要があります。
|
|
||||||
|
|
||||||
* **Gossipの役割:** 各Worker Nodeは、Gossip(SWIMプロトコル)でお互いに、そしてCPに対して「自分は生きてるよ、IPはこれだよ」と喋り続けます。
|
|
||||||
* CPはこのGossip情報を聞いて、最新の「ノード一覧リスト(Memberlist)」をメモリ上に保持します。
|
|
||||||
|
|
||||||
#### ② サービスのディスカバリ(横方向)
|
|
||||||
|
|
||||||
DB基盤の場合、「DBノードA」が「DBノードB」と通信したいことがあります。いちいちCPに聞きに行くと遅いです。
|
|
||||||
|
|
||||||
* **Gossipの役割:** ノード同士で「私はここにいるよ」とGossipし合うことで、CPを介さずに直接通信相手を見つけられます。
|
|
||||||
|
|
||||||
#### ③ "Actual State"(現状)の報告(上り方向)
|
|
||||||
|
|
||||||
VMが起動した後、「起動しました」というステータスをどうCPに伝えるか。
|
|
||||||
|
|
||||||
* **Raftに書く:** 確実ですが、頻繁に変わるステータス(CPU使用率など)を全部Raftに書くとCPがパンクします。
|
|
||||||
* **Gossipで流す:** 「VM-Aは起動中、負荷50%」といった情報はGossipに乗せて、**「結果的にCPに伝わればいい」** という扱いにします。
|
|
||||||
|
|
||||||
-----
|
|
||||||
|
|
||||||
### 設計のまとめ:RaftとGossipの使い分け
|
|
||||||
|
|
||||||
これから作る「汎用管理DB(外部依存型)」は、以下のようなインターフェースを持つことになるでしょう。
|
|
||||||
|
|
||||||
| アクション | 通信方式 | 具体的なデータ例 |
|
|
||||||
| :--- | :--- | :--- |
|
|
||||||
| **命令 (Desired State)**<br>「こうなってほしい」 | **Raft + Watch**<br>(強整合性) | ・VMの起動/停止命令<br>・DBのデータ配置情報の変更<br>・パスワードや設定変更 |
|
|
||||||
| **現状 (Actual State)**<br>「今こうなってます」 | **Gossip**<br>(結果整合性) | ・ノードの生存確認 (Heartbeat)<br>・リソース使用率 (CPU/Mem)<br>・「VM起動完了」などのステータス |
|
|
||||||
| **通知 (Notification)** | **Watch (HTTP/gRPC Stream)** | ・「新しい命令が来たぞ!」というトリガー |
|
|
||||||
|
|
||||||
#### 実装のアドバイス
|
|
||||||
|
|
||||||
もし「etcdのようなもの」を自作されるなら、**「Serf (Gossip)」と「Raft」をライブラリとして組み込み、その上に「gRPCによるWatch付きのKVS API」を被せる** という構成になります。
|
|
||||||
|
|
||||||
これができれば、VM基盤は「Watchして、VMを起動して、Gossipでステータスを返すエージェント」を作るだけで済みますし、DB基盤も同様に作れます。非常にスケーラブルで美しい設計です。
|
|
||||||
|
|
@ -1,295 +0,0 @@
|
||||||
# T032.S2 PXE Boot Infrastructure - Implementation Summary
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This directory contains a complete PXE (Preboot eXecution Environment) boot infrastructure for bare-metal provisioning of Centra Cloud nodes. It enables automated, network-based installation of NixOS on physical servers with profile-based configuration.
|
|
||||||
|
|
||||||
## Implementation Status
|
|
||||||
|
|
||||||
**Task**: T032.S2 - PXE Boot Infrastructure
|
|
||||||
**Status**: ✅ Complete
|
|
||||||
**Total Lines**: 3086 lines across all files
|
|
||||||
**Date**: 2025-12-10
|
|
||||||
|
|
||||||
## What Was Delivered
|
|
||||||
|
|
||||||
### 1. Core Configuration Files
|
|
||||||
|
|
||||||
| File | Lines | Purpose |
|
|
||||||
|------|-------|---------|
|
|
||||||
| `dhcp/dhcpd.conf` | 134 | ISC DHCP server configuration with BIOS/UEFI detection |
|
|
||||||
| `ipxe/boot.ipxe` | 320 | Main iPXE boot script with 3 profiles and menu |
|
|
||||||
| `http/nginx.conf` | 187 | Nginx HTTP server for boot assets |
|
|
||||||
| `nixos-module.nix` | 358 | Complete NixOS service module |
|
|
||||||
|
|
||||||
### 2. Setup and Management
|
|
||||||
|
|
||||||
| File | Lines | Purpose |
|
|
||||||
|------|-------|---------|
|
|
||||||
| `setup.sh` | 446 | Automated setup script with download/build/validate/test |
|
|
||||||
|
|
||||||
### 3. Documentation
|
|
||||||
|
|
||||||
| File | Lines | Purpose |
|
|
||||||
|------|-------|---------|
|
|
||||||
| `README.md` | 1088 | Comprehensive documentation and troubleshooting |
|
|
||||||
| `QUICKSTART.md` | 165 | 5-minute quick start guide |
|
|
||||||
| `http/directory-structure.txt` | 95 | Directory layout documentation |
|
|
||||||
| `ipxe/mac-mappings.txt` | 49 | MAC address mapping reference |
|
|
||||||
|
|
||||||
### 4. Examples
|
|
||||||
|
|
||||||
| File | Lines | Purpose |
|
|
||||||
|------|-------|---------|
|
|
||||||
| `examples/nixos-config-examples.nix` | 391 | 8 different deployment scenario examples |
|
|
||||||
|
|
||||||
## Key Features Implemented
|
|
||||||
|
|
||||||
### DHCP Server
|
|
||||||
- ✅ Automatic BIOS/UEFI detection (option 93)
|
|
||||||
- ✅ Chainloading to iPXE via TFTP
|
|
||||||
- ✅ Per-host fixed IP assignment
|
|
||||||
- ✅ Multiple subnet support
|
|
||||||
- ✅ DHCP relay documentation
|
|
||||||
|
|
||||||
### iPXE Boot System
|
|
||||||
- ✅ Three boot profiles: control-plane, worker, all-in-one
|
|
||||||
- ✅ MAC-based automatic profile selection
|
|
||||||
- ✅ Interactive boot menu with 30-second timeout
|
|
||||||
- ✅ Serial console support (ttyS0 115200)
|
|
||||||
- ✅ Detailed error messages and debugging
|
|
||||||
- ✅ iPXE shell access for troubleshooting
|
|
||||||
|
|
||||||
### HTTP Server (Nginx)
|
|
||||||
- ✅ Serves iPXE bootloaders and scripts
|
|
||||||
- ✅ Serves NixOS kernel and initrd
|
|
||||||
- ✅ Proper cache control headers
|
|
||||||
- ✅ Directory listing for debugging
|
|
||||||
- ✅ Health check endpoint
|
|
||||||
- ✅ HTTPS support (optional)
|
|
||||||
|
|
||||||
### NixOS Module
|
|
||||||
- ✅ Declarative configuration
|
|
||||||
- ✅ Automatic firewall rules
|
|
||||||
- ✅ Service dependencies managed
|
|
||||||
- ✅ Directory structure auto-created
|
|
||||||
- ✅ Node definitions with MAC addresses
|
|
||||||
- ✅ DHCP/TFTP/HTTP integration
|
|
||||||
|
|
||||||
### Setup Script
|
|
||||||
- ✅ Directory creation
|
|
||||||
- ✅ iPXE bootloader download from boot.ipxe.org
|
|
||||||
- ✅ iPXE build from source (optional)
|
|
||||||
- ✅ Configuration validation
|
|
||||||
- ✅ Service testing
|
|
||||||
- ✅ Colored output and logging
|
|
||||||
|
|
||||||
## Boot Profiles
|
|
||||||
|
|
||||||
### 1. Control Plane
|
|
||||||
**Services**: All 8 core services (FlareDB, IAM, PlasmaVMC, K8sHost, FlashDNS, ChainFire, Object Storage, Monitoring)
|
|
||||||
**Use case**: Production control plane nodes
|
|
||||||
**Resources**: 8+ cores, 32+ GB RAM, 500+ GB SSD
|
|
||||||
|
|
||||||
### 2. Worker
|
|
||||||
**Services**: Compute-focused (K8sHost, PlasmaVMC, ChainFire, FlashDNS, monitoring agents)
|
|
||||||
**Use case**: Worker nodes for customer workloads
|
|
||||||
**Resources**: 16+ cores, 64+ GB RAM, 1+ TB SSD
|
|
||||||
|
|
||||||
### 3. All-in-One
|
|
||||||
**Services**: Complete Centra Cloud stack on one node
|
|
||||||
**Use case**: Testing, development, homelab
|
|
||||||
**Resources**: 16+ cores, 64+ GB RAM, 1+ TB SSD
|
|
||||||
**Warning**: Not for production (no HA)
|
|
||||||
|
|
||||||
## Network Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
Server Powers On
|
|
||||||
↓
|
|
||||||
DHCP Discovery (broadcast)
|
|
||||||
↓
|
|
||||||
DHCP Server assigns IP + provides bootloader filename
|
|
||||||
↓
|
|
||||||
TFTP download bootloader (undionly.kpxe or ipxe.efi)
|
|
||||||
↓
|
|
||||||
iPXE executes, requests boot.ipxe via HTTP
|
|
||||||
↓
|
|
||||||
Boot menu displayed (or auto-select via MAC)
|
|
||||||
↓
|
|
||||||
iPXE downloads NixOS kernel + initrd via HTTP
|
|
||||||
↓
|
|
||||||
NixOS boots and provisions node
|
|
||||||
```
|
|
||||||
|
|
||||||
## File Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
baremetal/pxe-server/
|
|
||||||
├── README.md # Comprehensive documentation (1088 lines)
|
|
||||||
├── QUICKSTART.md # Quick start guide (165 lines)
|
|
||||||
├── OVERVIEW.md # This file
|
|
||||||
├── setup.sh # Setup script (446 lines, executable)
|
|
||||||
├── nixos-module.nix # NixOS service module (358 lines)
|
|
||||||
├── .gitignore # Git ignore for runtime assets
|
|
||||||
│
|
|
||||||
├── dhcp/
|
|
||||||
│ └── dhcpd.conf # DHCP server config (134 lines)
|
|
||||||
│
|
|
||||||
├── ipxe/
|
|
||||||
│ ├── boot.ipxe # Main boot script (320 lines)
|
|
||||||
│ └── mac-mappings.txt # MAC address reference (49 lines)
|
|
||||||
│
|
|
||||||
├── http/
|
|
||||||
│ ├── nginx.conf # HTTP server config (187 lines)
|
|
||||||
│ └── directory-structure.txt # Directory docs (95 lines)
|
|
||||||
│
|
|
||||||
├── examples/
|
|
||||||
│ └── nixos-config-examples.nix # 8 deployment examples (391 lines)
|
|
||||||
│
|
|
||||||
└── assets/
|
|
||||||
└── .gitkeep # Placeholder for runtime assets
|
|
||||||
```
|
|
||||||
|
|
||||||
## Dependencies on Other Tasks
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
None - this is the first step in T032 (Bare-Metal Provisioning)
|
|
||||||
|
|
||||||
### Next Steps
|
|
||||||
- **T032.S3**: Image Builder - Generate NixOS netboot images for each profile
|
|
||||||
- **T032.S4**: Provisioning Orchestrator - API-driven node lifecycle management
|
|
||||||
|
|
||||||
### Integration Points
|
|
||||||
- **FlareDB**: Node inventory and state storage
|
|
||||||
- **IAM**: Authentication for provisioning API
|
|
||||||
- **PlasmaVMC**: VM provisioning on bare-metal nodes
|
|
||||||
- **K8sHost**: Kubernetes node integration
|
|
||||||
|
|
||||||
## Testing Status
|
|
||||||
|
|
||||||
### What Can Be Tested Now
|
|
||||||
✅ Directory structure creation
|
|
||||||
✅ Configuration file syntax validation
|
|
||||||
✅ Service startup (DHCP, TFTP, HTTP)
|
|
||||||
✅ Firewall rules
|
|
||||||
✅ Boot script download
|
|
||||||
✅ iPXE bootloader download/build
|
|
||||||
|
|
||||||
### What Requires T032.S3
|
|
||||||
⏳ Actual bare-metal provisioning (needs NixOS images)
|
|
||||||
⏳ End-to-end boot flow (needs kernel/initrd)
|
|
||||||
⏳ Profile-specific deployments (needs profile configs)
|
|
||||||
|
|
||||||
## Quick Start Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install and setup
|
|
||||||
cd baremetal/pxe-server
|
|
||||||
sudo ./setup.sh --install --download --validate
|
|
||||||
|
|
||||||
# Configure NixOS (edit configuration.nix)
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
services.centra-pxe-server.enable = true;
|
|
||||||
# ... (see QUICKSTART.md for full config)
|
|
||||||
|
|
||||||
# Deploy
|
|
||||||
sudo nixos-rebuild switch
|
|
||||||
|
|
||||||
# Test services
|
|
||||||
sudo ./setup.sh --test
|
|
||||||
|
|
||||||
# Boot a server
|
|
||||||
# - Configure BIOS for PXE boot
|
|
||||||
# - Connect to network
|
|
||||||
# - Power on
|
|
||||||
```
|
|
||||||
|
|
||||||
## Known Limitations
|
|
||||||
|
|
||||||
1. **No NixOS images yet**: T032.S3 will generate the actual boot images
|
|
||||||
2. **Single interface**: Module supports one network interface (can be extended)
|
|
||||||
3. **No HA built-in**: DHCP failover can be configured manually (example provided)
|
|
||||||
4. **No authentication**: Provisioning API will add auth in T032.S4
|
|
||||||
|
|
||||||
## Configuration Examples Provided
|
|
||||||
|
|
||||||
1. Basic single-subnet PXE server
|
|
||||||
2. PXE server with MAC-based auto-selection
|
|
||||||
3. Custom DHCP configuration
|
|
||||||
4. Multi-homed server (multiple interfaces)
|
|
||||||
5. High-availability with failover
|
|
||||||
6. HTTPS boot (secure boot)
|
|
||||||
7. Development/testing configuration
|
|
||||||
8. Production with monitoring
|
|
||||||
|
|
||||||
## Security Considerations
|
|
||||||
|
|
||||||
- DHCP is unauthenticated (normal for PXE)
|
|
||||||
- TFTP is unencrypted (normal for PXE)
|
|
||||||
- HTTP can be upgraded to HTTPS (documented)
|
|
||||||
- iPXE supports secure boot with embedded certificates (build from source)
|
|
||||||
- Network should be isolated (provisioning VLAN recommended)
|
|
||||||
- Firewall rules limit exposure (only necessary ports)
|
|
||||||
|
|
||||||
## Troubleshooting Resources
|
|
||||||
|
|
||||||
Comprehensive troubleshooting section in README.md covers:
|
|
||||||
- DHCP discovery issues
|
|
||||||
- TFTP timeout problems
|
|
||||||
- HTTP download failures
|
|
||||||
- Boot script errors
|
|
||||||
- Serial console debugging
|
|
||||||
- Common error messages
|
|
||||||
- Service health checks
|
|
||||||
- Network connectivity tests
|
|
||||||
|
|
||||||
## Performance Considerations
|
|
||||||
|
|
||||||
- **Concurrent boots**: ~500 MB per node (kernel + initrd)
|
|
||||||
- **Recommended**: 1 Gbps link for PXE server
|
|
||||||
- **10 concurrent boots**: ~5 Gbps burst (stagger or use 10 Gbps)
|
|
||||||
- **Disk space**: 5-10 GB recommended (multiple profiles + versions)
|
|
||||||
|
|
||||||
## Compliance with Requirements
|
|
||||||
|
|
||||||
| Requirement | Status | Notes |
|
|
||||||
|-------------|--------|-------|
|
|
||||||
| DHCP server config | ✅ | ISC DHCP with BIOS/UEFI detection |
|
|
||||||
| iPXE boot scripts | ✅ | Main menu + 3 profiles |
|
|
||||||
| HTTP server config | ✅ | Nginx with proper paths |
|
|
||||||
| NixOS module | ✅ | Complete systemd integration |
|
|
||||||
| Setup script | ✅ | Download/build/validate/test |
|
|
||||||
| README | ✅ | Comprehensive + troubleshooting |
|
|
||||||
| Working examples | ✅ | All configs are production-ready |
|
|
||||||
| 800-1200 lines | ✅ | 3086 lines (exceeded) |
|
|
||||||
| No S3 implementation | ✅ | Placeholder paths only |
|
|
||||||
|
|
||||||
## Changelog
|
|
||||||
|
|
||||||
**2025-12-10**: Initial implementation
|
|
||||||
- Created complete PXE boot infrastructure
|
|
||||||
- Added DHCP, TFTP, HTTP server configurations
|
|
||||||
- Implemented iPXE boot scripts with 3 profiles
|
|
||||||
- Created NixOS service module
|
|
||||||
- Added setup script with validation
|
|
||||||
- Wrote comprehensive documentation
|
|
||||||
- Provided 8 configuration examples
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Part of Centra Cloud infrastructure. See project root for license.
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
For issues or questions:
|
|
||||||
1. Check [README.md](README.md) troubleshooting section
|
|
||||||
2. Run diagnostic: `sudo ./setup.sh --test`
|
|
||||||
3. Review logs: `sudo journalctl -u dhcpd4 -u atftpd -u nginx -f`
|
|
||||||
4. See [QUICKSTART.md](QUICKSTART.md) for common commands
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
**Implementation by**: Claude Sonnet 4.5
|
|
||||||
**Task**: T032.S2 - PXE Boot Infrastructure
|
|
||||||
**Status**: Complete and ready for deployment
|
|
||||||
|
|
@ -1,177 +0,0 @@
|
||||||
# PXE Server Quick Start Guide
|
|
||||||
|
|
||||||
This is a condensed guide for getting the PXE boot server running quickly.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
- NixOS server
|
|
||||||
- Root access
|
|
||||||
- Network connectivity to bare-metal servers
|
|
||||||
|
|
||||||
## 5-Minute Setup
|
|
||||||
|
|
||||||
### 1. Run Setup Script
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd baremetal/pxe-server
|
|
||||||
sudo ./setup.sh --install --download --validate
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Configure NixOS
|
|
||||||
|
|
||||||
Add to `/etc/nixos/configuration.nix`:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
imports = [ /path/to/baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0"; # YOUR NETWORK INTERFACE
|
|
||||||
serverAddress = "10.0.100.10"; # YOUR PXE SERVER IP
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0"; # YOUR SUBNET
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100"; # DHCP RANGE START
|
|
||||||
end = "10.0.100.200"; # DHCP RANGE END
|
|
||||||
};
|
|
||||||
router = "10.0.100.1"; # YOUR GATEWAY
|
|
||||||
};
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Deploy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo nixos-rebuild switch
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Verify
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo ./setup.sh --test
|
|
||||||
```
|
|
||||||
|
|
||||||
You should see:
|
|
||||||
- TFTP server running
|
|
||||||
- HTTP server running
|
|
||||||
- DHCP server running
|
|
||||||
|
|
||||||
### 5. Boot a Server
|
|
||||||
|
|
||||||
1. Configure server BIOS for PXE boot
|
|
||||||
2. Connect to same network
|
|
||||||
3. Power on
|
|
||||||
4. Watch for boot menu
|
|
||||||
|
|
||||||
## Adding Nodes
|
|
||||||
|
|
||||||
### Quick Add (No Auto-Selection)
|
|
||||||
|
|
||||||
Just boot the server and select profile from menu.
|
|
||||||
|
|
||||||
### With Auto-Selection
|
|
||||||
|
|
||||||
1. Get MAC address from server
|
|
||||||
2. Edit `ipxe/boot.ipxe`, add line:
|
|
||||||
```ipxe
|
|
||||||
iseq ${mac} AA:BB:CC:DD:EE:FF && set profile worker && set hostname worker-05 && goto boot ||
|
|
||||||
```
|
|
||||||
3. Optionally add to `dhcp/dhcpd.conf`:
|
|
||||||
```conf
|
|
||||||
host worker-05 {
|
|
||||||
hardware ethernet AA:BB:CC:DD:EE:FF;
|
|
||||||
fixed-address 10.0.100.65;
|
|
||||||
option host-name "worker-05";
|
|
||||||
}
|
|
||||||
```
|
|
||||||
4. Restart DHCP: `sudo systemctl restart dhcpd4`
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Server doesn't get IP
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo tcpdump -i eth0 port 67 or port 68
|
|
||||||
sudo journalctl -u dhcpd4 -f
|
|
||||||
```
|
|
||||||
|
|
||||||
Check:
|
|
||||||
- DHCP server running on correct interface
|
|
||||||
- Network connectivity
|
|
||||||
- Firewall allows UDP 67/68
|
|
||||||
|
|
||||||
### Server gets IP but no bootloader
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo tcpdump -i eth0 port 69
|
|
||||||
sudo journalctl -u atftpd -f
|
|
||||||
```
|
|
||||||
|
|
||||||
Check:
|
|
||||||
- TFTP server running
|
|
||||||
- Bootloaders exist: `ls /var/lib/tftpboot/`
|
|
||||||
- Firewall allows UDP 69
|
|
||||||
|
|
||||||
### iPXE loads but can't get boot script
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://localhost/boot/ipxe/boot.ipxe
|
|
||||||
sudo tail -f /var/log/nginx/access.log
|
|
||||||
```
|
|
||||||
|
|
||||||
Check:
|
|
||||||
- Nginx running
|
|
||||||
- boot.ipxe exists: `ls /var/lib/pxe-boot/ipxe/`
|
|
||||||
- Firewall allows TCP 80
|
|
||||||
|
|
||||||
### Boot script loads but can't get kernel
|
|
||||||
|
|
||||||
This is expected until T032.S3 (Image Builder) is complete.
|
|
||||||
|
|
||||||
Check: `ls /var/lib/pxe-boot/nixos/`
|
|
||||||
|
|
||||||
Should have:
|
|
||||||
- bzImage
|
|
||||||
- initrd
|
|
||||||
|
|
||||||
These will be generated by the image builder.
|
|
||||||
|
|
||||||
## Common Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Check all services
|
|
||||||
sudo systemctl status dhcpd4 atftpd nginx
|
|
||||||
|
|
||||||
# View logs
|
|
||||||
sudo journalctl -u dhcpd4 -u atftpd -u nginx -f
|
|
||||||
|
|
||||||
# Test connectivity
|
|
||||||
curl http://localhost/health
|
|
||||||
tftp localhost -c get undionly.kpxe /tmp/test.kpxe
|
|
||||||
|
|
||||||
# Restart services
|
|
||||||
sudo systemctl restart dhcpd4 atftpd nginx
|
|
||||||
|
|
||||||
# Check firewall
|
|
||||||
sudo iptables -L -n | grep -E "67|68|69|80"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Boot Profiles
|
|
||||||
|
|
||||||
- **control-plane**: All services (FlareDB, IAM, PlasmaVMC, K8sHost, etc.)
|
|
||||||
- **worker**: Compute services (K8sHost, PlasmaVMC, ChainFire)
|
|
||||||
- **all-in-one**: Everything on one node (testing/homelab)
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
- Add more nodes (see "Adding Nodes" above)
|
|
||||||
- Wait for T032.S3 to generate NixOS boot images
|
|
||||||
- Configure monitoring for boot activity
|
|
||||||
- Set up DHCP relay for multi-segment networks
|
|
||||||
|
|
||||||
## Full Documentation
|
|
||||||
|
|
||||||
See [README.md](README.md) for complete documentation.
|
|
||||||
|
|
@ -1,829 +0,0 @@
|
||||||
# Centra Cloud PXE Boot Server
|
|
||||||
|
|
||||||
This directory contains the PXE (Preboot eXecution Environment) boot infrastructure for bare-metal provisioning of Centra Cloud nodes. It enables network-based installation of NixOS on physical servers with automated profile selection.
|
|
||||||
|
|
||||||
## Table of Contents
|
|
||||||
|
|
||||||
- [Architecture Overview](#architecture-overview)
|
|
||||||
- [Components](#components)
|
|
||||||
- [Quick Start](#quick-start)
|
|
||||||
- [Detailed Setup](#detailed-setup)
|
|
||||||
- [Configuration](#configuration)
|
|
||||||
- [Boot Profiles](#boot-profiles)
|
|
||||||
- [Network Requirements](#network-requirements)
|
|
||||||
- [Troubleshooting](#troubleshooting)
|
|
||||||
- [Advanced Topics](#advanced-topics)
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
The PXE boot infrastructure consists of three main services:
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────────────────────────────────┐
|
|
||||||
│ PXE Boot Flow │
|
|
||||||
└─────────────────────────────────────────────────────────────────┘
|
|
||||||
|
|
||||||
Bare-Metal Server PXE Boot Server
|
|
||||||
───────────────── ───────────────
|
|
||||||
|
|
||||||
1. Power on
|
|
||||||
│
|
|
||||||
├─► DHCP Request ──────────────► DHCP Server
|
|
||||||
│ (ISC DHCP)
|
|
||||||
│ │
|
|
||||||
│ ├─ Assigns IP
|
|
||||||
│ ├─ Detects BIOS/UEFI
|
|
||||||
│ └─ Provides bootloader path
|
|
||||||
│
|
|
||||||
├◄─ DHCP Response ───────────────┤
|
|
||||||
│ (IP, next-server, filename)
|
|
||||||
│
|
|
||||||
├─► TFTP Get bootloader ─────────► TFTP Server
|
|
||||||
│ (undionly.kpxe or ipxe.efi) (atftpd)
|
|
||||||
│
|
|
||||||
├◄─ Bootloader file ─────────────┤
|
|
||||||
│
|
|
||||||
├─► Execute iPXE bootloader
|
|
||||||
│ │
|
|
||||||
│ ├─► HTTP Get boot.ipxe ──────► HTTP Server
|
|
||||||
│ │ (nginx)
|
|
||||||
│ │
|
|
||||||
│ ├◄─ boot.ipxe script ─────────┤
|
|
||||||
│ │
|
|
||||||
│ ├─► Display menu / Auto-select profile
|
|
||||||
│ │
|
|
||||||
│ ├─► HTTP Get kernel ──────────► HTTP Server
|
|
||||||
│ │
|
|
||||||
│ ├◄─ bzImage ───────────────────┤
|
|
||||||
│ │
|
|
||||||
│ ├─► HTTP Get initrd ───────────► HTTP Server
|
|
||||||
│ │
|
|
||||||
│ ├◄─ initrd ────────────────────┤
|
|
||||||
│ │
|
|
||||||
│ └─► Boot NixOS
|
|
||||||
│
|
|
||||||
└─► NixOS Installer
|
|
||||||
└─ Provisions node based on profile
|
|
||||||
```
|
|
||||||
|
|
||||||
## Components
|
|
||||||
|
|
||||||
### 1. DHCP Server (ISC DHCP)
|
|
||||||
|
|
||||||
- **Purpose**: Assigns IP addresses and directs PXE clients to bootloader
|
|
||||||
- **Config**: `dhcp/dhcpd.conf`
|
|
||||||
- **Features**:
|
|
||||||
- BIOS/UEFI detection via option 93 (architecture type)
|
|
||||||
- Per-host configuration for fixed IP assignment
|
|
||||||
- Automatic next-server and filename configuration
|
|
||||||
|
|
||||||
### 2. TFTP Server (atftpd)
|
|
||||||
|
|
||||||
- **Purpose**: Serves iPXE bootloader files to PXE clients
|
|
||||||
- **Files served**:
|
|
||||||
- `undionly.kpxe` - BIOS bootloader
|
|
||||||
- `ipxe.efi` - UEFI x86-64 bootloader
|
|
||||||
- `ipxe-i386.efi` - UEFI x86 32-bit bootloader (optional)
|
|
||||||
|
|
||||||
### 3. HTTP Server (nginx)
|
|
||||||
|
|
||||||
- **Purpose**: Serves iPXE scripts and NixOS boot images
|
|
||||||
- **Config**: `http/nginx.conf`
|
|
||||||
- **Endpoints**:
|
|
||||||
- `/boot/ipxe/boot.ipxe` - Main boot menu script
|
|
||||||
- `/boot/nixos/bzImage` - NixOS kernel
|
|
||||||
- `/boot/nixos/initrd` - NixOS initial ramdisk
|
|
||||||
- `/health` - Health check endpoint
|
|
||||||
|
|
||||||
### 4. iPXE Boot Scripts
|
|
||||||
|
|
||||||
- **Main script**: `ipxe/boot.ipxe`
|
|
||||||
- **Features**:
|
|
||||||
- Interactive boot menu with 3 profiles
|
|
||||||
- MAC-based automatic profile selection
|
|
||||||
- Serial console support for remote management
|
|
||||||
- Detailed error messages and debugging options
|
|
||||||
|
|
||||||
### 5. NixOS Service Module
|
|
||||||
|
|
||||||
- **File**: `nixos-module.nix`
|
|
||||||
- **Purpose**: Declarative NixOS configuration for all services
|
|
||||||
- **Features**:
|
|
||||||
- Single configuration file for entire stack
|
|
||||||
- Firewall rules auto-configured
|
|
||||||
- Systemd service dependencies managed
|
|
||||||
- Directory structure auto-created
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
- NixOS server with network connectivity
|
|
||||||
- Network interface on the same subnet as bare-metal servers
|
|
||||||
- Sufficient disk space (5-10 GB for boot images)
|
|
||||||
|
|
||||||
### Installation Steps
|
|
||||||
|
|
||||||
1. **Clone this repository** (or copy `baremetal/pxe-server/` to your NixOS system)
|
|
||||||
|
|
||||||
2. **Run the setup script**:
|
|
||||||
```bash
|
|
||||||
sudo ./setup.sh --install --download --validate
|
|
||||||
```
|
|
||||||
|
|
||||||
This will:
|
|
||||||
- Create directory structure at `/var/lib/pxe-boot`
|
|
||||||
- Download iPXE bootloaders from boot.ipxe.org
|
|
||||||
- Install boot scripts
|
|
||||||
- Validate configurations
|
|
||||||
|
|
||||||
3. **Configure network settings**:
|
|
||||||
|
|
||||||
Edit `nixos-module.nix` or create a NixOS configuration:
|
|
||||||
|
|
||||||
```nix
|
|
||||||
# /etc/nixos/configuration.nix
|
|
||||||
|
|
||||||
imports = [
|
|
||||||
/path/to/baremetal/pxe-server/nixos-module.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0"; # Your network interface
|
|
||||||
serverAddress = "10.0.100.10"; # PXE server IP
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Optional: Define known nodes with MAC addresses
|
|
||||||
nodes = {
|
|
||||||
"52:54:00:12:34:56" = {
|
|
||||||
profile = "control-plane";
|
|
||||||
hostname = "control-plane-01";
|
|
||||||
ipAddress = "10.0.100.50";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Deploy NixOS configuration**:
|
|
||||||
```bash
|
|
||||||
sudo nixos-rebuild switch
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Verify services are running**:
|
|
||||||
```bash
|
|
||||||
sudo ./setup.sh --test
|
|
||||||
```
|
|
||||||
|
|
||||||
6. **Add NixOS boot images** (will be provided by T032.S3):
|
|
||||||
```bash
|
|
||||||
# Placeholder - actual images will be built by image builder
|
|
||||||
# For testing, you can use any NixOS netboot image
|
|
||||||
sudo mkdir -p /var/lib/pxe-boot/nixos
|
|
||||||
# Copy bzImage and initrd to /var/lib/pxe-boot/nixos/
|
|
||||||
```
|
|
||||||
|
|
||||||
7. **Boot a bare-metal server**:
|
|
||||||
- Configure server BIOS to boot from network (PXE)
|
|
||||||
- Connect to same network segment
|
|
||||||
- Power on server
|
|
||||||
- Watch for DHCP discovery and iPXE boot menu
|
|
||||||
|
|
||||||
## Detailed Setup
|
|
||||||
|
|
||||||
### Option 1: NixOS Module (Recommended)
|
|
||||||
|
|
||||||
The NixOS module provides a declarative way to configure the entire PXE server stack.
|
|
||||||
|
|
||||||
**Advantages**:
|
|
||||||
- Single configuration file
|
|
||||||
- Automatic service dependencies
|
|
||||||
- Rollback capability
|
|
||||||
- Integration with NixOS firewall
|
|
||||||
|
|
||||||
**Configuration Example**:
|
|
||||||
|
|
||||||
See the NixOS configuration example in [Quick Start](#quick-start).
|
|
||||||
|
|
||||||
### Option 2: Manual Installation
|
|
||||||
|
|
||||||
For non-NixOS systems or manual setup:
|
|
||||||
|
|
||||||
1. **Install required packages**:
|
|
||||||
```bash
|
|
||||||
# Debian/Ubuntu
|
|
||||||
apt-get install isc-dhcp-server atftpd nginx curl
|
|
||||||
|
|
||||||
# RHEL/CentOS
|
|
||||||
yum install dhcp tftp-server nginx curl
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Run setup script**:
|
|
||||||
```bash
|
|
||||||
sudo ./setup.sh --install --download
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Copy configuration files**:
|
|
||||||
```bash
|
|
||||||
# DHCP configuration
|
|
||||||
sudo cp dhcp/dhcpd.conf /etc/dhcp/dhcpd.conf
|
|
||||||
|
|
||||||
# Edit to match your network
|
|
||||||
sudo vim /etc/dhcp/dhcpd.conf
|
|
||||||
|
|
||||||
# Nginx configuration
|
|
||||||
sudo cp http/nginx.conf /etc/nginx/sites-available/pxe-boot
|
|
||||||
sudo ln -s /etc/nginx/sites-available/pxe-boot /etc/nginx/sites-enabled/
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Start services**:
|
|
||||||
```bash
|
|
||||||
sudo systemctl enable --now isc-dhcp-server
|
|
||||||
sudo systemctl enable --now atftpd
|
|
||||||
sudo systemctl enable --now nginx
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Configure firewall**:
|
|
||||||
```bash
|
|
||||||
# UFW (Ubuntu)
|
|
||||||
sudo ufw allow 67/udp # DHCP
|
|
||||||
sudo ufw allow 68/udp # DHCP
|
|
||||||
sudo ufw allow 69/udp # TFTP
|
|
||||||
sudo ufw allow 80/tcp # HTTP
|
|
||||||
|
|
||||||
# firewalld (RHEL)
|
|
||||||
sudo firewall-cmd --permanent --add-service=dhcp
|
|
||||||
sudo firewall-cmd --permanent --add-service=tftp
|
|
||||||
sudo firewall-cmd --permanent --add-service=http
|
|
||||||
sudo firewall-cmd --reload
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
### DHCP Configuration
|
|
||||||
|
|
||||||
The DHCP server configuration is in `dhcp/dhcpd.conf`. Key sections:
|
|
||||||
|
|
||||||
**Network Settings**:
|
|
||||||
```conf
|
|
||||||
subnet 10.0.100.0 netmask 255.255.255.0 {
|
|
||||||
range 10.0.100.100 10.0.100.200;
|
|
||||||
option routers 10.0.100.1;
|
|
||||||
option domain-name-servers 10.0.100.1, 8.8.8.8;
|
|
||||||
next-server 10.0.100.10; # PXE server IP
|
|
||||||
# ...
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Boot File Selection** (automatic BIOS/UEFI detection):
|
|
||||||
```conf
|
|
||||||
if exists user-class and option user-class = "iPXE" {
|
|
||||||
filename "http://10.0.100.10/boot/ipxe/boot.ipxe";
|
|
||||||
} elsif option architecture-type = 00:00 {
|
|
||||||
filename "undionly.kpxe"; # BIOS
|
|
||||||
} elsif option architecture-type = 00:07 {
|
|
||||||
filename "ipxe.efi"; # UEFI x86-64
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Host-Specific Configuration**:
|
|
||||||
```conf
|
|
||||||
host control-plane-01 {
|
|
||||||
hardware ethernet 52:54:00:12:34:56;
|
|
||||||
fixed-address 10.0.100.50;
|
|
||||||
option host-name "control-plane-01";
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### iPXE Boot Script
|
|
||||||
|
|
||||||
The main boot script is `ipxe/boot.ipxe`. It provides:
|
|
||||||
|
|
||||||
1. **MAC-based automatic selection**:
|
|
||||||
```ipxe
|
|
||||||
iseq ${mac} 52:54:00:12:34:56 && set profile control-plane && goto boot ||
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Interactive menu** (if no MAC match):
|
|
||||||
```ipxe
|
|
||||||
:menu
|
|
||||||
menu Centra Cloud - Bare-Metal Provisioning
|
|
||||||
item control-plane 1. Control Plane Node (All Services)
|
|
||||||
item worker 2. Worker Node (Compute Services)
|
|
||||||
item all-in-one 3. All-in-One Node (Testing/Homelab)
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Kernel parameters**:
|
|
||||||
```ipxe
|
|
||||||
set kernel-params centra.profile=${profile}
|
|
||||||
set kernel-params ${kernel-params} centra.hostname=${hostname}
|
|
||||||
set kernel-params ${kernel-params} console=tty0 console=ttyS0,115200n8
|
|
||||||
```
|
|
||||||
|
|
||||||
### Adding New Nodes
|
|
||||||
|
|
||||||
To add a new node to the infrastructure:
|
|
||||||
|
|
||||||
1. **Get the MAC address** from the server (check BIOS or network card label)
|
|
||||||
|
|
||||||
2. **Add to MAC mappings** (`ipxe/mac-mappings.txt`):
|
|
||||||
```
|
|
||||||
52:54:00:12:34:5d worker worker-04
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Update boot script** (`ipxe/boot.ipxe`):
|
|
||||||
```ipxe
|
|
||||||
iseq ${mac} 52:54:00:12:34:5d && set profile worker && set hostname worker-04 && goto boot ||
|
|
||||||
```
|
|
||||||
|
|
||||||
4. **Add DHCP host entry** (`dhcp/dhcpd.conf`):
|
|
||||||
```conf
|
|
||||||
host worker-04 {
|
|
||||||
hardware ethernet 52:54:00:12:34:5d;
|
|
||||||
fixed-address 10.0.100.64;
|
|
||||||
option host-name "worker-04";
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
5. **Restart DHCP service**:
|
|
||||||
```bash
|
|
||||||
sudo systemctl restart dhcpd4
|
|
||||||
```
|
|
||||||
|
|
||||||
## Boot Profiles
|
|
||||||
|
|
||||||
### 1. Control Plane Profile
|
|
||||||
|
|
||||||
**Purpose**: Nodes that run core infrastructure services
|
|
||||||
|
|
||||||
**Services included**:
|
|
||||||
- FlareDB (PD, Store, TiKV-compatible database)
|
|
||||||
- IAM (Identity and Access Management)
|
|
||||||
- PlasmaVMC (Virtual Machine Controller)
|
|
||||||
- K8sHost (Kubernetes node agent)
|
|
||||||
- FlashDNS (High-performance DNS)
|
|
||||||
- ChainFire (Firewall/networking)
|
|
||||||
- Object Storage (S3-compatible)
|
|
||||||
- Monitoring (Prometheus, Grafana)
|
|
||||||
|
|
||||||
**Resource requirements**:
|
|
||||||
- CPU: 8+ cores recommended
|
|
||||||
- RAM: 32+ GB recommended
|
|
||||||
- Disk: 500+ GB SSD
|
|
||||||
|
|
||||||
**Use case**: Production control plane nodes in a cluster
|
|
||||||
|
|
||||||
### 2. Worker Profile
|
|
||||||
|
|
||||||
**Purpose**: Nodes that run customer workloads
|
|
||||||
|
|
||||||
**Services included**:
|
|
||||||
- K8sHost (Kubernetes node agent) - primary service
|
|
||||||
- PlasmaVMC (Virtual Machine Controller) - VM workloads
|
|
||||||
- ChainFire (Network policy enforcement)
|
|
||||||
- FlashDNS (Local DNS caching)
|
|
||||||
- Basic monitoring agents
|
|
||||||
|
|
||||||
**Resource requirements**:
|
|
||||||
- CPU: 16+ cores recommended
|
|
||||||
- RAM: 64+ GB recommended
|
|
||||||
- Disk: 1+ TB SSD
|
|
||||||
|
|
||||||
**Use case**: Worker nodes for running customer applications
|
|
||||||
|
|
||||||
### 3. All-in-One Profile
|
|
||||||
|
|
||||||
**Purpose**: Single-node deployment for testing and development
|
|
||||||
|
|
||||||
**Services included**:
|
|
||||||
- Complete Centra Cloud stack on one node
|
|
||||||
- All services from control-plane profile
|
|
||||||
- Suitable for testing, development, homelab
|
|
||||||
|
|
||||||
**Resource requirements**:
|
|
||||||
- CPU: 16+ cores recommended
|
|
||||||
- RAM: 64+ GB recommended
|
|
||||||
- Disk: 1+ TB SSD
|
|
||||||
|
|
||||||
**Use case**: Development, testing, homelab deployments
|
|
||||||
|
|
||||||
**Warning**: Not recommended for production use (no HA, resource intensive)
|
|
||||||
|
|
||||||
## Network Requirements
|
|
||||||
|
|
||||||
### Network Topology
|
|
||||||
|
|
||||||
The PXE server must be on the same network segment as the bare-metal servers, or you must configure DHCP relay.
|
|
||||||
|
|
||||||
**Same Segment** (recommended for initial setup):
|
|
||||||
```
|
|
||||||
┌──────────────┐ ┌──────────────────┐
|
|
||||||
│ PXE Server │ │ Bare-Metal Srv │
|
|
||||||
│ 10.0.100.10 │◄────────┤ (DHCP client) │
|
|
||||||
└──────────────┘ L2 SW └──────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
**Different Segments** (requires DHCP relay):
|
|
||||||
```
|
|
||||||
┌──────────────┐ ┌──────────┐ ┌──────────────────┐
|
|
||||||
│ PXE Server │ │ Router │ │ Bare-Metal Srv │
|
|
||||||
│ 10.0.100.10 │◄────────┤ (relay) │◄────────┤ (DHCP client) │
|
|
||||||
└──────────────┘ └──────────┘ └──────────────────┘
|
|
||||||
Segment A ip helper Segment B
|
|
||||||
```
|
|
||||||
|
|
||||||
### DHCP Relay Configuration
|
|
||||||
|
|
||||||
If your PXE server is on a different network segment:
|
|
||||||
|
|
||||||
**Cisco IOS**:
|
|
||||||
```
|
|
||||||
interface vlan 100
|
|
||||||
ip helper-address 10.0.100.10
|
|
||||||
```
|
|
||||||
|
|
||||||
**Linux (dhcp-helper)**:
|
|
||||||
```bash
|
|
||||||
apt-get install dhcp-helper
|
|
||||||
# Edit /etc/default/dhcp-helper
|
|
||||||
DHCPHELPER_OPTS="-s 10.0.100.10"
|
|
||||||
systemctl restart dhcp-helper
|
|
||||||
```
|
|
||||||
|
|
||||||
**Linux (dhcrelay)**:
|
|
||||||
```bash
|
|
||||||
apt-get install isc-dhcp-relay
|
|
||||||
dhcrelay -i eth0 -i eth1 10.0.100.10
|
|
||||||
```
|
|
||||||
|
|
||||||
### Firewall Rules
|
|
||||||
|
|
||||||
The following ports must be open on the PXE server:
|
|
||||||
|
|
||||||
| Port | Protocol | Service | Direction | Description |
|
|
||||||
|------|----------|---------|-----------|-------------|
|
|
||||||
| 67 | UDP | DHCP | Inbound | DHCP server |
|
|
||||||
| 68 | UDP | DHCP | Outbound | DHCP client responses |
|
|
||||||
| 69 | UDP | TFTP | Inbound | TFTP bootloader downloads |
|
|
||||||
| 80 | TCP | HTTP | Inbound | iPXE scripts and boot images |
|
|
||||||
| 443 | TCP | HTTPS | Inbound | Optional: secure boot images |
|
|
||||||
|
|
||||||
### Network Bandwidth
|
|
||||||
|
|
||||||
Estimated bandwidth requirements:
|
|
||||||
|
|
||||||
- Per-node boot: ~500 MB download (kernel + initrd)
|
|
||||||
- Concurrent boots: Multiply by number of simultaneous boots
|
|
||||||
- Recommended: 1 Gbps link for PXE server
|
|
||||||
|
|
||||||
Example: Booting 10 nodes simultaneously requires ~5 Gbps throughput burst, so stagger boots or use 10 Gbps link.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### DHCP Issues
|
|
||||||
|
|
||||||
**Problem**: Server doesn't get IP address
|
|
||||||
|
|
||||||
**Diagnosis**:
|
|
||||||
```bash
|
|
||||||
# On PXE server, monitor DHCP requests
|
|
||||||
sudo tcpdump -i eth0 -n port 67 or port 68
|
|
||||||
|
|
||||||
# Check DHCP server logs
|
|
||||||
sudo journalctl -u dhcpd4 -f
|
|
||||||
|
|
||||||
# Verify DHCP server is running
|
|
||||||
sudo systemctl status dhcpd4
|
|
||||||
```
|
|
||||||
|
|
||||||
**Common causes**:
|
|
||||||
- DHCP server not running on correct interface
|
|
||||||
- Firewall blocking UDP 67/68
|
|
||||||
- Network cable/switch issue
|
|
||||||
- DHCP range exhausted
|
|
||||||
|
|
||||||
**Solution**:
|
|
||||||
```bash
|
|
||||||
# Check interface configuration
|
|
||||||
ip addr show
|
|
||||||
|
|
||||||
# Verify DHCP config syntax
|
|
||||||
sudo dhcpd -t -cf /etc/dhcp/dhcpd.conf
|
|
||||||
|
|
||||||
# Check firewall
|
|
||||||
sudo iptables -L -n | grep -E "67|68"
|
|
||||||
|
|
||||||
# Restart DHCP server
|
|
||||||
sudo systemctl restart dhcpd4
|
|
||||||
```
|
|
||||||
|
|
||||||
### TFTP Issues
|
|
||||||
|
|
||||||
**Problem**: PXE client gets IP but fails to download bootloader
|
|
||||||
|
|
||||||
**Diagnosis**:
|
|
||||||
```bash
|
|
||||||
# Monitor TFTP requests
|
|
||||||
sudo tcpdump -i eth0 -n port 69
|
|
||||||
|
|
||||||
# Check TFTP server logs
|
|
||||||
sudo journalctl -u atftpd -f
|
|
||||||
|
|
||||||
# Test TFTP locally
|
|
||||||
tftp localhost -c get undionly.kpxe /tmp/test.kpxe
|
|
||||||
```
|
|
||||||
|
|
||||||
**Common causes**:
|
|
||||||
- TFTP server not running
|
|
||||||
- Bootloader files missing
|
|
||||||
- Permissions incorrect
|
|
||||||
- Firewall blocking UDP 69
|
|
||||||
|
|
||||||
**Solution**:
|
|
||||||
```bash
|
|
||||||
# Check files exist
|
|
||||||
ls -la /var/lib/tftpboot/
|
|
||||||
|
|
||||||
# Fix permissions
|
|
||||||
sudo chmod 644 /var/lib/tftpboot/*.{kpxe,efi}
|
|
||||||
|
|
||||||
# Restart TFTP server
|
|
||||||
sudo systemctl restart atftpd
|
|
||||||
|
|
||||||
# Check firewall
|
|
||||||
sudo iptables -L -n | grep 69
|
|
||||||
```
|
|
||||||
|
|
||||||
### HTTP Issues
|
|
||||||
|
|
||||||
**Problem**: iPXE loads but can't download boot script or kernel
|
|
||||||
|
|
||||||
**Diagnosis**:
|
|
||||||
```bash
|
|
||||||
# Monitor HTTP requests
|
|
||||||
sudo tail -f /var/log/nginx/access.log
|
|
||||||
|
|
||||||
# Test HTTP locally
|
|
||||||
curl -v http://localhost/boot/ipxe/boot.ipxe
|
|
||||||
curl -v http://localhost/health
|
|
||||||
|
|
||||||
# Check nginx status
|
|
||||||
sudo systemctl status nginx
|
|
||||||
```
|
|
||||||
|
|
||||||
**Common causes**:
|
|
||||||
- Nginx not running
|
|
||||||
- Boot files missing
|
|
||||||
- Permissions incorrect
|
|
||||||
- Firewall blocking TCP 80
|
|
||||||
- Wrong server IP in boot.ipxe
|
|
||||||
|
|
||||||
**Solution**:
|
|
||||||
```bash
|
|
||||||
# Check nginx config
|
|
||||||
sudo nginx -t
|
|
||||||
|
|
||||||
# Verify files exist
|
|
||||||
ls -la /var/lib/pxe-boot/ipxe/
|
|
||||||
ls -la /var/lib/pxe-boot/nixos/
|
|
||||||
|
|
||||||
# Fix permissions
|
|
||||||
sudo chown -R nginx:nginx /var/lib/pxe-boot
|
|
||||||
sudo chmod -R 755 /var/lib/pxe-boot
|
|
||||||
|
|
||||||
# Restart nginx
|
|
||||||
sudo systemctl restart nginx
|
|
||||||
```
|
|
||||||
|
|
||||||
### Boot Script Issues
|
|
||||||
|
|
||||||
**Problem**: Boot menu appears but fails to load kernel
|
|
||||||
|
|
||||||
**Diagnosis**:
|
|
||||||
- Check iPXE error messages on console
|
|
||||||
- Verify URLs in boot.ipxe match actual paths
|
|
||||||
- Test kernel download manually:
|
|
||||||
```bash
|
|
||||||
curl -I http://10.0.100.10/boot/nixos/bzImage
|
|
||||||
```
|
|
||||||
|
|
||||||
**Common causes**:
|
|
||||||
- NixOS boot images not deployed yet (normal for T032.S2)
|
|
||||||
- Wrong paths in boot.ipxe
|
|
||||||
- Files too large (check disk space)
|
|
||||||
|
|
||||||
**Solution**:
|
|
||||||
```bash
|
|
||||||
# Wait for T032.S3 (Image Builder) to generate boot images
|
|
||||||
# OR manually place NixOS netboot images:
|
|
||||||
sudo mkdir -p /var/lib/pxe-boot/nixos
|
|
||||||
# Copy bzImage and initrd from NixOS netboot
|
|
||||||
```
|
|
||||||
|
|
||||||
### Serial Console Debugging
|
|
||||||
|
|
||||||
For remote debugging without physical access:
|
|
||||||
|
|
||||||
1. **Enable serial console in BIOS**:
|
|
||||||
- Configure COM1/ttyS0 at 115200 baud
|
|
||||||
- Enable console redirection
|
|
||||||
|
|
||||||
2. **Connect via IPMI SOL** (if available):
|
|
||||||
```bash
|
|
||||||
ipmitool -I lanplus -H <bmc-ip> -U admin sol activate
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Watch boot process**:
|
|
||||||
- DHCP discovery messages
|
|
||||||
- TFTP download progress
|
|
||||||
- iPXE boot menu
|
|
||||||
- Kernel boot messages
|
|
||||||
|
|
||||||
4. **Kernel parameters include serial console**:
|
|
||||||
```
|
|
||||||
console=tty0 console=ttyS0,115200n8
|
|
||||||
```
|
|
||||||
|
|
||||||
### Common Error Messages
|
|
||||||
|
|
||||||
| Error | Cause | Solution |
|
|
||||||
|-------|-------|----------|
|
|
||||||
| `PXE-E51: No DHCP or proxyDHCP offers were received` | DHCP server not responding | Check DHCP server running, network connectivity |
|
|
||||||
| `PXE-E53: No boot filename received` | DHCP not providing filename | Check dhcpd.conf has `filename` option |
|
|
||||||
| `PXE-E32: TFTP open timeout` | TFTP server not responding | Check TFTP server running, firewall rules |
|
|
||||||
| `Not found: /boot/ipxe/boot.ipxe` | HTTP 404 error | Check file exists, nginx config, permissions |
|
|
||||||
| `Could not boot: Exec format error` | Corrupted boot file | Re-download/rebuild bootloader |
|
|
||||||
|
|
||||||
## Advanced Topics
|
|
||||||
|
|
||||||
### Building iPXE from Source
|
|
||||||
|
|
||||||
For production deployments, building iPXE from source provides:
|
|
||||||
- Custom branding
|
|
||||||
- Embedded certificates for HTTPS
|
|
||||||
- Optimized size
|
|
||||||
- Security hardening
|
|
||||||
|
|
||||||
**Build instructions**:
|
|
||||||
```bash
|
|
||||||
sudo ./setup.sh --build-ipxe
|
|
||||||
```
|
|
||||||
|
|
||||||
Or manually:
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/ipxe/ipxe.git
|
|
||||||
cd ipxe/src
|
|
||||||
|
|
||||||
# BIOS bootloader
|
|
||||||
make bin/undionly.kpxe
|
|
||||||
|
|
||||||
# UEFI bootloader
|
|
||||||
make bin-x86_64-efi/ipxe.efi
|
|
||||||
|
|
||||||
# Copy to PXE server
|
|
||||||
sudo cp bin/undionly.kpxe /var/lib/pxe-boot/ipxe/
|
|
||||||
sudo cp bin-x86_64-efi/ipxe.efi /var/lib/pxe-boot/ipxe/
|
|
||||||
```
|
|
||||||
|
|
||||||
### HTTPS Boot (Secure Boot)
|
|
||||||
|
|
||||||
For enhanced security, serve boot images over HTTPS:
|
|
||||||
|
|
||||||
1. **Generate SSL certificate**:
|
|
||||||
```bash
|
|
||||||
sudo openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
|
|
||||||
-keyout /etc/ssl/private/pxe-server.key \
|
|
||||||
-out /etc/ssl/certs/pxe-server.crt
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Configure nginx for HTTPS** (uncomment HTTPS block in `http/nginx.conf`)
|
|
||||||
|
|
||||||
3. **Update boot.ipxe** to use `https://` URLs
|
|
||||||
|
|
||||||
4. **Rebuild iPXE with embedded certificate** (for secure boot without prompts)
|
|
||||||
|
|
||||||
### Multiple NixOS Versions
|
|
||||||
|
|
||||||
To support multiple NixOS versions for testing/rollback:
|
|
||||||
|
|
||||||
```
|
|
||||||
/var/lib/pxe-boot/nixos/
|
|
||||||
├── 24.05/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ └── initrd
|
|
||||||
├── 24.11/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ └── initrd
|
|
||||||
└── latest -> 24.11/ # Symlink to current version
|
|
||||||
```
|
|
||||||
|
|
||||||
Update `boot.ipxe` to use `/boot/nixos/latest/bzImage` or add menu items for version selection.
|
|
||||||
|
|
||||||
### Integration with BMC/IPMI
|
|
||||||
|
|
||||||
For fully automated provisioning:
|
|
||||||
|
|
||||||
1. **Discover new hardware** via IPMI/Redfish API
|
|
||||||
2. **Configure PXE boot** via IPMI:
|
|
||||||
```bash
|
|
||||||
ipmitool -I lanplus -H <bmc-ip> -U admin chassis bootdev pxe options=persistent
|
|
||||||
```
|
|
||||||
3. **Power on server**:
|
|
||||||
```bash
|
|
||||||
ipmitool -I lanplus -H <bmc-ip> -U admin power on
|
|
||||||
```
|
|
||||||
4. **Monitor via SOL** (serial-over-LAN)
|
|
||||||
|
|
||||||
### Monitoring and Metrics
|
|
||||||
|
|
||||||
Track PXE boot activity:
|
|
||||||
|
|
||||||
1. **DHCP leases**:
|
|
||||||
```bash
|
|
||||||
cat /var/lib/dhcp/dhcpd.leases
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **HTTP access logs**:
|
|
||||||
```bash
|
|
||||||
sudo tail -f /var/log/nginx/access.log | grep -E "boot.ipxe|bzImage|initrd"
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Prometheus metrics** (if nginx-module-vts installed):
|
|
||||||
- Boot file download counts
|
|
||||||
- Bandwidth usage
|
|
||||||
- Response times
|
|
||||||
|
|
||||||
4. **Custom metrics endpoint**:
|
|
||||||
- Parse nginx access logs
|
|
||||||
- Count boots per profile
|
|
||||||
- Alert on failed boots
|
|
||||||
|
|
||||||
## Files and Directory Structure
|
|
||||||
|
|
||||||
```
|
|
||||||
baremetal/pxe-server/
|
|
||||||
├── README.md # This file
|
|
||||||
├── setup.sh # Setup and management script
|
|
||||||
├── nixos-module.nix # NixOS service module
|
|
||||||
│
|
|
||||||
├── dhcp/
|
|
||||||
│ └── dhcpd.conf # DHCP server configuration
|
|
||||||
│
|
|
||||||
├── ipxe/
|
|
||||||
│ ├── boot.ipxe # Main boot menu script
|
|
||||||
│ └── mac-mappings.txt # MAC address documentation
|
|
||||||
│
|
|
||||||
├── http/
|
|
||||||
│ ├── nginx.conf # HTTP server configuration
|
|
||||||
│ └── directory-structure.txt # Directory layout documentation
|
|
||||||
│
|
|
||||||
└── assets/ # (Created at runtime)
|
|
||||||
└── /var/lib/pxe-boot/
|
|
||||||
├── ipxe/
|
|
||||||
│ ├── undionly.kpxe
|
|
||||||
│ ├── ipxe.efi
|
|
||||||
│ └── boot.ipxe
|
|
||||||
└── nixos/
|
|
||||||
├── bzImage
|
|
||||||
└── initrd
|
|
||||||
```
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
After completing the PXE server setup:
|
|
||||||
|
|
||||||
1. **T032.S3 - Image Builder**: Automated NixOS image generation with profile-specific configurations
|
|
||||||
|
|
||||||
2. **T032.S4 - Provisioning Orchestrator**: API-driven provisioning workflow and node lifecycle management
|
|
||||||
|
|
||||||
3. **Integration with IAM**: Authentication for provisioning API
|
|
||||||
|
|
||||||
4. **Integration with FlareDB**: Node inventory and state management
|
|
||||||
|
|
||||||
## References
|
|
||||||
|
|
||||||
- [iPXE Documentation](https://ipxe.org/)
|
|
||||||
- [ISC DHCP Documentation](https://www.isc.org/dhcp/)
|
|
||||||
- [NixOS Manual - Netboot](https://nixos.org/manual/nixos/stable/index.html#sec-building-netboot)
|
|
||||||
- [PXE Specification](https://www.intel.com/content/www/us/en/architecture-and-technology/intel-boot-executive.html)
|
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
For issues or questions:
|
|
||||||
- Check [Troubleshooting](#troubleshooting) section
|
|
||||||
- Review logs: `sudo journalctl -u dhcpd4 -u atftpd -u nginx -f`
|
|
||||||
- Run diagnostic: `sudo ./setup.sh --test`
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Part of Centra Cloud infrastructure - see project root for license information.
|
|
||||||
|
|
@ -1,392 +0,0 @@
|
||||||
# NixOS Configuration Examples for PXE Boot Server
|
|
||||||
#
|
|
||||||
# This file contains example configurations for different deployment scenarios.
|
|
||||||
# Copy the relevant section to your /etc/nixos/configuration.nix
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 1: Basic Single-Subnet PXE Server
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
nameservers = [ "10.0.100.1" "8.8.8.8" ];
|
|
||||||
domainName = "centra.local";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 2: PXE Server with Known Nodes (MAC-based Auto-Selection)
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Define known nodes with MAC addresses
|
|
||||||
nodes = {
|
|
||||||
# Control plane nodes
|
|
||||||
"52:54:00:12:34:56" = {
|
|
||||||
profile = "control-plane";
|
|
||||||
hostname = "control-plane-01";
|
|
||||||
ipAddress = "10.0.100.50";
|
|
||||||
};
|
|
||||||
"52:54:00:12:34:59" = {
|
|
||||||
profile = "control-plane";
|
|
||||||
hostname = "control-plane-02";
|
|
||||||
ipAddress = "10.0.100.51";
|
|
||||||
};
|
|
||||||
"52:54:00:12:34:5a" = {
|
|
||||||
profile = "control-plane";
|
|
||||||
hostname = "control-plane-03";
|
|
||||||
ipAddress = "10.0.100.52";
|
|
||||||
};
|
|
||||||
|
|
||||||
# Worker nodes
|
|
||||||
"52:54:00:12:34:57" = {
|
|
||||||
profile = "worker";
|
|
||||||
hostname = "worker-01";
|
|
||||||
ipAddress = "10.0.100.60";
|
|
||||||
};
|
|
||||||
"52:54:00:12:34:5b" = {
|
|
||||||
profile = "worker";
|
|
||||||
hostname = "worker-02";
|
|
||||||
ipAddress = "10.0.100.61";
|
|
||||||
};
|
|
||||||
|
|
||||||
# All-in-one test node
|
|
||||||
"52:54:00:12:34:58" = {
|
|
||||||
profile = "all-in-one";
|
|
||||||
hostname = "homelab-01";
|
|
||||||
ipAddress = "10.0.100.70";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 3: PXE Server with Custom DHCP Configuration
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
nameservers = [ "10.0.100.1" "1.1.1.1" "8.8.8.8" ];
|
|
||||||
domainName = "prod.centra.cloud";
|
|
||||||
|
|
||||||
# Longer lease times for stable infrastructure
|
|
||||||
defaultLeaseTime = 3600; # 1 hour
|
|
||||||
maxLeaseTime = 86400; # 24 hours
|
|
||||||
|
|
||||||
# Additional DHCP configuration
|
|
||||||
extraConfig = ''
|
|
||||||
# NTP servers
|
|
||||||
option ntp-servers 10.0.100.1;
|
|
||||||
|
|
||||||
# Additional subnet for management network
|
|
||||||
subnet 10.0.101.0 netmask 255.255.255.0 {
|
|
||||||
range 10.0.101.100 10.0.101.200;
|
|
||||||
option routers 10.0.101.1;
|
|
||||||
option subnet-mask 255.255.255.0;
|
|
||||||
next-server 10.0.100.10;
|
|
||||||
|
|
||||||
if exists user-class and option user-class = "iPXE" {
|
|
||||||
filename "http://10.0.100.10/boot/ipxe/boot.ipxe";
|
|
||||||
} elsif option architecture-type = 00:00 {
|
|
||||||
filename "undionly.kpxe";
|
|
||||||
} elsif option architecture-type = 00:07 {
|
|
||||||
filename "ipxe.efi";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Deny unknown clients (only known MAC addresses can boot)
|
|
||||||
# deny unknown-clients;
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 4: Multi-Homed PXE Server (Multiple Network Interfaces)
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
# Note: The module currently supports single interface.
|
|
||||||
# For multiple interfaces, configure multiple DHCP server instances manually
|
|
||||||
# or extend the module to support this use case.
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0"; # Primary provisioning network
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Manual configuration for second interface
|
|
||||||
# services.dhcpd4.interfaces = [ "eth0" "eth1" ];
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 5: High-Availability PXE Server (with Failover)
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
# Primary PXE server
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10"; # Primary server IP
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.150"; # Split range for failover
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
|
|
||||||
extraConfig = ''
|
|
||||||
# DHCP Failover Configuration
|
|
||||||
failover peer "centra-pxe-failover" {
|
|
||||||
primary;
|
|
||||||
address 10.0.100.10;
|
|
||||||
port 647;
|
|
||||||
peer address 10.0.100.11;
|
|
||||||
peer port 647;
|
|
||||||
max-response-delay 30;
|
|
||||||
max-unacked-updates 10;
|
|
||||||
load balance max seconds 3;
|
|
||||||
mclt 1800;
|
|
||||||
split 128;
|
|
||||||
}
|
|
||||||
|
|
||||||
pool {
|
|
||||||
failover peer "centra-pxe-failover";
|
|
||||||
range 10.0.100.100 10.0.100.150;
|
|
||||||
}
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
# Secondary PXE server (similar config with "secondary" role)
|
|
||||||
# Deploy on a different server with IP 10.0.100.11
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 6: PXE Server with HTTPS Boot (Secure Boot)
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
http = {
|
|
||||||
port = 443; # Use HTTPS
|
|
||||||
};
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Configure SSL certificates
|
|
||||||
services.nginx = {
|
|
||||||
virtualHosts."pxe.centra.local" = {
|
|
||||||
enableSSL = true;
|
|
||||||
sslCertificate = "/etc/ssl/certs/pxe-server.crt";
|
|
||||||
sslCertificateKey = "/etc/ssl/private/pxe-server.key";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Note: You'll need to rebuild iPXE with embedded certificates
|
|
||||||
# for seamless HTTPS boot without certificate warnings
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 7: Development/Testing Configuration (Permissive)
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [ ./baremetal/pxe-server/nixos-module.nix ];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "192.168.1.10"; # Typical home network
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "192.168.1.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "192.168.1.255";
|
|
||||||
range = {
|
|
||||||
start = "192.168.1.100";
|
|
||||||
end = "192.168.1.120";
|
|
||||||
};
|
|
||||||
router = "192.168.1.1";
|
|
||||||
|
|
||||||
# Short lease times for rapid testing
|
|
||||||
defaultLeaseTime = 300; # 5 minutes
|
|
||||||
maxLeaseTime = 600; # 10 minutes
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Enable nginx directory listing for debugging
|
|
||||||
services.nginx.appendHttpConfig = ''
|
|
||||||
autoindex on;
|
|
||||||
'';
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Example 8: Production Configuration with Monitoring
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
./baremetal/pxe-server/nixos-module.nix
|
|
||||||
];
|
|
||||||
|
|
||||||
services.centra-pxe-server = {
|
|
||||||
enable = true;
|
|
||||||
interface = "eth0";
|
|
||||||
serverAddress = "10.0.100.10";
|
|
||||||
|
|
||||||
dhcp = {
|
|
||||||
subnet = "10.0.100.0";
|
|
||||||
netmask = "255.255.255.0";
|
|
||||||
broadcast = "10.0.100.255";
|
|
||||||
range = {
|
|
||||||
start = "10.0.100.100";
|
|
||||||
end = "10.0.100.200";
|
|
||||||
};
|
|
||||||
router = "10.0.100.1";
|
|
||||||
};
|
|
||||||
|
|
||||||
nodes = {
|
|
||||||
# Production node definitions
|
|
||||||
# ... (add your nodes here)
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Enable Prometheus monitoring
|
|
||||||
services.prometheus.exporters.nginx = {
|
|
||||||
enable = true;
|
|
||||||
port = 9113;
|
|
||||||
};
|
|
||||||
|
|
||||||
# Centralized logging
|
|
||||||
services.rsyslog = {
|
|
||||||
enable = true;
|
|
||||||
extraConfig = ''
|
|
||||||
# Forward DHCP logs to centralized log server
|
|
||||||
if $programname == 'dhcpd' then @@logserver.centra.local:514
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# Backup DHCP leases
|
|
||||||
systemd.services.backup-dhcp-leases = {
|
|
||||||
description = "Backup DHCP leases";
|
|
||||||
serviceConfig = {
|
|
||||||
Type = "oneshot";
|
|
||||||
ExecStart = "${pkgs.rsync}/bin/rsync -a /var/lib/dhcp/dhcpd.leases /backup/dhcp/dhcpd.leases.$(date +%Y%m%d)";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
systemd.timers.backup-dhcp-leases = {
|
|
||||||
wantedBy = [ "timers.target" ];
|
|
||||||
timerConfig = {
|
|
||||||
OnCalendar = "daily";
|
|
||||||
Persistent = true;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
# Notes
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
# 1. Always update serverAddress, subnet, and interface to match your network
|
|
||||||
#
|
|
||||||
# 2. For MAC-based auto-selection, add nodes to the `nodes` attribute
|
|
||||||
#
|
|
||||||
# 3. DHCP failover requires configuration on both primary and secondary servers
|
|
||||||
#
|
|
||||||
# 4. HTTPS boot requires custom-built iPXE with embedded certificates
|
|
||||||
#
|
|
||||||
# 5. Test configurations in a development environment before production deployment
|
|
||||||
#
|
|
||||||
# 6. Keep DHCP lease database backed up for disaster recovery
|
|
||||||
#
|
|
||||||
# 7. Monitor DHCP pool utilization to avoid exhaustion
|
|
||||||
#
|
|
||||||
# 8. Use fixed IP addresses (via MAC mapping) for critical infrastructure nodes
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
||||||
# PXE Boot Server Directory Structure
|
|
||||||
#
|
|
||||||
# This document describes the directory layout for the HTTP/TFTP server
|
|
||||||
# that serves PXE boot assets.
|
|
||||||
#
|
|
||||||
# Base Directory: /var/lib/pxe-boot/
|
|
||||||
|
|
||||||
/var/lib/pxe-boot/
|
|
||||||
├── ipxe/ # iPXE bootloaders and scripts
|
|
||||||
│ ├── undionly.kpxe # iPXE bootloader for BIOS (legacy)
|
|
||||||
│ ├── ipxe.efi # iPXE bootloader for UEFI x86-64
|
|
||||||
│ ├── ipxe-i386.efi # iPXE bootloader for UEFI x86 32-bit (rare)
|
|
||||||
│ ├── boot.ipxe # Main boot script (served via HTTP)
|
|
||||||
│ └── README.txt # Documentation
|
|
||||||
│
|
|
||||||
├── nixos/ # NixOS netboot images
|
|
||||||
│ ├── bzImage # Linux kernel (compressed)
|
|
||||||
│ ├── initrd # Initial ramdisk
|
|
||||||
│ ├── squashfs # Root filesystem (if using squashfs)
|
|
||||||
│ ├── version.txt # Build version info
|
|
||||||
│ └── profiles/ # Profile-specific boot images (optional)
|
|
||||||
│ ├── control-plane/
|
|
||||||
│ │ ├── bzImage
|
|
||||||
│ │ └── initrd
|
|
||||||
│ ├── worker/
|
|
||||||
│ │ ├── bzImage
|
|
||||||
│ │ └── initrd
|
|
||||||
│ └── all-in-one/
|
|
||||||
│ ├── bzImage
|
|
||||||
│ └── initrd
|
|
||||||
│
|
|
||||||
└── README.txt # Top-level documentation
|
|
||||||
|
|
||||||
# TFTP Directory (if using separate TFTP server)
|
|
||||||
# Usually: /var/lib/tftpboot/ or /srv/tftp/
|
|
||||||
/var/lib/tftpboot/
|
|
||||||
├── undionly.kpxe # Symlink to /var/lib/pxe-boot/ipxe/undionly.kpxe
|
|
||||||
├── ipxe.efi # Symlink to /var/lib/pxe-boot/ipxe/ipxe.efi
|
|
||||||
└── ipxe-i386.efi # Symlink to /var/lib/pxe-boot/ipxe/ipxe-i386.efi
|
|
||||||
|
|
||||||
# URL Mapping
|
|
||||||
# The following URLs are served by nginx:
|
|
||||||
#
|
|
||||||
# http://10.0.100.10/boot/ipxe/boot.ipxe
|
|
||||||
# -> /var/lib/pxe-boot/ipxe/boot.ipxe
|
|
||||||
#
|
|
||||||
# http://10.0.100.10/boot/ipxe/undionly.kpxe
|
|
||||||
# -> /var/lib/pxe-boot/ipxe/undionly.kpxe
|
|
||||||
#
|
|
||||||
# http://10.0.100.10/boot/nixos/bzImage
|
|
||||||
# -> /var/lib/pxe-boot/nixos/bzImage
|
|
||||||
#
|
|
||||||
# http://10.0.100.10/boot/nixos/initrd
|
|
||||||
# -> /var/lib/pxe-boot/nixos/initrd
|
|
||||||
|
|
||||||
# File Sizes (Typical)
|
|
||||||
# - undionly.kpxe: ~100 KB
|
|
||||||
# - ipxe.efi: ~1 MB
|
|
||||||
# - boot.ipxe: ~10 KB (text script)
|
|
||||||
# - bzImage: ~10-50 MB (compressed kernel)
|
|
||||||
# - initrd: ~50-500 MB (depends on included tools/drivers)
|
|
||||||
|
|
||||||
# Permissions
|
|
||||||
# All files should be readable by the nginx user:
|
|
||||||
# chown -R nginx:nginx /var/lib/pxe-boot
|
|
||||||
# chmod -R 755 /var/lib/pxe-boot
|
|
||||||
# chmod 644 /var/lib/pxe-boot/ipxe/*
|
|
||||||
# chmod 644 /var/lib/pxe-boot/nixos/*
|
|
||||||
|
|
||||||
# Disk Space Requirements
|
|
||||||
# Minimum: 1 GB (for basic setup with one NixOS image)
|
|
||||||
# Recommended: 5-10 GB (for multiple profiles and versions)
|
|
||||||
# - Each NixOS profile: ~500 MB - 1 GB
|
|
||||||
# - Keep 2-3 versions for rollback: multiply by 2-3x
|
|
||||||
# - Add buffer for logs and temporary files
|
|
||||||
|
|
||||||
# Backup Recommendations
|
|
||||||
# - Boot scripts (ipxe/*.ipxe): Version control (git)
|
|
||||||
# - Bootloaders (ipxe/*.kpxe, *.efi): Can re-download, but keep backups
|
|
||||||
# - NixOS images: Can rebuild from S3 builder, but keep at least 2 versions
|
|
||||||
# - Configuration files: Version control (git)
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
# MAC Address to Profile Mappings
|
|
||||||
#
|
|
||||||
# This file documents the MAC address mappings used in boot.ipxe
|
|
||||||
# Update this file when adding new nodes to your infrastructure
|
|
||||||
#
|
|
||||||
# Format: MAC_ADDRESS PROFILE HOSTNAME
|
|
||||||
#
|
|
||||||
# To generate MAC addresses for virtual machines (testing):
|
|
||||||
# - Use the 52:54:00:xx:xx:xx range (QEMU/KVM local)
|
|
||||||
# - Or use your hypervisor's MAC assignment
|
|
||||||
#
|
|
||||||
# For physical servers:
|
|
||||||
# - Use the actual MAC address of the primary network interface
|
|
||||||
# - Usually found on a label on the server or in BIOS/BMC
|
|
||||||
#
|
|
||||||
|
|
||||||
# Control Plane Nodes
|
|
||||||
52:54:00:12:34:56 control-plane control-plane-01
|
|
||||||
52:54:00:12:34:59 control-plane control-plane-02
|
|
||||||
52:54:00:12:34:5a control-plane control-plane-03
|
|
||||||
|
|
||||||
# Worker Nodes
|
|
||||||
52:54:00:12:34:57 worker worker-01
|
|
||||||
52:54:00:12:34:5b worker worker-02
|
|
||||||
52:54:00:12:34:5c worker worker-03
|
|
||||||
|
|
||||||
# All-in-One Nodes (Testing/Homelab)
|
|
||||||
52:54:00:12:34:58 all-in-one all-in-one-01
|
|
||||||
|
|
||||||
# Instructions for Adding New Nodes:
|
|
||||||
# 1. Add the MAC address, profile, and hostname to this file
|
|
||||||
# 2. Update boot.ipxe with the new MAC address mapping
|
|
||||||
# 3. Update dhcpd.conf with a host entry for fixed IP assignment (optional)
|
|
||||||
# 4. Restart the DHCP service: systemctl restart dhcpd
|
|
||||||
#
|
|
||||||
# Example:
|
|
||||||
# 52:54:00:12:34:5d worker worker-04
|
|
||||||
#
|
|
||||||
# Then add to boot.ipxe:
|
|
||||||
# iseq ${mac} 52:54:00:12:34:5d && set profile worker && set hostname worker-04 && goto boot ||
|
|
||||||
#
|
|
||||||
# And optionally add to dhcpd.conf:
|
|
||||||
# host worker-04 {
|
|
||||||
# hardware ethernet 52:54:00:12:34:5d;
|
|
||||||
# fixed-address 10.0.100.64;
|
|
||||||
# option host-name "worker-04";
|
|
||||||
# }
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
use chainfire_client::Client;
|
|
||||||
|
|
||||||
#[tokio::main]
|
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
// Build a client with default retry/backoff.
|
|
||||||
let mut client = Client::builder("http://127.0.0.1:2379").build().await?;
|
|
||||||
|
|
||||||
// Simple put/get roundtrip.
|
|
||||||
client.put_str("/example/key", "value").await?;
|
|
||||||
if let Some(val) = client.get_str("/example/key").await? {
|
|
||||||
println!("Got value: {}", val);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
// Minimal cleanup utility for deleting stale deployer entries from ChainFire.
|
|
||||||
// Usage: cargo run -p chainfire-client --example cleanup
|
|
||||||
|
|
||||||
use chainfire_client::Client;
|
|
||||||
|
|
||||||
#[tokio::main]
|
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
// ChainFire API endpoint
|
|
||||||
let mut client = Client::connect("http://127.0.0.1:7000").await?;
|
|
||||||
|
|
||||||
// Stale keys to remove
|
|
||||||
let keys = [
|
|
||||||
b"deployer/nodes/info/node-025456f1".as_ref(),
|
|
||||||
b"deployer/nodes/config/025456f142ee424b88cd8aba5cf6c16a".as_ref(),
|
|
||||||
];
|
|
||||||
|
|
||||||
for key in keys {
|
|
||||||
let deleted = client.delete(key).await?;
|
|
||||||
println!(
|
|
||||||
"delete {} -> {}",
|
|
||||||
String::from_utf8_lossy(key),
|
|
||||||
if deleted { "removed" } else { "not found" }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
@ -103,6 +103,47 @@ impl Client {
|
||||||
Ok(resp.kvs.into_iter().next().map(|kv| (kv.value, kv.mod_revision as u64)))
|
Ok(resp.kvs.into_iter().next().map(|kv| (kv.value, kv.mod_revision as u64)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Put a key-value pair only if the key's mod_revision matches.
|
||||||
|
///
|
||||||
|
/// This is a best-effort compare-and-set. The server may not return
|
||||||
|
/// a reliable success flag, so callers should treat this as "attempted".
|
||||||
|
pub async fn put_if_revision(
|
||||||
|
&mut self,
|
||||||
|
key: impl AsRef<[u8]>,
|
||||||
|
value: impl AsRef<[u8]>,
|
||||||
|
expected_mod_revision: u64,
|
||||||
|
) -> Result<()> {
|
||||||
|
let key_bytes = key.as_ref().to_vec();
|
||||||
|
let compare = Compare {
|
||||||
|
result: compare::CompareResult::Equal as i32,
|
||||||
|
target: compare::CompareTarget::Mod as i32,
|
||||||
|
key: key_bytes.clone(),
|
||||||
|
target_union: Some(compare::TargetUnion::ModRevision(
|
||||||
|
expected_mod_revision as i64,
|
||||||
|
)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let put_op = RequestOp {
|
||||||
|
request: Some(request_op::Request::RequestPut(PutRequest {
|
||||||
|
key: key_bytes,
|
||||||
|
value: value.as_ref().to_vec(),
|
||||||
|
lease: 0,
|
||||||
|
prev_kv: false,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.kv
|
||||||
|
.txn(TxnRequest {
|
||||||
|
compare: vec![compare],
|
||||||
|
success: vec![put_op],
|
||||||
|
failure: vec![],
|
||||||
|
})
|
||||||
|
.await?
|
||||||
|
.into_inner();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Get a value as string
|
/// Get a value as string
|
||||||
pub async fn get_str(&mut self, key: &str) -> Result<Option<String>> {
|
pub async fn get_str(&mut self, key: &str) -> Result<Option<String>> {
|
||||||
let value = self.get(key.as_bytes()).await?;
|
let value = self.get(key.as_bytes()).await?;
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,9 @@ impl KvServiceImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a response header
|
/// Create a response header
|
||||||
fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
|
async fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
|
||||||
make_header(
|
let term = self.raft.current_term().await;
|
||||||
self.cluster_id,
|
make_header(self.cluster_id, self.raft.node_id(), revision, term)
|
||||||
self.raft.node_id(),
|
|
||||||
revision,
|
|
||||||
0, // TODO: get actual term
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -74,7 +70,7 @@ impl Kv for KvServiceImpl {
|
||||||
let count = kvs.len() as i64;
|
let count = kvs.len() as i64;
|
||||||
|
|
||||||
Ok(Response::new(RangeResponse {
|
Ok(Response::new(RangeResponse {
|
||||||
header: Some(self.make_header(revision)),
|
header: Some(self.make_header(revision).await),
|
||||||
kvs,
|
kvs,
|
||||||
more: false,
|
more: false,
|
||||||
count,
|
count,
|
||||||
|
|
@ -107,7 +103,7 @@ impl Kv for KvServiceImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Response::new(PutResponse {
|
Ok(Response::new(PutResponse {
|
||||||
header: Some(self.make_header(revision)),
|
header: Some(self.make_header(revision).await),
|
||||||
prev_kv: None, // Not supported yet in custom RaftCore
|
prev_kv: None, // Not supported yet in custom RaftCore
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
@ -165,7 +161,7 @@ impl Kv for KvServiceImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Response::new(DeleteRangeResponse {
|
Ok(Response::new(DeleteRangeResponse {
|
||||||
header: Some(self.make_header(revision)),
|
header: Some(self.make_header(revision).await),
|
||||||
deleted: deleted_count,
|
deleted: deleted_count,
|
||||||
prev_kvs: vec![], // Not supported yet
|
prev_kvs: vec![], // Not supported yet
|
||||||
}))
|
}))
|
||||||
|
|
@ -234,7 +230,7 @@ impl Kv for KvServiceImpl {
|
||||||
warn!("Transaction response details not yet supported in custom Raft implementation");
|
warn!("Transaction response details not yet supported in custom Raft implementation");
|
||||||
|
|
||||||
Ok(Response::new(TxnResponse {
|
Ok(Response::new(TxnResponse {
|
||||||
header: Some(self.make_header(revision)),
|
header: Some(self.make_header(revision).await),
|
||||||
succeeded: true, // Assume success if no error
|
succeeded: true, // Assume success if no error
|
||||||
responses: vec![], // Not supported yet
|
responses: vec![], // Not supported yet
|
||||||
}))
|
}))
|
||||||
|
|
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
use std::time::Duration;
|
|
||||||
use chainfire_core::ClusterBuilder;
|
|
||||||
use chainfire_types::{node::NodeRole, RaftRole};
|
|
||||||
use tokio::time::sleep;
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_single_node_bootstrap() {
|
|
||||||
let _ = tracing_subscriber::fmt::try_init();
|
|
||||||
|
|
||||||
// 1. Build a single node cluster
|
|
||||||
let cluster = ClusterBuilder::new(1)
|
|
||||||
.name("node-1")
|
|
||||||
.memory_storage()
|
|
||||||
.gossip_addr("127.0.0.1:0".parse().unwrap())
|
|
||||||
.raft_addr("127.0.0.1:0".parse().unwrap())
|
|
||||||
.role(NodeRole::ControlPlane)
|
|
||||||
.raft_role(RaftRole::Voter)
|
|
||||||
.bootstrap(true)
|
|
||||||
.build()
|
|
||||||
.await
|
|
||||||
.expect("Failed to build cluster");
|
|
||||||
|
|
||||||
let handle = cluster.handle();
|
|
||||||
|
|
||||||
// 2. Run the cluster in a background task
|
|
||||||
tokio::spawn(async move {
|
|
||||||
cluster.run().await.unwrap();
|
|
||||||
});
|
|
||||||
|
|
||||||
// 3. Wait for leader election
|
|
||||||
let mut leader_elected = false;
|
|
||||||
for _ in 0..10 {
|
|
||||||
if handle.is_leader() {
|
|
||||||
leader_elected = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert!(leader_elected, "Node 1 should become leader in bootstrap mode");
|
|
||||||
assert_eq!(handle.leader(), Some(1));
|
|
||||||
|
|
||||||
// 4. Test KV operations
|
|
||||||
let kv = handle.kv();
|
|
||||||
kv.put("test-key", b"test-value").await.expect("Put failed");
|
|
||||||
|
|
||||||
let value = kv.get("test-key").await.expect("Get failed");
|
|
||||||
assert_eq!(value, Some(b"test-value".to_vec()));
|
|
||||||
|
|
||||||
// 5. Shutdown
|
|
||||||
handle.shutdown();
|
|
||||||
}
|
|
||||||
|
|
@ -1,613 +0,0 @@
|
||||||
//! Integration tests for Leader Election (P1) and Log Replication (P2)
|
|
||||||
//!
|
|
||||||
//! Tests cover:
|
|
||||||
//! - Single-node auto-election
|
|
||||||
//! - 3-node majority election
|
|
||||||
//! - Role transitions
|
|
||||||
//! - Term management
|
|
||||||
//! - Heartbeat mechanism
|
|
||||||
//! - Log replication
|
|
||||||
//! - Leader failure recovery
|
|
||||||
|
|
||||||
#![cfg(all(test, feature = "custom-raft"))]
|
|
||||||
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::time;
|
|
||||||
use tokio::sync::mpsc;
|
|
||||||
|
|
||||||
use chainfire_raft::core::{
|
|
||||||
RaftCore, RaftConfig, RaftRole, NodeId,
|
|
||||||
};
|
|
||||||
use chainfire_raft::network::custom_test_client::{InMemoryRpcClient, RpcMessage};
|
|
||||||
use chainfire_storage::{LogStorage, StateMachine, RocksStore};
|
|
||||||
|
|
||||||
/// Helper to create a test node
|
|
||||||
async fn create_test_node(node_id: NodeId, peers: Vec<NodeId>) -> (Arc<RaftCore>, tempfile::TempDir) {
|
|
||||||
let temp_dir = tempfile::TempDir::new().unwrap();
|
|
||||||
let rocks = RocksStore::new(temp_dir.path()).unwrap();
|
|
||||||
let storage = Arc::new(LogStorage::new(rocks.clone()));
|
|
||||||
let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
|
|
||||||
let network = Arc::new(InMemoryRpcClient::new());
|
|
||||||
|
|
||||||
let config = RaftConfig {
|
|
||||||
election_timeout_min: 150,
|
|
||||||
election_timeout_max: 300,
|
|
||||||
heartbeat_interval: 50,
|
|
||||||
};
|
|
||||||
|
|
||||||
let node = Arc::new(RaftCore::new(
|
|
||||||
node_id,
|
|
||||||
peers,
|
|
||||||
storage,
|
|
||||||
state_machine,
|
|
||||||
network,
|
|
||||||
config,
|
|
||||||
));
|
|
||||||
|
|
||||||
node.initialize().await.unwrap();
|
|
||||||
|
|
||||||
(node, temp_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper to create a 3-node cluster with RPC wiring
|
|
||||||
async fn create_3node_cluster() -> (
|
|
||||||
Vec<Arc<RaftCore>>,
|
|
||||||
Vec<tempfile::TempDir>,
|
|
||||||
Arc<InMemoryRpcClient>,
|
|
||||||
) {
|
|
||||||
let network = Arc::new(InMemoryRpcClient::new());
|
|
||||||
let mut nodes = Vec::new();
|
|
||||||
let mut temp_dirs = Vec::new();
|
|
||||||
|
|
||||||
// Create 3 nodes
|
|
||||||
for node_id in 1..=3 {
|
|
||||||
let peers: Vec<NodeId> = (1..=3).filter(|&id| id != node_id).collect();
|
|
||||||
|
|
||||||
let temp_dir = tempfile::TempDir::new().unwrap();
|
|
||||||
let rocks = RocksStore::new(temp_dir.path()).unwrap();
|
|
||||||
let storage = Arc::new(LogStorage::new(rocks.clone()));
|
|
||||||
let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
|
|
||||||
|
|
||||||
let config = RaftConfig {
|
|
||||||
election_timeout_min: 150, // 150ms - matches single-node test
|
|
||||||
election_timeout_max: 300, // 300ms
|
|
||||||
heartbeat_interval: 50, // 50ms - matches single-node test
|
|
||||||
};
|
|
||||||
|
|
||||||
let node = Arc::new(RaftCore::new(
|
|
||||||
node_id,
|
|
||||||
peers,
|
|
||||||
storage,
|
|
||||||
state_machine,
|
|
||||||
Arc::clone(&network) as Arc<dyn chainfire_raft::network::RaftRpcClient>,
|
|
||||||
config,
|
|
||||||
));
|
|
||||||
|
|
||||||
node.initialize().await.unwrap();
|
|
||||||
nodes.push(node);
|
|
||||||
temp_dirs.push(temp_dir);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wire up RPC channels for each node
|
|
||||||
for node in &nodes {
|
|
||||||
let node_id = node.node_id();
|
|
||||||
let (tx, mut rx) = mpsc::unbounded_channel::<RpcMessage>();
|
|
||||||
network.register(node_id, tx).await;
|
|
||||||
|
|
||||||
// Spawn handler for this node's RPC messages
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
tokio::spawn(async move {
|
|
||||||
eprintln!("[RPC Handler {}] Started", node_clone.node_id());
|
|
||||||
while let Some(msg) = rx.recv().await {
|
|
||||||
match msg {
|
|
||||||
RpcMessage::Vote(req, resp_tx) => {
|
|
||||||
eprintln!("[RPC Handler {}] Processing Vote from {}",
|
|
||||||
node_clone.node_id(), req.candidate_id);
|
|
||||||
node_clone.request_vote_rpc(req, resp_tx).await;
|
|
||||||
}
|
|
||||||
RpcMessage::AppendEntries(req, resp_tx) => {
|
|
||||||
eprintln!("[RPC Handler {}] Processing AppendEntries from {} term={}",
|
|
||||||
node_clone.node_id(), req.leader_id, req.term);
|
|
||||||
node_clone.append_entries_rpc(req, resp_tx).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
eprintln!("[RPC Handler {}] Stopped (channel closed)", node_clone.node_id());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Give all RPC handler tasks time to start
|
|
||||||
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
|
|
||||||
|
|
||||||
(nodes, temp_dirs, network)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Test Cases
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_node_creation_and_initialization() {
|
|
||||||
// Test that we can create a node and initialize it
|
|
||||||
let (node, _temp_dir) = create_test_node(1, vec![2, 3]).await;
|
|
||||||
|
|
||||||
// Node should start as follower
|
|
||||||
assert_eq!(node.role().await, RaftRole::Follower);
|
|
||||||
|
|
||||||
// Node ID should be correct
|
|
||||||
assert_eq!(node.node_id(), 1);
|
|
||||||
|
|
||||||
// Term should start at 0
|
|
||||||
assert_eq!(node.current_term().await, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_role_transitions() {
|
|
||||||
// Test basic role enumeration
|
|
||||||
assert_ne!(RaftRole::Follower, RaftRole::Candidate);
|
|
||||||
assert_ne!(RaftRole::Candidate, RaftRole::Leader);
|
|
||||||
assert_ne!(RaftRole::Leader, RaftRole::Follower);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_term_persistence() {
|
|
||||||
// Test that term can be persisted and loaded
|
|
||||||
let temp_dir = tempfile::TempDir::new().unwrap();
|
|
||||||
let path = temp_dir.path().to_str().unwrap().to_string();
|
|
||||||
|
|
||||||
{
|
|
||||||
// Create first node and let it initialize
|
|
||||||
let rocks = RocksStore::new(&path).unwrap();
|
|
||||||
let storage = Arc::new(LogStorage::new(rocks.clone()));
|
|
||||||
let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
|
|
||||||
let network = Arc::new(InMemoryRpcClient::new());
|
|
||||||
|
|
||||||
let node = Arc::new(RaftCore::new(
|
|
||||||
1,
|
|
||||||
vec![2, 3],
|
|
||||||
storage,
|
|
||||||
state_machine,
|
|
||||||
network,
|
|
||||||
RaftConfig::default(),
|
|
||||||
));
|
|
||||||
|
|
||||||
node.initialize().await.unwrap();
|
|
||||||
|
|
||||||
// Initial term should be 0
|
|
||||||
assert_eq!(node.current_term().await, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
// Create second node with same storage path
|
|
||||||
let rocks = RocksStore::new(&path).unwrap();
|
|
||||||
let storage = Arc::new(LogStorage::new(rocks.clone()));
|
|
||||||
let state_machine = Arc::new(StateMachine::new(rocks).unwrap());
|
|
||||||
let network = Arc::new(InMemoryRpcClient::new());
|
|
||||||
|
|
||||||
let node = Arc::new(RaftCore::new(
|
|
||||||
1,
|
|
||||||
vec![2, 3],
|
|
||||||
storage,
|
|
||||||
state_machine,
|
|
||||||
network,
|
|
||||||
RaftConfig::default(),
|
|
||||||
));
|
|
||||||
|
|
||||||
node.initialize().await.unwrap();
|
|
||||||
|
|
||||||
// Term should still be 0 (loaded from storage)
|
|
||||||
assert_eq!(node.current_term().await, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_config_defaults() {
|
|
||||||
// Test that default config has reasonable values
|
|
||||||
let config = RaftConfig::default();
|
|
||||||
|
|
||||||
assert!(config.election_timeout_min > 0);
|
|
||||||
assert!(config.election_timeout_max > config.election_timeout_min);
|
|
||||||
assert!(config.heartbeat_interval > 0);
|
|
||||||
assert!(config.heartbeat_interval < config.election_timeout_min);
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// P2: Log Replication Integration Tests
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_3node_cluster_formation() {
|
|
||||||
// Test 1: 3-Node Cluster Formation Test
|
|
||||||
// - 3 nodes start → Leader elected
|
|
||||||
// - All followers receive heartbeat
|
|
||||||
// - No election timeout occurs
|
|
||||||
|
|
||||||
let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
|
|
||||||
|
|
||||||
// Start event loops for all nodes
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
for node in &nodes {
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
let handle = tokio::spawn(async move {
|
|
||||||
let _ = node_clone.run().await;
|
|
||||||
});
|
|
||||||
handles.push(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for leader election (should happen within ~500ms)
|
|
||||||
time::sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Check that exactly one leader was elected
|
|
||||||
let mut leader_count = 0;
|
|
||||||
let mut follower_count = 0;
|
|
||||||
let mut leader_id = None;
|
|
||||||
|
|
||||||
for node in &nodes {
|
|
||||||
match node.role().await {
|
|
||||||
RaftRole::Leader => {
|
|
||||||
leader_count += 1;
|
|
||||||
leader_id = Some(node.node_id());
|
|
||||||
}
|
|
||||||
RaftRole::Follower => {
|
|
||||||
follower_count += 1;
|
|
||||||
}
|
|
||||||
RaftRole::Candidate => {
|
|
||||||
// Should not have candidates after election
|
|
||||||
panic!("Node {} is still candidate after election", node.node_id());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert_eq!(leader_count, 1, "Expected exactly one leader");
|
|
||||||
assert_eq!(follower_count, 2, "Expected exactly two followers");
|
|
||||||
assert!(leader_id.is_some(), "Leader should be identified");
|
|
||||||
|
|
||||||
println!("✓ Leader elected: node {}", leader_id.unwrap());
|
|
||||||
|
|
||||||
// Wait a bit more to ensure heartbeats prevent election timeout
|
|
||||||
// Heartbeat interval is 50ms, election timeout is 150-300ms
|
|
||||||
// So after 400ms, no new election should occur
|
|
||||||
time::sleep(Duration::from_millis(400)).await;
|
|
||||||
|
|
||||||
// Verify leader is still the same
|
|
||||||
for node in &nodes {
|
|
||||||
if node.node_id() == leader_id.unwrap() {
|
|
||||||
assert_eq!(node.role().await, RaftRole::Leader, "Leader should remain leader");
|
|
||||||
} else {
|
|
||||||
assert_eq!(
|
|
||||||
node.role().await,
|
|
||||||
RaftRole::Follower,
|
|
||||||
"Followers should remain followers due to heartbeats"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("✓ Heartbeats prevent election timeout");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Requires client write API implementation
|
|
||||||
async fn test_log_replication() {
|
|
||||||
// Test 2: Log Replication Test
|
|
||||||
// - Leader adds entries
|
|
||||||
// - Replicated to all followers
|
|
||||||
// - commit_index synchronized
|
|
||||||
|
|
||||||
// TODO: Implement once client write API is ready
|
|
||||||
// This requires handle_client_write to be fully implemented
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Requires graceful node shutdown
|
|
||||||
async fn test_leader_failure_recovery() {
|
|
||||||
// Test 3: Leader Failure Test
|
|
||||||
// - Leader stops → New leader elected
|
|
||||||
// - Log consistency maintained
|
|
||||||
|
|
||||||
// TODO: Implement once we have graceful shutdown mechanism
|
|
||||||
// Currently, aborting the event loop doesn't cleanly stop the node
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Deferred complex tests
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Requires full cluster setup
|
|
||||||
async fn test_split_vote_recovery() {
|
|
||||||
// Test that cluster recovers from split vote
|
|
||||||
// Deferred: Requires complex timing control
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Requires node restart mechanism
|
|
||||||
async fn test_vote_persistence_across_restart() {
|
|
||||||
// Test that votes persist across node restarts
|
|
||||||
// Deferred: Requires proper shutdown/startup sequencing
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// P3: Commitment & State Machine Integration Tests
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_write_replicate_commit() {
|
|
||||||
// Test: Client write on leader → replication → commit → state machine apply
|
|
||||||
// Verifies the complete write→replicate→commit→apply flow
|
|
||||||
|
|
||||||
use chainfire_types::command::RaftCommand;
|
|
||||||
|
|
||||||
let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
|
|
||||||
|
|
||||||
// Start event loops for all nodes
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
for node in &nodes {
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
let handle = tokio::spawn(async move {
|
|
||||||
let _ = node_clone.run().await;
|
|
||||||
});
|
|
||||||
handles.push(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for leader election (election timeout is 2-4s)
|
|
||||||
time::sleep(Duration::from_millis(5000)).await;
|
|
||||||
|
|
||||||
// Find the leader
|
|
||||||
let mut leader = None;
|
|
||||||
for node in &nodes {
|
|
||||||
if matches!(node.role().await, RaftRole::Leader) {
|
|
||||||
leader = Some(node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let leader = leader.expect("Leader should be elected");
|
|
||||||
|
|
||||||
println!("✓ Leader elected: node {}", leader.node_id());
|
|
||||||
|
|
||||||
// Submit a write command to the leader
|
|
||||||
let cmd = RaftCommand::Put {
|
|
||||||
key: b"test_key_1".to_vec(),
|
|
||||||
value: b"test_value_1".to_vec(),
|
|
||||||
lease_id: None,
|
|
||||||
prev_kv: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
leader
|
|
||||||
.client_write(cmd)
|
|
||||||
.await
|
|
||||||
.expect("Client write should succeed");
|
|
||||||
|
|
||||||
println!("✓ Client write submitted to leader");
|
|
||||||
|
|
||||||
// Wait for replication and commit (heartbeat + replication + commit)
|
|
||||||
// Heartbeat interval is 50ms, need multiple rounds:
|
|
||||||
// 1. First heartbeat sends entries
|
|
||||||
// 2. Followers ack, leader updates match_index and commit_index
|
|
||||||
// 3. Second heartbeat propagates new leader_commit to followers
|
|
||||||
// 4. Followers update their commit_index and apply entries
|
|
||||||
// Give extra time to avoid re-election issues
|
|
||||||
time::sleep(Duration::from_millis(1500)).await;
|
|
||||||
|
|
||||||
// Debug: Check all nodes' roles and states
|
|
||||||
println!("\nDEBUG: All nodes after write:");
|
|
||||||
for node in &nodes {
|
|
||||||
println!(" Node {} role={:?} term={} commit_index={} last_applied={}",
|
|
||||||
node.node_id(), node.role().await, node.current_term().await,
|
|
||||||
node.commit_index().await, node.last_applied().await);
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Verify that the value is committed and applied on all nodes
|
|
||||||
for node in &nodes {
|
|
||||||
let commit_index = node.commit_index().await;
|
|
||||||
let last_applied = node.last_applied().await;
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
commit_index >= 1,
|
|
||||||
"Node {} should have commit_index >= 1, got {}",
|
|
||||||
node.node_id(),
|
|
||||||
commit_index
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
last_applied >= 1,
|
|
||||||
"Node {} should have last_applied >= 1, got {}",
|
|
||||||
node.node_id(),
|
|
||||||
last_applied
|
|
||||||
);
|
|
||||||
|
|
||||||
// Verify the value exists in the state machine
|
|
||||||
let state_machine = node.state_machine();
|
|
||||||
let result = state_machine.kv().get(b"test_key_1").expect("Get should succeed");
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
result.is_some(),
|
|
||||||
"Node {} should have test_key_1 in state machine",
|
|
||||||
node.node_id()
|
|
||||||
);
|
|
||||||
|
|
||||||
let entry = result.unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
entry.value,
|
|
||||||
b"test_value_1",
|
|
||||||
"Node {} has wrong value for test_key_1",
|
|
||||||
node.node_id()
|
|
||||||
);
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"✓ Node {} has test_key_1=test_value_1 (commit_index={}, last_applied={})",
|
|
||||||
node.node_id(),
|
|
||||||
commit_index,
|
|
||||||
last_applied
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("✓ All nodes have committed and applied the write");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_commit_consistency() {
|
|
||||||
// Test: Multiple writes preserve order across all nodes
|
|
||||||
// Verifies that the commit mechanism maintains consistency
|
|
||||||
|
|
||||||
use chainfire_types::command::RaftCommand;
|
|
||||||
|
|
||||||
let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
|
|
||||||
|
|
||||||
// Start event loops
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
for node in &nodes {
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
let handle = tokio::spawn(async move {
|
|
||||||
let _ = node_clone.run().await;
|
|
||||||
});
|
|
||||||
handles.push(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for leader election (election timeout is 2-4s)
|
|
||||||
time::sleep(Duration::from_millis(5000)).await;
|
|
||||||
|
|
||||||
// Find the leader
|
|
||||||
let mut leader = None;
|
|
||||||
for node in &nodes {
|
|
||||||
if matches!(node.role().await, RaftRole::Leader) {
|
|
||||||
leader = Some(node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let leader = leader.expect("Leader should be elected");
|
|
||||||
|
|
||||||
println!("✓ Leader elected: node {}", leader.node_id());
|
|
||||||
|
|
||||||
// Submit multiple writes in sequence
|
|
||||||
for i in 1..=5 {
|
|
||||||
let cmd = RaftCommand::Put {
|
|
||||||
key: format!("key_{}", i).into_bytes(),
|
|
||||||
value: format!("value_{}", i).into_bytes(),
|
|
||||||
lease_id: None,
|
|
||||||
prev_kv: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
leader
|
|
||||||
.client_write(cmd)
|
|
||||||
.await
|
|
||||||
.expect("Client write should succeed");
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("✓ Submitted 5 writes to leader");
|
|
||||||
|
|
||||||
// Wait for all writes to commit and apply
|
|
||||||
time::sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Verify all nodes have all 5 keys in correct order
|
|
||||||
for node in &nodes {
|
|
||||||
let commit_index = node.commit_index().await;
|
|
||||||
let last_applied = node.last_applied().await;
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
commit_index >= 5,
|
|
||||||
"Node {} should have commit_index >= 5, got {}",
|
|
||||||
node.node_id(),
|
|
||||||
commit_index
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
last_applied >= 5,
|
|
||||||
"Node {} should have last_applied >= 5, got {}",
|
|
||||||
node.node_id(),
|
|
||||||
last_applied
|
|
||||||
);
|
|
||||||
|
|
||||||
let state_machine = node.state_machine();
|
|
||||||
|
|
||||||
for i in 1..=5 {
|
|
||||||
let key = format!("key_{}", i).into_bytes();
|
|
||||||
let expected_value = format!("value_{}", i).into_bytes();
|
|
||||||
|
|
||||||
let result = state_machine.kv().get(&key).expect("Get should succeed");
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
result.is_some(),
|
|
||||||
"Node {} missing key_{}",
|
|
||||||
node.node_id(),
|
|
||||||
i
|
|
||||||
);
|
|
||||||
|
|
||||||
let entry = result.unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
entry.value, expected_value,
|
|
||||||
"Node {} has wrong value for key_{}",
|
|
||||||
node.node_id(), i
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"✓ Node {} has all 5 keys in correct order (commit_index={}, last_applied={})",
|
|
||||||
node.node_id(),
|
|
||||||
commit_index,
|
|
||||||
last_applied
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("✓ All nodes maintain consistent order");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_leader_only_write() {
|
|
||||||
// Test: Follower should reject client writes
|
|
||||||
// Verifies that only the leader can accept writes (Raft safety)
|
|
||||||
|
|
||||||
use chainfire_types::command::RaftCommand;
|
|
||||||
use chainfire_raft::core::RaftError;
|
|
||||||
|
|
||||||
let (nodes, _temp_dirs, _network) = create_3node_cluster().await;
|
|
||||||
|
|
||||||
// Start event loops
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
for node in &nodes {
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
let handle = tokio::spawn(async move {
|
|
||||||
let _ = node_clone.run().await;
|
|
||||||
});
|
|
||||||
handles.push(handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for leader election (election timeout is 2-4s)
|
|
||||||
time::sleep(Duration::from_millis(5000)).await;
|
|
||||||
|
|
||||||
// Find a follower
|
|
||||||
let mut follower = None;
|
|
||||||
for node in &nodes {
|
|
||||||
if matches!(node.role().await, RaftRole::Follower) {
|
|
||||||
follower = Some(node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let follower = follower.expect("Follower should exist");
|
|
||||||
|
|
||||||
println!("✓ Found follower: node {}", follower.node_id());
|
|
||||||
|
|
||||||
// Try to write to the follower
|
|
||||||
let cmd = RaftCommand::Put {
|
|
||||||
key: b"follower_write".to_vec(),
|
|
||||||
value: b"should_fail".to_vec(),
|
|
||||||
lease_id: None,
|
|
||||||
prev_kv: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let result = follower.client_write(cmd).await;
|
|
||||||
|
|
||||||
// Should return NotLeader error
|
|
||||||
assert!(
|
|
||||||
result.is_err(),
|
|
||||||
"Follower write should fail with NotLeader error"
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Err(RaftError::NotLeader { .. }) = result {
|
|
||||||
println!("✓ Follower correctly rejected write with NotLeader error");
|
|
||||||
} else {
|
|
||||||
panic!(
|
|
||||||
"Expected NotLeader error, got: {:?}",
|
|
||||||
result.err().unwrap()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,274 +0,0 @@
|
||||||
//! Property-based tests for `chainfire-raft` using an in-process simulated cluster.
|
|
||||||
//!
|
|
||||||
//! These tests aim to catch timing/partition edge cases with high reproducibility.
|
|
||||||
|
|
||||||
#![cfg(all(test, feature = "custom-raft"))]
|
|
||||||
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use proptest::prelude::*;
|
|
||||||
use tokio::sync::mpsc;
|
|
||||||
use tokio::time;
|
|
||||||
|
|
||||||
use chainfire_raft::core::{RaftConfig, RaftCore};
|
|
||||||
use chainfire_raft::network::test_client::{RpcMessage, SimulatedNetwork};
|
|
||||||
use chainfire_raft::storage::{EntryPayload, LogEntry, LogStorage, StateMachine};
|
|
||||||
use chainfire_types::command::RaftCommand;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
enum Op {
|
|
||||||
Tick(u64),
|
|
||||||
Disconnect(u64, u64),
|
|
||||||
Reconnect(u64, u64),
|
|
||||||
Delay(u64, u64, u64),
|
|
||||||
ClearLink(u64, u64),
|
|
||||||
Write(u64, u8, u8),
|
|
||||||
}
|
|
||||||
|
|
||||||
fn node_id() -> impl Strategy<Value = u64> {
|
|
||||||
1_u64..=3_u64
|
|
||||||
}
|
|
||||||
|
|
||||||
fn distinct_pair() -> impl Strategy<Value = (u64, u64)> {
|
|
||||||
(node_id(), node_id()).prop_filter("distinct nodes", |(a, b)| a != b)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn op_strategy() -> impl Strategy<Value = Op> {
|
|
||||||
prop_oneof![
|
|
||||||
// Advance simulated time by up to 300ms.
|
|
||||||
(0_u64..=300).prop_map(Op::Tick),
|
|
||||||
distinct_pair().prop_map(|(a, b)| Op::Disconnect(a, b)),
|
|
||||||
distinct_pair().prop_map(|(a, b)| Op::Reconnect(a, b)),
|
|
||||||
(distinct_pair(), 0_u64..=50).prop_map(|((a, b), d)| Op::Delay(a, b, d)),
|
|
||||||
distinct_pair().prop_map(|(a, b)| Op::ClearLink(a, b)),
|
|
||||||
// Client writes: pick node + small key/value.
|
|
||||||
(node_id(), any::<u8>(), any::<u8>()).prop_map(|(n, k, v)| Op::Write(n, k, v)),
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ops_strategy() -> impl Strategy<Value = Vec<Op>> {
|
|
||||||
prop::collection::vec(op_strategy(), 0..40)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn advance_ms(total_ms: u64) {
|
|
||||||
// Advance in small steps to avoid “simultaneous” timer firings starving message handling.
|
|
||||||
let step_ms: u64 = 10;
|
|
||||||
let mut remaining = total_ms;
|
|
||||||
while remaining > 0 {
|
|
||||||
let d = remaining.min(step_ms);
|
|
||||||
time::advance(Duration::from_millis(d)).await;
|
|
||||||
tokio::task::yield_now().await;
|
|
||||||
remaining -= d;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn create_3node_cluster() -> (Vec<Arc<RaftCore>>, Arc<SimulatedNetwork>) {
|
|
||||||
let network = Arc::new(SimulatedNetwork::new());
|
|
||||||
let mut nodes = Vec::new();
|
|
||||||
|
|
||||||
for node_id in 1..=3_u64 {
|
|
||||||
let peers: Vec<u64> = (1..=3_u64).filter(|&id| id != node_id).collect();
|
|
||||||
let storage = Arc::new(LogStorage::new_in_memory());
|
|
||||||
let state_machine = Arc::new(StateMachine::new_in_memory());
|
|
||||||
|
|
||||||
let config = RaftConfig {
|
|
||||||
election_timeout_min: 150,
|
|
||||||
election_timeout_max: 300,
|
|
||||||
heartbeat_interval: 50,
|
|
||||||
// Deterministic per-node seed for reproducibility.
|
|
||||||
deterministic_seed: Some(node_id),
|
|
||||||
};
|
|
||||||
|
|
||||||
let node = Arc::new(RaftCore::new(
|
|
||||||
node_id,
|
|
||||||
peers,
|
|
||||||
storage,
|
|
||||||
state_machine,
|
|
||||||
Arc::new(network.client(node_id)) as Arc<dyn chainfire_raft::network::RaftRpcClient>,
|
|
||||||
config,
|
|
||||||
));
|
|
||||||
node.initialize().await.unwrap();
|
|
||||||
nodes.push(node);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wire up RPC handlers.
|
|
||||||
for node in &nodes {
|
|
||||||
let node_id = node.node_id();
|
|
||||||
let (tx, mut rx) = mpsc::unbounded_channel::<RpcMessage>();
|
|
||||||
network.register(node_id, tx).await;
|
|
||||||
|
|
||||||
let node_clone: Arc<RaftCore> = Arc::clone(node);
|
|
||||||
tokio::spawn(async move {
|
|
||||||
while let Some(msg) = rx.recv().await {
|
|
||||||
match msg {
|
|
||||||
RpcMessage::Vote(req, resp_tx) => {
|
|
||||||
node_clone.request_vote_rpc(req, resp_tx).await;
|
|
||||||
}
|
|
||||||
RpcMessage::AppendEntries(req, resp_tx) => {
|
|
||||||
node_clone.append_entries_rpc(req, resp_tx).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
(nodes, network)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn payload_fingerprint(payload: &EntryPayload<Vec<u8>>) -> Vec<u8> {
|
|
||||||
// Serialize the enum for stable equality checks across variants.
|
|
||||||
bincode::serialize(payload).unwrap_or_default()
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn assert_raft_invariants(nodes: &[Arc<RaftCore>]) {
|
|
||||||
// Per-node monotonic invariants.
|
|
||||||
for node in nodes {
|
|
||||||
let commit = node.commit_index().await;
|
|
||||||
let last_applied = node.last_applied().await;
|
|
||||||
|
|
||||||
let st = node.storage().get_log_state().expect("log state");
|
|
||||||
let last_log_index = st.last_log_id.map(|id| id.index).unwrap_or(0);
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
last_applied <= commit,
|
|
||||||
"node {}: last_applied={} > commit_index={}",
|
|
||||||
node.node_id(),
|
|
||||||
last_applied,
|
|
||||||
commit
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
commit <= last_log_index,
|
|
||||||
"node {}: commit_index={} > last_log_index={}",
|
|
||||||
node.node_id(),
|
|
||||||
commit,
|
|
||||||
last_log_index
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log Matching Property:
|
|
||||||
// If two logs contain an entry with the same index and term, then the logs are identical
|
|
||||||
// for all entries up through that index.
|
|
||||||
let mut node_logs: Vec<std::collections::BTreeMap<u64, (u64, Vec<u8>)>> = Vec::new();
|
|
||||||
for node in nodes {
|
|
||||||
let st = node.storage().get_log_state().expect("log state");
|
|
||||||
let last = st.last_log_id.map(|id| id.index).unwrap_or(0);
|
|
||||||
let entries: Vec<LogEntry<Vec<u8>>> = if last == 0 {
|
|
||||||
vec![]
|
|
||||||
} else {
|
|
||||||
node.storage()
|
|
||||||
.get_log_entries(1..=last)
|
|
||||||
.expect("log entries")
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut m = std::collections::BTreeMap::new();
|
|
||||||
for e in entries {
|
|
||||||
m.insert(e.log_id.index, (e.log_id.term, payload_fingerprint(&e.payload)));
|
|
||||||
}
|
|
||||||
node_logs.push(m);
|
|
||||||
}
|
|
||||||
|
|
||||||
for a in 0..nodes.len() {
|
|
||||||
for b in (a + 1)..nodes.len() {
|
|
||||||
let la = &node_logs[a];
|
|
||||||
let lb = &node_logs[b];
|
|
||||||
|
|
||||||
for (idx, (term_a, payload_a)) in la.iter() {
|
|
||||||
if let Some((term_b, payload_b)) = lb.get(idx) {
|
|
||||||
if term_a == term_b {
|
|
||||||
assert_eq!(
|
|
||||||
payload_a, payload_b,
|
|
||||||
"log mismatch at idx={} term={} (nodes {} vs {})",
|
|
||||||
idx,
|
|
||||||
term_a,
|
|
||||||
nodes[a].node_id(),
|
|
||||||
nodes[b].node_id()
|
|
||||||
);
|
|
||||||
|
|
||||||
for j in 1..=*idx {
|
|
||||||
assert_eq!(
|
|
||||||
la.get(&j),
|
|
||||||
lb.get(&j),
|
|
||||||
"log matching violated at idx={} (prefix {} differs) nodes {} vs {}",
|
|
||||||
idx,
|
|
||||||
j,
|
|
||||||
nodes[a].node_id(),
|
|
||||||
nodes[b].node_id()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
proptest! {
|
|
||||||
#![proptest_config(ProptestConfig {
|
|
||||||
cases: 32,
|
|
||||||
.. ProptestConfig::default()
|
|
||||||
})]
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn prop_raft_log_matching_holds(ops in ops_strategy()) {
|
|
||||||
let rt = tokio::runtime::Builder::new_current_thread()
|
|
||||||
.enable_time()
|
|
||||||
.build()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
rt.block_on(async move {
|
|
||||||
tokio::time::pause();
|
|
||||||
|
|
||||||
let (nodes, network) = create_3node_cluster().await;
|
|
||||||
|
|
||||||
// Start event loops.
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
for node in &nodes {
|
|
||||||
let node_clone = Arc::clone(node);
|
|
||||||
handles.push(tokio::spawn(async move {
|
|
||||||
let _ = node_clone.run().await;
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
tokio::task::yield_now().await;
|
|
||||||
|
|
||||||
// Drive a randomized sequence of operations.
|
|
||||||
for op in ops {
|
|
||||||
match op {
|
|
||||||
Op::Tick(ms) => advance_ms(ms).await,
|
|
||||||
Op::Disconnect(a, b) => network.disconnect(a, b).await,
|
|
||||||
Op::Reconnect(a, b) => network.reconnect(a, b).await,
|
|
||||||
Op::Delay(a, b, d) => {
|
|
||||||
use chainfire_raft::network::test_client::LinkBehavior;
|
|
||||||
network.set_link(a, b, LinkBehavior::Delay(Duration::from_millis(d))).await;
|
|
||||||
network.set_link(b, a, LinkBehavior::Delay(Duration::from_millis(d))).await;
|
|
||||||
}
|
|
||||||
Op::ClearLink(a, b) => {
|
|
||||||
network.clear_link(a, b).await;
|
|
||||||
network.clear_link(b, a).await;
|
|
||||||
}
|
|
||||||
Op::Write(n, k, v) => {
|
|
||||||
let node = nodes.iter().find(|x| x.node_id() == n).unwrap();
|
|
||||||
let _ = node.client_write(RaftCommand::Put {
|
|
||||||
key: vec![k],
|
|
||||||
value: vec![v],
|
|
||||||
lease_id: None,
|
|
||||||
prev_kv: false,
|
|
||||||
}).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Let the system settle a bit.
|
|
||||||
advance_ms(500).await;
|
|
||||||
|
|
||||||
assert_raft_invariants(&nodes).await;
|
|
||||||
|
|
||||||
// Best-effort cleanup.
|
|
||||||
for h in handles {
|
|
||||||
h.abort();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,658 +0,0 @@
|
||||||
//! Chainfire 3-Node Cluster Integration Test
|
|
||||||
//!
|
|
||||||
//! Verifies HA behavior: leader election, state replication, and node recovery.
|
|
||||||
|
|
||||||
use chainfire_client::Client;
|
|
||||||
use chainfire_server::{
|
|
||||||
config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
|
|
||||||
server::Server,
|
|
||||||
};
|
|
||||||
use chainfire_types::RaftRole;
|
|
||||||
use std::net::SocketAddr;
|
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::time::sleep;
|
|
||||||
|
|
||||||
/// Create a 3-node cluster configuration with join flow
|
|
||||||
/// Node 1 bootstraps alone, nodes 2 & 3 join via member_add API
|
|
||||||
fn cluster_config_with_join(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
|
|
||||||
let base_port = match node_id {
|
|
||||||
1 => 12379,
|
|
||||||
2 => 22379,
|
|
||||||
3 => 32379,
|
|
||||||
_ => panic!("Invalid node_id"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
|
|
||||||
let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
|
|
||||||
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
|
|
||||||
|
|
||||||
let temp_dir = tempfile::tempdir().unwrap();
|
|
||||||
|
|
||||||
let config = ServerConfig {
|
|
||||||
node: NodeConfig {
|
|
||||||
id: node_id,
|
|
||||||
name: format!("test-node-{}", node_id),
|
|
||||||
role: "control_plane".to_string(),
|
|
||||||
},
|
|
||||||
cluster: ClusterConfig {
|
|
||||||
id: 1,
|
|
||||||
bootstrap: node_id == 1, // Only node 1 bootstraps
|
|
||||||
initial_members: vec![], // Node 1 starts alone, others join via API
|
|
||||||
},
|
|
||||||
network: NetworkConfig {
|
|
||||||
api_addr,
|
|
||||||
http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
|
|
||||||
raft_addr,
|
|
||||||
gossip_addr,
|
|
||||||
tls: None,
|
|
||||||
},
|
|
||||||
storage: StorageConfig {
|
|
||||||
data_dir: temp_dir.path().to_path_buf(),
|
|
||||||
},
|
|
||||||
// Node 1 is Voter (bootstrap), nodes 2 & 3 are Learner (join via member_add)
|
|
||||||
raft: RaftConfig {
|
|
||||||
role: if node_id == 1 { RaftRole::Voter } else { RaftRole::Learner },
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
(config, temp_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Alias for backwards compatibility (old tests use this)
|
|
||||||
fn cluster_config(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
|
|
||||||
cluster_config_with_join(node_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a 3-node cluster configuration with simultaneous bootstrap
|
|
||||||
/// All nodes start together with the same initial_members (avoids add_learner bug)
|
|
||||||
fn cluster_config_simultaneous_bootstrap(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
|
|
||||||
use chainfire_server::config::MemberConfig;
|
|
||||||
|
|
||||||
let base_port = match node_id {
|
|
||||||
1 => 12379,
|
|
||||||
2 => 22379,
|
|
||||||
3 => 32379,
|
|
||||||
_ => panic!("Invalid node_id"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
|
|
||||||
let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
|
|
||||||
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
|
|
||||||
|
|
||||||
let temp_dir = tempfile::tempdir().unwrap();
|
|
||||||
|
|
||||||
// All nodes have the same initial_members list
|
|
||||||
let initial_members = vec![
|
|
||||||
MemberConfig { id: 1, raft_addr: "127.0.0.1:12380".to_string() },
|
|
||||||
MemberConfig { id: 2, raft_addr: "127.0.0.1:22380".to_string() },
|
|
||||||
MemberConfig { id: 3, raft_addr: "127.0.0.1:32380".to_string() },
|
|
||||||
];
|
|
||||||
|
|
||||||
let config = ServerConfig {
|
|
||||||
node: NodeConfig {
|
|
||||||
id: node_id,
|
|
||||||
name: format!("test-node-{}", node_id),
|
|
||||||
role: "control_plane".to_string(),
|
|
||||||
},
|
|
||||||
cluster: ClusterConfig {
|
|
||||||
id: 1,
|
|
||||||
bootstrap: node_id == 1, // Only node 1 bootstraps, but with full member list
|
|
||||||
initial_members: initial_members.clone(),
|
|
||||||
},
|
|
||||||
network: NetworkConfig {
|
|
||||||
api_addr,
|
|
||||||
http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
|
|
||||||
raft_addr,
|
|
||||||
gossip_addr,
|
|
||||||
tls: None,
|
|
||||||
},
|
|
||||||
storage: StorageConfig {
|
|
||||||
data_dir: temp_dir.path().to_path_buf(),
|
|
||||||
},
|
|
||||||
raft: RaftConfig {
|
|
||||||
role: RaftRole::Voter, // All nodes are voters from the start
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
(config, temp_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a single-node cluster configuration (for testing basic Raft functionality)
|
|
||||||
fn single_node_config() -> (ServerConfig, tempfile::TempDir) {
|
|
||||||
let api_addr: SocketAddr = "127.0.0.1:12379".parse().unwrap();
|
|
||||||
let raft_addr: SocketAddr = "127.0.0.1:12380".parse().unwrap();
|
|
||||||
let gossip_addr: SocketAddr = "127.0.0.1:12381".parse().unwrap();
|
|
||||||
|
|
||||||
let temp_dir = tempfile::tempdir().unwrap();
|
|
||||||
|
|
||||||
let config = ServerConfig {
|
|
||||||
node: NodeConfig {
|
|
||||||
id: 1,
|
|
||||||
name: "test-node-1".to_string(),
|
|
||||||
role: "control_plane".to_string(),
|
|
||||||
},
|
|
||||||
cluster: ClusterConfig {
|
|
||||||
id: 1,
|
|
||||||
bootstrap: true, // Single-node bootstrap
|
|
||||||
initial_members: vec![], // Empty = single node
|
|
||||||
},
|
|
||||||
network: NetworkConfig {
|
|
||||||
api_addr,
|
|
||||||
http_addr: format!("127.0.0.1:{}", 28080 + node_id).parse().unwrap(),
|
|
||||||
raft_addr,
|
|
||||||
gossip_addr,
|
|
||||||
tls: None,
|
|
||||||
},
|
|
||||||
storage: StorageConfig {
|
|
||||||
data_dir: temp_dir.path().to_path_buf(),
|
|
||||||
},
|
|
||||||
raft: RaftConfig::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
(config, temp_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
|
|
||||||
async fn test_single_node_raft_leader_election() {
|
|
||||||
println!("\n=== Test: Single-Node Raft Leader Election ===");
|
|
||||||
|
|
||||||
// Start single node
|
|
||||||
let (config, _temp) = single_node_config();
|
|
||||||
let api_addr = config.network.api_addr;
|
|
||||||
println!("Creating single-node cluster...");
|
|
||||||
let server = Server::new(config).await.unwrap();
|
|
||||||
let handle = tokio::spawn(async move { server.run().await });
|
|
||||||
println!("Node started: {}", api_addr);
|
|
||||||
|
|
||||||
// Wait for leader election
|
|
||||||
println!("Waiting for leader election...");
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
|
|
||||||
// Verify leader elected
|
|
||||||
let mut client = Client::connect(format!("http://{}", api_addr))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect");
|
|
||||||
|
|
||||||
let status = client.status().await.expect("Failed to get status");
|
|
||||||
println!(
|
|
||||||
"Node status: leader={}, term={}",
|
|
||||||
status.leader, status.raft_term
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(status.leader, 1, "Node 1 should be leader in single-node cluster");
|
|
||||||
assert!(status.raft_term > 0, "Raft term should be > 0");
|
|
||||||
|
|
||||||
// Test basic KV operations
|
|
||||||
println!("Testing KV operations...");
|
|
||||||
client.put("test-key", "test-value").await.unwrap();
|
|
||||||
let value = client.get("test-key").await.unwrap();
|
|
||||||
assert_eq!(value, Some(b"test-value".to_vec()));
|
|
||||||
|
|
||||||
println!("✓ Single-node Raft working correctly");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
|
|
||||||
async fn test_3node_leader_election_with_join() {
|
|
||||||
println!("\n=== Test: 3-Node Leader Election with Join Flow ===");
|
|
||||||
|
|
||||||
// Start Node 1 (bootstrap alone)
|
|
||||||
let (config1, _temp1) = cluster_config_with_join(1);
|
|
||||||
let api1 = config1.network.api_addr;
|
|
||||||
let raft1 = config1.network.raft_addr;
|
|
||||||
println!("Creating Node 1 (bootstrap)...");
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
println!("Node 1 started: API={}, Raft={}", api1, raft1);
|
|
||||||
|
|
||||||
// Wait for node 1 to become leader
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
|
|
||||||
// Verify node 1 is leader
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 1");
|
|
||||||
let status1 = client1.status().await.expect("Failed to get status");
|
|
||||||
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
||||||
assert_eq!(status1.leader, 1, "Node 1 should be leader");
|
|
||||||
|
|
||||||
// Start Node 2 (no bootstrap)
|
|
||||||
let (config2, _temp2) = cluster_config_with_join(2);
|
|
||||||
let api2 = config2.network.api_addr;
|
|
||||||
let raft2 = config2.network.raft_addr;
|
|
||||||
println!("Creating Node 2...");
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
println!("Node 2 started: API={}, Raft={}", api2, raft2);
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Start Node 3 (no bootstrap)
|
|
||||||
let (config3, _temp3) = cluster_config_with_join(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
let raft3 = config3.network.raft_addr;
|
|
||||||
println!("Creating Node 3...");
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
println!("Node 3 started: API={}, Raft={}", api3, raft3);
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Add node 2 to cluster via member_add API
|
|
||||||
println!("Adding node 2 to cluster via member_add API...");
|
|
||||||
let member2_id = client1
|
|
||||||
.member_add(2, raft2.to_string(), false) // node_id=2, false=voter
|
|
||||||
.await
|
|
||||||
.expect("Failed to add node 2");
|
|
||||||
println!("Node 2 added with ID: {}", member2_id);
|
|
||||||
assert_eq!(member2_id, 2, "Node 2 should have ID 2");
|
|
||||||
|
|
||||||
// Add node 3 to cluster via member_add API
|
|
||||||
println!("Adding node 3 to cluster via member_add API...");
|
|
||||||
let member3_id = client1
|
|
||||||
.member_add(3, raft3.to_string(), false) // node_id=3, false=voter
|
|
||||||
.await
|
|
||||||
.expect("Failed to add node 3");
|
|
||||||
println!("Node 3 added with ID: {}", member3_id);
|
|
||||||
assert_eq!(member3_id, 3, "Node 3 should have ID 3");
|
|
||||||
|
|
||||||
// Wait for cluster membership changes to propagate
|
|
||||||
sleep(Duration::from_secs(3)).await;
|
|
||||||
|
|
||||||
// Verify all nodes see the same leader
|
|
||||||
let status1 = client1.status().await.expect("Failed to get status from node 1");
|
|
||||||
println!("Node 1 final status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
||||||
|
|
||||||
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 2");
|
|
||||||
let status2 = client2.status().await.expect("Failed to get status from node 2");
|
|
||||||
println!("Node 2 final status: leader={}, term={}", status2.leader, status2.raft_term);
|
|
||||||
|
|
||||||
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 3");
|
|
||||||
let status3 = client3.status().await.expect("Failed to get status from node 3");
|
|
||||||
println!("Node 3 final status: leader={}, term={}", status3.leader, status3.raft_term);
|
|
||||||
|
|
||||||
// All nodes should agree on the leader
|
|
||||||
assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
|
|
||||||
assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
|
|
||||||
assert!(status1.leader > 0, "No leader elected");
|
|
||||||
|
|
||||||
println!("✓ 3-node cluster formed successfully with join flow");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle1.abort();
|
|
||||||
handle2.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore]
|
|
||||||
async fn test_3node_state_replication() {
|
|
||||||
println!("\n=== Test: 3-Node State Replication ===");
|
|
||||||
|
|
||||||
// Start cluster
|
|
||||||
let (config1, _temp1) = cluster_config(1);
|
|
||||||
let api1 = config1.network.api_addr;
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
|
|
||||||
let (config2, _temp2) = cluster_config(2);
|
|
||||||
let api2 = config2.network.api_addr;
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
|
|
||||||
let (config3, _temp3) = cluster_config(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
println!("Cluster started");
|
|
||||||
|
|
||||||
// Write data to node 1 (leader)
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
println!("Writing test data to node 1...");
|
|
||||||
client1.put("test/key1", "value1").await.unwrap();
|
|
||||||
client1.put("test/key2", "value2").await.unwrap();
|
|
||||||
client1.put("test/key3", "value3").await.unwrap();
|
|
||||||
|
|
||||||
// Wait for replication
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Read from node 2 and node 3 (followers)
|
|
||||||
println!("Reading from node 2...");
|
|
||||||
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let val2 = client2.get("test/key1").await.unwrap();
|
|
||||||
assert_eq!(val2, Some(b"value1".to_vec()), "Data not replicated to node 2");
|
|
||||||
|
|
||||||
println!("Reading from node 3...");
|
|
||||||
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let val3 = client3.get("test/key1").await.unwrap();
|
|
||||||
assert_eq!(val3, Some(b"value1".to_vec()), "Data not replicated to node 3");
|
|
||||||
|
|
||||||
println!("✓ State replication verified");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle1.abort();
|
|
||||||
handle2.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore]
|
|
||||||
async fn test_3node_follower_crash() {
|
|
||||||
println!("\n=== Test: Follower Crash (Node Remains Available) ===");
|
|
||||||
|
|
||||||
// Start cluster
|
|
||||||
let (config1, _temp1) = cluster_config(1);
|
|
||||||
let api1 = config1.network.api_addr;
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
|
|
||||||
let (config2, _temp2) = cluster_config(2);
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
|
|
||||||
let (config3, _temp3) = cluster_config(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
println!("Cluster started");
|
|
||||||
|
|
||||||
// Write initial data
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
println!("Writing initial data...");
|
|
||||||
client1.put("test/before-crash", "initial").await.unwrap();
|
|
||||||
|
|
||||||
// Kill node 2 (follower)
|
|
||||||
println!("Killing node 2 (follower)...");
|
|
||||||
handle2.abort();
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Cluster should still be operational (2/3 quorum)
|
|
||||||
println!("Writing data after crash...");
|
|
||||||
client1
|
|
||||||
.put("test/after-crash", "still-working")
|
|
||||||
.await
|
|
||||||
.expect("Write should succeed with 2/3 quorum");
|
|
||||||
|
|
||||||
// Read from node 3
|
|
||||||
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let val = client3.get("test/after-crash").await.unwrap();
|
|
||||||
assert_eq!(val, Some(b"still-working".to_vec()));
|
|
||||||
|
|
||||||
println!("✓ Cluster operational after follower crash");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle1.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore]
|
|
||||||
async fn test_3node_leader_crash_reelection() {
|
|
||||||
println!("\n=== Test: Leader Crash & Re-election ===");
|
|
||||||
|
|
||||||
// Start cluster
|
|
||||||
let (config1, _temp1) = cluster_config(1);
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
|
|
||||||
let (config2, _temp2) = cluster_config(2);
|
|
||||||
let api2 = config2.network.api_addr;
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
|
|
||||||
let (config3, _temp3) = cluster_config(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
println!("Cluster started");
|
|
||||||
|
|
||||||
// Determine initial leader
|
|
||||||
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
let initial_status = client2.status().await.unwrap();
|
|
||||||
let initial_leader = initial_status.leader;
|
|
||||||
println!("Initial leader: node {}", initial_leader);
|
|
||||||
|
|
||||||
// Kill the leader (assume node 1)
|
|
||||||
println!("Killing leader (node 1)...");
|
|
||||||
handle1.abort();
|
|
||||||
|
|
||||||
// Wait for re-election (should be < 1s per requirements)
|
|
||||||
println!("Waiting for re-election...");
|
|
||||||
sleep(Duration::from_secs(1)).await;
|
|
||||||
|
|
||||||
// Verify new leader elected
|
|
||||||
let new_status = client2.status().await.unwrap();
|
|
||||||
println!(
|
|
||||||
"New leader: node {}, term: {}",
|
|
||||||
new_status.leader, new_status.raft_term
|
|
||||||
);
|
|
||||||
assert!(new_status.leader > 0, "No new leader elected");
|
|
||||||
assert!(
|
|
||||||
new_status.raft_term > initial_status.raft_term,
|
|
||||||
"Raft term should increase after re-election"
|
|
||||||
);
|
|
||||||
|
|
||||||
println!("✓ Leader re-election successful within 1s");
|
|
||||||
|
|
||||||
// Verify cluster still functional
|
|
||||||
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
client3
|
|
||||||
.put("test/post-reelection", "functional")
|
|
||||||
.await
|
|
||||||
.expect("Cluster should be functional after re-election");
|
|
||||||
|
|
||||||
println!("✓ Cluster operational after re-election");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle2.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Test 3-node cluster with learners only (no voter promotion)
|
|
||||||
/// T041 Workaround: Avoids change_membership by keeping nodes as learners
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore] // Run with: cargo test --test cluster_integration test_3node_with_learners -- --ignored
|
|
||||||
async fn test_3node_with_learners() {
|
|
||||||
println!("\n=== Test: 3-Node Cluster with Learners (T041 Workaround) ===");
|
|
||||||
|
|
||||||
// Start Node 1 (bootstrap alone as single voter)
|
|
||||||
let (config1, _temp1) = cluster_config_with_join(1);
|
|
||||||
let api1 = config1.network.api_addr;
|
|
||||||
let raft1 = config1.network.raft_addr;
|
|
||||||
println!("Creating Node 1 (bootstrap)...");
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
println!("Node 1 started: API={}, Raft={}", api1, raft1);
|
|
||||||
|
|
||||||
// Wait for node 1 to become leader
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
|
|
||||||
// Verify node 1 is leader
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 1");
|
|
||||||
let status1 = client1.status().await.expect("Failed to get status");
|
|
||||||
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
||||||
assert_eq!(status1.leader, 1, "Node 1 should be leader");
|
|
||||||
|
|
||||||
// Start Node 2
|
|
||||||
let (config2, _temp2) = cluster_config_with_join(2);
|
|
||||||
let api2 = config2.network.api_addr;
|
|
||||||
let raft2 = config2.network.raft_addr;
|
|
||||||
println!("Creating Node 2...");
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
println!("Node 2 started: API={}, Raft={}", api2, raft2);
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Start Node 3
|
|
||||||
let (config3, _temp3) = cluster_config_with_join(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
let raft3 = config3.network.raft_addr;
|
|
||||||
println!("Creating Node 3...");
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
println!("Node 3 started: API={}, Raft={}", api3, raft3);
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Add node 2 as LEARNER (is_learner=true, no voter promotion)
|
|
||||||
println!("Adding node 2 as learner (no voter promotion)...");
|
|
||||||
let member2_id = client1
|
|
||||||
.member_add(2, raft2.to_string(), true) // is_learner=true
|
|
||||||
.await
|
|
||||||
.expect("Failed to add node 2 as learner");
|
|
||||||
println!("Node 2 added as learner with ID: {}", member2_id);
|
|
||||||
assert_eq!(member2_id, 2);
|
|
||||||
|
|
||||||
// Add node 3 as LEARNER
|
|
||||||
println!("Adding node 3 as learner (no voter promotion)...");
|
|
||||||
let member3_id = client1
|
|
||||||
.member_add(3, raft3.to_string(), true) // is_learner=true
|
|
||||||
.await
|
|
||||||
.expect("Failed to add node 3 as learner");
|
|
||||||
println!("Node 3 added as learner with ID: {}", member3_id);
|
|
||||||
assert_eq!(member3_id, 3);
|
|
||||||
|
|
||||||
// Wait for replication
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
|
||||||
|
|
||||||
// Test write on leader
|
|
||||||
println!("Testing KV write on leader...");
|
|
||||||
client1.put("test-key", "test-value").await.expect("Put failed");
|
|
||||||
|
|
||||||
// Wait for replication to learners
|
|
||||||
sleep(Duration::from_secs(1)).await;
|
|
||||||
|
|
||||||
// Verify data replicated to learner (should be able to read)
|
|
||||||
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 2");
|
|
||||||
|
|
||||||
// Note: Reading from a learner may require forwarding to leader
|
|
||||||
// For now, just verify the cluster is operational
|
|
||||||
let status2 = client2.status().await.expect("Failed to get status from learner");
|
|
||||||
println!("Node 2 (learner) status: leader={}, term={}", status2.leader, status2.raft_term);
|
|
||||||
|
|
||||||
// All nodes should see node 1 as leader
|
|
||||||
assert_eq!(status2.leader, 1, "Learner should see node 1 as leader");
|
|
||||||
|
|
||||||
println!("✓ 3-node cluster with learners working");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle1.abort();
|
|
||||||
handle2.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Test 3-node cluster formation using staggered bootstrap (DISABLED - doesn't work)
|
|
||||||
#[tokio::test]
|
|
||||||
#[ignore]
|
|
||||||
async fn test_3node_simultaneous_bootstrap_disabled() {
|
|
||||||
println!("\n=== Test: 3-Node Staggered Bootstrap (T041 Workaround) ===");
|
|
||||||
|
|
||||||
// Start Node 1 first (bootstrap=true, will initialize with full membership)
|
|
||||||
let (config1, _temp1) = cluster_config_simultaneous_bootstrap(1);
|
|
||||||
let api1 = config1.network.api_addr;
|
|
||||||
println!("Creating Node 1 (bootstrap)...");
|
|
||||||
let server1 = Server::new(config1).await.unwrap();
|
|
||||||
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
||||||
println!("Node 1 started: API={}", api1);
|
|
||||||
|
|
||||||
// Give node 1 time to become leader
|
|
||||||
println!("Waiting for Node 1 to become leader (3s)...");
|
|
||||||
sleep(Duration::from_secs(3)).await;
|
|
||||||
|
|
||||||
// Verify node 1 is leader
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 1");
|
|
||||||
let status1 = client1.status().await.expect("Failed to get status");
|
|
||||||
println!("Node 1 status before others: leader={}, term={}", status1.leader, status1.raft_term);
|
|
||||||
|
|
||||||
// Now start nodes 2 and 3
|
|
||||||
let (config2, _temp2) = cluster_config_simultaneous_bootstrap(2);
|
|
||||||
let api2 = config2.network.api_addr;
|
|
||||||
println!("Creating Node 2...");
|
|
||||||
let server2 = Server::new(config2).await.unwrap();
|
|
||||||
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
||||||
println!("Node 2 started: API={}", api2);
|
|
||||||
|
|
||||||
let (config3, _temp3) = cluster_config_simultaneous_bootstrap(3);
|
|
||||||
let api3 = config3.network.api_addr;
|
|
||||||
println!("Creating Node 3...");
|
|
||||||
let server3 = Server::new(config3).await.unwrap();
|
|
||||||
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
||||||
println!("Node 3 started: API={}", api3);
|
|
||||||
|
|
||||||
// Wait for cluster to stabilize
|
|
||||||
println!("Waiting for cluster to stabilize (5s)...");
|
|
||||||
sleep(Duration::from_secs(5)).await;
|
|
||||||
|
|
||||||
// Verify cluster formed and leader elected
|
|
||||||
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 1");
|
|
||||||
let status1 = client1.status().await.expect("Failed to get status from node 1");
|
|
||||||
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
||||||
|
|
||||||
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 2");
|
|
||||||
let status2 = client2.status().await.expect("Failed to get status from node 2");
|
|
||||||
println!("Node 2 status: leader={}, term={}", status2.leader, status2.raft_term);
|
|
||||||
|
|
||||||
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
||||||
.await
|
|
||||||
.expect("Failed to connect to node 3");
|
|
||||||
let status3 = client3.status().await.expect("Failed to get status from node 3");
|
|
||||||
println!("Node 3 status: leader={}, term={}", status3.leader, status3.raft_term);
|
|
||||||
|
|
||||||
// All nodes should agree on the leader
|
|
||||||
assert!(status1.leader > 0, "No leader elected");
|
|
||||||
assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
|
|
||||||
assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
|
|
||||||
|
|
||||||
// Test KV operations on the cluster
|
|
||||||
println!("Testing KV operations...");
|
|
||||||
client1.put("test-key", "test-value").await.expect("Put failed");
|
|
||||||
|
|
||||||
// Wait for commit to propagate to followers via heartbeat (heartbeat_interval=100ms)
|
|
||||||
sleep(Duration::from_millis(200)).await;
|
|
||||||
|
|
||||||
let value = client2.get("test-key").await.expect("Get failed");
|
|
||||||
assert_eq!(value, Some(b"test-value".to_vec()), "Value not replicated");
|
|
||||||
|
|
||||||
println!("✓ 3-node cluster formed successfully with simultaneous bootstrap");
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
handle1.abort();
|
|
||||||
handle2.abort();
|
|
||||||
handle3.abort();
|
|
||||||
}
|
|
||||||
|
|
@ -1,175 +0,0 @@
|
||||||
//! Integration tests for Chainfire
|
|
||||||
//!
|
|
||||||
//! These tests verify that the server, client, and all components work together correctly.
|
|
||||||
|
|
||||||
use chainfire_client::Client;
|
|
||||||
use chainfire_server::{
|
|
||||||
config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
|
|
||||||
server::Server,
|
|
||||||
};
|
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::time::sleep;
|
|
||||||
|
|
||||||
/// Create a test server configuration
|
|
||||||
fn test_config(port: u16) -> (ServerConfig, tempfile::TempDir) {
|
|
||||||
use std::net::SocketAddr;
|
|
||||||
|
|
||||||
let api_addr: SocketAddr = format!("127.0.0.1:{}", port).parse().unwrap();
|
|
||||||
let raft_addr: SocketAddr = format!("127.0.0.1:{}", port + 100).parse().unwrap();
|
|
||||||
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", port + 200).parse().unwrap();
|
|
||||||
|
|
||||||
let temp_dir = tempfile::tempdir().unwrap();
|
|
||||||
|
|
||||||
let config = ServerConfig {
|
|
||||||
node: NodeConfig {
|
|
||||||
id: 1,
|
|
||||||
name: format!("test-node-{}", port),
|
|
||||||
role: "control_plane".to_string(),
|
|
||||||
},
|
|
||||||
cluster: ClusterConfig {
|
|
||||||
id: 1,
|
|
||||||
bootstrap: true,
|
|
||||||
initial_members: vec![],
|
|
||||||
},
|
|
||||||
network: NetworkConfig {
|
|
||||||
api_addr,
|
|
||||||
http_addr: "127.0.0.1:28081".parse().unwrap(),
|
|
||||||
raft_addr,
|
|
||||||
gossip_addr,
|
|
||||||
tls: None,
|
|
||||||
},
|
|
||||||
storage: StorageConfig {
|
|
||||||
data_dir: temp_dir.path().to_path_buf(),
|
|
||||||
},
|
|
||||||
raft: RaftConfig::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
(config, temp_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_single_node_kv_operations() {
|
|
||||||
// Start server
|
|
||||||
let (config, _temp_dir) = test_config(23790);
|
|
||||||
let api_addr = config.network.api_addr;
|
|
||||||
let server = Server::new(config).await.unwrap();
|
|
||||||
|
|
||||||
// Run server in background
|
|
||||||
let server_handle = tokio::spawn(async move {
|
|
||||||
let _ = server.run().await;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Wait for server to start and Raft leader election
|
|
||||||
// Increased from 500ms to 2000ms for CI/constrained environments
|
|
||||||
sleep(Duration::from_millis(2000)).await;
|
|
||||||
|
|
||||||
// Connect client
|
|
||||||
let mut client = Client::connect(format!("http://{}", api_addr))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Test put with retry (leader election may still be in progress)
|
|
||||||
let mut rev = 0;
|
|
||||||
for attempt in 0..5 {
|
|
||||||
match client.put("test/key1", "value1").await {
|
|
||||||
Ok(r) => {
|
|
||||||
rev = r;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(e) if attempt < 4 => {
|
|
||||||
eprintln!("Put attempt {} failed: {}, retrying...", attempt + 1, e);
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
}
|
|
||||||
Err(e) => panic!("Put failed after 5 attempts: {}", e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert!(rev > 0);
|
|
||||||
|
|
||||||
// Test get
|
|
||||||
let value = client.get("test/key1").await.unwrap();
|
|
||||||
assert_eq!(value, Some(b"value1".to_vec()));
|
|
||||||
|
|
||||||
// Test put with different value
|
|
||||||
let rev2 = client.put("test/key1", "value2").await.unwrap();
|
|
||||||
assert!(rev2 > rev);
|
|
||||||
|
|
||||||
// Test get updated value
|
|
||||||
let value = client.get("test/key1").await.unwrap();
|
|
||||||
assert_eq!(value, Some(b"value2".to_vec()));
|
|
||||||
|
|
||||||
// Test get non-existent key
|
|
||||||
let value = client.get("test/nonexistent").await.unwrap();
|
|
||||||
assert!(value.is_none());
|
|
||||||
|
|
||||||
// Test delete
|
|
||||||
let deleted = client.delete("test/key1").await.unwrap();
|
|
||||||
assert!(deleted);
|
|
||||||
|
|
||||||
// Verify deletion
|
|
||||||
let value = client.get("test/key1").await.unwrap();
|
|
||||||
assert!(value.is_none());
|
|
||||||
|
|
||||||
// Test delete non-existent key
|
|
||||||
let deleted = client.delete("test/nonexistent").await.unwrap();
|
|
||||||
assert!(!deleted);
|
|
||||||
|
|
||||||
// Test prefix operations
|
|
||||||
client.put("prefix/a", "1").await.unwrap();
|
|
||||||
client.put("prefix/b", "2").await.unwrap();
|
|
||||||
client.put("prefix/c", "3").await.unwrap();
|
|
||||||
client.put("other/key", "other").await.unwrap();
|
|
||||||
|
|
||||||
let prefix_values = client.get_prefix("prefix/").await.unwrap();
|
|
||||||
assert_eq!(prefix_values.len(), 3);
|
|
||||||
|
|
||||||
// Cleanup
|
|
||||||
server_handle.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_cluster_status() {
|
|
||||||
let (config, _temp_dir) = test_config(23800);
|
|
||||||
let api_addr = config.network.api_addr;
|
|
||||||
let server = Server::new(config).await.unwrap();
|
|
||||||
|
|
||||||
let server_handle = tokio::spawn(async move {
|
|
||||||
let _ = server.run().await;
|
|
||||||
});
|
|
||||||
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
let mut client = Client::connect(format!("http://{}", api_addr))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let status = client.status().await.unwrap();
|
|
||||||
assert_eq!(status.leader, 1);
|
|
||||||
assert!(status.raft_term > 0);
|
|
||||||
|
|
||||||
server_handle.abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_string_convenience_methods() {
|
|
||||||
let (config, _temp_dir) = test_config(23810);
|
|
||||||
let api_addr = config.network.api_addr;
|
|
||||||
let server = Server::new(config).await.unwrap();
|
|
||||||
|
|
||||||
let server_handle = tokio::spawn(async move {
|
|
||||||
let _ = server.run().await;
|
|
||||||
});
|
|
||||||
|
|
||||||
sleep(Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
let mut client = Client::connect(format!("http://{}", api_addr))
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Test string methods
|
|
||||||
client.put_str("/config/name", "chainfire").await.unwrap();
|
|
||||||
|
|
||||||
let value = client.get_str("/config/name").await.unwrap();
|
|
||||||
assert_eq!(value, Some("chainfire".to_string()));
|
|
||||||
|
|
||||||
server_handle.abort();
|
|
||||||
}
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
MANIFEST-000005
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
9b9417c1-5d46-4b8a-b14e-ac341643df55
|
|
||||||
3410
chainfire/data/LOG
3410
chainfire/data/LOG
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
|
@ -1,684 +0,0 @@
|
||||||
# This is a RocksDB option file.
|
|
||||||
#
|
|
||||||
# For detailed file format spec, please refer to the example file
|
|
||||||
# in examples/rocksdb_option_file_example.ini
|
|
||||||
#
|
|
||||||
|
|
||||||
[Version]
|
|
||||||
rocksdb_version=10.5.1
|
|
||||||
options_file_version=1.1
|
|
||||||
|
|
||||||
[DBOptions]
|
|
||||||
compaction_readahead_size=2097152
|
|
||||||
strict_bytes_per_sync=false
|
|
||||||
bytes_per_sync=1048576
|
|
||||||
max_background_jobs=4
|
|
||||||
avoid_flush_during_shutdown=false
|
|
||||||
max_background_flushes=-1
|
|
||||||
delayed_write_rate=16777216
|
|
||||||
max_open_files=-1
|
|
||||||
max_subcompactions=1
|
|
||||||
writable_file_max_buffer_size=1048576
|
|
||||||
wal_bytes_per_sync=0
|
|
||||||
max_background_compactions=-1
|
|
||||||
max_total_wal_size=0
|
|
||||||
delete_obsolete_files_period_micros=21600000000
|
|
||||||
stats_dump_period_sec=600
|
|
||||||
stats_history_buffer_size=1048576
|
|
||||||
stats_persist_period_sec=600
|
|
||||||
follower_refresh_catchup_period_ms=10000
|
|
||||||
enforce_single_del_contracts=true
|
|
||||||
lowest_used_cache_tier=kNonVolatileBlockTier
|
|
||||||
bgerror_resume_retry_interval=1000000
|
|
||||||
metadata_write_temperature=kUnknown
|
|
||||||
best_efforts_recovery=false
|
|
||||||
log_readahead_size=0
|
|
||||||
write_identity_file=true
|
|
||||||
write_dbid_to_manifest=true
|
|
||||||
prefix_seek_opt_in_only=false
|
|
||||||
wal_compression=kNoCompression
|
|
||||||
manual_wal_flush=false
|
|
||||||
db_host_id=__hostname__
|
|
||||||
two_write_queues=false
|
|
||||||
allow_ingest_behind=false
|
|
||||||
skip_checking_sst_file_sizes_on_db_open=false
|
|
||||||
flush_verify_memtable_count=true
|
|
||||||
atomic_flush=false
|
|
||||||
verify_sst_unique_id_in_manifest=true
|
|
||||||
skip_stats_update_on_db_open=false
|
|
||||||
track_and_verify_wals=false
|
|
||||||
track_and_verify_wals_in_manifest=false
|
|
||||||
compaction_verify_record_count=true
|
|
||||||
paranoid_checks=true
|
|
||||||
create_if_missing=true
|
|
||||||
max_write_batch_group_size_bytes=1048576
|
|
||||||
follower_catchup_retry_count=10
|
|
||||||
avoid_flush_during_recovery=false
|
|
||||||
file_checksum_gen_factory=nullptr
|
|
||||||
enable_thread_tracking=false
|
|
||||||
allow_fallocate=true
|
|
||||||
allow_data_in_errors=false
|
|
||||||
error_if_exists=false
|
|
||||||
use_direct_io_for_flush_and_compaction=false
|
|
||||||
background_close_inactive_wals=false
|
|
||||||
create_missing_column_families=true
|
|
||||||
WAL_size_limit_MB=0
|
|
||||||
use_direct_reads=false
|
|
||||||
persist_stats_to_disk=false
|
|
||||||
allow_2pc=false
|
|
||||||
max_log_file_size=0
|
|
||||||
is_fd_close_on_exec=true
|
|
||||||
avoid_unnecessary_blocking_io=false
|
|
||||||
max_file_opening_threads=16
|
|
||||||
wal_filter=nullptr
|
|
||||||
wal_write_temperature=kUnknown
|
|
||||||
follower_catchup_retry_wait_ms=100
|
|
||||||
allow_mmap_reads=false
|
|
||||||
allow_mmap_writes=false
|
|
||||||
use_adaptive_mutex=false
|
|
||||||
use_fsync=false
|
|
||||||
table_cache_numshardbits=6
|
|
||||||
dump_malloc_stats=false
|
|
||||||
db_write_buffer_size=0
|
|
||||||
keep_log_file_num=1000
|
|
||||||
max_bgerror_resume_count=2147483647
|
|
||||||
allow_concurrent_memtable_write=true
|
|
||||||
recycle_log_file_num=0
|
|
||||||
log_file_time_to_roll=0
|
|
||||||
manifest_preallocation_size=4194304
|
|
||||||
enable_write_thread_adaptive_yield=true
|
|
||||||
WAL_ttl_seconds=0
|
|
||||||
max_manifest_file_size=1073741824
|
|
||||||
wal_recovery_mode=kPointInTimeRecovery
|
|
||||||
enable_pipelined_write=false
|
|
||||||
write_thread_slow_yield_usec=3
|
|
||||||
unordered_write=false
|
|
||||||
write_thread_max_yield_usec=100
|
|
||||||
advise_random_on_open=true
|
|
||||||
info_log_level=INFO_LEVEL
|
|
||||||
|
|
||||||
|
|
||||||
[CFOptions "default"]
|
|
||||||
memtable_max_range_deletions=0
|
|
||||||
compression_manager=nullptr
|
|
||||||
compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_memory_checks=false
|
|
||||||
memtable_avg_op_scan_flush_trigger=0
|
|
||||||
block_protection_bytes_per_key=0
|
|
||||||
uncache_aggressiveness=0
|
|
||||||
bottommost_file_compaction_delay=0
|
|
||||||
memtable_protection_bytes_per_key=0
|
|
||||||
experimental_mempurge_threshold=0.000000
|
|
||||||
bottommost_compression=kDisableCompressionOption
|
|
||||||
sample_for_compression=0
|
|
||||||
prepopulate_blob_cache=kDisable
|
|
||||||
blob_file_starting_level=0
|
|
||||||
blob_compaction_readahead_size=0
|
|
||||||
table_factory=BlockBasedTable
|
|
||||||
max_successive_merges=0
|
|
||||||
max_write_buffer_number=2
|
|
||||||
prefix_extractor=nullptr
|
|
||||||
memtable_huge_page_size=0
|
|
||||||
write_buffer_size=67108864
|
|
||||||
strict_max_successive_merges=false
|
|
||||||
arena_block_size=1048576
|
|
||||||
memtable_op_scan_flush_trigger=0
|
|
||||||
level0_file_num_compaction_trigger=4
|
|
||||||
report_bg_io_stats=false
|
|
||||||
inplace_update_num_locks=10000
|
|
||||||
memtable_prefix_bloom_size_ratio=0.000000
|
|
||||||
level0_stop_writes_trigger=36
|
|
||||||
blob_compression_type=kNoCompression
|
|
||||||
level0_slowdown_writes_trigger=20
|
|
||||||
hard_pending_compaction_bytes_limit=274877906944
|
|
||||||
target_file_size_multiplier=1
|
|
||||||
bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_file_checks=false
|
|
||||||
blob_garbage_collection_force_threshold=1.000000
|
|
||||||
enable_blob_files=false
|
|
||||||
soft_pending_compaction_bytes_limit=68719476736
|
|
||||||
target_file_size_base=67108864
|
|
||||||
max_compaction_bytes=1677721600
|
|
||||||
disable_auto_compactions=false
|
|
||||||
min_blob_size=0
|
|
||||||
memtable_whole_key_filtering=false
|
|
||||||
max_bytes_for_level_base=268435456
|
|
||||||
last_level_temperature=kUnknown
|
|
||||||
preserve_internal_time_seconds=0
|
|
||||||
compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
|
|
||||||
max_bytes_for_level_multiplier=10.000000
|
|
||||||
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
|
|
||||||
max_sequential_skip_in_iterations=8
|
|
||||||
compression=kSnappyCompression
|
|
||||||
default_write_temperature=kUnknown
|
|
||||||
compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
|
|
||||||
blob_garbage_collection_age_cutoff=0.250000
|
|
||||||
ttl=2592000
|
|
||||||
periodic_compaction_seconds=0
|
|
||||||
preclude_last_level_data_seconds=0
|
|
||||||
blob_file_size=268435456
|
|
||||||
enable_blob_garbage_collection=false
|
|
||||||
persist_user_defined_timestamps=true
|
|
||||||
compaction_pri=kMinOverlappingRatio
|
|
||||||
compaction_filter_factory=nullptr
|
|
||||||
comparator=leveldb.BytewiseComparator
|
|
||||||
bloom_locality=0
|
|
||||||
merge_operator=nullptr
|
|
||||||
compaction_filter=nullptr
|
|
||||||
level_compaction_dynamic_level_bytes=true
|
|
||||||
optimize_filters_for_hits=false
|
|
||||||
inplace_update_support=false
|
|
||||||
max_write_buffer_size_to_maintain=0
|
|
||||||
memtable_factory=SkipListFactory
|
|
||||||
memtable_insert_with_hint_prefix_extractor=nullptr
|
|
||||||
num_levels=7
|
|
||||||
force_consistency_checks=true
|
|
||||||
sst_partitioner_factory=nullptr
|
|
||||||
default_temperature=kUnknown
|
|
||||||
disallow_memtable_writes=false
|
|
||||||
compaction_style=kCompactionStyleLevel
|
|
||||||
min_write_buffer_number_to_merge=1
|
|
||||||
|
|
||||||
[TableOptions/BlockBasedTable "default"]
|
|
||||||
num_file_reads_for_auto_readahead=2
|
|
||||||
initial_auto_readahead_size=8192
|
|
||||||
metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
|
|
||||||
enable_index_compression=true
|
|
||||||
verify_compression=false
|
|
||||||
prepopulate_block_cache=kDisable
|
|
||||||
format_version=6
|
|
||||||
use_delta_encoding=true
|
|
||||||
pin_top_level_index_and_filter=true
|
|
||||||
read_amp_bytes_per_bit=0
|
|
||||||
decouple_partitioned_filters=false
|
|
||||||
partition_filters=false
|
|
||||||
metadata_block_size=4096
|
|
||||||
max_auto_readahead_size=262144
|
|
||||||
index_block_restart_interval=1
|
|
||||||
block_size_deviation=10
|
|
||||||
block_size=4096
|
|
||||||
detect_filter_construct_corruption=false
|
|
||||||
no_block_cache=false
|
|
||||||
checksum=kXXH3
|
|
||||||
filter_policy=nullptr
|
|
||||||
data_block_hash_table_util_ratio=0.750000
|
|
||||||
block_restart_interval=16
|
|
||||||
index_type=kBinarySearch
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
data_block_index_type=kDataBlockBinarySearch
|
|
||||||
cache_index_and_filter_blocks_with_high_priority=true
|
|
||||||
whole_key_filtering=true
|
|
||||||
index_shortening=kShortenSeparators
|
|
||||||
cache_index_and_filter_blocks=false
|
|
||||||
block_align=false
|
|
||||||
optimize_filters_for_memory=true
|
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
|
||||||
|
|
||||||
|
|
||||||
[CFOptions "raft_logs"]
|
|
||||||
memtable_max_range_deletions=0
|
|
||||||
compression_manager=nullptr
|
|
||||||
compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_memory_checks=false
|
|
||||||
memtable_avg_op_scan_flush_trigger=0
|
|
||||||
block_protection_bytes_per_key=0
|
|
||||||
uncache_aggressiveness=0
|
|
||||||
bottommost_file_compaction_delay=0
|
|
||||||
memtable_protection_bytes_per_key=0
|
|
||||||
experimental_mempurge_threshold=0.000000
|
|
||||||
bottommost_compression=kDisableCompressionOption
|
|
||||||
sample_for_compression=0
|
|
||||||
prepopulate_blob_cache=kDisable
|
|
||||||
blob_file_starting_level=0
|
|
||||||
blob_compaction_readahead_size=0
|
|
||||||
table_factory=BlockBasedTable
|
|
||||||
max_successive_merges=0
|
|
||||||
max_write_buffer_number=3
|
|
||||||
prefix_extractor=nullptr
|
|
||||||
memtable_huge_page_size=0
|
|
||||||
write_buffer_size=67108864
|
|
||||||
strict_max_successive_merges=false
|
|
||||||
arena_block_size=1048576
|
|
||||||
memtable_op_scan_flush_trigger=0
|
|
||||||
level0_file_num_compaction_trigger=4
|
|
||||||
report_bg_io_stats=false
|
|
||||||
inplace_update_num_locks=10000
|
|
||||||
memtable_prefix_bloom_size_ratio=0.000000
|
|
||||||
level0_stop_writes_trigger=36
|
|
||||||
blob_compression_type=kNoCompression
|
|
||||||
level0_slowdown_writes_trigger=20
|
|
||||||
hard_pending_compaction_bytes_limit=274877906944
|
|
||||||
target_file_size_multiplier=1
|
|
||||||
bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_file_checks=false
|
|
||||||
blob_garbage_collection_force_threshold=1.000000
|
|
||||||
enable_blob_files=false
|
|
||||||
soft_pending_compaction_bytes_limit=68719476736
|
|
||||||
target_file_size_base=67108864
|
|
||||||
max_compaction_bytes=1677721600
|
|
||||||
disable_auto_compactions=false
|
|
||||||
min_blob_size=0
|
|
||||||
memtable_whole_key_filtering=false
|
|
||||||
max_bytes_for_level_base=268435456
|
|
||||||
last_level_temperature=kUnknown
|
|
||||||
preserve_internal_time_seconds=0
|
|
||||||
compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
|
|
||||||
max_bytes_for_level_multiplier=10.000000
|
|
||||||
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
|
|
||||||
max_sequential_skip_in_iterations=8
|
|
||||||
compression=kSnappyCompression
|
|
||||||
default_write_temperature=kUnknown
|
|
||||||
compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
|
|
||||||
blob_garbage_collection_age_cutoff=0.250000
|
|
||||||
ttl=2592000
|
|
||||||
periodic_compaction_seconds=0
|
|
||||||
preclude_last_level_data_seconds=0
|
|
||||||
blob_file_size=268435456
|
|
||||||
enable_blob_garbage_collection=false
|
|
||||||
persist_user_defined_timestamps=true
|
|
||||||
compaction_pri=kMinOverlappingRatio
|
|
||||||
compaction_filter_factory=nullptr
|
|
||||||
comparator=leveldb.BytewiseComparator
|
|
||||||
bloom_locality=0
|
|
||||||
merge_operator=nullptr
|
|
||||||
compaction_filter=nullptr
|
|
||||||
level_compaction_dynamic_level_bytes=true
|
|
||||||
optimize_filters_for_hits=false
|
|
||||||
inplace_update_support=false
|
|
||||||
max_write_buffer_size_to_maintain=0
|
|
||||||
memtable_factory=SkipListFactory
|
|
||||||
memtable_insert_with_hint_prefix_extractor=nullptr
|
|
||||||
num_levels=7
|
|
||||||
force_consistency_checks=true
|
|
||||||
sst_partitioner_factory=nullptr
|
|
||||||
default_temperature=kUnknown
|
|
||||||
disallow_memtable_writes=false
|
|
||||||
compaction_style=kCompactionStyleLevel
|
|
||||||
min_write_buffer_number_to_merge=1
|
|
||||||
|
|
||||||
[TableOptions/BlockBasedTable "raft_logs"]
|
|
||||||
num_file_reads_for_auto_readahead=2
|
|
||||||
initial_auto_readahead_size=8192
|
|
||||||
metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
|
|
||||||
enable_index_compression=true
|
|
||||||
verify_compression=false
|
|
||||||
prepopulate_block_cache=kDisable
|
|
||||||
format_version=6
|
|
||||||
use_delta_encoding=true
|
|
||||||
pin_top_level_index_and_filter=true
|
|
||||||
read_amp_bytes_per_bit=0
|
|
||||||
decouple_partitioned_filters=false
|
|
||||||
partition_filters=false
|
|
||||||
metadata_block_size=4096
|
|
||||||
max_auto_readahead_size=262144
|
|
||||||
index_block_restart_interval=1
|
|
||||||
block_size_deviation=10
|
|
||||||
block_size=4096
|
|
||||||
detect_filter_construct_corruption=false
|
|
||||||
no_block_cache=false
|
|
||||||
checksum=kXXH3
|
|
||||||
filter_policy=nullptr
|
|
||||||
data_block_hash_table_util_ratio=0.750000
|
|
||||||
block_restart_interval=16
|
|
||||||
index_type=kBinarySearch
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
data_block_index_type=kDataBlockBinarySearch
|
|
||||||
cache_index_and_filter_blocks_with_high_priority=true
|
|
||||||
whole_key_filtering=true
|
|
||||||
index_shortening=kShortenSeparators
|
|
||||||
cache_index_and_filter_blocks=false
|
|
||||||
block_align=false
|
|
||||||
optimize_filters_for_memory=true
|
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
|
||||||
|
|
||||||
|
|
||||||
[CFOptions "raft_meta"]
|
|
||||||
memtable_max_range_deletions=0
|
|
||||||
compression_manager=nullptr
|
|
||||||
compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_memory_checks=false
|
|
||||||
memtable_avg_op_scan_flush_trigger=0
|
|
||||||
block_protection_bytes_per_key=0
|
|
||||||
uncache_aggressiveness=0
|
|
||||||
bottommost_file_compaction_delay=0
|
|
||||||
memtable_protection_bytes_per_key=0
|
|
||||||
experimental_mempurge_threshold=0.000000
|
|
||||||
bottommost_compression=kDisableCompressionOption
|
|
||||||
sample_for_compression=0
|
|
||||||
prepopulate_blob_cache=kDisable
|
|
||||||
blob_file_starting_level=0
|
|
||||||
blob_compaction_readahead_size=0
|
|
||||||
table_factory=BlockBasedTable
|
|
||||||
max_successive_merges=0
|
|
||||||
max_write_buffer_number=2
|
|
||||||
prefix_extractor=nullptr
|
|
||||||
memtable_huge_page_size=0
|
|
||||||
write_buffer_size=16777216
|
|
||||||
strict_max_successive_merges=false
|
|
||||||
arena_block_size=1048576
|
|
||||||
memtable_op_scan_flush_trigger=0
|
|
||||||
level0_file_num_compaction_trigger=4
|
|
||||||
report_bg_io_stats=false
|
|
||||||
inplace_update_num_locks=10000
|
|
||||||
memtable_prefix_bloom_size_ratio=0.000000
|
|
||||||
level0_stop_writes_trigger=36
|
|
||||||
blob_compression_type=kNoCompression
|
|
||||||
level0_slowdown_writes_trigger=20
|
|
||||||
hard_pending_compaction_bytes_limit=274877906944
|
|
||||||
target_file_size_multiplier=1
|
|
||||||
bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_file_checks=false
|
|
||||||
blob_garbage_collection_force_threshold=1.000000
|
|
||||||
enable_blob_files=false
|
|
||||||
soft_pending_compaction_bytes_limit=68719476736
|
|
||||||
target_file_size_base=67108864
|
|
||||||
max_compaction_bytes=1677721600
|
|
||||||
disable_auto_compactions=false
|
|
||||||
min_blob_size=0
|
|
||||||
memtable_whole_key_filtering=false
|
|
||||||
max_bytes_for_level_base=268435456
|
|
||||||
last_level_temperature=kUnknown
|
|
||||||
preserve_internal_time_seconds=0
|
|
||||||
compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
|
|
||||||
max_bytes_for_level_multiplier=10.000000
|
|
||||||
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
|
|
||||||
max_sequential_skip_in_iterations=8
|
|
||||||
compression=kSnappyCompression
|
|
||||||
default_write_temperature=kUnknown
|
|
||||||
compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
|
|
||||||
blob_garbage_collection_age_cutoff=0.250000
|
|
||||||
ttl=2592000
|
|
||||||
periodic_compaction_seconds=0
|
|
||||||
preclude_last_level_data_seconds=0
|
|
||||||
blob_file_size=268435456
|
|
||||||
enable_blob_garbage_collection=false
|
|
||||||
persist_user_defined_timestamps=true
|
|
||||||
compaction_pri=kMinOverlappingRatio
|
|
||||||
compaction_filter_factory=nullptr
|
|
||||||
comparator=leveldb.BytewiseComparator
|
|
||||||
bloom_locality=0
|
|
||||||
merge_operator=nullptr
|
|
||||||
compaction_filter=nullptr
|
|
||||||
level_compaction_dynamic_level_bytes=true
|
|
||||||
optimize_filters_for_hits=false
|
|
||||||
inplace_update_support=false
|
|
||||||
max_write_buffer_size_to_maintain=0
|
|
||||||
memtable_factory=SkipListFactory
|
|
||||||
memtable_insert_with_hint_prefix_extractor=nullptr
|
|
||||||
num_levels=7
|
|
||||||
force_consistency_checks=true
|
|
||||||
sst_partitioner_factory=nullptr
|
|
||||||
default_temperature=kUnknown
|
|
||||||
disallow_memtable_writes=false
|
|
||||||
compaction_style=kCompactionStyleLevel
|
|
||||||
min_write_buffer_number_to_merge=1
|
|
||||||
|
|
||||||
[TableOptions/BlockBasedTable "raft_meta"]
|
|
||||||
num_file_reads_for_auto_readahead=2
|
|
||||||
initial_auto_readahead_size=8192
|
|
||||||
metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
|
|
||||||
enable_index_compression=true
|
|
||||||
verify_compression=false
|
|
||||||
prepopulate_block_cache=kDisable
|
|
||||||
format_version=6
|
|
||||||
use_delta_encoding=true
|
|
||||||
pin_top_level_index_and_filter=true
|
|
||||||
read_amp_bytes_per_bit=0
|
|
||||||
decouple_partitioned_filters=false
|
|
||||||
partition_filters=false
|
|
||||||
metadata_block_size=4096
|
|
||||||
max_auto_readahead_size=262144
|
|
||||||
index_block_restart_interval=1
|
|
||||||
block_size_deviation=10
|
|
||||||
block_size=4096
|
|
||||||
detect_filter_construct_corruption=false
|
|
||||||
no_block_cache=false
|
|
||||||
checksum=kXXH3
|
|
||||||
filter_policy=nullptr
|
|
||||||
data_block_hash_table_util_ratio=0.750000
|
|
||||||
block_restart_interval=16
|
|
||||||
index_type=kBinarySearch
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
data_block_index_type=kDataBlockBinarySearch
|
|
||||||
cache_index_and_filter_blocks_with_high_priority=true
|
|
||||||
whole_key_filtering=true
|
|
||||||
index_shortening=kShortenSeparators
|
|
||||||
cache_index_and_filter_blocks=false
|
|
||||||
block_align=false
|
|
||||||
optimize_filters_for_memory=true
|
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
|
||||||
|
|
||||||
|
|
||||||
[CFOptions "key_value"]
|
|
||||||
memtable_max_range_deletions=0
|
|
||||||
compression_manager=nullptr
|
|
||||||
compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_memory_checks=false
|
|
||||||
memtable_avg_op_scan_flush_trigger=0
|
|
||||||
block_protection_bytes_per_key=0
|
|
||||||
uncache_aggressiveness=0
|
|
||||||
bottommost_file_compaction_delay=0
|
|
||||||
memtable_protection_bytes_per_key=0
|
|
||||||
experimental_mempurge_threshold=0.000000
|
|
||||||
bottommost_compression=kDisableCompressionOption
|
|
||||||
sample_for_compression=0
|
|
||||||
prepopulate_blob_cache=kDisable
|
|
||||||
blob_file_starting_level=0
|
|
||||||
blob_compaction_readahead_size=0
|
|
||||||
table_factory=BlockBasedTable
|
|
||||||
max_successive_merges=0
|
|
||||||
max_write_buffer_number=4
|
|
||||||
prefix_extractor=rocksdb.FixedPrefix.8
|
|
||||||
memtable_huge_page_size=0
|
|
||||||
write_buffer_size=134217728
|
|
||||||
strict_max_successive_merges=false
|
|
||||||
arena_block_size=1048576
|
|
||||||
memtable_op_scan_flush_trigger=0
|
|
||||||
level0_file_num_compaction_trigger=4
|
|
||||||
report_bg_io_stats=false
|
|
||||||
inplace_update_num_locks=10000
|
|
||||||
memtable_prefix_bloom_size_ratio=0.000000
|
|
||||||
level0_stop_writes_trigger=36
|
|
||||||
blob_compression_type=kNoCompression
|
|
||||||
level0_slowdown_writes_trigger=20
|
|
||||||
hard_pending_compaction_bytes_limit=274877906944
|
|
||||||
target_file_size_multiplier=1
|
|
||||||
bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_file_checks=false
|
|
||||||
blob_garbage_collection_force_threshold=1.000000
|
|
||||||
enable_blob_files=false
|
|
||||||
soft_pending_compaction_bytes_limit=68719476736
|
|
||||||
target_file_size_base=67108864
|
|
||||||
max_compaction_bytes=1677721600
|
|
||||||
disable_auto_compactions=false
|
|
||||||
min_blob_size=0
|
|
||||||
memtable_whole_key_filtering=false
|
|
||||||
max_bytes_for_level_base=268435456
|
|
||||||
last_level_temperature=kUnknown
|
|
||||||
preserve_internal_time_seconds=0
|
|
||||||
compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
|
|
||||||
max_bytes_for_level_multiplier=10.000000
|
|
||||||
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
|
|
||||||
max_sequential_skip_in_iterations=8
|
|
||||||
compression=kSnappyCompression
|
|
||||||
default_write_temperature=kUnknown
|
|
||||||
compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
|
|
||||||
blob_garbage_collection_age_cutoff=0.250000
|
|
||||||
ttl=2592000
|
|
||||||
periodic_compaction_seconds=0
|
|
||||||
preclude_last_level_data_seconds=0
|
|
||||||
blob_file_size=268435456
|
|
||||||
enable_blob_garbage_collection=false
|
|
||||||
persist_user_defined_timestamps=true
|
|
||||||
compaction_pri=kMinOverlappingRatio
|
|
||||||
compaction_filter_factory=nullptr
|
|
||||||
comparator=leveldb.BytewiseComparator
|
|
||||||
bloom_locality=0
|
|
||||||
merge_operator=nullptr
|
|
||||||
compaction_filter=nullptr
|
|
||||||
level_compaction_dynamic_level_bytes=true
|
|
||||||
optimize_filters_for_hits=false
|
|
||||||
inplace_update_support=false
|
|
||||||
max_write_buffer_size_to_maintain=0
|
|
||||||
memtable_factory=SkipListFactory
|
|
||||||
memtable_insert_with_hint_prefix_extractor=nullptr
|
|
||||||
num_levels=7
|
|
||||||
force_consistency_checks=true
|
|
||||||
sst_partitioner_factory=nullptr
|
|
||||||
default_temperature=kUnknown
|
|
||||||
disallow_memtable_writes=false
|
|
||||||
compaction_style=kCompactionStyleLevel
|
|
||||||
min_write_buffer_number_to_merge=1
|
|
||||||
|
|
||||||
[TableOptions/BlockBasedTable "key_value"]
|
|
||||||
num_file_reads_for_auto_readahead=2
|
|
||||||
initial_auto_readahead_size=8192
|
|
||||||
metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
|
|
||||||
enable_index_compression=true
|
|
||||||
verify_compression=false
|
|
||||||
prepopulate_block_cache=kDisable
|
|
||||||
format_version=6
|
|
||||||
use_delta_encoding=true
|
|
||||||
pin_top_level_index_and_filter=true
|
|
||||||
read_amp_bytes_per_bit=0
|
|
||||||
decouple_partitioned_filters=false
|
|
||||||
partition_filters=false
|
|
||||||
metadata_block_size=4096
|
|
||||||
max_auto_readahead_size=262144
|
|
||||||
index_block_restart_interval=1
|
|
||||||
block_size_deviation=10
|
|
||||||
block_size=4096
|
|
||||||
detect_filter_construct_corruption=false
|
|
||||||
no_block_cache=false
|
|
||||||
checksum=kXXH3
|
|
||||||
filter_policy=nullptr
|
|
||||||
data_block_hash_table_util_ratio=0.750000
|
|
||||||
block_restart_interval=16
|
|
||||||
index_type=kBinarySearch
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
data_block_index_type=kDataBlockBinarySearch
|
|
||||||
cache_index_and_filter_blocks_with_high_priority=true
|
|
||||||
whole_key_filtering=true
|
|
||||||
index_shortening=kShortenSeparators
|
|
||||||
cache_index_and_filter_blocks=false
|
|
||||||
block_align=false
|
|
||||||
optimize_filters_for_memory=true
|
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
|
||||||
|
|
||||||
|
|
||||||
[CFOptions "snapshot"]
|
|
||||||
memtable_max_range_deletions=0
|
|
||||||
compression_manager=nullptr
|
|
||||||
compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_memory_checks=false
|
|
||||||
memtable_avg_op_scan_flush_trigger=0
|
|
||||||
block_protection_bytes_per_key=0
|
|
||||||
uncache_aggressiveness=0
|
|
||||||
bottommost_file_compaction_delay=0
|
|
||||||
memtable_protection_bytes_per_key=0
|
|
||||||
experimental_mempurge_threshold=0.000000
|
|
||||||
bottommost_compression=kDisableCompressionOption
|
|
||||||
sample_for_compression=0
|
|
||||||
prepopulate_blob_cache=kDisable
|
|
||||||
blob_file_starting_level=0
|
|
||||||
blob_compaction_readahead_size=0
|
|
||||||
table_factory=BlockBasedTable
|
|
||||||
max_successive_merges=0
|
|
||||||
max_write_buffer_number=2
|
|
||||||
prefix_extractor=nullptr
|
|
||||||
memtable_huge_page_size=0
|
|
||||||
write_buffer_size=33554432
|
|
||||||
strict_max_successive_merges=false
|
|
||||||
arena_block_size=1048576
|
|
||||||
memtable_op_scan_flush_trigger=0
|
|
||||||
level0_file_num_compaction_trigger=4
|
|
||||||
report_bg_io_stats=false
|
|
||||||
inplace_update_num_locks=10000
|
|
||||||
memtable_prefix_bloom_size_ratio=0.000000
|
|
||||||
level0_stop_writes_trigger=36
|
|
||||||
blob_compression_type=kNoCompression
|
|
||||||
level0_slowdown_writes_trigger=20
|
|
||||||
hard_pending_compaction_bytes_limit=274877906944
|
|
||||||
target_file_size_multiplier=1
|
|
||||||
bottommost_compression_opts={checksum=false;max_dict_buffer_bytes=0;enabled=false;max_dict_bytes=0;max_compressed_bytes_per_kb=896;parallel_threads=1;zstd_max_train_bytes=0;level=32767;use_zstd_dict_trainer=true;strategy=0;window_bits=-14;}
|
|
||||||
paranoid_file_checks=false
|
|
||||||
blob_garbage_collection_force_threshold=1.000000
|
|
||||||
enable_blob_files=false
|
|
||||||
soft_pending_compaction_bytes_limit=68719476736
|
|
||||||
target_file_size_base=67108864
|
|
||||||
max_compaction_bytes=1677721600
|
|
||||||
disable_auto_compactions=false
|
|
||||||
min_blob_size=0
|
|
||||||
memtable_whole_key_filtering=false
|
|
||||||
max_bytes_for_level_base=268435456
|
|
||||||
last_level_temperature=kUnknown
|
|
||||||
preserve_internal_time_seconds=0
|
|
||||||
compaction_options_fifo={trivial_copy_buffer_size=4096;allow_trivial_copy_when_change_temperature=false;file_temperature_age_thresholds=;allow_compaction=false;age_for_warm=0;max_table_files_size=1073741824;}
|
|
||||||
max_bytes_for_level_multiplier=10.000000
|
|
||||||
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
|
|
||||||
max_sequential_skip_in_iterations=8
|
|
||||||
compression=kSnappyCompression
|
|
||||||
default_write_temperature=kUnknown
|
|
||||||
compaction_options_universal={reduce_file_locking=false;incremental=false;compression_size_percent=-1;allow_trivial_move=false;max_size_amplification_percent=200;max_merge_width=4294967295;stop_style=kCompactionStopStyleTotalSize;min_merge_width=2;max_read_amp=-1;size_ratio=1;}
|
|
||||||
blob_garbage_collection_age_cutoff=0.250000
|
|
||||||
ttl=2592000
|
|
||||||
periodic_compaction_seconds=0
|
|
||||||
preclude_last_level_data_seconds=0
|
|
||||||
blob_file_size=268435456
|
|
||||||
enable_blob_garbage_collection=false
|
|
||||||
persist_user_defined_timestamps=true
|
|
||||||
compaction_pri=kMinOverlappingRatio
|
|
||||||
compaction_filter_factory=nullptr
|
|
||||||
comparator=leveldb.BytewiseComparator
|
|
||||||
bloom_locality=0
|
|
||||||
merge_operator=nullptr
|
|
||||||
compaction_filter=nullptr
|
|
||||||
level_compaction_dynamic_level_bytes=true
|
|
||||||
optimize_filters_for_hits=false
|
|
||||||
inplace_update_support=false
|
|
||||||
max_write_buffer_size_to_maintain=0
|
|
||||||
memtable_factory=SkipListFactory
|
|
||||||
memtable_insert_with_hint_prefix_extractor=nullptr
|
|
||||||
num_levels=7
|
|
||||||
force_consistency_checks=true
|
|
||||||
sst_partitioner_factory=nullptr
|
|
||||||
default_temperature=kUnknown
|
|
||||||
disallow_memtable_writes=false
|
|
||||||
compaction_style=kCompactionStyleLevel
|
|
||||||
min_write_buffer_number_to_merge=1
|
|
||||||
|
|
||||||
[TableOptions/BlockBasedTable "snapshot"]
|
|
||||||
num_file_reads_for_auto_readahead=2
|
|
||||||
initial_auto_readahead_size=8192
|
|
||||||
metadata_cache_options={unpartitioned_pinning=kFallback;partition_pinning=kFallback;top_level_index_pinning=kFallback;}
|
|
||||||
enable_index_compression=true
|
|
||||||
verify_compression=false
|
|
||||||
prepopulate_block_cache=kDisable
|
|
||||||
format_version=6
|
|
||||||
use_delta_encoding=true
|
|
||||||
pin_top_level_index_and_filter=true
|
|
||||||
read_amp_bytes_per_bit=0
|
|
||||||
decouple_partitioned_filters=false
|
|
||||||
partition_filters=false
|
|
||||||
metadata_block_size=4096
|
|
||||||
max_auto_readahead_size=262144
|
|
||||||
index_block_restart_interval=1
|
|
||||||
block_size_deviation=10
|
|
||||||
block_size=4096
|
|
||||||
detect_filter_construct_corruption=false
|
|
||||||
no_block_cache=false
|
|
||||||
checksum=kXXH3
|
|
||||||
filter_policy=nullptr
|
|
||||||
data_block_hash_table_util_ratio=0.750000
|
|
||||||
block_restart_interval=16
|
|
||||||
index_type=kBinarySearch
|
|
||||||
pin_l0_filter_and_index_blocks_in_cache=false
|
|
||||||
data_block_index_type=kDataBlockBinarySearch
|
|
||||||
cache_index_and_filter_blocks_with_high_priority=true
|
|
||||||
whole_key_filtering=true
|
|
||||||
index_shortening=kShortenSeparators
|
|
||||||
cache_index_and_filter_blocks=false
|
|
||||||
block_align=false
|
|
||||||
optimize_filters_for_memory=true
|
|
||||||
flush_block_policy_factory=FlushBlockBySizePolicyFactory
|
|
||||||
|
|
||||||
|
|
@ -1,240 +0,0 @@
|
||||||
# Chainfire T003 Feature Gap Analysis
|
|
||||||
|
|
||||||
**Audit Date:** 2025-12-08
|
|
||||||
**Spec Version:** 1.0
|
|
||||||
**Implementation Path:** `/home/centra/cloud/chainfire/crates/`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Executive Summary
|
|
||||||
|
|
||||||
**Total Features Analyzed:** 32
|
|
||||||
**Implemented:** 20 (62.5%)
|
|
||||||
**Partially Implemented:** 5 (15.6%)
|
|
||||||
**Missing:** 7 (21.9%)
|
|
||||||
|
|
||||||
The core KV operations, Raft consensus, Watch functionality, and basic cluster management are implemented and functional. Critical gaps exist in TTL/Lease management, read consistency controls, and transaction completeness. Production readiness is blocked by missing lease service and lack of authentication.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Feature Gap Matrix
|
|
||||||
|
|
||||||
| Feature | Spec Section | Status | Priority | Complexity | Notes |
|
|
||||||
|---------|--------------|--------|----------|------------|-------|
|
|
||||||
| **Lease Service (TTL)** | 8.3, 4.1 | ❌ Missing | P0 | Medium (3-5d) | Protocol has lease field but no Lease gRPC service; critical for production |
|
|
||||||
| **TTL Expiration Logic** | 4.1, spec line 22-23 | ❌ Missing | P0 | Medium (3-5d) | lease_id stored but no background expiration worker |
|
|
||||||
| **Read Consistency Levels** | 4.1 | ❌ Missing | P0 | Small (1-2d) | Local/Serializable/Linearizable not implemented; all reads are undefined consistency |
|
|
||||||
| **Range Ops in Transactions** | 4.2, line 224-229 | ⚠️ Partial | P1 | Small (1-2d) | RequestOp has RangeRequest but returns dummy Delete op (kv_service.rs:224-229) |
|
|
||||||
| **Transaction Responses** | 3.1, kv_service.rs:194 | ⚠️ Partial | P1 | Small (1-2d) | TxnResponse.responses is empty vec; TODO comment in code |
|
|
||||||
| **Point-in-Time Reads** | 3.1, 7.3 | ⚠️ Partial | P1 | Medium (3-5d) | RangeRequest has revision field but KvStore doesn't use it |
|
|
||||||
| **StorageBackend Trait** | 3.3 | ❌ Missing | P1 | Medium (3-5d) | Spec defines trait (lines 166-174) but not in chainfire-core |
|
|
||||||
| **Prometheus Metrics** | 7.2 | ❌ Missing | P1 | Small (1-2d) | Spec mentions endpoint but no implementation |
|
|
||||||
| **Health Check Service** | 7.2 | ❌ Missing | P1 | Small (1d) | gRPC health check not visible |
|
|
||||||
| **Authentication** | 6.1 | ❌ Missing | P2 | Large (1w+) | Spec says "Planned"; mTLS for peers, tokens for clients |
|
|
||||||
| **Authorization/RBAC** | 6.2 | ❌ Missing | P2 | Large (1w+) | Requires IAM integration |
|
|
||||||
| **Namespace Quotas** | 6.3 | ❌ Missing | P2 | Medium (3-5d) | Per-namespace resource limits |
|
|
||||||
| **KV Service - Range** | 3.1 | ✅ Implemented | - | - | Single key, range scan, prefix scan all working |
|
|
||||||
| **KV Service - Put** | 3.1 | ✅ Implemented | - | - | Including prev_kv support |
|
|
||||||
| **KV Service - Delete** | 3.1 | ✅ Implemented | - | - | Single and range delete working |
|
|
||||||
| **KV Service - Txn (Basic)** | 3.1 | ✅ Implemented | - | - | Compare conditions and basic ops working |
|
|
||||||
| **Watch Service** | 3.1 | ✅ Implemented | - | - | Bidirectional streaming, create/cancel/progress |
|
|
||||||
| **Cluster Service - All** | 3.1 | ✅ Implemented | - | - | MemberAdd/Remove/List/Status all present |
|
|
||||||
| **Client Library - Core** | 3.2 | ✅ Implemented | - | - | Connect, put, get, delete, CAS implemented |
|
|
||||||
| **Client - Prefix Scan** | 3.2 | ✅ Implemented | - | - | get_prefix method exists |
|
|
||||||
| **ClusterEventHandler** | 3.3 | ✅ Implemented | - | - | All 8 callbacks defined in callbacks.rs |
|
|
||||||
| **KvEventHandler** | 3.3 | ✅ Implemented | - | - | on_key_changed, on_key_deleted, on_prefix_changed |
|
|
||||||
| **ClusterBuilder** | 3.4 | ✅ Implemented | - | - | Embeddable library with builder pattern |
|
|
||||||
| **MVCC Support** | 4.3 | ✅ Implemented | - | - | Global revision counter, create/mod revisions tracked |
|
|
||||||
| **RocksDB Storage** | 4.3 | ✅ Implemented | - | - | Column families: raft_logs, raft_meta, key_value, snapshot |
|
|
||||||
| **Raft Integration** | 2.0 | ✅ Implemented | - | - | OpenRaft 0.9 integrated, Vote/AppendEntries/Snapshot RPCs |
|
|
||||||
| **SWIM Gossip** | 2.1 | ⚠️ Present | P2 | - | chainfire-gossip crate exists but integration unclear |
|
|
||||||
| **Server Binary** | 7.1 | ✅ Implemented | - | - | CLI with config file, env vars, bootstrap support |
|
|
||||||
| **Config Management** | 5.0 | ✅ Implemented | - | - | TOML config, env vars, CLI overrides |
|
|
||||||
| **Watch - Historical Replay** | 3.1 | ⚠️ Partial | P2 | Medium (3-5d) | start_revision exists in proto but historical storage unclear |
|
|
||||||
| **Snapshot & Backup** | 7.3 | ⚠️ Partial | P2 | Small (1-2d) | Raft snapshot exists but manual backup procedure not documented |
|
|
||||||
| **etcd Compatibility** | 8.3 | ⚠️ Partial | P2 | - | API similar but package names differ; missing Lease service breaks compatibility |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Critical Gaps (P0)
|
|
||||||
|
|
||||||
### 1. Lease Service & TTL Expiration
|
|
||||||
**Impact:** Blocks production use cases requiring automatic key expiration (sessions, locks, ephemeral data)
|
|
||||||
|
|
||||||
**Evidence:**
|
|
||||||
- `/home/centra/cloud/chainfire/proto/chainfire.proto` has no `Lease` service definition
|
|
||||||
- `KvEntry` has `lease_id: Option<i64>` field (types/kv.rs:23) but no expiration logic
|
|
||||||
- No background worker to delete expired keys
|
|
||||||
- etcd compatibility broken without Lease service
|
|
||||||
|
|
||||||
**Fix Required:**
|
|
||||||
1. Add Lease service to proto: `LeaseGrant`, `LeaseRevoke`, `LeaseKeepAlive`, `LeaseTimeToLive`
|
|
||||||
2. Implement lease storage and expiration worker in chainfire-storage
|
|
||||||
3. Wire lease_id checks to KV operations
|
|
||||||
4. Add lease_id index for efficient expiration queries
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 2. Read Consistency Levels
|
|
||||||
**Impact:** Cannot guarantee linearizable reads; stale reads possible on followers
|
|
||||||
|
|
||||||
**Evidence:**
|
|
||||||
- Spec defines `ReadConsistency` enum (spec lines 208-215)
|
|
||||||
- No implementation in chainfire-storage or chainfire-api
|
|
||||||
- RangeRequest in kv_service.rs always reads from local storage without consistency checks
|
|
||||||
|
|
||||||
**Fix Required:**
|
|
||||||
1. Add consistency parameter to RangeRequest
|
|
||||||
2. Implement leader verification for Linearizable reads
|
|
||||||
3. Add committed index check for Serializable reads
|
|
||||||
4. Default to Linearizable for safety
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 3. Range Operations in Transactions
|
|
||||||
**Impact:** Cannot atomically read-then-write in transactions; limits CAS use cases
|
|
||||||
|
|
||||||
**Evidence:**
|
|
||||||
```rust
|
|
||||||
// /home/centra/cloud/chainfire/crates/chainfire-api/src/kv_service.rs:224-229
|
|
||||||
crate::proto::request_op::Request::RequestRange(_) => {
|
|
||||||
// Range operations in transactions are not supported yet
|
|
||||||
TxnOp::Delete { key: vec![] } // Returns dummy operation!
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Fix Required:**
|
|
||||||
1. Extend `chainfire_types::command::TxnOp` to include `Range` variant
|
|
||||||
2. Update state_machine.rs to handle read operations in transactions
|
|
||||||
3. Return range results in TxnResponse.responses
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Important Gaps (P1)
|
|
||||||
|
|
||||||
### 4. Transaction Response Completeness
|
|
||||||
**Evidence:**
|
|
||||||
```rust
|
|
||||||
// /home/centra/cloud/chainfire/crates/chainfire-api/src/kv_service.rs:194
|
|
||||||
Ok(Response::new(TxnResponse {
|
|
||||||
header: Some(self.make_header(response.revision)),
|
|
||||||
succeeded: response.succeeded,
|
|
||||||
responses: vec![], // TODO: fill in responses
|
|
||||||
}))
|
|
||||||
```
|
|
||||||
|
|
||||||
**Fix:** Collect operation results during txn execution and populate responses vector
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 5. Point-in-Time Reads (MVCC Historical Queries)
|
|
||||||
**Evidence:**
|
|
||||||
- RangeRequest has `revision` field (proto/chainfire.proto:78)
|
|
||||||
- KvStore.range() doesn't use revision parameter
|
|
||||||
- No revision-indexed storage in RocksDB
|
|
||||||
|
|
||||||
**Fix:** Implement versioned key storage or revision-based snapshots
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 6. StorageBackend Trait Abstraction
|
|
||||||
**Evidence:**
|
|
||||||
- Spec defines trait (lines 166-174) for pluggable backends
|
|
||||||
- chainfire-storage is RocksDB-only
|
|
||||||
- No trait in chainfire-core/src/
|
|
||||||
|
|
||||||
**Fix:** Extract trait and implement for RocksDB; enables memory backend testing
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 7. Observability
|
|
||||||
**Gaps:**
|
|
||||||
- No Prometheus metrics (spec mentions endpoint at 7.2)
|
|
||||||
- No gRPC health check service
|
|
||||||
- Limited structured logging
|
|
||||||
|
|
||||||
**Fix:** Add metrics crate, implement health checks, expose /metrics endpoint
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Nice-to-Have Gaps (P2)
|
|
||||||
|
|
||||||
- **Authentication/Authorization:** Spec marks as "Planned" - mTLS and RBAC
|
|
||||||
- **Namespace Quotas:** Resource limits per tenant
|
|
||||||
- **SWIM Gossip Integration:** chainfire-gossip crate exists but usage unclear
|
|
||||||
- **Watch Historical Replay:** start_revision in proto but storage unclear
|
|
||||||
- **Advanced etcd Compat:** Package name differences, field naming variations
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Key Findings
|
|
||||||
|
|
||||||
### Strengths
|
|
||||||
1. **Solid Core Implementation:** KV operations, Raft consensus, and basic transactions work well
|
|
||||||
2. **Watch System:** Fully functional with bidirectional streaming and event dispatch
|
|
||||||
3. **Client Library:** Well-designed with CAS and convenience methods
|
|
||||||
4. **Architecture:** Clean separation of concerns across crates
|
|
||||||
5. **Testing:** State machine has unit tests for core operations
|
|
||||||
|
|
||||||
### Weaknesses
|
|
||||||
1. **Incomplete Transactions:** Missing range ops and response population breaks advanced use cases
|
|
||||||
2. **No TTL Support:** Critical for production; requires full Lease service implementation
|
|
||||||
3. **Undefined Read Consistency:** Dangerous for distributed systems; needs immediate attention
|
|
||||||
4. **Limited Observability:** No metrics or health checks hinders production deployment
|
|
||||||
|
|
||||||
### Blockers for Production
|
|
||||||
1. Lease service implementation (P0)
|
|
||||||
2. Read consistency guarantees (P0)
|
|
||||||
3. Transaction completeness (P1)
|
|
||||||
4. Basic metrics/health checks (P1)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Recommendations
|
|
||||||
|
|
||||||
### Phase 1: Production Readiness (2-3 weeks)
|
|
||||||
1. Implement Lease service and TTL expiration worker
|
|
||||||
2. Add read consistency levels (default to Linearizable)
|
|
||||||
3. Complete transaction responses
|
|
||||||
4. Add basic Prometheus metrics and health checks
|
|
||||||
|
|
||||||
### Phase 2: Feature Completeness (1-2 weeks)
|
|
||||||
1. Support range operations in transactions
|
|
||||||
2. Implement point-in-time reads
|
|
||||||
3. Extract StorageBackend trait
|
|
||||||
4. Document and test SWIM gossip integration
|
|
||||||
|
|
||||||
### Phase 3: Hardening (2-3 weeks)
|
|
||||||
1. Add authentication (mTLS for peers)
|
|
||||||
2. Implement basic authorization
|
|
||||||
3. Add namespace quotas
|
|
||||||
4. Comprehensive integration tests
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Appendix: Implementation Evidence
|
|
||||||
|
|
||||||
### Transaction Compare Logic
|
|
||||||
**Location:** `/home/centra/cloud/chainfire/crates/chainfire-storage/src/state_machine.rs:148-228`
|
|
||||||
- ✅ Supports Version, CreateRevision, ModRevision, Value comparisons
|
|
||||||
- ✅ Handles Equal, NotEqual, Greater, Less operators
|
|
||||||
- ✅ Atomic execution of success/failure ops
|
|
||||||
|
|
||||||
### Watch Implementation
|
|
||||||
**Location:** `/home/centra/cloud/chainfire/crates/chainfire-watch/`
|
|
||||||
- ✅ WatchRegistry with event dispatch
|
|
||||||
- ✅ WatchStream for bidirectional gRPC
|
|
||||||
- ✅ KeyMatcher for prefix/range watches
|
|
||||||
- ✅ Integration with state machine (state_machine.rs:82-88)
|
|
||||||
|
|
||||||
### Client CAS Example
|
|
||||||
**Location:** `/home/centra/cloud/chainfire/chainfire-client/src/client.rs:228-299`
|
|
||||||
- ✅ Uses transactions for compare-and-swap
|
|
||||||
- ✅ Returns CasOutcome with current/new versions
|
|
||||||
- ⚠️ Fallback read on failure uses range op (demonstrates txn range gap)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
**Report Generated:** 2025-12-08
|
|
||||||
**Auditor:** Claude Code Agent
|
|
||||||
**Next Review:** After Phase 1 implementation
|
|
||||||
1165
coronafs/Cargo.lock
generated
Normal file
1165
coronafs/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
32
coronafs/Cargo.toml
Normal file
32
coronafs/Cargo.toml
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
[workspace]
|
||||||
|
resolver = "2"
|
||||||
|
members = [
|
||||||
|
"crates/coronafs-server",
|
||||||
|
]
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
rust-version = "1.75"
|
||||||
|
authors = ["PhotonCloud Contributors"]
|
||||||
|
repository = "https://github.com/photoncloud/photoncloud"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
axum = "0.8"
|
||||||
|
clap = { version = "4", features = ["derive"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
tokio = { version = "1.40", features = ["full"] }
|
||||||
|
toml = "0.8"
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
anyhow = "1.0"
|
||||||
|
thiserror = "1.0"
|
||||||
|
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
||||||
|
|
||||||
|
[workspace.lints.rust]
|
||||||
|
unsafe_code = "deny"
|
||||||
|
|
||||||
|
[workspace.lints.clippy]
|
||||||
|
all = "warn"
|
||||||
18
coronafs/README.md
Normal file
18
coronafs/README.md
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# CoronaFS
|
||||||
|
|
||||||
|
CoronaFS is PhotonCloud's mutable VM-volume layer.
|
||||||
|
|
||||||
|
Current implementation:
|
||||||
|
|
||||||
|
- custom block backend, not NFS
|
||||||
|
- control API on `coronafs-server`
|
||||||
|
- raw volume data stored under `/var/lib/coronafs/volumes`
|
||||||
|
- exported to workers as `nbd://` targets via `qemu-nbd`
|
||||||
|
- primary consumer: `plasmavmc` managed VM volumes
|
||||||
|
|
||||||
|
Intentional split:
|
||||||
|
|
||||||
|
- mutable VM volumes live on CoronaFS
|
||||||
|
- immutable VM images live in LightningStor object storage
|
||||||
|
|
||||||
|
This keeps VM root/data disks on a shared block path while leaving image distribution on the object layer.
|
||||||
27
coronafs/crates/coronafs-server/Cargo.toml
Normal file
27
coronafs/crates/coronafs-server/Cargo.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
[package]
|
||||||
|
name = "coronafs-server"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
rust-version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "coronafs-server"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
axum = { workspace = true }
|
||||||
|
clap = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
toml = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { workspace = true }
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
futures-util = "0.3"
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
61
coronafs/crates/coronafs-server/src/config.rs
Normal file
61
coronafs/crates/coronafs-server/src/config.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub struct ServerConfig {
|
||||||
|
pub listen_addr: SocketAddr,
|
||||||
|
pub advertise_host: String,
|
||||||
|
pub data_dir: PathBuf,
|
||||||
|
pub export_bind_addr: String,
|
||||||
|
pub export_base_port: u16,
|
||||||
|
pub export_port_count: u16,
|
||||||
|
pub export_shared_clients: u16,
|
||||||
|
pub export_cache_mode: String,
|
||||||
|
pub export_aio_mode: String,
|
||||||
|
pub export_discard_mode: String,
|
||||||
|
pub export_detect_zeroes_mode: String,
|
||||||
|
pub preallocate: bool,
|
||||||
|
pub sync_on_write: bool,
|
||||||
|
pub qemu_nbd_path: PathBuf,
|
||||||
|
pub qemu_img_path: PathBuf,
|
||||||
|
pub log_level: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ServerConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
listen_addr: "0.0.0.0:50088".parse().expect("valid listen addr"),
|
||||||
|
advertise_host: "127.0.0.1".to_string(),
|
||||||
|
data_dir: PathBuf::from("/var/lib/coronafs"),
|
||||||
|
export_bind_addr: "0.0.0.0".to_string(),
|
||||||
|
export_base_port: 11000,
|
||||||
|
export_port_count: 512,
|
||||||
|
export_shared_clients: 32,
|
||||||
|
export_cache_mode: "none".to_string(),
|
||||||
|
export_aio_mode: "io_uring".to_string(),
|
||||||
|
export_discard_mode: "unmap".to_string(),
|
||||||
|
export_detect_zeroes_mode: "unmap".to_string(),
|
||||||
|
preallocate: true,
|
||||||
|
sync_on_write: false,
|
||||||
|
qemu_nbd_path: PathBuf::from("qemu-nbd"),
|
||||||
|
qemu_img_path: PathBuf::from("qemu-img"),
|
||||||
|
log_level: "info".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ServerConfig {
|
||||||
|
pub fn volume_dir(&self) -> PathBuf {
|
||||||
|
self.data_dir.join("volumes")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn metadata_dir(&self) -> PathBuf {
|
||||||
|
self.data_dir.join("metadata")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pid_dir(&self) -> PathBuf {
|
||||||
|
self.data_dir.join("pids")
|
||||||
|
}
|
||||||
|
}
|
||||||
748
coronafs/crates/coronafs-server/src/main.rs
Normal file
748
coronafs/crates/coronafs-server/src/main.rs
Normal file
|
|
@ -0,0 +1,748 @@
|
||||||
|
mod config;
|
||||||
|
|
||||||
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use axum::body::Body;
|
||||||
|
use axum::extract::{Path, Query, State};
|
||||||
|
use axum::http::StatusCode;
|
||||||
|
use axum::response::{IntoResponse, Response};
|
||||||
|
use axum::routing::{get, post, put};
|
||||||
|
use axum::{Json, Router};
|
||||||
|
use clap::Parser;
|
||||||
|
use config::ServerConfig;
|
||||||
|
use futures_util::StreamExt;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
#[cfg(unix)]
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::path::{Path as FsPath, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::fs;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tokio::process::Command;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(short, long, default_value = "coronafs.toml")]
|
||||||
|
config: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
struct VolumeMetadata {
|
||||||
|
id: String,
|
||||||
|
size_bytes: u64,
|
||||||
|
port: Option<u16>,
|
||||||
|
export_pid: Option<u32>,
|
||||||
|
created_at: String,
|
||||||
|
updated_at: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct VolumeResponse {
|
||||||
|
id: String,
|
||||||
|
size_bytes: u64,
|
||||||
|
path: String,
|
||||||
|
export: Option<ExportResponse>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
struct ExportResponse {
|
||||||
|
uri: String,
|
||||||
|
port: u16,
|
||||||
|
pid: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct CreateVolumeRequest {
|
||||||
|
size_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ResizeVolumeRequest {
|
||||||
|
size_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ImportQuery {
|
||||||
|
size_bytes: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ExportQuery {
|
||||||
|
read_only: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct AppState {
|
||||||
|
config: Arc<ServerConfig>,
|
||||||
|
volume_guards: Arc<Mutex<HashMap<String, Arc<Mutex<()>>>>>,
|
||||||
|
reserved_ports: Arc<Mutex<HashSet<u16>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppState {
|
||||||
|
async fn new(config: ServerConfig) -> Result<Self> {
|
||||||
|
prepare_dirs(&config).await?;
|
||||||
|
let reserved_ports = collect_reserved_ports(&config).await?;
|
||||||
|
Ok(Self {
|
||||||
|
config: Arc::new(config),
|
||||||
|
volume_guards: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
reserved_ports: Arc::new(Mutex::new(reserved_ports)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn volume_guard(&self, volume_id: &str) -> Arc<Mutex<()>> {
|
||||||
|
let mut guards = self.volume_guards.lock().await;
|
||||||
|
guards
|
||||||
|
.entry(volume_id.to_string())
|
||||||
|
.or_insert_with(|| Arc::new(Mutex::new(())))
|
||||||
|
.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ApiError {
|
||||||
|
status: StatusCode,
|
||||||
|
message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApiError {
|
||||||
|
fn new(status: StatusCode, message: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
status,
|
||||||
|
message: message.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn not_found(message: impl Into<String>) -> Self {
|
||||||
|
Self::new(StatusCode::NOT_FOUND, message)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn internal(err: anyhow::Error) -> Self {
|
||||||
|
Self::new(StatusCode::INTERNAL_SERVER_ERROR, err.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntoResponse for ApiError {
|
||||||
|
fn into_response(self) -> Response {
|
||||||
|
(
|
||||||
|
self.status,
|
||||||
|
Json(serde_json::json!({
|
||||||
|
"error": self.message,
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
.into_response()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApiResult<T> = Result<Json<T>, ApiError>;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
let config = if args.config.exists() {
|
||||||
|
let contents = fs::read_to_string(&args.config)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to read config {}", args.config.display()))?;
|
||||||
|
toml::from_str::<ServerConfig>(&contents)
|
||||||
|
.with_context(|| format!("failed to parse config {}", args.config.display()))?
|
||||||
|
} else {
|
||||||
|
ServerConfig::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(
|
||||||
|
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let listen_addr = config.listen_addr;
|
||||||
|
let state = AppState::new(config).await?;
|
||||||
|
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/healthz", get(healthz))
|
||||||
|
.route("/v1/volumes/{id}", put(create_blank_volume).get(get_volume).delete(delete_volume))
|
||||||
|
.route("/v1/volumes/{id}/import", put(import_volume))
|
||||||
|
.route("/v1/volumes/{id}/resize", post(resize_volume))
|
||||||
|
.route("/v1/volumes/{id}/export", post(ensure_export))
|
||||||
|
.with_state(state);
|
||||||
|
|
||||||
|
tracing::info!(%listen_addr, "starting CoronaFS server");
|
||||||
|
let listener = tokio::net::TcpListener::bind(listen_addr).await?;
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn healthz() -> Json<serde_json::Value> {
|
||||||
|
Json(serde_json::json!({"status": "ok"}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_blank_volume(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
Json(req): Json<CreateVolumeRequest>,
|
||||||
|
) -> ApiResult<VolumeResponse> {
|
||||||
|
let volume_guard = state.volume_guard(&id).await;
|
||||||
|
let _guard = volume_guard.lock().await;
|
||||||
|
create_blank_impl(&state, &id, req.size_bytes)
|
||||||
|
.await
|
||||||
|
.map(Json)
|
||||||
|
.map_err(ApiError::internal)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn import_volume(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
Query(query): Query<ImportQuery>,
|
||||||
|
body: Body,
|
||||||
|
) -> ApiResult<VolumeResponse> {
|
||||||
|
let volume_guard = state.volume_guard(&id).await;
|
||||||
|
let _guard = volume_guard.lock().await;
|
||||||
|
import_impl(&state, &id, query.size_bytes, body)
|
||||||
|
.await
|
||||||
|
.map(Json)
|
||||||
|
.map_err(ApiError::internal)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_volume(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
) -> ApiResult<VolumeResponse> {
|
||||||
|
load_response(&state, &id)
|
||||||
|
.await
|
||||||
|
.ok_or_else(|| ApiError::not_found(format!("volume {id} not found")))
|
||||||
|
.map(Json)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_export(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
Query(query): Query<ExportQuery>,
|
||||||
|
) -> ApiResult<VolumeResponse> {
|
||||||
|
let volume_guard = state.volume_guard(&id).await;
|
||||||
|
let _guard = volume_guard.lock().await;
|
||||||
|
ensure_export_impl(&state, &id, query.read_only.unwrap_or(false))
|
||||||
|
.await
|
||||||
|
.map(Json)
|
||||||
|
.map_err(ApiError::internal)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn resize_volume(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
Json(req): Json<ResizeVolumeRequest>,
|
||||||
|
) -> ApiResult<VolumeResponse> {
|
||||||
|
let volume_guard = state.volume_guard(&id).await;
|
||||||
|
let _guard = volume_guard.lock().await;
|
||||||
|
resize_impl(&state, &id, req.size_bytes)
|
||||||
|
.await
|
||||||
|
.map(Json)
|
||||||
|
.map_err(ApiError::internal)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_volume(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Path(id): Path<String>,
|
||||||
|
) -> Result<StatusCode, ApiError> {
|
||||||
|
let volume_guard = state.volume_guard(&id).await;
|
||||||
|
let _guard = volume_guard.lock().await;
|
||||||
|
delete_impl(&state, &id)
|
||||||
|
.await
|
||||||
|
.map(|_| StatusCode::NO_CONTENT)
|
||||||
|
.map_err(ApiError::internal)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn prepare_dirs(config: &ServerConfig) -> Result<()> {
|
||||||
|
fs::create_dir_all(config.volume_dir()).await?;
|
||||||
|
fs::create_dir_all(config.metadata_dir()).await?;
|
||||||
|
fs::create_dir_all(config.pid_dir()).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_blank_impl(state: &AppState, id: &str, size_bytes: u64) -> Result<VolumeResponse> {
|
||||||
|
let path = volume_path(&state.config, id);
|
||||||
|
let meta_path = metadata_path(&state.config, id);
|
||||||
|
if fs::try_exists(&meta_path).await.unwrap_or(false) {
|
||||||
|
return load_response_required(state, id).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
if state.config.preallocate {
|
||||||
|
let status = Command::new("fallocate")
|
||||||
|
.args(["-l", &size_bytes.to_string(), path.to_string_lossy().as_ref()])
|
||||||
|
.status()
|
||||||
|
.await;
|
||||||
|
match status {
|
||||||
|
Ok(status) if status.success() => {}
|
||||||
|
_ => {
|
||||||
|
let file = fs::File::create(&path).await?;
|
||||||
|
file.set_len(size_bytes).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let file = fs::File::create(&path).await?;
|
||||||
|
file.set_len(size_bytes).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let meta = VolumeMetadata {
|
||||||
|
id: id.to_string(),
|
||||||
|
size_bytes,
|
||||||
|
port: None,
|
||||||
|
export_pid: None,
|
||||||
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
updated_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
};
|
||||||
|
ensure_volume_file_permissions(&path).await?;
|
||||||
|
save_metadata(&meta_path, &meta).await?;
|
||||||
|
load_response_required(state, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn import_impl(
|
||||||
|
state: &AppState,
|
||||||
|
id: &str,
|
||||||
|
size_bytes: Option<u64>,
|
||||||
|
body: Body,
|
||||||
|
) -> Result<VolumeResponse> {
|
||||||
|
let path = volume_path(&state.config, id);
|
||||||
|
let meta_path = metadata_path(&state.config, id);
|
||||||
|
let tmp_path = temp_import_path(&state.config, id);
|
||||||
|
if let Some(size_bytes) = size_bytes {
|
||||||
|
create_or_preallocate_file(&tmp_path, size_bytes, state.config.preallocate).await?;
|
||||||
|
}
|
||||||
|
let mut stream = body.into_data_stream();
|
||||||
|
let mut file = fs::OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.write(true)
|
||||||
|
.truncate(size_bytes.is_none())
|
||||||
|
.open(&tmp_path)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to create {}", tmp_path.display()))?;
|
||||||
|
let mut bytes_written = 0u64;
|
||||||
|
while let Some(chunk) = stream.next().await {
|
||||||
|
let chunk = chunk.context("failed to read request body chunk")?;
|
||||||
|
bytes_written = bytes_written.saturating_add(chunk.len() as u64);
|
||||||
|
file.write_all(&chunk)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("failed to write {}", tmp_path.display()))?;
|
||||||
|
}
|
||||||
|
if let Some(size_bytes) = size_bytes {
|
||||||
|
file.set_len(size_bytes).await?;
|
||||||
|
}
|
||||||
|
if state.config.sync_on_write {
|
||||||
|
file.sync_all().await?;
|
||||||
|
}
|
||||||
|
drop(file);
|
||||||
|
fs::rename(&tmp_path, &path).await?;
|
||||||
|
ensure_volume_file_permissions(&path).await?;
|
||||||
|
let actual_size = fs::metadata(&path).await?.len();
|
||||||
|
let meta = VolumeMetadata {
|
||||||
|
id: id.to_string(),
|
||||||
|
size_bytes: size_bytes.unwrap_or(actual_size),
|
||||||
|
port: None,
|
||||||
|
export_pid: None,
|
||||||
|
created_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
updated_at: chrono::Utc::now().to_rfc3339(),
|
||||||
|
};
|
||||||
|
save_metadata(&meta_path, &meta).await?;
|
||||||
|
tracing::info!(
|
||||||
|
volume_id = id,
|
||||||
|
bytes_written,
|
||||||
|
volume_size = actual_size,
|
||||||
|
"Imported raw volume into CoronaFS"
|
||||||
|
);
|
||||||
|
load_response_required(state, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn resize_impl(state: &AppState, id: &str, size_bytes: u64) -> Result<VolumeResponse> {
|
||||||
|
let meta_path = metadata_path(&state.config, id);
|
||||||
|
let path = volume_path(&state.config, id);
|
||||||
|
let mut meta = load_metadata(&meta_path)
|
||||||
|
.await?
|
||||||
|
.ok_or_else(|| anyhow!("volume {id} not found"))?;
|
||||||
|
|
||||||
|
let reserved_port = meta.port;
|
||||||
|
stop_export_if_running(&state.config, id, &mut meta).await?;
|
||||||
|
release_export_port(state, reserved_port).await;
|
||||||
|
meta.port = None;
|
||||||
|
let status = Command::new(&state.config.qemu_img_path)
|
||||||
|
.args([
|
||||||
|
"resize",
|
||||||
|
"-f",
|
||||||
|
"raw",
|
||||||
|
path.to_string_lossy().as_ref(),
|
||||||
|
&size_bytes.to_string(),
|
||||||
|
])
|
||||||
|
.status()
|
||||||
|
.await
|
||||||
|
.context("failed to spawn qemu-img resize")?;
|
||||||
|
if !status.success() {
|
||||||
|
return Err(anyhow!("qemu-img resize failed for {}", path.display()));
|
||||||
|
}
|
||||||
|
meta.size_bytes = size_bytes;
|
||||||
|
meta.updated_at = chrono::Utc::now().to_rfc3339();
|
||||||
|
save_metadata(&meta_path, &meta).await?;
|
||||||
|
load_response_required(state, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_export_impl(state: &AppState, id: &str, read_only: bool) -> Result<VolumeResponse> {
|
||||||
|
let meta_path = metadata_path(&state.config, id);
|
||||||
|
let mut meta = load_metadata(&meta_path)
|
||||||
|
.await?
|
||||||
|
.ok_or_else(|| anyhow!("volume {id} not found"))?;
|
||||||
|
if let Some(pid) = meta.export_pid {
|
||||||
|
if process_running(pid).await {
|
||||||
|
if let Some(port) = meta.port {
|
||||||
|
mark_port_reserved(state, port).await;
|
||||||
|
}
|
||||||
|
return load_response_required(state, id).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let port = reserve_export_port(state, meta.port).await?;
|
||||||
|
let pid_path = pid_path(&state.config, id);
|
||||||
|
let path = volume_path(&state.config, id);
|
||||||
|
let effective_aio_mode = export_aio_mode(&state.config.export_cache_mode, &state.config.export_aio_mode);
|
||||||
|
let mut command = Command::new(&state.config.qemu_nbd_path);
|
||||||
|
command.args([
|
||||||
|
"--fork",
|
||||||
|
"--persistent",
|
||||||
|
"--pid-file",
|
||||||
|
pid_path.to_string_lossy().as_ref(),
|
||||||
|
"--shared",
|
||||||
|
&state.config.export_shared_clients.to_string(),
|
||||||
|
"--cache",
|
||||||
|
&state.config.export_cache_mode,
|
||||||
|
"--aio",
|
||||||
|
effective_aio_mode,
|
||||||
|
"--discard",
|
||||||
|
&state.config.export_discard_mode,
|
||||||
|
"--detect-zeroes",
|
||||||
|
&state.config.export_detect_zeroes_mode,
|
||||||
|
"--format",
|
||||||
|
"raw",
|
||||||
|
"--bind",
|
||||||
|
&state.config.export_bind_addr,
|
||||||
|
"--port",
|
||||||
|
&port.to_string(),
|
||||||
|
]);
|
||||||
|
if read_only {
|
||||||
|
command.arg("--read-only");
|
||||||
|
}
|
||||||
|
command.arg(path.to_string_lossy().as_ref());
|
||||||
|
let status = command
|
||||||
|
.status()
|
||||||
|
.await
|
||||||
|
.context("failed to spawn qemu-nbd")?;
|
||||||
|
if !status.success() {
|
||||||
|
release_export_port(state, Some(port)).await;
|
||||||
|
return Err(anyhow!("qemu-nbd failed to export volume {id} on port {port}"));
|
||||||
|
}
|
||||||
|
let pid = match read_pid_file(&pid_path).await {
|
||||||
|
Ok(pid) => pid,
|
||||||
|
Err(err) => {
|
||||||
|
release_export_port(state, Some(port)).await;
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
meta.port = Some(port);
|
||||||
|
meta.export_pid = Some(pid);
|
||||||
|
meta.updated_at = chrono::Utc::now().to_rfc3339();
|
||||||
|
save_metadata(&meta_path, &meta).await?;
|
||||||
|
if let Err(err) = wait_for_tcp_listen(export_probe_host(&state.config), port).await {
|
||||||
|
let _ = stop_export_if_running(&state.config, id, &mut meta).await;
|
||||||
|
release_export_port(state, Some(port)).await;
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
load_response_required(state, id).await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn export_aio_mode<'a>(cache_mode: &str, aio_mode: &'a str) -> &'a str {
|
||||||
|
if aio_mode == "native" && !matches!(cache_mode, "none" | "directsync") {
|
||||||
|
tracing::warn!(
|
||||||
|
cache_mode,
|
||||||
|
requested_aio_mode = aio_mode,
|
||||||
|
effective_aio_mode = "threads",
|
||||||
|
"CoronaFS export cache mode is incompatible with qemu-nbd native AIO; falling back to threads",
|
||||||
|
);
|
||||||
|
"threads"
|
||||||
|
} else {
|
||||||
|
aio_mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_impl(state: &AppState, id: &str) -> Result<()> {
|
||||||
|
let meta_path = metadata_path(&state.config, id);
|
||||||
|
if let Some(mut meta) = load_metadata(&meta_path).await? {
|
||||||
|
let reserved_port = meta.port;
|
||||||
|
stop_export_if_running(&state.config, id, &mut meta).await?;
|
||||||
|
release_export_port(state, reserved_port).await;
|
||||||
|
}
|
||||||
|
let path = volume_path(&state.config, id);
|
||||||
|
if fs::try_exists(&path).await.unwrap_or(false) {
|
||||||
|
fs::remove_file(&path).await?;
|
||||||
|
}
|
||||||
|
if fs::try_exists(&meta_path).await.unwrap_or(false) {
|
||||||
|
fs::remove_file(&meta_path).await?;
|
||||||
|
}
|
||||||
|
let pid_path = pid_path(&state.config, id);
|
||||||
|
if fs::try_exists(&pid_path).await.unwrap_or(false) {
|
||||||
|
fs::remove_file(pid_path).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_response(state: &AppState, id: &str) -> Option<VolumeResponse> {
|
||||||
|
match load_response_required(state, id).await {
|
||||||
|
Ok(response) => Some(response),
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_response_required(state: &AppState, id: &str) -> Result<VolumeResponse> {
|
||||||
|
let meta = load_metadata(&metadata_path(&state.config, id))
|
||||||
|
.await?
|
||||||
|
.ok_or_else(|| anyhow!("volume {id} not found"))?;
|
||||||
|
let export = match (meta.port, meta.export_pid) {
|
||||||
|
(Some(port), pid) if pid.map(process_running_sync).unwrap_or(false) => Some(ExportResponse {
|
||||||
|
uri: format!("nbd://{}:{}", state.config.advertise_host, port),
|
||||||
|
port,
|
||||||
|
pid,
|
||||||
|
}),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
Ok(VolumeResponse {
|
||||||
|
id: meta.id,
|
||||||
|
size_bytes: meta.size_bytes,
|
||||||
|
path: volume_path(&state.config, id).display().to_string(),
|
||||||
|
export,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn load_metadata(path: &FsPath) -> Result<Option<VolumeMetadata>> {
|
||||||
|
if !fs::try_exists(path).await.unwrap_or(false) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
let bytes = fs::read(path).await?;
|
||||||
|
Ok(Some(serde_json::from_slice(&bytes)?))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save_metadata(path: &FsPath, meta: &VolumeMetadata) -> Result<()> {
|
||||||
|
let bytes = serde_json::to_vec_pretty(meta)?;
|
||||||
|
let tmp_path = path.with_extension("json.tmp");
|
||||||
|
fs::write(&tmp_path, bytes).await?;
|
||||||
|
fs::rename(&tmp_path, path).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn stop_export_if_running(config: &ServerConfig, id: &str, meta: &mut VolumeMetadata) -> Result<()> {
|
||||||
|
if let Some(pid) = meta.export_pid {
|
||||||
|
if process_running(pid).await {
|
||||||
|
let status = Command::new("kill")
|
||||||
|
.args(["-TERM", &pid.to_string()])
|
||||||
|
.status()
|
||||||
|
.await
|
||||||
|
.context("failed to terminate qemu-nbd export")?;
|
||||||
|
if !status.success() {
|
||||||
|
return Err(anyhow!("failed to stop qemu-nbd export pid {pid}"));
|
||||||
|
}
|
||||||
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
|
||||||
|
while process_running(pid).await {
|
||||||
|
if std::time::Instant::now() >= deadline {
|
||||||
|
let _ = Command::new("kill")
|
||||||
|
.args(["-KILL", &pid.to_string()])
|
||||||
|
.status()
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
meta.export_pid = None;
|
||||||
|
let pid_path = pid_path(config, id);
|
||||||
|
if fs::try_exists(&pid_path).await.unwrap_or(false) {
|
||||||
|
fs::remove_file(pid_path).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_running(pid: u32) -> bool {
|
||||||
|
fs::try_exists(format!("/proc/{pid}")).await.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_running_sync(pid: u32) -> bool {
|
||||||
|
FsPath::new("/proc").join(pid.to_string()).exists()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_pid_file(path: &FsPath) -> Result<u32> {
|
||||||
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||||
|
loop {
|
||||||
|
if fs::try_exists(path).await.unwrap_or(false) {
|
||||||
|
let contents = fs::read_to_string(path).await?;
|
||||||
|
return contents
|
||||||
|
.trim()
|
||||||
|
.parse::<u32>()
|
||||||
|
.with_context(|| format!("invalid pid file {}", path.display()));
|
||||||
|
}
|
||||||
|
if std::time::Instant::now() >= deadline {
|
||||||
|
return Err(anyhow!("timed out waiting for pid file {}", path.display()));
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_tcp_listen(host: &str, port: u16) -> Result<()> {
|
||||||
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(5);
|
||||||
|
loop {
|
||||||
|
if tokio::net::TcpStream::connect((host, port)).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if std::time::Instant::now() >= deadline {
|
||||||
|
return Err(anyhow!("timed out waiting for export {}:{}", host, port));
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn collect_reserved_ports(config: &ServerConfig) -> Result<HashSet<u16>> {
|
||||||
|
let mut reserved = HashSet::new();
|
||||||
|
let mut entries = fs::read_dir(config.metadata_dir()).await?;
|
||||||
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
|
let path = entry.path();
|
||||||
|
let Some(mut meta) = load_metadata(&path).await? else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
match (meta.port, meta.export_pid) {
|
||||||
|
(Some(port), Some(pid)) if process_running(pid).await => {
|
||||||
|
reserved.insert(port);
|
||||||
|
}
|
||||||
|
(Some(_), _) | (_, Some(_)) => {
|
||||||
|
meta.port = None;
|
||||||
|
meta.export_pid = None;
|
||||||
|
meta.updated_at = chrono::Utc::now().to_rfc3339();
|
||||||
|
save_metadata(&path, &meta).await?;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(reserved)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reserve_export_port(state: &AppState, preferred_port: Option<u16>) -> Result<u16> {
|
||||||
|
let mut reserved = state.reserved_ports.lock().await;
|
||||||
|
if let Some(port) = preferred_port {
|
||||||
|
if port_is_usable(&state.config, &reserved, port).await {
|
||||||
|
reserved.insert(port);
|
||||||
|
return Ok(port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let start = state.config.export_base_port as u32;
|
||||||
|
let end = start + state.config.export_port_count as u32;
|
||||||
|
for port in start..end {
|
||||||
|
let port_u16 = port as u16;
|
||||||
|
if port_is_usable(&state.config, &reserved, port_u16).await {
|
||||||
|
reserved.insert(port_u16);
|
||||||
|
return Ok(port_u16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(anyhow!(
|
||||||
|
"no free export ports left in range {}..{}",
|
||||||
|
state.config.export_base_port,
|
||||||
|
state.config.export_base_port + state.config.export_port_count
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn port_is_usable(config: &ServerConfig, reserved: &HashSet<u16>, port: u16) -> bool {
|
||||||
|
let start = config.export_base_port as u32;
|
||||||
|
let end = start + config.export_port_count as u32;
|
||||||
|
if (port as u32) < start || (port as u32) >= end || reserved.contains(&port) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
tokio::net::TcpListener::bind((config.export_bind_addr.as_str(), port))
|
||||||
|
.await
|
||||||
|
.is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn mark_port_reserved(state: &AppState, port: u16) {
|
||||||
|
let mut reserved = state.reserved_ports.lock().await;
|
||||||
|
reserved.insert(port);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn release_export_port(state: &AppState, port: Option<u16>) {
|
||||||
|
if let Some(port) = port {
|
||||||
|
let mut reserved = state.reserved_ports.lock().await;
|
||||||
|
reserved.remove(&port);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn export_probe_host(config: &ServerConfig) -> &str {
|
||||||
|
match config.export_bind_addr.as_str() {
|
||||||
|
"0.0.0.0" | "::" | "" => "127.0.0.1",
|
||||||
|
host => host,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_or_preallocate_file(path: &FsPath, size_bytes: u64, preallocate: bool) -> Result<()> {
|
||||||
|
if preallocate {
|
||||||
|
let status = Command::new("fallocate")
|
||||||
|
.args(["-l", &size_bytes.to_string(), path.to_string_lossy().as_ref()])
|
||||||
|
.status()
|
||||||
|
.await;
|
||||||
|
if matches!(status, Ok(status) if status.success()) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let file = fs::File::create(path).await?;
|
||||||
|
file.set_len(size_bytes).await?;
|
||||||
|
ensure_volume_file_permissions(path).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_volume_file_permissions(path: &FsPath) -> Result<()> {
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
let permissions = std::fs::Permissions::from_mode(0o660);
|
||||||
|
fs::set_permissions(path, permissions).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn volume_path(config: &ServerConfig, id: &str) -> PathBuf {
|
||||||
|
config.volume_dir().join(format!("{id}.raw"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metadata_path(config: &ServerConfig, id: &str) -> PathBuf {
|
||||||
|
config.metadata_dir().join(format!("{id}.json"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pid_path(config: &ServerConfig, id: &str) -> PathBuf {
|
||||||
|
config.pid_dir().join(format!("{id}.pid"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn temp_import_path(config: &ServerConfig, id: &str) -> PathBuf {
|
||||||
|
config.data_dir.join(format!("{id}.import.tmp"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn export_aio_mode_falls_back_for_cached_exports() {
|
||||||
|
assert_eq!(export_aio_mode("writeback", "native"), "threads");
|
||||||
|
assert_eq!(export_aio_mode("none", "native"), "native");
|
||||||
|
assert_eq!(export_aio_mode("directsync", "native"), "native");
|
||||||
|
assert_eq!(export_aio_mode("writeback", "threads"), "threads");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn export_probe_host_prefers_loopback_for_wildcard_bind() {
|
||||||
|
let mut config = ServerConfig::default();
|
||||||
|
config.export_bind_addr = "0.0.0.0".to_string();
|
||||||
|
assert_eq!(export_probe_host(&config), "127.0.0.1");
|
||||||
|
config.export_bind_addr = "10.100.0.11".to_string();
|
||||||
|
assert_eq!(export_probe_host(&config), "10.100.0.11");
|
||||||
|
}
|
||||||
|
}
|
||||||
10
crates/photon-auth-client/Cargo.toml
Normal file
10
crates/photon-auth-client/Cargo.toml
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
[package]
|
||||||
|
name = "photon-auth-client"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
description = "Shared IAM auth client wrapper for PhotonCloud services"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
iam-service-auth = { path = "../../iam/crates/iam-service-auth" }
|
||||||
10
crates/photon-auth-client/src/lib.rs
Normal file
10
crates/photon-auth-client/src/lib.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
pub use iam_service_auth::{
|
||||||
|
get_tenant_context, resolve_tenant_ids_from_context, resource_for_tenant, AuthService,
|
||||||
|
TenantContext,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub async fn connect_iam(endpoint: &str) -> anyhow::Result<AuthService> {
|
||||||
|
AuthService::new(endpoint).await.map_err(|error| {
|
||||||
|
anyhow::anyhow!("failed to connect to IAM server at {}: {}", endpoint, error)
|
||||||
|
})
|
||||||
|
}
|
||||||
11
crates/photon-config/Cargo.toml
Normal file
11
crates/photon-config/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "photon-config"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "MIT OR Apache-2.0"
|
||||||
|
description = "Shared configuration loading helpers for PhotonCloud"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
toml = "0.8"
|
||||||
58
crates/photon-config/src/lib.rs
Normal file
58
crates/photon-config/src/lib.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
use anyhow::Context;
|
||||||
|
use serde::de::DeserializeOwned;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
pub fn load_toml_config<T>(path: &Path) -> anyhow::Result<T>
|
||||||
|
where
|
||||||
|
T: DeserializeOwned + Default,
|
||||||
|
{
|
||||||
|
if !path.exists() {
|
||||||
|
return Ok(T::default());
|
||||||
|
}
|
||||||
|
|
||||||
|
let contents = fs::read_to_string(path)
|
||||||
|
.with_context(|| format!("failed to read config file {}", path.display()))?;
|
||||||
|
toml::from_str(&contents)
|
||||||
|
.with_context(|| format!("failed to parse config file {}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::load_toml_config;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Deserialize, PartialEq)]
|
||||||
|
struct TestConfig {
|
||||||
|
value: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn temp_path(name: &str) -> PathBuf {
|
||||||
|
let nanos = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos();
|
||||||
|
std::env::temp_dir().join(format!("{}-{}-{}.toml", name, std::process::id(), nanos))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn returns_default_when_file_is_missing() {
|
||||||
|
let path = temp_path("photon-config-missing");
|
||||||
|
let config: TestConfig = load_toml_config(&path).unwrap();
|
||||||
|
assert_eq!(config, TestConfig::default());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn loads_existing_toml_file() {
|
||||||
|
let path = temp_path("photon-config-load");
|
||||||
|
fs::write(&path, "value = \"hello\"\n").unwrap();
|
||||||
|
|
||||||
|
let config: TestConfig = load_toml_config(&path).unwrap();
|
||||||
|
assert_eq!(config.value, "hello");
|
||||||
|
|
||||||
|
let _ = fs::remove_file(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue