Establish canonical validation lanes
This commit is contained in:
parent
b8ebd24d4e
commit
11cd8be2f7
34 changed files with 2578 additions and 314 deletions
37
.github/workflows/kvm-publishable.yml
vendored
Normal file
37
.github/workflows/kvm-publishable.yml
vendored
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
name: KVM Publishable Validation
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publishable-kvm-suite:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 360
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: DeterminateSystems/nix-installer-action@v11
|
||||||
|
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||||
|
|
||||||
|
- name: Probe KVM Environment
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
echo "hostname=$(hostname)"
|
||||||
|
uname -a
|
||||||
|
id
|
||||||
|
test -e /dev/kvm
|
||||||
|
ls -l /dev/kvm
|
||||||
|
if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then
|
||||||
|
echo "kvm_intel_nested=$(cat /sys/module/kvm_intel/parameters/nested)"
|
||||||
|
fi
|
||||||
|
if [[ -f /sys/module/kvm_amd/parameters/nested ]]; then
|
||||||
|
echo "kvm_amd_nested=$(cat /sys/module/kvm_amd/parameters/nested)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run Publishable KVM Suite
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
chmod +x ./nix/test-cluster/run-publishable-kvm-suite.sh
|
||||||
|
./nix/test-cluster/run-publishable-kvm-suite.sh "$RUNNER_TEMP/publishable-kvm-suite"
|
||||||
28
.github/workflows/nix.yml
vendored
28
.github/workflows/nix.yml
vendored
|
|
@ -96,6 +96,23 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
nix run ./nix/ci#gate-ci -- --shared-crate ${{ matrix.crate }} --tier 0 --no-logs
|
nix run ./nix/ci#gate-ci -- --shared-crate ${{ matrix.crate }} --tier 0 --no-logs
|
||||||
|
|
||||||
|
portable-regressions:
|
||||||
|
needs: filter
|
||||||
|
if: ${{ needs.filter.outputs.any_changed == 'true' || needs.filter.outputs.global_changed == 'true' || needs.filter.outputs.shared_crates_changed == 'true' }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: portable regressions
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: DeterminateSystems/nix-installer-action@v11
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||||
|
|
||||||
|
- name: Run portable canonical profile regressions
|
||||||
|
run: |
|
||||||
|
nix build \
|
||||||
|
.#checks.x86_64-linux.canonical-profile-eval-guards \
|
||||||
|
.#checks.x86_64-linux.portable-control-plane-regressions \
|
||||||
|
--accept-flake-config
|
||||||
|
|
||||||
# Build server packages (tier 1+)
|
# Build server packages (tier 1+)
|
||||||
build:
|
build:
|
||||||
needs: [filter, gate]
|
needs: [filter, gate]
|
||||||
|
|
@ -116,7 +133,7 @@ jobs:
|
||||||
|
|
||||||
# Summary job for PR status checks
|
# Summary job for PR status checks
|
||||||
ci-status:
|
ci-status:
|
||||||
needs: [filter, gate, shared-crates-gate]
|
needs: [filter, gate, shared-crates-gate, portable-regressions]
|
||||||
if: always()
|
if: always()
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -128,11 +145,18 @@ jobs:
|
||||||
if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then
|
if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then
|
if [[ "${{ needs.portable-regressions.result }}" == "failure" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" || "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
|
||||||
if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
|
if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
|
||||||
echo "Gate was skipped despite changes. This is unexpected."
|
echo "Gate was skipped despite changes. This is unexpected."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
if [[ "${{ needs.portable-regressions.result }}" == "skipped" ]]; then
|
||||||
|
echo "Portable regressions were skipped despite changes. This is unexpected."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
|
if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
|
||||||
if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then
|
if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then
|
||||||
|
|
|
||||||
91
README.md
91
README.md
|
|
@ -2,7 +2,8 @@
|
||||||
|
|
||||||
UltraCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository.
|
UltraCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository.
|
||||||
|
|
||||||
The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
|
The fastest public entrypoint is the one-command single-node quickstart. The canonical multi-node integration proof remains the six-node VM cluster under [`nix/test-cluster`](nix/test-cluster/README.md), which builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
|
||||||
|
The canonical bare-metal bootstrap proof is the ISO-on-QEMU path under [`nix/test-cluster`](nix/test-cluster/README.md), which drives phone-home, Disko install, reboot, and desired-system convergence for one control-plane node and one worker-equivalent node.
|
||||||
|
|
||||||
## Components
|
## Components
|
||||||
|
|
||||||
|
|
@ -15,38 +16,102 @@ The canonical local proof path is the six-node VM cluster under [`nix/test-clust
|
||||||
- `plasmavmc`: VM control plane and worker agents
|
- `plasmavmc`: VM control plane and worker agents
|
||||||
- `coronafs`: shared filesystem for mutable VM volumes
|
- `coronafs`: shared filesystem for mutable VM volumes
|
||||||
- `lightningstor`: object storage and VM image backing
|
- `lightningstor`: object storage and VM image backing
|
||||||
- `k8shost`: Kubernetes-style hosting control plane
|
- `k8shost`: Kubernetes-style hosting control plane for tenant pods and services
|
||||||
- `apigateway`: external API and proxy surface
|
- `apigateway`: external API and proxy surface
|
||||||
- `nightlight`: metrics ingestion and query service
|
- `nightlight`: metrics ingestion and query service
|
||||||
- `creditservice`: minimal reference quota/credit service
|
- `creditservice`: minimal reference quota/credit service
|
||||||
- `deployer`: bootstrap and phone-home deployment service
|
- `deployer`: bootstrap and phone-home deployment service that owns install plans and desired-system intent
|
||||||
- `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services
|
- `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
Single-node quickstart:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix run .#single-node-quickstart
|
||||||
|
```
|
||||||
|
|
||||||
|
This app builds the minimal VM stack, boots a QEMU VM, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, checks their health endpoints, and verifies the in-guest VM runtime prerequisites. For an interactive session, keep the VM running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart
|
||||||
|
```
|
||||||
|
|
||||||
|
The legacy name `.#all-in-one-quickstart` is kept as an alias.
|
||||||
|
|
||||||
|
Portable local proof on hosts without `/dev/kvm`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix build .#checks.x86_64-linux.canonical-profile-eval-guards
|
||||||
|
nix build .#checks.x86_64-linux.portable-control-plane-regressions
|
||||||
|
```
|
||||||
|
|
||||||
|
This TCG-safe lane keeps canonical profile drift, the core `chainfire` / `deployer` control-plane path, the `deployer -> nix-agent` boundary, and the `fleet-scheduler -> node-agent` boundary under regression coverage without requiring nested virtualization.
|
||||||
|
|
||||||
|
Publishable nested-KVM suite:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
nix develop
|
nix develop
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||||
|
./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Project-done release proof now requires both halves of the public validation surface to be green:
|
||||||
|
|
||||||
|
- `baremetal-iso` and `baremetal-iso-e2e` for the canonical `deployer -> installer -> nix-agent` bare-metal bootstrap path
|
||||||
|
- the KVM publishable suite (`fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`) for the nested-KVM multi-node VM-hosting path
|
||||||
|
|
||||||
|
Canonical bare-metal bootstrap proof:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix run ./nix/test-cluster#cluster -- baremetal-iso
|
||||||
|
nix build .#checks.x86_64-linux.baremetal-iso-e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
## Canonical Profiles
|
||||||
|
|
||||||
|
UltraCloud now fixes the public support surface to three canonical profiles:
|
||||||
|
|
||||||
|
| Profile | Primary Nix outputs | Required components | Optional components |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| `single-node dev` | `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer` |
|
||||||
|
| `3-node HA control plane` | `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` |
|
||||||
|
| `bare-metal bootstrap` | `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap |
|
||||||
|
|
||||||
|
`netboot-base` is an internal helper image, not a public profile. `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` remain experimental helper images until they implement the same phone-home and install semantics as the ISO path. Older launch flows under `baremetal/vm-cluster` are `legacy/manual`, not canonical.
|
||||||
|
|
||||||
|
## Responsibility Boundaries
|
||||||
|
|
||||||
|
- `k8shost` owns Kubernetes-style pod and service APIs for tenant workloads, then translates them into `prismnet`, `flashdns`, and `fiberlb` objects. It does not place host-native cluster daemons.
|
||||||
|
- `fleet-scheduler` owns placement and failover of host-native service instances from declarative cluster state. It consumes `node-agent` heartbeats and writes instance placement, but it does not expose tenant-facing Kubernetes semantics.
|
||||||
|
- `deployer` owns machine enrollment, `/api/v1/phone-home`, install plans, cluster metadata, and desired-system references. It decides what a node should become, but it does not execute the host-local switch.
|
||||||
|
- `nix-agent` owns host-local NixOS convergence only. It reads desired-system state from `deployer` or `chainfire`, activates the target closure, and rolls back on failed health checks.
|
||||||
|
- `node-agent` owns host-local runtime execution only. It reports heartbeats and applies scheduled service-instance state, but it does not install the base OS or rewrite desired-system targets.
|
||||||
|
|
||||||
## Main Entrypoints
|
## Main Entrypoints
|
||||||
|
|
||||||
- workspace flake: [flake.nix](/home/centra/cloud/flake.nix)
|
- workspace flake: [flake.nix](flake.nix)
|
||||||
- VM validation harness: [nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
|
- single-node quickstart smoke: [`nix run .#single-node-quickstart`](docs/testing.md)
|
||||||
- shared volume notes: [coronafs/README.md](/home/centra/cloud/coronafs/README.md)
|
- portable local proof: [`nix build .#checks.x86_64-linux.portable-control-plane-regressions`](docs/testing.md)
|
||||||
- minimal quota-service rationale: [creditservice/README.md](/home/centra/cloud/creditservice/README.md)
|
- canonical bare-metal bootstrap smoke: [`nix run ./nix/test-cluster#cluster -- baremetal-iso`](docs/testing.md)
|
||||||
- archived manual VM launch scripts: [baremetal/vm-cluster/README.md](/home/centra/cloud/baremetal/vm-cluster/README.md)
|
- canonical profile guards: [`nix build .#checks.x86_64-linux.canonical-profile-eval-guards`](docs/testing.md), [`nix build .#checks.x86_64-linux.canonical-profile-build-guards`](docs/testing.md)
|
||||||
|
- VM validation harness: [nix/test-cluster/README.md](nix/test-cluster/README.md)
|
||||||
|
- shared volume notes: [coronafs/README.md](coronafs/README.md)
|
||||||
|
- minimal quota-service rationale: [creditservice/README.md](creditservice/README.md)
|
||||||
|
- legacy/manual VM launch scripts: [baremetal/vm-cluster/README.md](baremetal/vm-cluster/README.md)
|
||||||
|
|
||||||
## Repository Guide
|
## Repository Guide
|
||||||
|
|
||||||
- [docs/README.md](/home/centra/cloud/docs/README.md): documentation entrypoint
|
- [docs/README.md](docs/README.md): documentation entrypoint
|
||||||
- [docs/testing.md](/home/centra/cloud/docs/testing.md): validation path summary
|
- [docs/testing.md](docs/testing.md): validation path summary
|
||||||
- [docs/component-matrix.md](/home/centra/cloud/docs/component-matrix.md): supported multi-component compositions
|
- [docs/component-matrix.md](docs/component-matrix.md): canonical profiles and optional bundles
|
||||||
- [docs/storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
|
- [docs/storage-benchmarks.md](docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
|
||||||
- `plans/`: design notes and exploration documents
|
- `plans/`: design notes and exploration documents
|
||||||
|
|
||||||
## Scope
|
## Scope
|
||||||
|
|
||||||
UltraCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
|
UltraCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
|
||||||
|
|
||||||
Host-level NixOS rollout validation is also expected to stay reproducible: the `deployer-vm-smoke` VM test now proves that `nix-agent` can activate a prebuilt target system closure directly, without recompiling the stack inside the guest.
|
Host-level NixOS rollout validation is also expected to stay reproducible: `baremetal-iso-e2e` is now the full install-path proof, `canonical-profile-eval-guards` and `canonical-profile-build-guards` fail fast when supported outputs drift, and `portable-control-plane-regressions` is the non-KVM developer lane that keeps the main control-plane and rollout boundaries green on TCG-only hosts before the publishable nested-KVM suite is rerun.
|
||||||
|
|
|
||||||
|
|
@ -4,16 +4,16 @@ This directory is the public documentation entrypoint for UltraCloud.
|
||||||
|
|
||||||
## Read First
|
## Read First
|
||||||
|
|
||||||
- [../README.md](/home/centra/cloud/README.md)
|
- [../README.md](../README.md)
|
||||||
- [testing.md](/home/centra/cloud/docs/testing.md)
|
- [testing.md](testing.md)
|
||||||
- [component-matrix.md](/home/centra/cloud/docs/component-matrix.md)
|
- [component-matrix.md](component-matrix.md)
|
||||||
- [storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md)
|
- [storage-benchmarks.md](storage-benchmarks.md)
|
||||||
|
|
||||||
## Key References
|
## Key References
|
||||||
|
|
||||||
- VM validation harness: [../nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
|
- VM validation harness: [../nix/test-cluster/README.md](../nix/test-cluster/README.md)
|
||||||
- CoronaFS storage role: [../coronafs/README.md](/home/centra/cloud/coronafs/README.md)
|
- CoronaFS storage role: [../coronafs/README.md](../coronafs/README.md)
|
||||||
- CreditService scope note: [../creditservice/README.md](/home/centra/cloud/creditservice/README.md)
|
- CreditService scope note: [../creditservice/README.md](../creditservice/README.md)
|
||||||
|
|
||||||
## Design Notes
|
## Design Notes
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,54 +1,56 @@
|
||||||
# Component Matrix
|
# Component Matrix
|
||||||
|
|
||||||
UltraCloud is intended to validate meaningful service combinations, not only a single all-on deployment.
|
UltraCloud now fixes the public support surface to three canonical profiles. This page defines the required and optional component bundles for each profile and keeps everything else explicitly outside the core contract.
|
||||||
This page summarizes the compositions that are exercised by the VM-cluster harness today.
|
|
||||||
|
|
||||||
## Validated Control Plane
|
## Canonical Profiles
|
||||||
|
|
||||||
- `chainfire + flaredb + iam`
|
### `single-node dev`
|
||||||
|
|
||||||
## Validated Network Provider Layer
|
- Required components: `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet`
|
||||||
|
- Optional components: `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer`
|
||||||
|
- Primary Nix outputs: `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, and companion install image `nixosConfigurations.netboot-all-in-one`
|
||||||
|
- Optional component toggles: `ultracloud.quickstart.enableLightningStor`, `enableCoronafs`, `enableFlashDNS`, `enableFiberLB`, `enableApiGateway`, `enableNightlight`, `enableCreditService`, `enableK8sHost`
|
||||||
|
- Primary use: one-command local bring-up, API development, and one-box VM experimentation without the HA control-plane overhead
|
||||||
|
|
||||||
- `prismnet`
|
### `3-node HA control plane`
|
||||||
- `prismnet + flashdns`
|
|
||||||
- `prismnet + fiberlb`
|
|
||||||
- `prismnet + flashdns + fiberlb`
|
|
||||||
|
|
||||||
These combinations justify the existence of the network services as composable providers rather than hidden internal subsystems.
|
- Required components: `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node
|
||||||
|
- Optional components: `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice`
|
||||||
|
- Primary Nix outputs: `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane`
|
||||||
|
- Primary use: stable replicated control plane that can later accept worker, storage, and edge bundles without redefining the bootstrap path
|
||||||
|
|
||||||
## Validated VM Hosting Layer
|
### `bare-metal bootstrap`
|
||||||
|
|
||||||
- `plasmavmc + prismnet`
|
- Required components: `deployer`, `first-boot-automation`, `install-target`, `nix-agent`
|
||||||
- `plasmavmc + lightningstor`
|
- Optional components: `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after the first successful rollout
|
||||||
- `plasmavmc + coronafs`
|
- Primary Nix outputs: `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e`
|
||||||
- `plasmavmc + coronafs + lightningstor`
|
- Primary use: boot the installer ISO, phone home to `deployer`, fetch the flake bundle, run Disko, reboot, and converge QEMU-emulated or real machines into either the single-node or HA profile
|
||||||
- `plasmavmc + prismnet + coronafs + lightningstor`
|
|
||||||
|
|
||||||
This split keeps mutable VM volumes on CoronaFS and immutable VM images on LightningStor object storage.
|
## Optional Composition Bundles
|
||||||
|
|
||||||
## Validated Kubernetes-Style Hosting Layer
|
The optional bundles below remain important, but they are layered on top of the canonical profiles rather than treated as separate top-level products:
|
||||||
|
|
||||||
- `k8shost + prismnet`
|
- control-plane core: `chainfire + flaredb + iam`
|
||||||
- `k8shost + flashdns`
|
- network provider bundle: `prismnet + flashdns + fiberlb`
|
||||||
- `k8shost + fiberlb`
|
- VM hosting bundle: `plasmavmc + prismnet + coronafs + lightningstor`
|
||||||
- `k8shost + prismnet + flashdns + fiberlb`
|
- Kubernetes-style hosting bundle: `k8shost + prismnet + flashdns + fiberlb`
|
||||||
|
- edge and tenant bundle: `apigateway + iam + nightlight + creditservice`
|
||||||
|
- native rollout bundle: `deployer + chainfire + nix-agent + fleet-scheduler + node-agent`
|
||||||
|
|
||||||
## Validated Edge And Tenant Services
|
`fresh-matrix` is the publishable composition proof because it rebuilds the host-side VM images before validating these bundles on the VM cluster.
|
||||||
|
|
||||||
- `apigateway + iam + prismnet`
|
## Responsibility Boundaries
|
||||||
- `nightlight + apigateway`
|
|
||||||
- `nightlight`
|
|
||||||
- `creditservice + iam + apigateway`
|
|
||||||
- `creditservice + iam`
|
|
||||||
- `deployer + iam + chainfire`
|
|
||||||
|
|
||||||
## Validation Direction
|
- `k8shost`: tenant workload API surface. It manages pod, deployment, and service semantics, then delegates network publication to `prismnet`, `flashdns`, and `fiberlb`.
|
||||||
|
- `fleet-scheduler`: bare-metal service placement surface. It schedules host-native service instances from declarative cluster state and `node-agent` heartbeats, without exposing Kubernetes APIs.
|
||||||
|
- `deployer`: enrollment and rollout authority. It serves `/api/v1/phone-home`, stores install plans and desired-system references, and seeds cluster metadata.
|
||||||
|
- `nix-agent`: host OS reconciler. It turns `deployer` desired-system references into `switch-to-configuration` actions plus rollback and health-check handling.
|
||||||
|
- `node-agent`: host runtime reconciler. It applies scheduled service-instance state, keeps runtime heartbeats fresh, and reports host-local execution status back to the scheduler.
|
||||||
|
|
||||||
The VM cluster harness now exposes:
|
The intended layering is `deployer -> nix-agent` for machine image or NixOS generation changes, and `deployer -> fleet-scheduler -> node-agent` for host-native service placement changes. `k8shost` stays separate because it is the tenant workload control plane, not the native service scheduler.
|
||||||
|
|
||||||
```bash
|
## Non-Canonical Paths
|
||||||
nix run ./nix/test-cluster#cluster -- matrix
|
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-matrix
|
|
||||||
```
|
|
||||||
|
|
||||||
`fresh-matrix` is the publishable path because it rebuilds the host-side VM images before validating the composed service scenarios, including PrismNet-backed PlasmaVMC guests.
|
- `baremetal/vm-cluster` remains `legacy/manual`
|
||||||
|
- `netboot-control-plane`, `netboot-worker`, `netboot-all-in-one`, `netboot-base`, and `pxe-server` are internal or experimental helpers, not supported profiles by themselves
|
||||||
|
- ad hoc shell-driven cluster bring-up is for debugging only and should not be presented as the canonical public path
|
||||||
|
|
|
||||||
111
docs/testing.md
111
docs/testing.md
|
|
@ -1,37 +1,113 @@
|
||||||
# Testing
|
# Testing
|
||||||
|
|
||||||
UltraCloud treats VM-first validation as the canonical local proof path.
|
UltraCloud treats VM-first validation as the canonical local proof path and keeps the public support contract limited to three profiles.
|
||||||
|
|
||||||
## Canonical Validation
|
## Canonical Profiles
|
||||||
|
|
||||||
|
| Profile | Primary outputs | Required components | Optional components |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| `single-node dev` | `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer` |
|
||||||
|
| `3-node HA control plane` | `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` |
|
||||||
|
| `bare-metal bootstrap` | `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap |
|
||||||
|
|
||||||
|
## Quickstart Smoke
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
nix flake show . --all-systems | rg -n "single|all-in-one|quickstart"
|
||||||
|
nix eval --no-eval-cache .#nixosConfigurations.single-node-quickstart.config.system.build.toplevel.drvPath --raw
|
||||||
|
nix run .#single-node-quickstart
|
||||||
```
|
```
|
||||||
|
|
||||||
This flow:
|
`single-node-quickstart` is the supported one-box entrypoint. It boots the minimal VM stack under QEMU, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, and verifies their health from inside the guest. The launcher uses the generated NixOS VM runner, so it can fall back to TCG when `/dev/kvm` is absent.
|
||||||
|
|
||||||
- builds all six VM images on the host
|
For debugging, keep the VM alive after the smoke passes:
|
||||||
- boots the cluster in dependency order
|
|
||||||
- validates control-plane, worker, gateway, storage, and fault-injection behavior
|
```bash
|
||||||
- proves that `deployer` seeds scheduler-managed native services directly from declarative Nix cluster state
|
ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart
|
||||||
|
```
|
||||||
|
|
||||||
|
## Canonical Bare-Metal Proof
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel.drvPath --raw
|
||||||
|
nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel.drvPath --raw
|
||||||
|
nix run ./nix/test-cluster#cluster -- baremetal-iso
|
||||||
|
nix build .#checks.x86_64-linux.baremetal-iso-e2e
|
||||||
|
```
|
||||||
|
|
||||||
|
`baremetal-iso` is the canonical install path for QEMU-as-bare-metal validation. It boots `nixosConfigurations.ultracloud-iso`, waits for `/api/v1/phone-home`, downloads the flake bundle from `deployer`, runs Disko, reboots, confirms the first post-install boot markers, and waits for `nix-agent` to report the desired system as `active` for both `baremetal-qemu-control-plane` and `baremetal-qemu-worker`. `baremetal-iso-e2e` runs the same flow under `flake check`.
|
||||||
|
|
||||||
|
## Regression Guards
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix build .#checks.x86_64-linux.canonical-profile-eval-guards
|
||||||
|
nix build .#checks.x86_64-linux.canonical-profile-build-guards
|
||||||
|
```
|
||||||
|
|
||||||
|
These two checks are the fast fail-first drift gates for the supported surface:
|
||||||
|
|
||||||
|
- `canonical-profile-eval-guards`: forces evaluation of every canonical profile output, including `netboot-worker` and `netboot-all-in-one`, so broken attrs fail before any long-running harness work starts.
|
||||||
|
- `canonical-profile-build-guards`: realizes the canonical VM, ISO, control-plane, and helper-image outputs so build-time drift is caught even when a cluster harness is not running.
|
||||||
|
|
||||||
|
## Portable Local Proof
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix build .#checks.x86_64-linux.canonical-profile-eval-guards
|
||||||
|
nix build .#checks.x86_64-linux.portable-control-plane-regressions
|
||||||
|
```
|
||||||
|
|
||||||
|
Use this lane on Linux hosts that do not expose `/dev/kvm`:
|
||||||
|
|
||||||
|
- `portable-control-plane-regressions`: TCG-safe aggregate check that keeps the canonical profile eval guard, `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` green together.
|
||||||
|
- It intentionally does not boot the six-node nested-KVM VM suite, so it is a developer regression path, not the publishable multi-node proof.
|
||||||
|
- CI runs `canonical-profile-eval-guards` and `portable-control-plane-regressions` on every relevant change from `.github/workflows/nix.yml`.
|
||||||
|
|
||||||
## Publishable Checks
|
## Publishable Checks
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
nix run .#single-node-quickstart
|
||||||
|
nix run ./nix/test-cluster#cluster -- baremetal-iso
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-matrix
|
nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-bench-storage
|
./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite
|
||||||
|
nix build .#checks.x86_64-linux.baremetal-iso-e2e
|
||||||
nix build .#checks.x86_64-linux.deployer-vm-smoke
|
nix build .#checks.x86_64-linux.deployer-vm-smoke
|
||||||
```
|
```
|
||||||
|
|
||||||
Use these commands as the release-facing local proof set:
|
Use these commands as the release-facing local proof set:
|
||||||
|
|
||||||
- `fresh-smoke`: whole-cluster readiness, core behavior, and fault injection
|
- `single-node-quickstart`: productized one-command quickstart gate for the minimal VM platform profile
|
||||||
- `fresh-demo-vm-webapp`: focused VM demo showing a web app inside the guest with FlareDB-backed state and LightningStor object snapshots surviving restart and migration
|
- `baremetal-iso`: canonical bare-metal bootstrap gate covering pre-install boot, phone-home, flake bundle fetch, Disko install, reboot, post-install boot, and desired-system activation on one control-plane node plus one worker-equivalent node
|
||||||
- `fresh-matrix`: composed service scenarios such as `prismnet + flashdns + fiberlb` and PrismNet-backed VM hosting bundles with `plasmavmc + coronafs + lightningstor`
|
- `fresh-smoke`: base VM-cluster gate for the canonical multi-node topology, including readiness, core behavior, and fault injection
|
||||||
- `fresh-bench-storage`: CoronaFS local-vs-shared-volume throughput, cross-worker volume visibility, and LightningStor large/small-object throughput capture
|
- `fresh-demo-vm-webapp`: optional VM-hosting bundle proof for `plasmavmc + prismnet` with state persisted through `lightningstor`
|
||||||
- `deployer-vm-smoke`: prebuilt NixOS system closure handoff into `nix-agent`, proving host rollout can activate a host-built target without guest-side compilation
|
- `fresh-matrix`: optional composition proof for provider bundles such as `prismnet + flashdns + fiberlb` and `plasmavmc + coronafs + lightningstor`
|
||||||
|
- `run-publishable-kvm-suite.sh`: reproducible wrapper that captures the KVM environment and runs the full publishable nested-KVM trio in a single command
|
||||||
|
- `baremetal-iso-e2e`: flake-check wrapper around the same canonical ISO harness
|
||||||
|
- `deployer-vm-smoke`: lightweight regression proving that `nix-agent` can activate a host-built target closure without guest-side compilation
|
||||||
|
|
||||||
|
## Responsibility Coverage
|
||||||
|
|
||||||
|
- `baremetal-iso` and `baremetal-iso-e2e` are the canonical proof for `deployer -> installer -> nix-agent`. They cover phone-home, install-plan materialization, Disko, reboot, and desired-system activation.
|
||||||
|
- `deployer-vm-smoke` is the smallest regression for the same `deployer -> nix-agent` boundary. It proves that a node can receive a prebuilt target closure and activate it without guest-side compilation.
|
||||||
|
- `portable-control-plane-regressions` keeps the main non-KVM-safe boundaries under continuous coverage by composing `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` behind the canonical profile eval guard.
|
||||||
|
- `fresh-smoke` and `fresh-matrix` are the canonical proof for `deployer -> fleet-scheduler -> node-agent`. They cover native service placement, heartbeats, failover, and runtime reconciliation.
|
||||||
|
- `fresh-smoke` also covers `k8shost` separately from `fleet-scheduler`: `k8shost` exposes tenant pod and service semantics, while `fleet-scheduler` handles bare-metal host services.
|
||||||
|
|
||||||
|
The three `fresh-*` VM-cluster commands are the publishable nested-KVM suite. They require a Linux host with `/dev/kvm` and nested virtualization, and the harness stops at preflight by design when that device is absent. `single-node-quickstart`, `baremetal-iso`, `baremetal-iso-e2e`, `deployer-vm-smoke`, and `portable-control-plane-regressions` can run on TCG-only hosts, but they are slower without host KVM.
|
||||||
|
|
||||||
|
Release-facing completion now requires both of these to be green on the same branch:
|
||||||
|
|
||||||
|
- the canonical bare-metal proof: `nix run ./nix/test-cluster#cluster -- baremetal-iso` plus `nix build .#checks.x86_64-linux.baremetal-iso-e2e`
|
||||||
|
- the publishable nested-KVM suite: `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix`, preferably through `./nix/test-cluster/run-publishable-kvm-suite.sh`
|
||||||
|
|
||||||
|
## Extended Measurements
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nix run ./nix/test-cluster#cluster -- fresh-bench-storage
|
||||||
|
```
|
||||||
|
|
||||||
|
`fresh-bench-storage` remains useful for storage regression tracking, but it is a benchmark path, not part of the minimal canonical publish gate.
|
||||||
|
|
||||||
## Operational Commands
|
## Operational Commands
|
||||||
|
|
||||||
|
|
@ -53,8 +129,11 @@ nix run ./nix/test-cluster#cluster -- clean
|
||||||
|
|
||||||
- package unit tests are useful but not sufficient
|
- package unit tests are useful but not sufficient
|
||||||
- host-built VM clusters are the main integration signal
|
- host-built VM clusters are the main integration signal
|
||||||
|
- bootstrap and rollout paths must stay evaluable independently of the larger VM-hosting feature set
|
||||||
- distributed storage and virtualization paths must be checked under failure, not only at steady state
|
- distributed storage and virtualization paths must be checked under failure, not only at steady state
|
||||||
|
|
||||||
## Legacy Note
|
## Legacy And Experimental Paths
|
||||||
|
|
||||||
Older manual launch scripts under `baremetal/vm-cluster` are archived only for historical reference. They are not the release-validation path.
|
- `baremetal/vm-cluster` manual launch scripts are `legacy/manual`, not canonical validation
|
||||||
|
- direct `nix develop ./nix/test-cluster -c ./nix/test-cluster/run-cluster.sh ...` usage is a debugging path, not the publishable entrypoint
|
||||||
|
- `netboot-control-plane`, `netboot-worker`, `netboot-all-in-one`, `netboot-base`, `pxe-server`, and other helper images are internal or experimental building blocks, not supported profiles by themselves
|
||||||
|
|
|
||||||
487
flake.nix
487
flake.nix
|
|
@ -963,6 +963,185 @@
|
||||||
self.packages.${system}.vmClusterDeployerState
|
self.packages.${system}.vmClusterDeployerState
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
single-node-quickstart-vm =
|
||||||
|
self.nixosConfigurations.single-node-quickstart.config.system.build.vm;
|
||||||
|
|
||||||
|
single-node-quickstart = pkgs.writeShellApplication {
|
||||||
|
name = "single-node-quickstart";
|
||||||
|
runtimeInputs = with pkgs; [
|
||||||
|
coreutils
|
||||||
|
findutils
|
||||||
|
netcat
|
||||||
|
openssh
|
||||||
|
procps
|
||||||
|
sshpass
|
||||||
|
];
|
||||||
|
text = ''
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
STATE_DIR="''${ULTRACLOUD_QUICKSTART_STATE_DIR:-$HOME/.ultracloud-single-node-quickstart}"
|
||||||
|
RUN_DIR="$STATE_DIR/run"
|
||||||
|
DISK_IMAGE="$STATE_DIR/quickstart.qcow2"
|
||||||
|
PID_FILE="$STATE_DIR/qemu.pid"
|
||||||
|
SERIAL_LOG="$STATE_DIR/serial.log"
|
||||||
|
SSH_PORT="''${ULTRACLOUD_QUICKSTART_SSH_PORT:-22220}"
|
||||||
|
KEEP_VM="''${ULTRACLOUD_QUICKSTART_KEEP_VM:-0}"
|
||||||
|
REUSE_DISK="''${ULTRACLOUD_QUICKSTART_REUSE_DISK:-0}"
|
||||||
|
VM_PATH="${self.packages.${system}.single-node-quickstart-vm}"
|
||||||
|
RUN_VM="$(find "$VM_PATH/bin" -maxdepth 1 -name 'run-*-vm' | head -n1)"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '[single-node-quickstart] %s\n' "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
dump_serial() {
|
||||||
|
if [ -f "$SERIAL_LOG" ]; then
|
||||||
|
log "serial log tail:"
|
||||||
|
tail -n 120 "$SERIAL_LOG" >&2 || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if [ -f "$PID_FILE" ]; then
|
||||||
|
pid="$(cat "$PID_FILE")"
|
||||||
|
if kill -0 "$pid" >/dev/null 2>&1; then
|
||||||
|
kill "$pid" >/dev/null 2>&1 || true
|
||||||
|
for _ in $(seq 1 30); do
|
||||||
|
if ! kill -0 "$pid" >/dev/null 2>&1; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
rm -f "$PID_FILE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
on_exit() {
|
||||||
|
status="$?"
|
||||||
|
if [ "$status" -ne 0 ]; then
|
||||||
|
dump_serial
|
||||||
|
fi
|
||||||
|
if [ "$KEEP_VM" != "1" ]; then
|
||||||
|
cleanup
|
||||||
|
fi
|
||||||
|
exit "$status"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_ssh() {
|
||||||
|
local deadline=$((SECONDS + 240))
|
||||||
|
while true; do
|
||||||
|
if sshpass -p ultracloud ssh \
|
||||||
|
-F /dev/null \
|
||||||
|
-o StrictHostKeyChecking=no \
|
||||||
|
-o UserKnownHostsFile=/dev/null \
|
||||||
|
-o LogLevel=ERROR \
|
||||||
|
-o ConnectTimeout=5 \
|
||||||
|
-o ConnectionAttempts=1 \
|
||||||
|
-p "$SSH_PORT" \
|
||||||
|
root@127.0.0.1 true >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$SECONDS" -ge "$deadline" ]; then
|
||||||
|
log "timed out waiting for SSH on port $SSH_PORT"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_unit_active() {
|
||||||
|
local unit="$1"
|
||||||
|
local deadline=$((SECONDS + 240))
|
||||||
|
while true; do
|
||||||
|
if ssh_cmd systemctl is-active "$unit" >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$SECONDS" -ge "$deadline" ]; then
|
||||||
|
log "timed out waiting for $unit"
|
||||||
|
ssh_cmd systemctl status "$unit" --no-pager || true
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh_cmd() {
|
||||||
|
sshpass -p ultracloud ssh \
|
||||||
|
-F /dev/null \
|
||||||
|
-o StrictHostKeyChecking=no \
|
||||||
|
-o UserKnownHostsFile=/dev/null \
|
||||||
|
-o LogLevel=ERROR \
|
||||||
|
-o ConnectTimeout=5 \
|
||||||
|
-o ConnectionAttempts=1 \
|
||||||
|
-p "$SSH_PORT" \
|
||||||
|
root@127.0.0.1 -- "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh_shell() {
|
||||||
|
local script="$1"
|
||||||
|
local quoted
|
||||||
|
printf -v quoted '%q' "$script"
|
||||||
|
sshpass -p ultracloud ssh \
|
||||||
|
-F /dev/null \
|
||||||
|
-o StrictHostKeyChecking=no \
|
||||||
|
-o UserKnownHostsFile=/dev/null \
|
||||||
|
-o LogLevel=ERROR \
|
||||||
|
-o ConnectTimeout=5 \
|
||||||
|
-o ConnectionAttempts=1 \
|
||||||
|
-p "$SSH_PORT" \
|
||||||
|
root@127.0.0.1 "bash -lc $quoted"
|
||||||
|
}
|
||||||
|
|
||||||
|
trap on_exit EXIT
|
||||||
|
|
||||||
|
[ -n "$RUN_VM" ] || {
|
||||||
|
log "failed to locate run-*-vm under $VM_PATH/bin"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
mkdir -p "$STATE_DIR"
|
||||||
|
rm -rf "$RUN_DIR"
|
||||||
|
mkdir -p "$RUN_DIR"
|
||||||
|
rm -f "$SERIAL_LOG"
|
||||||
|
if [ "$REUSE_DISK" != "1" ]; then
|
||||||
|
rm -f "$DISK_IMAGE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cleanup
|
||||||
|
|
||||||
|
log "launching single-node quickstart VM"
|
||||||
|
nohup env \
|
||||||
|
USE_TMPDIR=1 \
|
||||||
|
TMPDIR="$RUN_DIR" \
|
||||||
|
NIX_DISK_IMAGE="$DISK_IMAGE" \
|
||||||
|
QEMU_NET_OPTS="hostfwd=tcp:127.0.0.1:$SSH_PORT-:22" \
|
||||||
|
"$RUN_VM" >"$SERIAL_LOG" 2>&1 &
|
||||||
|
echo "$!" > "$PID_FILE"
|
||||||
|
|
||||||
|
log "waiting for guest SSH"
|
||||||
|
wait_for_ssh
|
||||||
|
|
||||||
|
log "waiting for in-guest readiness gate"
|
||||||
|
wait_for_unit_active ultracloud-single-node-quickstart-ready.service
|
||||||
|
|
||||||
|
log "verifying required services"
|
||||||
|
ssh_cmd systemctl is-active chainfire flaredb iam prismnet plasmavmc >/dev/null
|
||||||
|
|
||||||
|
log "verifying service health endpoints and VM runtime prerequisites"
|
||||||
|
ssh_shell 'curl -fsS http://127.0.0.1:8081/health >/dev/null && curl -fsS http://127.0.0.1:8082/health >/dev/null && curl -fsS http://127.0.0.1:8083/health >/dev/null && curl -fsS http://127.0.0.1:8087/health >/dev/null && curl -fsS http://127.0.0.1:8084/health >/dev/null && test -x /run/current-system/sw/bin/qemu-system-x86_64 && test -x /run/current-system/sw/bin/qemu-img && test -c /dev/net/tun'
|
||||||
|
|
||||||
|
log "single-node quickstart smoke passed"
|
||||||
|
|
||||||
|
if [ "$KEEP_VM" = "1" ]; then
|
||||||
|
trap - EXIT
|
||||||
|
log "VM left running"
|
||||||
|
log "ssh: sshpass -p ultracloud ssh -p $SSH_PORT root@127.0.0.1"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
|
|
@ -1044,9 +1223,75 @@
|
||||||
fleet-scheduler = flake-utils.lib.mkApp {
|
fleet-scheduler = flake-utils.lib.mkApp {
|
||||||
drv = self.packages.${system}.fleet-scheduler;
|
drv = self.packages.${system}.fleet-scheduler;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
single-node-quickstart = flake-utils.lib.mkApp {
|
||||||
|
drv = self.packages.${system}.single-node-quickstart;
|
||||||
|
};
|
||||||
|
|
||||||
|
all-in-one-quickstart = flake-utils.lib.mkApp {
|
||||||
|
drv = self.packages.${system}.single-node-quickstart;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
checks = {
|
checks =
|
||||||
|
let
|
||||||
|
stripKvmRequiredSystemFeature = drv:
|
||||||
|
drv.overrideTestDerivation (old: {
|
||||||
|
requiredSystemFeatures =
|
||||||
|
builtins.filter (feature: feature != "kvm") (old.requiredSystemFeatures or [ ]);
|
||||||
|
});
|
||||||
|
|
||||||
|
canonicalProfileEvalData = {
|
||||||
|
single-node-quickstart = {
|
||||||
|
hostName = self.nixosConfigurations.single-node-quickstart.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.single-node-quickstart.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
node01 = {
|
||||||
|
hostName = self.nixosConfigurations.node01.config.networking.hostName;
|
||||||
|
stateVersion = self.nixosConfigurations.node01.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
node02 = {
|
||||||
|
hostName = self.nixosConfigurations.node02.config.networking.hostName;
|
||||||
|
stateVersion = self.nixosConfigurations.node02.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
node03 = {
|
||||||
|
hostName = self.nixosConfigurations.node03.config.networking.hostName;
|
||||||
|
stateVersion = self.nixosConfigurations.node03.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
netboot-control-plane = {
|
||||||
|
hostName = self.nixosConfigurations.netboot-control-plane.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.netboot-control-plane.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
netboot-worker = {
|
||||||
|
hostName = self.nixosConfigurations.netboot-worker.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.netboot-worker.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
netboot-all-in-one = {
|
||||||
|
hostName = self.nixosConfigurations.netboot-all-in-one.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.netboot-all-in-one.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
ultracloud-iso = {
|
||||||
|
hostName = self.nixosConfigurations.ultracloud-iso.config.networking.hostName;
|
||||||
|
imageFileName = self.nixosConfigurations.ultracloud-iso.config.image.fileName;
|
||||||
|
};
|
||||||
|
baremetal-qemu-control-plane = {
|
||||||
|
hostName =
|
||||||
|
self.nixosConfigurations.baremetal-qemu-control-plane.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.baremetal-qemu-control-plane.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
baremetal-qemu-worker = {
|
||||||
|
hostName = self.nixosConfigurations.baremetal-qemu-worker.config.networking.hostName;
|
||||||
|
stateVersion =
|
||||||
|
self.nixosConfigurations.baremetal-qemu-worker.config.system.stateVersion;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit"
|
workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit"
|
||||||
{
|
{
|
||||||
nativeBuildInputs = [ pkgs.python3 ];
|
nativeBuildInputs = [ pkgs.python3 ];
|
||||||
|
|
@ -1169,6 +1414,76 @@
|
||||||
touch "$out"
|
touch "$out"
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
canonical-profile-eval-guards = pkgs.writeText "canonical-profile-eval-guards.json"
|
||||||
|
(builtins.toJSON canonicalProfileEvalData);
|
||||||
|
|
||||||
|
canonical-profile-build-guards = pkgs.linkFarm "canonical-profile-build-guards" [
|
||||||
|
{
|
||||||
|
name = "single-node-quickstart-vm";
|
||||||
|
path = self.packages.${system}.single-node-quickstart-vm;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "node01-toplevel";
|
||||||
|
path = self.nixosConfigurations.node01.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "node02-toplevel";
|
||||||
|
path = self.nixosConfigurations.node02.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "node03-toplevel";
|
||||||
|
path = self.nixosConfigurations.node03.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "netboot-control-plane-toplevel";
|
||||||
|
path = self.nixosConfigurations.netboot-control-plane.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "netboot-worker-toplevel";
|
||||||
|
path = self.nixosConfigurations.netboot-worker.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "netboot-all-in-one-toplevel";
|
||||||
|
path = self.nixosConfigurations.netboot-all-in-one.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "ultracloud-iso-image";
|
||||||
|
path = self.nixosConfigurations.ultracloud-iso.config.system.build.isoImage;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "baremetal-qemu-control-plane-toplevel";
|
||||||
|
path = self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "baremetal-qemu-worker-toplevel";
|
||||||
|
path = self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
portable-control-plane-regressions =
|
||||||
|
pkgs.linkFarm "portable-control-plane-regressions" [
|
||||||
|
{
|
||||||
|
name = "canonical-profile-eval-guards";
|
||||||
|
path = self.checks.${system}.canonical-profile-eval-guards;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "deployer-bootstrap-e2e";
|
||||||
|
path = self.checks.${system}.deployer-bootstrap-e2e;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "host-lifecycle-e2e";
|
||||||
|
path = self.checks.${system}.host-lifecycle-e2e;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "deployer-vm-smoke";
|
||||||
|
path = self.checks.${system}.deployer-vm-smoke;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "fleet-scheduler-e2e";
|
||||||
|
path = self.checks.${system}.fleet-scheduler-e2e;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest (
|
first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest (
|
||||||
import ./nix/tests/first-boot-topology-vm-smoke.nix {
|
import ./nix/tests/first-boot-topology-vm-smoke.nix {
|
||||||
inherit pkgs;
|
inherit pkgs;
|
||||||
|
|
@ -1177,15 +1492,15 @@
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
deployer-vm-smoke = pkgs.testers.runNixOSTest (
|
deployer-vm-smoke = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest (
|
||||||
import ./nix/tests/deployer-vm-smoke.nix {
|
import ./nix/tests/deployer-vm-smoke.nix {
|
||||||
inherit pkgs;
|
inherit pkgs;
|
||||||
ultracloudPackages = self.packages.${system};
|
ultracloudPackages = self.packages.${system};
|
||||||
smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel;
|
smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel;
|
||||||
}
|
}
|
||||||
);
|
));
|
||||||
|
|
||||||
deployer-vm-rollback = pkgs.testers.runNixOSTest (
|
deployer-vm-rollback = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest (
|
||||||
import ./nix/tests/deployer-vm-smoke.nix {
|
import ./nix/tests/deployer-vm-smoke.nix {
|
||||||
inherit pkgs;
|
inherit pkgs;
|
||||||
ultracloudPackages = self.packages.${system};
|
ultracloudPackages = self.packages.${system};
|
||||||
|
|
@ -1198,7 +1513,83 @@
|
||||||
expectCurrentSystemMatchesTarget = false;
|
expectCurrentSystemMatchesTarget = false;
|
||||||
expectMarkerPresent = false;
|
expectMarkerPresent = false;
|
||||||
}
|
}
|
||||||
);
|
));
|
||||||
|
|
||||||
|
baremetal-iso-e2e = pkgs.runCommand "baremetal-iso-e2e"
|
||||||
|
{
|
||||||
|
nativeBuildInputs = with pkgs; [
|
||||||
|
bash
|
||||||
|
coreutils
|
||||||
|
curl
|
||||||
|
findutils
|
||||||
|
gawk
|
||||||
|
gnugrep
|
||||||
|
gnused
|
||||||
|
iproute2
|
||||||
|
jq
|
||||||
|
nix
|
||||||
|
openssh
|
||||||
|
procps
|
||||||
|
python3
|
||||||
|
qemu
|
||||||
|
];
|
||||||
|
preferLocalBuild = true;
|
||||||
|
allowSubstitutes = false;
|
||||||
|
ULTRACLOUD_BAREMETAL_ISO_IMAGE =
|
||||||
|
"${self.nixosConfigurations.ultracloud-iso.config.system.build.isoImage}";
|
||||||
|
ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE =
|
||||||
|
"${self.packages.${system}.ultracloudFlakeBundle}";
|
||||||
|
ULTRACLOUD_BAREMETAL_CONTROL_TARGET =
|
||||||
|
"${self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel}";
|
||||||
|
ULTRACLOUD_BAREMETAL_WORKER_TARGET =
|
||||||
|
"${self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel}";
|
||||||
|
ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT =
|
||||||
|
"${self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount}";
|
||||||
|
ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT =
|
||||||
|
"${self.nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount}";
|
||||||
|
ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION = "${pkgs.closureInfo {
|
||||||
|
rootPaths = [
|
||||||
|
self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel
|
||||||
|
self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel
|
||||||
|
self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount
|
||||||
|
self.nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount
|
||||||
|
];
|
||||||
|
}}";
|
||||||
|
ULTRACLOUD_CHAINFIRE_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.chainfire-server}/bin/chainfire";
|
||||||
|
ULTRACLOUD_DEPLOYER_SERVER_BIN =
|
||||||
|
"${self.packages.${system}.deployer-server}/bin/deployer-server";
|
||||||
|
ULTRACLOUD_DEPLOYER_CTL_BIN =
|
||||||
|
"${self.packages.${system}.deployer-ctl}/bin/deployer-ctl";
|
||||||
|
ULTRACLOUD_OVMF_CODE = "${pkgs.OVMF.fd}/FV/OVMF_CODE.fd";
|
||||||
|
ULTRACLOUD_OVMF_VARS = "${pkgs.OVMF.fd}/FV/OVMF_VARS.fd";
|
||||||
|
ULTRACLOUD_QEMU_BIN = "${pkgs.qemu}/bin/qemu-system-x86_64";
|
||||||
|
ULTRACLOUD_QEMU_IMG_BIN = "${pkgs.qemu}/bin/qemu-img";
|
||||||
|
ULTRACLOUD_REPO_ROOT = "${self}";
|
||||||
|
NIX_CONFIG = "experimental-features = nix-command flakes";
|
||||||
|
} ''
|
||||||
|
export HOME="$TMPDIR/home"
|
||||||
|
mkdir -p "$HOME"
|
||||||
|
export NIX_CONFIG="$NIX_CONFIG"
|
||||||
|
export PATH="${pkgs.lib.makeBinPath [
|
||||||
|
pkgs.bash
|
||||||
|
pkgs.coreutils
|
||||||
|
pkgs.curl
|
||||||
|
pkgs.findutils
|
||||||
|
pkgs.gawk
|
||||||
|
pkgs.gnugrep
|
||||||
|
pkgs.gnused
|
||||||
|
pkgs.iproute2
|
||||||
|
pkgs.jq
|
||||||
|
pkgs.nix
|
||||||
|
pkgs.openssh
|
||||||
|
pkgs.procps
|
||||||
|
pkgs.python3
|
||||||
|
pkgs.qemu
|
||||||
|
]}"
|
||||||
|
bash ${./nix/test-cluster/verify-baremetal-iso.sh}
|
||||||
|
touch "$out"
|
||||||
|
'';
|
||||||
|
|
||||||
fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest (
|
fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest (
|
||||||
import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix {
|
import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix {
|
||||||
|
|
@ -1363,6 +1754,9 @@
|
||||||
nixosConfigurations =
|
nixosConfigurations =
|
||||||
let
|
let
|
||||||
vmClusterLib = import ./nix/nodes/vm-cluster/lib.nix { lib = nixpkgs.lib; };
|
vmClusterLib = import ./nix/nodes/vm-cluster/lib.nix { lib = nixpkgs.lib; };
|
||||||
|
overlayModule = {
|
||||||
|
nixpkgs.overlays = [ self.overlays.default ];
|
||||||
|
};
|
||||||
mkVmClusterSystem = nodeName:
|
mkVmClusterSystem = nodeName:
|
||||||
nixpkgs.lib.nixosSystem {
|
nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
|
|
@ -1382,25 +1776,74 @@
|
||||||
# Control Plane netboot image (all 8 services)
|
# Control Plane netboot image (all 8 services)
|
||||||
netboot-control-plane = nixpkgs.lib.nixosSystem {
|
netboot-control-plane = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
modules = [ ./nix/images/netboot-control-plane.nix ];
|
modules = [
|
||||||
|
./nix/images/netboot-control-plane.nix
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
# Worker netboot image (compute-focused services)
|
# Worker netboot image (compute-focused services)
|
||||||
netboot-worker = nixpkgs.lib.nixosSystem {
|
netboot-worker = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
modules = [ ./nix/images/netboot-worker.nix ];
|
modules = [
|
||||||
|
./nix/images/netboot-worker.nix
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
# All-in-One netboot image (single-node deployment)
|
# All-in-One netboot image (single-node deployment)
|
||||||
netboot-all-in-one = nixpkgs.lib.nixosSystem {
|
netboot-all-in-one = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
modules = [ ./nix/images/netboot-all-in-one.nix ];
|
modules = [
|
||||||
|
./nix/images/netboot-all-in-one.nix
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
# QEMU-first single-node quickstart for one-command local bring-up.
|
||||||
|
single-node-quickstart = nixpkgs.lib.nixosSystem {
|
||||||
|
system = "x86_64-linux";
|
||||||
|
modules = [
|
||||||
|
./nix/single-node/qemu-vm.nix
|
||||||
|
./nix/single-node/base.nix
|
||||||
|
self.nixosModules.default
|
||||||
|
overlayModule
|
||||||
|
{
|
||||||
|
ultracloud.quickstart.enable = true;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
# Canonical bare-metal ISO install targets used by the QEMU proof path.
|
||||||
|
baremetal-qemu-control-plane = nixpkgs.lib.nixosSystem {
|
||||||
|
system = "x86_64-linux";
|
||||||
|
modules = [
|
||||||
|
disko.nixosModules.disko
|
||||||
|
./nix/nodes/baremetal-qemu/control-plane/configuration.nix
|
||||||
|
./nix/nodes/baremetal-qemu/control-plane/disko.nix
|
||||||
|
self.nixosModules.default
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
baremetal-qemu-worker = nixpkgs.lib.nixosSystem {
|
||||||
|
system = "x86_64-linux";
|
||||||
|
modules = [
|
||||||
|
disko.nixosModules.disko
|
||||||
|
./nix/nodes/baremetal-qemu/worker/configuration.nix
|
||||||
|
./nix/nodes/baremetal-qemu/worker/disko.nix
|
||||||
|
self.nixosModules.default
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
# Base netboot image (minimal, for VM testing and provisioning)
|
# Base netboot image (minimal, for VM testing and provisioning)
|
||||||
netboot-base = nixpkgs.lib.nixosSystem {
|
netboot-base = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
modules = [ ./nix/images/netboot-base.nix ];
|
modules = [
|
||||||
|
./nix/images/netboot-base.nix
|
||||||
|
overlayModule
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
# Offline-friendly target used by deployer VM smoke tests.
|
# Offline-friendly target used by deployer VM smoke tests.
|
||||||
|
|
@ -1412,6 +1855,20 @@
|
||||||
# UltraCloud ISO (T061.S5 - bootable ISO with cluster-config embedding)
|
# UltraCloud ISO (T061.S5 - bootable ISO with cluster-config embedding)
|
||||||
ultracloud-iso = nixpkgs.lib.nixosSystem {
|
ultracloud-iso = nixpkgs.lib.nixosSystem {
|
||||||
system = "x86_64-linux";
|
system = "x86_64-linux";
|
||||||
|
specialArgs = {
|
||||||
|
ultracloudBaremetalFormatMountPaths = {
|
||||||
|
baremetal-qemu-control-plane =
|
||||||
|
self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.formatMount;
|
||||||
|
baremetal-qemu-worker =
|
||||||
|
self.nixosConfigurations."baremetal-qemu-worker".config.system.build.formatMount;
|
||||||
|
};
|
||||||
|
ultracloudBaremetalSystemPaths = {
|
||||||
|
baremetal-qemu-control-plane =
|
||||||
|
self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.toplevel;
|
||||||
|
baremetal-qemu-worker =
|
||||||
|
self.nixosConfigurations."baremetal-qemu-worker".config.system.build.toplevel;
|
||||||
|
};
|
||||||
|
};
|
||||||
modules = [
|
modules = [
|
||||||
./nix/iso/ultracloud-iso.nix
|
./nix/iso/ultracloud-iso.nix
|
||||||
self.nixosModules.default
|
self.nixosModules.default
|
||||||
|
|
@ -1455,13 +1912,13 @@
|
||||||
apigateway-server = self.packages.${final.system}.apigateway-server;
|
apigateway-server = self.packages.${final.system}.apigateway-server;
|
||||||
k8shost-server = self.packages.${final.system}.k8shost-server;
|
k8shost-server = self.packages.${final.system}.k8shost-server;
|
||||||
deployer-workspace = self.packages.${final.system}.deployer-workspace;
|
deployer-workspace = self.packages.${final.system}.deployer-workspace;
|
||||||
deployer-server = self.packages.${final.system}.deployer-workspace;
|
deployer-server = self.packages.${final.system}.deployer-server;
|
||||||
deployer-ctl = self.packages.${final.system}.deployer-workspace;
|
deployer-ctl = self.packages.${final.system}.deployer-ctl;
|
||||||
ultracloud-reconciler = self.packages.${final.system}.deployer-workspace;
|
ultracloud-reconciler = self.packages.${final.system}.ultracloud-reconciler;
|
||||||
ultracloudFlakeBundle = self.packages.${final.system}.ultracloudFlakeBundle;
|
ultracloudFlakeBundle = self.packages.${final.system}.ultracloudFlakeBundle;
|
||||||
nix-agent = self.packages.${final.system}.deployer-workspace;
|
nix-agent = self.packages.${final.system}.nix-agent;
|
||||||
node-agent = self.packages.${final.system}.deployer-workspace;
|
node-agent = self.packages.${final.system}.node-agent;
|
||||||
fleet-scheduler = self.packages.${final.system}.deployer-workspace;
|
fleet-scheduler = self.packages.${final.system}.fleet-scheduler;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -379,6 +379,9 @@
|
||||||
${gate}/bin/ultracloud-gate --tier 0 --no-logs
|
${gate}/bin/ultracloud-gate --tier 0 --no-logs
|
||||||
touch $out/ok
|
touch $out/ok
|
||||||
'';
|
'';
|
||||||
|
checks.canonical-profile-eval-guards = ultracloud.checks.${system}.canonical-profile-eval-guards;
|
||||||
|
checks.portable-control-plane-regressions =
|
||||||
|
ultracloud.checks.${system}.portable-control-plane-regressions;
|
||||||
checks.deployer-vm-smoke = ultracloud.checks.${system}.deployer-vm-smoke;
|
checks.deployer-vm-smoke = ultracloud.checks.${system}.deployer-vm-smoke;
|
||||||
checks.deployer-vm-rollback = ultracloud.checks.${system}.deployer-vm-rollback;
|
checks.deployer-vm-rollback = ultracloud.checks.${system}.deployer-vm-rollback;
|
||||||
checks.deployer-bootstrap-e2e = ultracloud.checks.${system}.deployer-bootstrap-e2e;
|
checks.deployer-bootstrap-e2e = ultracloud.checks.${system}.deployer-bootstrap-e2e;
|
||||||
|
|
|
||||||
|
|
@ -3,220 +3,113 @@
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
./netboot-base.nix
|
./netboot-base.nix
|
||||||
../modules # Import UltraCloud service modules
|
../modules
|
||||||
];
|
];
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# ALL-IN-ONE PROFILE
|
# SINGLE-NODE / ALL-IN-ONE INSTALL IMAGE
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# This profile includes all 8 UltraCloud services for a single-node deployment:
|
# This netboot image is the bare-metal companion to the QEMU-first
|
||||||
# - Chainfire: Distributed configuration and coordination
|
# `single-node-quickstart` profile. It keeps only the minimum VM stack in the
|
||||||
# - FlareDB: Time-series metrics and events database
|
# image by default and leaves DNS, load-balancing, storage, API, metrics, and
|
||||||
# - IAM: Identity and access management
|
# Kubernetes layers as explicit add-ons in the final installed system.
|
||||||
# - PlasmaVMC: Virtual machine control plane
|
|
||||||
# - PrismNET: Software-defined networking controller
|
|
||||||
# - FlashDNS: High-performance DNS server
|
|
||||||
# - FiberLB: Layer 4/7 load balancer
|
|
||||||
# - LightningStor: Distributed block storage
|
|
||||||
# - K8sHost: Kubernetes hosting component
|
|
||||||
#
|
#
|
||||||
# This profile is optimized for:
|
# Included by default:
|
||||||
# - Development/testing environments
|
# - Chainfire: local coordination and placement metadata
|
||||||
# - Small deployments (1-3 nodes)
|
# - FlareDB: metadata/event storage
|
||||||
# - Edge locations with limited infrastructure
|
# - IAM: local identity plane for the dev profile
|
||||||
# - Proof-of-concept installations
|
# - PrismNET: VM networking control plane
|
||||||
|
# - PlasmaVMC: VM control plane
|
||||||
#
|
#
|
||||||
# Services are DISABLED by default in the netboot image.
|
# Optional after install:
|
||||||
# They will be enabled in the final installed system configuration.
|
# - LightningStor, CoronaFS
|
||||||
|
# - FlashDNS, FiberLB
|
||||||
|
# - API Gateway, Nightlight, CreditService
|
||||||
|
# - K8sHost
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# SERVICE PACKAGE AVAILABILITY
|
|
||||||
# ============================================================================
|
|
||||||
# Make all service packages available in the netboot image
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
# Core services
|
|
||||||
chainfire-server
|
chainfire-server
|
||||||
flaredb-server
|
flaredb-server
|
||||||
iam-server
|
iam-server
|
||||||
|
|
||||||
# Compute and networking
|
|
||||||
plasmavmc-server
|
|
||||||
prismnet-server
|
prismnet-server
|
||||||
|
plasmavmc-server
|
||||||
# Network services
|
qemu
|
||||||
flashdns-server
|
libvirt
|
||||||
fiberlb-server
|
bridge-utils
|
||||||
|
openvswitch
|
||||||
# Storage
|
curl
|
||||||
lightningstor-server
|
jq
|
||||||
|
|
||||||
# Container orchestration
|
|
||||||
k8shost-server
|
|
||||||
|
|
||||||
# Additional tools for all-in-one deployment
|
|
||||||
qemu # For running VMs
|
|
||||||
libvirt # Virtualization management
|
|
||||||
bridge-utils # Network bridge configuration
|
|
||||||
openvswitch # Software-defined networking
|
|
||||||
];
|
];
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# CHAINFIRE CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.chainfire = {
|
services.chainfire = {
|
||||||
enable = lib.mkDefault false;
|
enable = lib.mkDefault false;
|
||||||
port = 2379;
|
port = 2379;
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
gossipPort = 2381;
|
gossipPort = 2381;
|
||||||
|
httpPort = 8081;
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# FLAREDB CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.flaredb = {
|
services.flaredb = {
|
||||||
enable = lib.mkDefault false;
|
enable = lib.mkDefault false;
|
||||||
port = 2479;
|
port = 2479;
|
||||||
raftPort = 2480;
|
raftPort = 2480;
|
||||||
|
httpPort = 8082;
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# IAM CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.iam = {
|
services.iam = {
|
||||||
enable = lib.mkDefault false;
|
enable = lib.mkDefault false;
|
||||||
port = 8080;
|
port = 50080;
|
||||||
|
httpPort = 8083;
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PLASMAVMC CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.plasmavmc = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8081;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PRISMNET CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.prismnet = {
|
services.prismnet = {
|
||||||
enable = lib.mkDefault false;
|
enable = lib.mkDefault false;
|
||||||
port = 8082;
|
port = 50081;
|
||||||
|
httpPort = 8087;
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
services.plasmavmc = {
|
||||||
# FLASHDNS CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.flashdns = {
|
|
||||||
enable = lib.mkDefault false;
|
enable = lib.mkDefault false;
|
||||||
port = 53;
|
port = 50082;
|
||||||
|
httpPort = 8084;
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
boot.kernelModules = [ "kvm-intel" "kvm-amd" "tun" ];
|
||||||
# FIBERLB CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.fiberlb = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8083;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# LIGHTNINGSTOR CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.lightningstor = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8084;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# K8SHOST CONFIGURATION (DISABLED)
|
|
||||||
# ============================================================================
|
|
||||||
services.k8shost = {
|
|
||||||
enable = lib.mkDefault false;
|
|
||||||
port = 8085;
|
|
||||||
};
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# VIRTUALIZATION SUPPORT
|
|
||||||
# ============================================================================
|
|
||||||
# Enable KVM virtualization
|
|
||||||
boot.kernelModules = [ "kvm-intel" "kvm-amd" ];
|
|
||||||
|
|
||||||
# Enable nested virtualization
|
|
||||||
boot.extraModprobeConfig = ''
|
boot.extraModprobeConfig = ''
|
||||||
options kvm_intel nested=1
|
options kvm_intel nested=1
|
||||||
options kvm_amd nested=1
|
options kvm_amd nested=1
|
||||||
'';
|
'';
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# NETWORKING CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
# Enable Open vSwitch for SDN
|
|
||||||
networking.vswitches = lib.mkDefault {};
|
networking.vswitches = lib.mkDefault {};
|
||||||
|
|
||||||
# Open firewall ports for all services
|
|
||||||
networking.firewall.allowedTCPPorts = [
|
networking.firewall.allowedTCPPorts = [
|
||||||
# Chainfire
|
22
|
||||||
2379 # API
|
2379
|
||||||
2380 # Raft
|
2380
|
||||||
2381 # Gossip
|
2381
|
||||||
|
2479
|
||||||
# FlareDB
|
2480
|
||||||
2479 # API
|
50080
|
||||||
2480 # Raft
|
50081
|
||||||
|
50082
|
||||||
# IAM
|
|
||||||
8080
|
|
||||||
|
|
||||||
# PlasmaVMC
|
|
||||||
8081
|
8081
|
||||||
|
|
||||||
# PrismNET
|
|
||||||
8082
|
8082
|
||||||
|
|
||||||
# FlashDNS
|
|
||||||
53
|
|
||||||
|
|
||||||
# FiberLB
|
|
||||||
8083
|
8083
|
||||||
|
|
||||||
# LightningStor
|
|
||||||
8084
|
8084
|
||||||
|
8087
|
||||||
# K8sHost
|
16509
|
||||||
8085
|
5900
|
||||||
|
|
||||||
# QEMU/LibVirt
|
|
||||||
16509 # libvirtd
|
|
||||||
5900 # VNC (for VM console access)
|
|
||||||
];
|
];
|
||||||
|
|
||||||
networking.firewall.allowedUDPPorts = [
|
networking.firewall.allowedUDPPorts = [
|
||||||
# FlashDNS
|
|
||||||
53
|
|
||||||
|
|
||||||
# Chainfire gossip
|
|
||||||
2381
|
2381
|
||||||
|
|
||||||
# VXLAN for overlay networking
|
|
||||||
4789
|
4789
|
||||||
];
|
];
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# STORAGE CONFIGURATION
|
|
||||||
# ============================================================================
|
|
||||||
# Enable LVM for flexible storage management
|
|
||||||
services.lvm.enable = true;
|
services.lvm.enable = true;
|
||||||
|
|
||||||
# Enable ZFS if needed
|
|
||||||
boot.supportedFilesystems = [ "ext4" "xfs" "btrfs" "zfs" ];
|
boot.supportedFilesystems = [ "ext4" "xfs" "btrfs" "zfs" ];
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# RESOURCE LIMITS (BALANCED FOR ALL-IN-ONE)
|
|
||||||
# ============================================================================
|
|
||||||
# Balance resources between services on a single node
|
|
||||||
# These are minimal limits for netboot; adjust in final config based on hardware
|
|
||||||
|
|
||||||
systemd.services.chainfire.serviceConfig = lib.mkIf config.services.chainfire.enable {
|
systemd.services.chainfire.serviceConfig = lib.mkIf config.services.chainfire.enable {
|
||||||
MemoryMax = "1G";
|
MemoryMax = "1G";
|
||||||
CPUQuota = "100%";
|
CPUQuota = "100%";
|
||||||
|
|
@ -242,26 +135,13 @@
|
||||||
CPUQuota = "50%";
|
CPUQuota = "50%";
|
||||||
};
|
};
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# PERFORMANCE TUNING
|
|
||||||
# ============================================================================
|
|
||||||
# Optimize for mixed workload (services + VMs)
|
|
||||||
boot.kernel.sysctl = {
|
boot.kernel.sysctl = {
|
||||||
# Increase max number of open files
|
|
||||||
"fs.file-max" = 1000000;
|
"fs.file-max" = 1000000;
|
||||||
|
"net.core.netdev_max_backlog" = 5000;
|
||||||
# Increase network buffer sizes
|
|
||||||
"net.core.rmem_max" = 134217728;
|
"net.core.rmem_max" = 134217728;
|
||||||
"net.core.wmem_max" = 134217728;
|
"net.core.wmem_max" = 134217728;
|
||||||
|
|
||||||
# Enable IP forwarding for VM networking
|
|
||||||
"net.ipv4.ip_forward" = 1;
|
"net.ipv4.ip_forward" = 1;
|
||||||
"net.ipv6.conf.all.forwarding" = 1;
|
"net.ipv6.conf.all.forwarding" = 1;
|
||||||
|
|
||||||
# Optimize for high-performance networking
|
|
||||||
"net.core.netdev_max_backlog" = 5000;
|
|
||||||
|
|
||||||
# Swappiness for server workloads
|
|
||||||
"vm.swappiness" = 10;
|
"vm.swappiness" = 10;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,15 @@
|
||||||
# Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install
|
# Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install
|
||||||
# For VM cluster deployment: boots, phones home, partitions disk, installs NixOS
|
# For VM cluster deployment: boots, phones home, partitions disk, installs NixOS
|
||||||
|
|
||||||
{ config, lib, pkgs, modulesPath, ... }:
|
{
|
||||||
|
config,
|
||||||
|
lib,
|
||||||
|
pkgs,
|
||||||
|
modulesPath,
|
||||||
|
ultracloudBaremetalFormatMountPaths ? { },
|
||||||
|
ultracloudBaremetalSystemPaths ? { },
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
|
|
@ -58,16 +66,34 @@
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dmi_value() {
|
||||||
|
local path="$1"
|
||||||
|
if [ -r "$path" ]; then
|
||||||
|
tr -d '\n' <"$path" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_deployer_url() {
|
||||||
|
local explicit_url="''${DEPLOYER_URL:-}"
|
||||||
|
if [ -z "$explicit_url" ]; then
|
||||||
|
explicit_url="$(cmdline_value ultracloud.deployer_url || true)"
|
||||||
|
fi
|
||||||
|
if [ -n "$explicit_url" ]; then
|
||||||
|
echo "$explicit_url"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
|
||||||
|
http://10.0.2.2:8088/health >/dev/null 2>&1; then
|
||||||
|
echo "http://10.0.2.2:8088"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "http://192.168.100.1:8080"
|
||||||
|
}
|
||||||
|
|
||||||
mkdir -p /etc/ultracloud
|
mkdir -p /etc/ultracloud
|
||||||
|
|
||||||
# Discover Deployer via environment, kernel cmdline, or fallback.
|
# Discover Deployer via environment, kernel cmdline, or fallback.
|
||||||
DEPLOYER_URL="''${DEPLOYER_URL:-}"
|
DEPLOYER_URL="$(resolve_deployer_url)"
|
||||||
if [ -z "$DEPLOYER_URL" ]; then
|
|
||||||
DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)"
|
|
||||||
fi
|
|
||||||
if [ -z "$DEPLOYER_URL" ]; then
|
|
||||||
DEPLOYER_URL="http://192.168.100.1:8080"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get machine identity
|
# Get machine identity
|
||||||
MACHINE_ID=$(cat /etc/machine-id)
|
MACHINE_ID=$(cat /etc/machine-id)
|
||||||
|
|
@ -113,7 +139,24 @@
|
||||||
if [ -z "$NODE_IP" ]; then
|
if [ -z "$NODE_IP" ]; then
|
||||||
NODE_IP=$(hostname -I 2>/dev/null | ${pkgs.gawk}/bin/awk '{print $1}')
|
NODE_IP=$(hostname -I 2>/dev/null | ${pkgs.gawk}/bin/awk '{print $1}')
|
||||||
fi
|
fi
|
||||||
NODE_HOSTNAME=$(hostname)
|
REQUESTED_NODE_ID="''${ULTRACLOUD_NODE_ID:-}"
|
||||||
|
if [ -z "$REQUESTED_NODE_ID" ]; then
|
||||||
|
REQUESTED_NODE_ID="$(cmdline_value ultracloud.node_id || true)"
|
||||||
|
fi
|
||||||
|
if [ -z "$REQUESTED_NODE_ID" ]; then
|
||||||
|
REQUESTED_NODE_ID="$(dmi_value /sys/class/dmi/id/product_serial)"
|
||||||
|
fi
|
||||||
|
if [ -z "$REQUESTED_NODE_ID" ]; then
|
||||||
|
REQUESTED_NODE_ID="$(hostname)"
|
||||||
|
fi
|
||||||
|
REQUESTED_HOSTNAME="''${ULTRACLOUD_HOSTNAME:-}"
|
||||||
|
if [ -z "$REQUESTED_HOSTNAME" ]; then
|
||||||
|
REQUESTED_HOSTNAME="$(cmdline_value ultracloud.hostname || true)"
|
||||||
|
fi
|
||||||
|
if [ -z "$REQUESTED_HOSTNAME" ]; then
|
||||||
|
REQUESTED_HOSTNAME="$REQUESTED_NODE_ID"
|
||||||
|
fi
|
||||||
|
echo "ULTRACLOUD_MARKER pre-install.boot.$REQUESTED_NODE_ID"
|
||||||
CPU_MODEL=$(${pkgs.gawk}/bin/awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true)
|
CPU_MODEL=$(${pkgs.gawk}/bin/awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true)
|
||||||
CPU_CORES=$(${pkgs.gawk}/bin/awk '/^cpu cores/ {print $4; exit}' /proc/cpuinfo 2>/dev/null || true)
|
CPU_CORES=$(${pkgs.gawk}/bin/awk '/^cpu cores/ {print $4; exit}' /proc/cpuinfo 2>/dev/null || true)
|
||||||
CPU_THREADS=$(${pkgs.coreutils}/bin/nproc --all 2>/dev/null || true)
|
CPU_THREADS=$(${pkgs.coreutils}/bin/nproc --all 2>/dev/null || true)
|
||||||
|
|
@ -172,8 +215,8 @@
|
||||||
')
|
')
|
||||||
REQUEST_JSON=$(${pkgs.jq}/bin/jq -n \
|
REQUEST_JSON=$(${pkgs.jq}/bin/jq -n \
|
||||||
--arg machine_id "$MACHINE_ID" \
|
--arg machine_id "$MACHINE_ID" \
|
||||||
--arg node_id "$NODE_HOSTNAME" \
|
--arg node_id "$REQUESTED_NODE_ID" \
|
||||||
--arg hostname "$NODE_HOSTNAME" \
|
--arg hostname "$REQUESTED_HOSTNAME" \
|
||||||
--arg ip "$NODE_IP" \
|
--arg ip "$NODE_IP" \
|
||||||
--argjson hardware_facts "$HARDWARE_FACTS" '
|
--argjson hardware_facts "$HARDWARE_FACTS" '
|
||||||
{
|
{
|
||||||
|
|
@ -253,6 +296,7 @@
|
||||||
|
|
||||||
# Signal success
|
# Signal success
|
||||||
NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.assignment.node_id // "unknown"')
|
NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.assignment.node_id // "unknown"')
|
||||||
|
echo "ULTRACLOUD_MARKER pre-install.phone-home.complete.$NODE_ID"
|
||||||
echo "✓ Bootstrap complete: $NODE_ID"
|
echo "✓ Bootstrap complete: $NODE_ID"
|
||||||
exit 0
|
exit 0
|
||||||
else
|
else
|
||||||
|
|
@ -282,6 +326,7 @@
|
||||||
|
|
||||||
script = ''
|
script = ''
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
export PATH="${pkgs.nix}/bin:${config.system.build.nixos-install}/bin:$PATH"
|
||||||
|
|
||||||
cmdline_value() {
|
cmdline_value() {
|
||||||
local key="$1"
|
local key="$1"
|
||||||
|
|
@ -297,6 +342,40 @@
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resolve_deployer_url() {
|
||||||
|
local explicit_url="''${DEPLOYER_URL:-}"
|
||||||
|
if [ -z "$explicit_url" ]; then
|
||||||
|
explicit_url="$(cmdline_value ultracloud.deployer_url || true)"
|
||||||
|
fi
|
||||||
|
if [ -n "$explicit_url" ]; then
|
||||||
|
echo "$explicit_url"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
|
||||||
|
http://10.0.2.2:8088/health >/dev/null 2>&1; then
|
||||||
|
echo "http://10.0.2.2:8088"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "http://192.168.100.1:8080"
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_binary_cache_url() {
|
||||||
|
local explicit_url="''${ULTRACLOUD_BINARY_CACHE_URL:-}"
|
||||||
|
if [ -z "$explicit_url" ]; then
|
||||||
|
explicit_url="$(cmdline_value ultracloud.binary_cache_url || true)"
|
||||||
|
fi
|
||||||
|
if [ -n "$explicit_url" ]; then
|
||||||
|
echo "$explicit_url"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
|
||||||
|
http://10.0.2.2:8090/nix-cache-info >/dev/null 2>&1; then
|
||||||
|
echo "http://10.0.2.2:8090"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
if [ ! -s /etc/ultracloud/node-config.json ]; then
|
if [ ! -s /etc/ultracloud/node-config.json ]; then
|
||||||
echo "ERROR: node-config.json missing (bootstrap not complete?)"
|
echo "ERROR: node-config.json missing (bootstrap not complete?)"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
@ -305,16 +384,17 @@
|
||||||
NODE_ID=$(${pkgs.jq}/bin/jq -r '.assignment.hostname // .assignment.node_id // empty' /etc/ultracloud/node-config.json)
|
NODE_ID=$(${pkgs.jq}/bin/jq -r '.assignment.hostname // .assignment.node_id // empty' /etc/ultracloud/node-config.json)
|
||||||
NODE_IP=$(${pkgs.jq}/bin/jq -r '.assignment.ip // empty' /etc/ultracloud/node-config.json)
|
NODE_IP=$(${pkgs.jq}/bin/jq -r '.assignment.ip // empty' /etc/ultracloud/node-config.json)
|
||||||
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.nixos_configuration // .assignment.hostname // empty' /etc/ultracloud/node-config.json)
|
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.nixos_configuration // .assignment.hostname // empty' /etc/ultracloud/node-config.json)
|
||||||
DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_config_path // empty' /etc/ultracloud/node-config.json)
|
DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_script_path // empty' /etc/ultracloud/node-config.json)
|
||||||
|
if [ -z "$DISKO_SCRIPT_PATH" ] && [ -r /etc/ultracloud/disko-script-paths.json ]; then
|
||||||
|
DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/disko-script-paths.json)
|
||||||
|
fi
|
||||||
|
TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_system_path // empty' /etc/ultracloud/node-config.json)
|
||||||
|
if [ -z "$TARGET_SYSTEM_PATH" ] && [ -r /etc/ultracloud/system-paths.json ]; then
|
||||||
|
TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/system-paths.json)
|
||||||
|
fi
|
||||||
TARGET_DISK=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk // empty' /etc/ultracloud/node-config.json)
|
TARGET_DISK=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk // empty' /etc/ultracloud/node-config.json)
|
||||||
TARGET_DISK_BY_ID=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk_by_id // empty' /etc/ultracloud/node-config.json)
|
TARGET_DISK_BY_ID=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk_by_id // empty' /etc/ultracloud/node-config.json)
|
||||||
DEPLOYER_URL="''${DEPLOYER_URL:-}"
|
DEPLOYER_URL="$(resolve_deployer_url)"
|
||||||
if [ -z "$DEPLOYER_URL" ]; then
|
|
||||||
DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)"
|
|
||||||
fi
|
|
||||||
if [ -z "$DEPLOYER_URL" ]; then
|
|
||||||
DEPLOYER_URL="http://192.168.100.1:8080"
|
|
||||||
fi
|
|
||||||
SRC_ROOT="/opt/ultracloud-src"
|
SRC_ROOT="/opt/ultracloud-src"
|
||||||
|
|
||||||
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
|
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
|
||||||
|
|
@ -362,6 +442,7 @@
|
||||||
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
|
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
|
||||||
-o "$BUNDLE_PATH"; then
|
-o "$BUNDLE_PATH"; then
|
||||||
echo "Downloaded bootstrap flake bundle from deployer"
|
echo "Downloaded bootstrap flake bundle from deployer"
|
||||||
|
echo "ULTRACLOUD_MARKER install.bundle-downloaded.$NODE_ID"
|
||||||
rm -rf "$SRC_ROOT"
|
rm -rf "$SRC_ROOT"
|
||||||
mkdir -p "$SRC_ROOT"
|
mkdir -p "$SRC_ROOT"
|
||||||
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
|
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
|
||||||
|
|
@ -369,24 +450,12 @@
|
||||||
echo "No deployer flake bundle available; using embedded source tree"
|
echo "No deployer flake bundle available; using embedded source tree"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$DISKO_PATH" ]; then
|
echo "ULTRACLOUD_MARKER install.start.$NODE_ID"
|
||||||
CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
|
DISPLAY_TARGET_DISK="$TARGET_DISK"
|
||||||
if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then
|
if [ -n "$TARGET_DISK_BY_ID" ]; then
|
||||||
DISKO_PATH="$CANDIDATE_DISKO"
|
DISPLAY_TARGET_DISK="$TARGET_DISK_BY_ID"
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, target_disk=$DISPLAY_TARGET_DISK)"
|
||||||
if [ -z "$DISKO_PATH" ]; then
|
|
||||||
echo "ERROR: node-config.json missing install_plan.disko_config_path and no default Disko path exists for $NODE_ID"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then
|
|
||||||
echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, disko_path=$DISKO_PATH)"
|
|
||||||
|
|
||||||
# Resolve installation target disk.
|
# Resolve installation target disk.
|
||||||
if [ -n "$TARGET_DISK_BY_ID" ]; then
|
if [ -n "$TARGET_DISK_BY_ID" ]; then
|
||||||
|
|
@ -423,50 +492,99 @@
|
||||||
umount /mnt || true
|
umount /mnt || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Validating NixOS configuration output..."
|
NIX_CONFIG_LINES=(
|
||||||
nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
|
"experimental-features = nix-command flakes"
|
||||||
|
)
|
||||||
EFFECTIVE_DISKO_PATH="$SRC_ROOT/$DISKO_PATH"
|
BINARY_CACHE_URL="$(resolve_binary_cache_url || true)"
|
||||||
if [ -n "$DISK" ]; then
|
NIXPKGS_PATH="$SRC_ROOT/.bundle-inputs/nixpkgs"
|
||||||
cat > /run/ultracloud/disko-wrapper.nix <<EOF
|
if [ ! -e "$NIXPKGS_PATH" ]; then
|
||||||
{ ... }:
|
NIXPKGS_PATH="${pkgs.path}"
|
||||||
{
|
|
||||||
imports = [
|
|
||||||
"$SRC_ROOT/nix/modules/install-target.nix"
|
|
||||||
"$SRC_ROOT/$DISKO_PATH"
|
|
||||||
];
|
|
||||||
|
|
||||||
ultracloud.install.diskDevice = "$DISK";
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
EFFECTIVE_DISKO_PATH="/run/ultracloud/disko-wrapper.nix"
|
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$BINARY_CACHE_URL" ]; then
|
||||||
|
echo "Using bootstrap binary cache: $BINARY_CACHE_URL"
|
||||||
|
NIX_CONFIG_LINES+=("substituters = $BINARY_CACHE_URL")
|
||||||
|
NIX_CONFIG_LINES+=("trusted-substituters = $BINARY_CACHE_URL")
|
||||||
|
NIX_CONFIG_LINES+=("require-sigs = false")
|
||||||
|
fi
|
||||||
|
export NIX_CONFIG="$(printf '%s\n' "''${NIX_CONFIG_LINES[@]}")"
|
||||||
|
export NIX_PATH="nixpkgs=$NIXPKGS_PATH"
|
||||||
|
|
||||||
|
echo "Preparing NixOS installation for configuration $NIXOS_CONFIGURATION"
|
||||||
|
|
||||||
echo "Running disko to partition $DISK..."
|
echo "Running disko to partition $DISK..."
|
||||||
export NIX_CONFIG="experimental-features = nix-command flakes"
|
if [ -n "$DISKO_SCRIPT_PATH" ]; then
|
||||||
nix run github:nix-community/disko -- --mode disko "$EFFECTIVE_DISKO_PATH"
|
echo "Realising pre-built Disko script: $DISKO_SCRIPT_PATH"
|
||||||
|
${pkgs.nix}/bin/nix-store --realise "$DISKO_SCRIPT_PATH" >/dev/null
|
||||||
|
"$DISKO_SCRIPT_PATH/bin/disko-format-mount"
|
||||||
|
else
|
||||||
|
${pkgs.disko}/bin/disko \
|
||||||
|
--mode destroy,format,mount \
|
||||||
|
--yes-wipe-all-disks \
|
||||||
|
--root-mountpoint /mnt \
|
||||||
|
--flake "$SRC_ROOT#$NIXOS_CONFIGURATION"
|
||||||
|
fi
|
||||||
|
echo "ULTRACLOUD_MARKER install.disko.complete.$NODE_ID"
|
||||||
|
|
||||||
echo "Running nixos-install..."
|
echo "Running nixos-install..."
|
||||||
nixos-install --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" --no-root-passwd
|
if [ -n "$TARGET_SYSTEM_PATH" ]; then
|
||||||
|
echo "Realising pre-built target system: $TARGET_SYSTEM_PATH"
|
||||||
|
${pkgs.nix}/bin/nix-store --realise "$TARGET_SYSTEM_PATH" >/dev/null
|
||||||
|
${config.system.build.nixos-install}/bin/nixos-install \
|
||||||
|
--system "$TARGET_SYSTEM_PATH" \
|
||||||
|
--no-root-passwd \
|
||||||
|
--no-channel-copy
|
||||||
|
else
|
||||||
|
${config.system.build.nixos-install}/bin/nixos-install \
|
||||||
|
--flake "$SRC_ROOT#$NIXOS_CONFIGURATION" \
|
||||||
|
--no-root-passwd \
|
||||||
|
--no-channel-copy
|
||||||
|
fi
|
||||||
|
echo "ULTRACLOUD_MARKER install.nixos-install.complete.$NODE_ID"
|
||||||
|
|
||||||
|
mkdir -p /mnt/etc/ssh /mnt/etc/ultracloud /mnt/root/.ssh /mnt/var/lib
|
||||||
|
cp -f /etc/ultracloud/node-config.json /mnt/etc/ultracloud/node-config.json
|
||||||
|
cp -f /root/.ssh/authorized_keys /mnt/root/.ssh/authorized_keys
|
||||||
|
shopt -s nullglob
|
||||||
|
for host_key in /etc/ssh/ssh_host_*; do
|
||||||
|
cp -f "$host_key" /mnt/etc/ssh/"$(basename "$host_key")"
|
||||||
|
done
|
||||||
|
shopt -u nullglob
|
||||||
|
chmod 700 /mnt/root/.ssh
|
||||||
|
chmod 600 /mnt/root/.ssh/authorized_keys
|
||||||
|
chmod 600 /mnt/etc/ssh/ssh_host_*_key 2>/dev/null || true
|
||||||
|
chmod 644 /mnt/etc/ssh/ssh_host_*_key.pub 2>/dev/null || true
|
||||||
|
|
||||||
|
rm -rf /mnt/var/lib/photon-src
|
||||||
|
cp -a "$SRC_ROOT" /mnt/var/lib/photon-src
|
||||||
|
|
||||||
sync
|
sync
|
||||||
|
echo "ULTRACLOUD_MARKER reboot.$NODE_ID"
|
||||||
|
echo "Allowing the harness to observe the reboot marker before shutting down..."
|
||||||
|
sleep 15
|
||||||
echo "✓ Install complete; rebooting..."
|
echo "✓ Install complete; rebooting..."
|
||||||
${pkgs.systemd}/bin/systemctl reboot
|
${pkgs.systemd}/bin/systemctl reboot
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# Packages for bootstrap + install
|
# Packages for bootstrap + install
|
||||||
|
environment.etc."ultracloud/disko-script-paths.json".text =
|
||||||
|
builtins.toJSON ultracloudBaremetalFormatMountPaths;
|
||||||
|
environment.etc."ultracloud/system-paths.json".text =
|
||||||
|
builtins.toJSON ultracloudBaremetalSystemPaths;
|
||||||
|
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
curl
|
curl
|
||||||
jq
|
jq
|
||||||
vim
|
vim
|
||||||
htop
|
htop
|
||||||
|
nix
|
||||||
gawk
|
gawk
|
||||||
gnugrep
|
gnugrep
|
||||||
util-linux
|
util-linux
|
||||||
parted
|
parted
|
||||||
dosfstools
|
dosfstools
|
||||||
e2fsprogs
|
e2fsprogs
|
||||||
|
disko
|
||||||
gnutar
|
gnutar
|
||||||
gzip
|
gzip
|
||||||
];
|
];
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
./lightningstor.nix
|
./lightningstor.nix
|
||||||
./k8shost.nix
|
./k8shost.nix
|
||||||
./nightlight.nix
|
./nightlight.nix
|
||||||
|
./apigateway.nix
|
||||||
./deployer.nix
|
./deployer.nix
|
||||||
./nix-agent.nix
|
./nix-agent.nix
|
||||||
./node-agent.nix
|
./node-agent.nix
|
||||||
|
|
|
||||||
87
nix/nodes/baremetal-qemu/common.nix
Normal file
87
nix/nodes/baremetal-qemu/common.nix
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
{ lib, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
boot.kernelParams = [ "console=ttyS0,115200n8" ];
|
||||||
|
boot.initrd.availableKernelModules = [
|
||||||
|
"ahci"
|
||||||
|
"sr_mod"
|
||||||
|
"virtio_blk"
|
||||||
|
"virtio_net"
|
||||||
|
"virtio_pci"
|
||||||
|
"virtio_scsi"
|
||||||
|
"xhci_pci"
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
networking.useDHCP = lib.mkForce false;
|
||||||
|
networking.dhcpcd.enable = lib.mkForce false;
|
||||||
|
networking.usePredictableInterfaceNames = false;
|
||||||
|
|
||||||
|
systemd.network = {
|
||||||
|
enable = true;
|
||||||
|
wait-online.enable = true;
|
||||||
|
networks."10-eth0" = {
|
||||||
|
matchConfig.Name = "eth0";
|
||||||
|
networkConfig.DHCP = "yes";
|
||||||
|
linkConfig.RequiredForOnline = "routable";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
services.openssh = {
|
||||||
|
enable = true;
|
||||||
|
settings = {
|
||||||
|
PermitRootLogin = "prohibit-password";
|
||||||
|
PasswordAuthentication = false;
|
||||||
|
KbdInteractiveAuthentication = false;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
users.users.root.openssh.authorizedKeys.keys = [ ];
|
||||||
|
|
||||||
|
nix.registry = lib.mkForce { };
|
||||||
|
nix.nixPath = lib.mkForce [ ];
|
||||||
|
nix.channel.enable = false;
|
||||||
|
nix.settings = {
|
||||||
|
experimental-features = [
|
||||||
|
"nix-command"
|
||||||
|
"flakes"
|
||||||
|
];
|
||||||
|
flake-registry = "";
|
||||||
|
};
|
||||||
|
nixpkgs.flake = {
|
||||||
|
source = lib.mkForce null;
|
||||||
|
setFlakeRegistry = lib.mkForce false;
|
||||||
|
setNixPath = lib.mkForce false;
|
||||||
|
};
|
||||||
|
|
||||||
|
documentation.enable = false;
|
||||||
|
documentation.nixos.enable = false;
|
||||||
|
documentation.man.enable = false;
|
||||||
|
documentation.info.enable = false;
|
||||||
|
documentation.doc.enable = false;
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
jq
|
||||||
|
];
|
||||||
|
|
||||||
|
systemd.services.ultracloud-baremetal-postinstall-marker = {
|
||||||
|
description = "Emit a canonical post-install marker for bare-metal QEMU smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network-online.target" ];
|
||||||
|
wants = [ "network-online.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
StandardOutput = "journal+console";
|
||||||
|
StandardError = "journal+console";
|
||||||
|
};
|
||||||
|
script = ''
|
||||||
|
hostname="$(tr -d '\n' </etc/hostname)"
|
||||||
|
role="$(cat /etc/ultracloud-role)"
|
||||||
|
echo "ULTRACLOUD_MARKER post-install.boot.$hostname.$role"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
}
|
||||||
45
nix/nodes/baremetal-qemu/control-plane/configuration.nix
Normal file
45
nix/nodes/baremetal-qemu/control-plane/configuration.nix
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
{
|
||||||
|
pkgs,
|
||||||
|
...
|
||||||
|
}:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ ../common.nix ];
|
||||||
|
|
||||||
|
networking.hostName = "iso-control-plane-01";
|
||||||
|
|
||||||
|
boot.loader.grub = {
|
||||||
|
enable = true;
|
||||||
|
devices = [ "/dev/vda" ];
|
||||||
|
efiSupport = true;
|
||||||
|
efiInstallAsRemovable = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
environment.etc."ultracloud-role".text = "control-plane\n";
|
||||||
|
environment.etc."ultracloud-role-control-plane".text = "control-plane\n";
|
||||||
|
environment.etc."ultracloud-canonical-install-path".text = "iso\n";
|
||||||
|
|
||||||
|
services.chainfire = {
|
||||||
|
enable = true;
|
||||||
|
nodeId = "iso-control-plane-01";
|
||||||
|
initialPeers = [ "iso-control-plane-01=127.0.0.1:2380" ];
|
||||||
|
package = pkgs.chainfire-server;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.nix-agent = {
|
||||||
|
enable = true;
|
||||||
|
chainfireEndpoint = "http://10.0.2.2:2379";
|
||||||
|
clusterId = "baremetal-iso-canonical";
|
||||||
|
nodeId = "iso-control-plane-01";
|
||||||
|
flakeRoot = "/var/lib/photon-src";
|
||||||
|
intervalSecs = 15;
|
||||||
|
apply = true;
|
||||||
|
healthCheckCommand = [
|
||||||
|
"test"
|
||||||
|
"-f"
|
||||||
|
"/etc/ultracloud-role-control-plane"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.nix-agent.environment.RUST_LOG = "info";
|
||||||
|
}
|
||||||
5
nix/nodes/baremetal-qemu/control-plane/disko.nix
Normal file
5
nix/nodes/baremetal-qemu/control-plane/disko.nix
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ ../../vm-cluster/common-disko.nix ];
|
||||||
|
}
|
||||||
35
nix/nodes/baremetal-qemu/worker/configuration.nix
Normal file
35
nix/nodes/baremetal-qemu/worker/configuration.nix
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ ../common.nix ];
|
||||||
|
|
||||||
|
networking.hostName = "iso-worker-01";
|
||||||
|
|
||||||
|
boot.loader.grub = {
|
||||||
|
enable = true;
|
||||||
|
devices = [ "/dev/vda" ];
|
||||||
|
efiSupport = true;
|
||||||
|
efiInstallAsRemovable = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
environment.etc."ultracloud-role".text = "worker\n";
|
||||||
|
environment.etc."ultracloud-role-worker".text = "worker\n";
|
||||||
|
environment.etc."ultracloud-canonical-install-path".text = "iso\n";
|
||||||
|
|
||||||
|
services.nix-agent = {
|
||||||
|
enable = true;
|
||||||
|
chainfireEndpoint = "http://10.0.2.2:2379";
|
||||||
|
clusterId = "baremetal-iso-canonical";
|
||||||
|
nodeId = "iso-worker-01";
|
||||||
|
flakeRoot = "/var/lib/photon-src";
|
||||||
|
intervalSecs = 15;
|
||||||
|
apply = true;
|
||||||
|
healthCheckCommand = [
|
||||||
|
"test"
|
||||||
|
"-f"
|
||||||
|
"/etc/ultracloud-role-worker"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.nix-agent.environment.RUST_LOG = "info";
|
||||||
|
}
|
||||||
5
nix/nodes/baremetal-qemu/worker/disko.nix
Normal file
5
nix/nodes/baremetal-qemu/worker/disko.nix
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ ../../vm-cluster/common-disko.nix ];
|
||||||
|
}
|
||||||
|
|
@ -1,13 +1,28 @@
|
||||||
{ config, ... }:
|
{ config, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
installDisk =
|
||||||
|
if config ? ultracloud
|
||||||
|
&& config.ultracloud ? install
|
||||||
|
&& config.ultracloud.install ? diskDevice
|
||||||
|
&& config.ultracloud.install.diskDevice != null then
|
||||||
|
config.ultracloud.install.diskDevice
|
||||||
|
else
|
||||||
|
"/dev/vda";
|
||||||
|
in
|
||||||
|
|
||||||
{
|
{
|
||||||
disko.devices = {
|
disko.devices = {
|
||||||
disk.main = {
|
disk.main = {
|
||||||
type = "disk";
|
type = "disk";
|
||||||
device = config.ultracloud.install.diskDevice or "/dev/vda";
|
device = installDisk;
|
||||||
content = {
|
content = {
|
||||||
type = "gpt";
|
type = "gpt";
|
||||||
partitions = {
|
partitions = {
|
||||||
|
BIOS = {
|
||||||
|
size = "1M";
|
||||||
|
type = "EF02";
|
||||||
|
};
|
||||||
ESP = {
|
ESP = {
|
||||||
size = "512M";
|
size = "512M";
|
||||||
type = "EF00";
|
type = "EF00";
|
||||||
|
|
|
||||||
360
nix/single-node/base.nix
Normal file
360
nix/single-node/base.nix
Normal file
|
|
@ -0,0 +1,360 @@
|
||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
cfg = config.ultracloud.quickstart;
|
||||||
|
|
||||||
|
localChainfire = "127.0.0.1:${toString config.services.chainfire.port}";
|
||||||
|
localChainfireHttp = "http://127.0.0.1:${toString config.services.chainfire.httpPort}";
|
||||||
|
localFlaredb = "127.0.0.1:${toString config.services.flaredb.port}";
|
||||||
|
localIamGrpc = "127.0.0.1:${toString config.services.iam.port}";
|
||||||
|
localIamHttp = "http://127.0.0.1:${toString config.services.iam.httpPort}";
|
||||||
|
localPrismnetGrpc = "127.0.0.1:${toString config.services.prismnet.port}";
|
||||||
|
localPrismnetHttp = "http://127.0.0.1:${toString config.services.prismnet.httpPort}";
|
||||||
|
localPlasmavmcHttp = "http://127.0.0.1:${toString config.services.plasmavmc.httpPort}";
|
||||||
|
localLightningstorGrpc = "127.0.0.1:${toString config.services.lightningstor.port}";
|
||||||
|
localCreditserviceGrpc = "127.0.0.1:${toString config.services.creditservice.grpcPort}";
|
||||||
|
localFiberlbHttp = "http://127.0.0.1:${toString config.services.fiberlb.port}";
|
||||||
|
localFlashdnsHttp = "http://127.0.0.1:${toString config.services.flashdns.port}";
|
||||||
|
localCoronafsHttp = "http://127.0.0.1:${toString config.services.coronafs.port}";
|
||||||
|
|
||||||
|
requiredPackages = with pkgs; [
|
||||||
|
chainfire-server
|
||||||
|
flaredb-server
|
||||||
|
iam-server
|
||||||
|
prismnet-server
|
||||||
|
plasmavmc-server
|
||||||
|
curl
|
||||||
|
iproute2
|
||||||
|
jq
|
||||||
|
qemu
|
||||||
|
];
|
||||||
|
|
||||||
|
optionalPackages =
|
||||||
|
(lib.optionals cfg.enableLightningStor [ pkgs.lightningstor-server ])
|
||||||
|
++ (lib.optionals cfg.enableCoronafs [ pkgs.coronafs-server ])
|
||||||
|
++ (lib.optionals cfg.enableFlashDNS [ pkgs.flashdns-server ])
|
||||||
|
++ (lib.optionals cfg.enableFiberLB [ pkgs.fiberlb-server ])
|
||||||
|
++ (lib.optionals cfg.enableApiGateway [ pkgs.apigateway-server ])
|
||||||
|
++ (lib.optionals cfg.enableNightlight [ pkgs.nightlight-server ])
|
||||||
|
++ (lib.optionals cfg.enableCreditService [ pkgs.creditservice-server ])
|
||||||
|
++ (lib.optionals cfg.enableK8sHost [ pkgs.k8shost-server ]);
|
||||||
|
|
||||||
|
requiredUnits = [
|
||||||
|
"chainfire.service"
|
||||||
|
"flaredb.service"
|
||||||
|
"iam.service"
|
||||||
|
"prismnet.service"
|
||||||
|
"plasmavmc.service"
|
||||||
|
];
|
||||||
|
|
||||||
|
optionalUnits =
|
||||||
|
(lib.optionals cfg.enableLightningStor [ "lightningstor.service" ])
|
||||||
|
++ (lib.optionals cfg.enableCoronafs [ "coronafs.service" ])
|
||||||
|
++ (lib.optionals cfg.enableFlashDNS [ "flashdns.service" ])
|
||||||
|
++ (lib.optionals cfg.enableFiberLB [ "fiberlb.service" ])
|
||||||
|
++ (lib.optionals cfg.enableApiGateway [ "apigateway.service" ])
|
||||||
|
++ (lib.optionals cfg.enableNightlight [ "nightlight.service" ])
|
||||||
|
++ (lib.optionals cfg.enableCreditService [ "creditservice.service" ])
|
||||||
|
++ (lib.optionals cfg.enableK8sHost [ "k8shost.service" ]);
|
||||||
|
|
||||||
|
readyUnitArgs = lib.escapeShellArgs (requiredUnits ++ optionalUnits);
|
||||||
|
|
||||||
|
healthChecks = [
|
||||||
|
{ name = "chainfire"; url = "http://127.0.0.1:8081/health"; }
|
||||||
|
{ name = "flaredb"; url = "http://127.0.0.1:8082/health"; }
|
||||||
|
{ name = "iam"; url = "http://127.0.0.1:8083/health"; }
|
||||||
|
{ name = "prismnet"; url = "http://127.0.0.1:8087/health"; }
|
||||||
|
{ name = "plasmavmc"; url = localPlasmavmcHttp + "/health"; }
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableCoronafs [
|
||||||
|
{ name = "coronafs"; url = localCoronafsHttp + "/healthz"; }
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableApiGateway [
|
||||||
|
{ name = "apigateway"; url = "http://127.0.0.1:8080/health"; }
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableNightlight [
|
||||||
|
{ name = "nightlight"; url = "http://127.0.0.1:9101/healthz"; }
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableCreditService [
|
||||||
|
{ name = "creditservice"; url = "http://127.0.0.1:3011/health"; }
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableK8sHost [
|
||||||
|
{ name = "k8shost"; url = "http://127.0.0.1:8085/health"; }
|
||||||
|
];
|
||||||
|
|
||||||
|
healthCheckScript = lib.concatMapStringsSep "\n" (check: ''
|
||||||
|
wait_for_health ${lib.escapeShellArg check.name} ${lib.escapeShellArg check.url}
|
||||||
|
'') healthChecks;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
options.ultracloud.quickstart = {
|
||||||
|
enable = lib.mkEnableOption "UltraCloud single-node quickstart profile";
|
||||||
|
|
||||||
|
hostName = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "single-node-quickstart";
|
||||||
|
description = "Hostname used by the single-node quickstart profile.";
|
||||||
|
};
|
||||||
|
|
||||||
|
nodeId = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "single01";
|
||||||
|
description = "Node identifier for the single-node quickstart profile.";
|
||||||
|
};
|
||||||
|
|
||||||
|
enableLightningStor = lib.mkEnableOption "LightningStor object/image storage add-on for the quickstart";
|
||||||
|
enableCoronafs = lib.mkEnableOption "CoronaFS shared-volume add-on for the quickstart";
|
||||||
|
enableFlashDNS = lib.mkEnableOption "FlashDNS add-on for the quickstart";
|
||||||
|
enableFiberLB = lib.mkEnableOption "FiberLB add-on for the quickstart";
|
||||||
|
enableApiGateway = lib.mkEnableOption "API gateway add-on for the quickstart";
|
||||||
|
enableNightlight = lib.mkEnableOption "Nightlight metrics add-on for the quickstart";
|
||||||
|
enableCreditService = lib.mkEnableOption "CreditService reference add-on for the quickstart";
|
||||||
|
enableK8sHost = lib.mkEnableOption "K8sHost add-on for the quickstart";
|
||||||
|
};
|
||||||
|
|
||||||
|
config = lib.mkIf cfg.enable {
|
||||||
|
assertions = [
|
||||||
|
{
|
||||||
|
assertion = !cfg.enableK8sHost || (cfg.enableFiberLB && cfg.enableFlashDNS);
|
||||||
|
message = "ultracloud.quickstart.enableK8sHost requires ultracloud.quickstart.enableFiberLB and ultracloud.quickstart.enableFlashDNS.";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.hostName = lib.mkDefault cfg.hostName;
|
||||||
|
networking.useDHCP = lib.mkDefault true;
|
||||||
|
networking.vswitches = lib.mkDefault {};
|
||||||
|
|
||||||
|
networking.firewall.allowedTCPPorts = [
|
||||||
|
22
|
||||||
|
2379
|
||||||
|
2380
|
||||||
|
2381
|
||||||
|
2479
|
||||||
|
2480
|
||||||
|
50080
|
||||||
|
50081
|
||||||
|
50082
|
||||||
|
8081
|
||||||
|
8082
|
||||||
|
8083
|
||||||
|
8084
|
||||||
|
8087
|
||||||
|
]
|
||||||
|
++ (lib.optionals cfg.enableLightningStor [ 50086 50090 9000 ])
|
||||||
|
++ (lib.optionals cfg.enableCoronafs [ 50088 ])
|
||||||
|
++ (lib.optionals cfg.enableFlashDNS [ 50084 ])
|
||||||
|
++ (lib.optionals cfg.enableFiberLB [ 50085 ])
|
||||||
|
++ (lib.optionals cfg.enableApiGateway [ 8080 ])
|
||||||
|
++ (lib.optionals cfg.enableNightlight [ 9091 9101 ])
|
||||||
|
++ (lib.optionals cfg.enableCreditService [ 3010 3011 ])
|
||||||
|
++ (lib.optionals cfg.enableK8sHost [ 50087 8085 ]);
|
||||||
|
|
||||||
|
networking.firewall.allowedUDPPorts =
|
||||||
|
[ 2381 4789 ]
|
||||||
|
++ (lib.optionals cfg.enableFlashDNS [ 5353 ]);
|
||||||
|
|
||||||
|
boot.kernelModules = [ "kvm-intel" "kvm-amd" "tun" ];
|
||||||
|
boot.extraModprobeConfig = ''
|
||||||
|
options kvm_intel nested=1
|
||||||
|
options kvm_amd nested=1
|
||||||
|
'';
|
||||||
|
boot.kernel.sysctl = {
|
||||||
|
"fs.file-max" = 1000000;
|
||||||
|
"net.core.netdev_max_backlog" = 5000;
|
||||||
|
"net.core.rmem_max" = 134217728;
|
||||||
|
"net.core.wmem_max" = 134217728;
|
||||||
|
"net.ipv4.ip_forward" = 1;
|
||||||
|
"net.ipv6.conf.all.forwarding" = 1;
|
||||||
|
"vm.swappiness" = 10;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.chainfire = {
|
||||||
|
enable = true;
|
||||||
|
nodeId = cfg.nodeId;
|
||||||
|
initialPeers = [ "${cfg.nodeId}=127.0.0.1:2380" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
services.flaredb = {
|
||||||
|
enable = true;
|
||||||
|
nodeId = cfg.nodeId;
|
||||||
|
initialPeers = [ "${cfg.nodeId}=127.0.0.1:2479" ];
|
||||||
|
pdAddr = localChainfire;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.iam = {
|
||||||
|
enable = true;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
allowRandomSigningKey = true;
|
||||||
|
allowUnauthenticatedAdmin = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.prismnet = {
|
||||||
|
enable = true;
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.plasmavmc = {
|
||||||
|
enable = true;
|
||||||
|
mode = "all-in-one";
|
||||||
|
prismnetAddr = localPrismnetGrpc;
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
lightningstorAddr = if cfg.enableLightningStor then localLightningstorGrpc else null;
|
||||||
|
coronafsControllerEndpoint = if cfg.enableCoronafs then localCoronafsHttp else null;
|
||||||
|
coronafsNodeEndpoint = if cfg.enableCoronafs then localCoronafsHttp else null;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.lightningstor = lib.mkIf cfg.enableLightningStor {
|
||||||
|
enable = true;
|
||||||
|
mode = "all-in-one";
|
||||||
|
objectStorageBackend = "local_fs";
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.coronafs = lib.mkIf cfg.enableCoronafs {
|
||||||
|
enable = true;
|
||||||
|
metadataBackend = "chainfire";
|
||||||
|
chainfireApiUrl = localChainfireHttp;
|
||||||
|
advertiseHost = "127.0.0.1";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.flashdns = lib.mkIf cfg.enableFlashDNS {
|
||||||
|
enable = true;
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.fiberlb = lib.mkIf cfg.enableFiberLB {
|
||||||
|
enable = true;
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.creditservice = lib.mkIf cfg.enableCreditService {
|
||||||
|
enable = true;
|
||||||
|
iamAddr = localIamGrpc;
|
||||||
|
chainfireAddr = localChainfire;
|
||||||
|
flaredbAddr = localFlaredb;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.nightlight = lib.mkIf cfg.enableNightlight {
|
||||||
|
enable = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.apigateway = lib.mkIf cfg.enableApiGateway {
|
||||||
|
enable = true;
|
||||||
|
authProviders = [
|
||||||
|
{
|
||||||
|
name = "iam";
|
||||||
|
providerType = "grpc";
|
||||||
|
endpoint = localIamHttp;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
creditProviders = lib.optionals cfg.enableCreditService [
|
||||||
|
{
|
||||||
|
name = "creditservice";
|
||||||
|
providerType = "grpc";
|
||||||
|
endpoint = "http://${localCreditserviceGrpc}";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
routes =
|
||||||
|
[
|
||||||
|
{
|
||||||
|
name = "iam-rest";
|
||||||
|
pathPrefix = "/iam";
|
||||||
|
upstream = localIamHttp;
|
||||||
|
stripPrefix = true;
|
||||||
|
auth = {
|
||||||
|
provider = "iam";
|
||||||
|
mode = "required";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
]
|
||||||
|
++ lib.optionals cfg.enableCreditService [
|
||||||
|
{
|
||||||
|
name = "credit-rest";
|
||||||
|
pathPrefix = "/credit";
|
||||||
|
upstream = "http://127.0.0.1:${toString config.services.creditservice.httpPort}";
|
||||||
|
stripPrefix = true;
|
||||||
|
auth = {
|
||||||
|
provider = "iam";
|
||||||
|
mode = "required";
|
||||||
|
};
|
||||||
|
credit = {
|
||||||
|
provider = "creditservice";
|
||||||
|
mode = "optional";
|
||||||
|
units = 1;
|
||||||
|
commitOn = "success";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
services.k8shost = lib.mkIf cfg.enableK8sHost {
|
||||||
|
enable = true;
|
||||||
|
iamAddr = localIamHttp;
|
||||||
|
chainfireAddr = "http://${localChainfire}";
|
||||||
|
prismnetAddr = localPrismnetHttp;
|
||||||
|
flaredbPdAddr = localChainfire;
|
||||||
|
flaredbDirectAddr = localFlaredb;
|
||||||
|
fiberlbAddr = localFiberlbHttp;
|
||||||
|
flashdnsAddr = localFlashdnsHttp;
|
||||||
|
creditserviceAddr =
|
||||||
|
if cfg.enableCreditService
|
||||||
|
then "http://${localCreditserviceGrpc}"
|
||||||
|
else null;
|
||||||
|
};
|
||||||
|
|
||||||
|
environment.systemPackages = requiredPackages ++ optionalPackages;
|
||||||
|
|
||||||
|
systemd.services.ultracloud-single-node-quickstart-ready = {
|
||||||
|
description = "Verify UltraCloud single-node quickstart readiness";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = requiredUnits ++ optionalUnits;
|
||||||
|
wants = requiredUnits ++ optionalUnits;
|
||||||
|
path = [ pkgs.coreutils pkgs.curl pkgs.qemu pkgs.systemd ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
};
|
||||||
|
script = ''
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
wait_for_health() {
|
||||||
|
local name="$1"
|
||||||
|
local url="$2"
|
||||||
|
local deadline=$((SECONDS + 180))
|
||||||
|
while true; do
|
||||||
|
if curl -fsS "$url" >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [ "$SECONDS" -ge "$deadline" ]; then
|
||||||
|
echo "timed out waiting for $name at $url" >&2
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
systemctl is-active ${readyUnitArgs}
|
||||||
|
${healthCheckScript}
|
||||||
|
test -x ${pkgs.qemu}/bin/qemu-system-x86_64
|
||||||
|
test -x ${pkgs.qemu}/bin/qemu-img
|
||||||
|
test -c /dev/net/tun
|
||||||
|
if [ -e /dev/kvm ]; then
|
||||||
|
test -r /dev/kvm
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
}
|
||||||
24
nix/single-node/qemu-vm.nix
Normal file
24
nix/single-node/qemu-vm.nix
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
{ modulesPath, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") ];
|
||||||
|
|
||||||
|
virtualisation = {
|
||||||
|
graphics = false;
|
||||||
|
cores = 2;
|
||||||
|
memorySize = 3072;
|
||||||
|
diskSize = 16384;
|
||||||
|
};
|
||||||
|
|
||||||
|
services.openssh = {
|
||||||
|
enable = true;
|
||||||
|
settings = {
|
||||||
|
KbdInteractiveAuthentication = false;
|
||||||
|
PasswordAuthentication = true;
|
||||||
|
PermitRootLogin = "yes";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
users.mutableUsers = false;
|
||||||
|
users.users.root.hashedPassword = "$6$iu4O1PEqq77wLMfh$T4bP3V9v8RoPgwqgBr2taKEgVNcb42HaTUy.VMjjsFtWTvnai3rqvy8AQbELKWdB1Qzfb7wkUOSK1wnmSZph/.";
|
||||||
|
}
|
||||||
|
|
@ -3,6 +3,10 @@
|
||||||
`nix/test-cluster` is the canonical local validation path for UltraCloud.
|
`nix/test-cluster` is the canonical local validation path for UltraCloud.
|
||||||
It boots six QEMU VMs, treats them as hardware-like nodes, and validates representative control-plane, worker, and gateway behavior over SSH and service endpoints.
|
It boots six QEMU VMs, treats them as hardware-like nodes, and validates representative control-plane, worker, and gateway behavior over SSH and service endpoints.
|
||||||
All VM images are built on the host in a single Nix invocation and then booted as prebuilt artifacts. The guests do not compile the stack locally.
|
All VM images are built on the host in a single Nix invocation and then booted as prebuilt artifacts. The guests do not compile the stack locally.
|
||||||
|
The same harness also owns the canonical bare-metal bootstrap proof: a raw-QEMU ISO flow that phones home to `deployer`, runs Disko, reboots, and waits for `nix-agent` desired-system convergence on one control-plane node and one worker-equivalent node.
|
||||||
|
|
||||||
|
When `/dev/kvm` is absent, the portable fallback is not another harness subcommand. Use the root-flake non-KVM lane instead: `nix build .#checks.x86_64-linux.portable-control-plane-regressions`.
|
||||||
|
When `/dev/kvm` and nested virtualization are available, the reproducible publishable lane is `./nix/test-cluster/run-publishable-kvm-suite.sh`, which records environment metadata and then runs `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix` in order.
|
||||||
|
|
||||||
## What it validates
|
## What it validates
|
||||||
|
|
||||||
|
|
@ -15,6 +19,7 @@ All VM images are built on the host in a single Nix invocation and then booted a
|
||||||
- host-forwarded access to the API gateway and NightLight HTTP surfaces
|
- host-forwarded access to the API gateway and NightLight HTTP surfaces
|
||||||
- cross-node data replication smoke tests for `chainfire` and `flaredb`
|
- cross-node data replication smoke tests for `chainfire` and `flaredb`
|
||||||
- deployer-seeded native runtime scheduling from declarative Nix service definitions, including drain/failover recovery
|
- deployer-seeded native runtime scheduling from declarative Nix service definitions, including drain/failover recovery
|
||||||
|
- ISO-based bare-metal bootstrap from `nixosConfigurations.ultracloud-iso` through phone-home, flake bundle fetch, Disko install, reboot, and desired-system activation
|
||||||
|
|
||||||
## Validation layers
|
## Validation layers
|
||||||
|
|
||||||
|
|
@ -45,6 +50,7 @@ nix run ./nix/test-cluster#cluster -- build
|
||||||
nix run ./nix/test-cluster#cluster -- start
|
nix run ./nix/test-cluster#cluster -- start
|
||||||
nix run ./nix/test-cluster#cluster -- smoke
|
nix run ./nix/test-cluster#cluster -- smoke
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
nix run ./nix/test-cluster#cluster -- baremetal-iso
|
||||||
nix run ./nix/test-cluster#cluster -- demo-vm-webapp
|
nix run ./nix/test-cluster#cluster -- demo-vm-webapp
|
||||||
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||||
nix run ./nix/test-cluster#cluster -- serve-vm-webapp
|
nix run ./nix/test-cluster#cluster -- serve-vm-webapp
|
||||||
|
|
@ -63,6 +69,12 @@ make cluster-smoke
|
||||||
|
|
||||||
Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#cluster -- fresh-smoke`
|
Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#cluster -- fresh-smoke`
|
||||||
|
|
||||||
|
Preferred entrypoint for publishable bare-metal bootstrap verification: `nix run ./nix/test-cluster#cluster -- baremetal-iso`
|
||||||
|
|
||||||
|
Preferred entrypoint for portable local verification on TCG-only hosts: `nix build .#checks.x86_64-linux.portable-control-plane-regressions`
|
||||||
|
|
||||||
|
Preferred entrypoint for reproducible KVM-suite reruns: `./nix/test-cluster/run-publishable-kvm-suite.sh <log-dir>`
|
||||||
|
|
||||||
`make cluster-smoke` is a convenience wrapper for the same clean host-build VM validation flow.
|
`make cluster-smoke` is a convenience wrapper for the same clean host-build VM validation flow.
|
||||||
|
|
||||||
`nix run ./nix/test-cluster#cluster -- demo-vm-webapp` creates a PrismNet-attached VM, boots a tiny web app inside the guest, stores its counter in FlareDB, writes JSON snapshots to LightningStor object storage, and then proves that the state survives guest restart plus cross-worker migration. The attached data volume is still used by the guest for its local bootstrap config.
|
`nix run ./nix/test-cluster#cluster -- demo-vm-webapp` creates a PrismNet-attached VM, boots a tiny web app inside the guest, stores its counter in FlareDB, writes JSON snapshots to LightningStor object storage, and then proves that the state survives guest restart plus cross-worker migration. The attached data volume is still used by the guest for its local bootstrap config.
|
||||||
|
|
@ -101,4 +113,4 @@ Logs for each VM are written to `<state-dir>/<node>/vm.log`.
|
||||||
|
|
||||||
## Scope note
|
## Scope note
|
||||||
|
|
||||||
This harness is intentionally VM-first. Older ad hoc launch scripts under `baremetal/vm-cluster` are legacy/manual paths and should not be treated as the primary local validation entrypoint.
|
This harness is intentionally VM-first, but the canonical bare-metal install proof also lives here so the docs, harness, and `flake check` all exercise the same ISO route. Older ad hoc launch scripts under `baremetal/vm-cluster` are legacy/manual paths, and the `netboot-*` images remain experimental helpers rather than the supported bootstrap entrypoint.
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,8 @@
|
||||||
bash
|
bash
|
||||||
coreutils
|
coreutils
|
||||||
curl
|
curl
|
||||||
|
ultracloud.packages.${system}.chainfire-server
|
||||||
|
ultracloud.packages.${system}.deployer-server
|
||||||
ultracloud.packages.${system}.deployer-ctl
|
ultracloud.packages.${system}.deployer-ctl
|
||||||
findutils
|
findutils
|
||||||
gawk
|
gawk
|
||||||
|
|
@ -60,7 +62,9 @@
|
||||||
gnugrep
|
gnugrep
|
||||||
iproute2
|
iproute2
|
||||||
jq
|
jq
|
||||||
|
nix
|
||||||
openssh
|
openssh
|
||||||
|
python3
|
||||||
procps
|
procps
|
||||||
clusterPython
|
clusterPython
|
||||||
qemu
|
qemu
|
||||||
|
|
|
||||||
|
|
@ -8369,6 +8369,10 @@ fresh_smoke_requested() {
|
||||||
smoke_requested "$@"
|
smoke_requested "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
baremetal_iso_requested() {
|
||||||
|
bash "${REPO_ROOT}/nix/test-cluster/verify-baremetal-iso.sh" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
storage_smoke_requested() {
|
storage_smoke_requested() {
|
||||||
BUILD_PROFILE="storage"
|
BUILD_PROFILE="storage"
|
||||||
start_requested "${STORAGE_NODES[@]}"
|
start_requested "${STORAGE_NODES[@]}"
|
||||||
|
|
@ -8686,6 +8690,7 @@ Commands:
|
||||||
validate Run the cluster smoke validation
|
validate Run the cluster smoke validation
|
||||||
smoke start + validate
|
smoke start + validate
|
||||||
fresh-smoke clean local runtime state, rebuild on the host, start, and validate
|
fresh-smoke clean local runtime state, rebuild on the host, start, and validate
|
||||||
|
baremetal-iso verify the canonical ISO bootstrap path from phone-home through desired-system convergence
|
||||||
storage-smoke start the storage lab (node01-05) and validate CoronaFS/LightningStor/PlasmaVMC
|
storage-smoke start the storage lab (node01-05) and validate CoronaFS/LightningStor/PlasmaVMC
|
||||||
fresh-storage-smoke clean local runtime state, rebuild node01-05 on the host, start, and validate the storage lab
|
fresh-storage-smoke clean local runtime state, rebuild node01-05 on the host, start, and validate the storage lab
|
||||||
demo-vm-webapp start the cluster and run the VM web app demo backed by FlareDB and LightningStor
|
demo-vm-webapp start the cluster and run the VM web app demo backed by FlareDB and LightningStor
|
||||||
|
|
@ -8716,6 +8721,7 @@ Commands:
|
||||||
Examples:
|
Examples:
|
||||||
$0 smoke
|
$0 smoke
|
||||||
$0 fresh-smoke
|
$0 fresh-smoke
|
||||||
|
$0 baremetal-iso
|
||||||
$0 storage-smoke
|
$0 storage-smoke
|
||||||
$0 fresh-storage-smoke
|
$0 fresh-storage-smoke
|
||||||
$0 demo-vm-webapp
|
$0 demo-vm-webapp
|
||||||
|
|
@ -8756,6 +8762,7 @@ main() {
|
||||||
validate) validate_cluster ;;
|
validate) validate_cluster ;;
|
||||||
smoke) smoke_requested "$@" ;;
|
smoke) smoke_requested "$@" ;;
|
||||||
fresh-smoke) fresh_smoke_requested "$@" ;;
|
fresh-smoke) fresh_smoke_requested "$@" ;;
|
||||||
|
baremetal-iso) baremetal_iso_requested "$@" ;;
|
||||||
storage-smoke) storage_smoke_requested ;;
|
storage-smoke) storage_smoke_requested ;;
|
||||||
fresh-storage-smoke) fresh_storage_smoke_requested ;;
|
fresh-storage-smoke) fresh_storage_smoke_requested ;;
|
||||||
demo-vm-webapp) demo_vm_webapp_requested "$@" ;;
|
demo-vm-webapp) demo_vm_webapp_requested "$@" ;;
|
||||||
|
|
|
||||||
87
nix/test-cluster/run-publishable-kvm-suite.sh
Executable file
87
nix/test-cluster/run-publishable-kvm-suite.sh
Executable file
|
|
@ -0,0 +1,87 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||||
|
LOG_DIR="${1:-${ULTRACLOUD_KVM_PUBLISHABLE_LOG_DIR:-${REPO_ROOT}/work/publishable-kvm-suite}}"
|
||||||
|
|
||||||
|
mkdir -p "${LOG_DIR}"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '[publishable-kvm-suite] %s\n' "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
capture_environment() {
|
||||||
|
{
|
||||||
|
printf 'started_at=%s\n' "$(date -Is)"
|
||||||
|
printf 'hostname=%s\n' "$(hostname)"
|
||||||
|
printf 'kernel=%s\n' "$(uname -a)"
|
||||||
|
printf 'pwd=%s\n' "$(pwd)"
|
||||||
|
printf 'user=%s\n' "$(id -un)"
|
||||||
|
printf 'uid=%s\n' "$(id -u)"
|
||||||
|
printf 'gid=%s\n' "$(id -g)"
|
||||||
|
printf 'branch=%s\n' "$(git -C "${REPO_ROOT}" branch --show-current)"
|
||||||
|
printf 'commit=%s\n' "$(git -C "${REPO_ROOT}" rev-parse HEAD)"
|
||||||
|
printf 'nix_version=%s\n' "$(nix --version)"
|
||||||
|
printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)"
|
||||||
|
if [[ -e /dev/kvm ]]; then
|
||||||
|
printf 'kvm_stat=%s\n' "$(stat -c '%A %U %G %t:%T' /dev/kvm)"
|
||||||
|
fi
|
||||||
|
if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then
|
||||||
|
printf 'kvm_intel_nested=%s\n' "$(cat /sys/module/kvm_intel/parameters/nested)"
|
||||||
|
fi
|
||||||
|
if [[ -f /sys/module/kvm_amd/parameters/nested ]]; then
|
||||||
|
printf 'kvm_amd_nested=%s\n' "$(cat /sys/module/kvm_amd/parameters/nested)"
|
||||||
|
fi
|
||||||
|
} >"${LOG_DIR}/environment.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_case() {
|
||||||
|
local name="$1"
|
||||||
|
shift
|
||||||
|
local logfile="${LOG_DIR}/${name}.log"
|
||||||
|
local metafile="${LOG_DIR}/${name}.meta"
|
||||||
|
local started_at ended_at rc
|
||||||
|
|
||||||
|
started_at="$(date -Is)"
|
||||||
|
printf 'command=%s\n' "$*" >"${metafile}"
|
||||||
|
printf 'started_at=%s\n' "${started_at}" >>"${metafile}"
|
||||||
|
|
||||||
|
log "running ${name}: $*"
|
||||||
|
set +e
|
||||||
|
(
|
||||||
|
cd "${REPO_ROOT}"
|
||||||
|
"$@"
|
||||||
|
) 2>&1 | tee "${logfile}"
|
||||||
|
rc=${PIPESTATUS[0]}
|
||||||
|
set -e
|
||||||
|
|
||||||
|
ended_at="$(date -Is)"
|
||||||
|
printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}"
|
||||||
|
printf 'exit_code=%s\n' "${rc}" >>"${metafile}"
|
||||||
|
|
||||||
|
if (( rc != 0 )); then
|
||||||
|
log "${name} failed; see ${logfile}"
|
||||||
|
return "${rc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "${name} passed"
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
capture_environment
|
||||||
|
|
||||||
|
[[ -e /dev/kvm ]] || {
|
||||||
|
log "/dev/kvm is missing"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
run_case fresh-smoke nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
run_case fresh-demo-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||||
|
run_case fresh-matrix nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||||
|
|
||||||
|
printf 'finished_at=%s\n' "$(date -Is)" >>"${LOG_DIR}/environment.txt"
|
||||||
|
log "publishable KVM suite passed; logs in ${LOG_DIR}"
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
824
nix/test-cluster/verify-baremetal-iso.sh
Normal file
824
nix/test-cluster/verify-baremetal-iso.sh
Normal file
|
|
@ -0,0 +1,824 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
|
||||||
|
|
||||||
|
CLUSTER_ID="baremetal-iso-canonical"
|
||||||
|
CHAINFIRE_ENDPOINT="http://127.0.0.1:2379"
|
||||||
|
DEPLOYER_ENDPOINT="http://127.0.0.1:8088"
|
||||||
|
BINARY_CACHE_ENDPOINT="http://127.0.0.1:8090"
|
||||||
|
BOOTSTRAP_TOKEN="baremetal-iso-bootstrap-token"
|
||||||
|
CONTROL_NODE_ID="iso-control-plane-01"
|
||||||
|
WORKER_NODE_ID="iso-worker-01"
|
||||||
|
CONTROL_SSH_PORT="22231"
|
||||||
|
WORKER_SSH_PORT="22232"
|
||||||
|
CONTROL_DHCP_START="10.0.2.15"
|
||||||
|
WORKER_DHCP_START="10.0.2.16"
|
||||||
|
CONTROL_DISK_GIB="18G"
|
||||||
|
WORKER_DISK_GIB="18G"
|
||||||
|
|
||||||
|
log() {
|
||||||
|
printf '[baremetal-iso-e2e] %s\n' "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
marker() {
|
||||||
|
printf 'ULTRACLOUD_MARKER %s\n' "$*"
|
||||||
|
}
|
||||||
|
|
||||||
|
die() {
|
||||||
|
echo "[baremetal-iso-e2e] ERROR: $*" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
require_cmd() {
|
||||||
|
command -v "$1" >/dev/null 2>&1 || die "required command not found: $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_store_path() {
|
||||||
|
local env_name="$1"
|
||||||
|
local attr="$2"
|
||||||
|
if [[ -n "${!env_name:-}" ]]; then
|
||||||
|
printf '%s\n' "${!env_name}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
nix build "$ROOT#$attr" --no-link --print-out-paths
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_binary() {
|
||||||
|
local env_name="$1"
|
||||||
|
local bin_name="$2"
|
||||||
|
local attr="$3"
|
||||||
|
if [[ -n "${!env_name:-}" ]]; then
|
||||||
|
printf '%s\n' "${!env_name}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if command -v "$bin_name" >/dev/null 2>&1; then
|
||||||
|
command -v "$bin_name"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local out
|
||||||
|
out="$(nix build "$ROOT#$attr" --no-link --print-out-paths)"
|
||||||
|
printf '%s/bin/%s\n' "$out" "$bin_name"
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_iso_image() {
|
||||||
|
local candidate="$1"
|
||||||
|
if [[ -f "$candidate" ]]; then
|
||||||
|
printf '%s\n' "$candidate"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local iso_dir="$candidate/iso"
|
||||||
|
if [[ -d "$iso_dir" ]]; then
|
||||||
|
local iso_path
|
||||||
|
iso_path="$(find "$iso_dir" -maxdepth 1 -type f -name '*.iso' | head -n 1)"
|
||||||
|
if [[ -n "$iso_path" ]]; then
|
||||||
|
printf '%s\n' "$iso_path"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
die "unable to resolve a bootable ISO file from $candidate"
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve_ovmf_firmware() {
|
||||||
|
local env_name="$1"
|
||||||
|
local relative_path="$2"
|
||||||
|
if [[ -n "${!env_name:-}" ]]; then
|
||||||
|
printf '%s\n' "${!env_name}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local ovmf_dir
|
||||||
|
ovmf_dir="$(nix build nixpkgs#OVMF.fd --no-link --print-out-paths)"
|
||||||
|
printf '%s/%s\n' "$ovmf_dir" "$relative_path"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_http() {
|
||||||
|
local url="$1"
|
||||||
|
local timeout_secs="$2"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if curl -fsS "$url" >/dev/null 2>&1; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_log_marker() {
|
||||||
|
local label="$1"
|
||||||
|
local log_file="$2"
|
||||||
|
local needle="$3"
|
||||||
|
local timeout_secs="$4"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if [[ -f "$log_file" ]] && grep -Eq "$needle" "$log_file"; then
|
||||||
|
log "${label}: observed ${needle}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh_base() {
|
||||||
|
local port="$1"
|
||||||
|
shift
|
||||||
|
ssh \
|
||||||
|
-F /dev/null \
|
||||||
|
-i "$SSH_KEY" \
|
||||||
|
-o BatchMode=yes \
|
||||||
|
-o ConnectTimeout=5 \
|
||||||
|
-o ConnectionAttempts=1 \
|
||||||
|
-o StrictHostKeyChecking=no \
|
||||||
|
-o UserKnownHostsFile=/dev/null \
|
||||||
|
-o LogLevel=ERROR \
|
||||||
|
-p "$port" \
|
||||||
|
root@127.0.0.1 "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_ssh() {
|
||||||
|
local label="$1"
|
||||||
|
local port="$2"
|
||||||
|
local timeout_secs="$3"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if ssh_base "$port" true >/dev/null 2>&1; then
|
||||||
|
log "${label}: SSH is reachable on port ${port}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh_shell() {
|
||||||
|
local port="$1"
|
||||||
|
local script="$2"
|
||||||
|
local quoted
|
||||||
|
printf -v quoted '%q' "$script"
|
||||||
|
ssh_base "$port" "bash -lc $quoted"
|
||||||
|
}
|
||||||
|
|
||||||
|
current_system_path() {
|
||||||
|
local port="$1"
|
||||||
|
ssh_shell "$port" 'readlink -f /run/current-system'
|
||||||
|
}
|
||||||
|
|
||||||
|
remote_boot_id() {
|
||||||
|
local port="$1"
|
||||||
|
ssh_shell "$port" 'cat /proc/sys/kernel/random/boot_id'
|
||||||
|
}
|
||||||
|
|
||||||
|
remote_journal_has_marker() {
|
||||||
|
local port="$1"
|
||||||
|
local needle="$2"
|
||||||
|
shift 2
|
||||||
|
|
||||||
|
local remote_cmd="journalctl -b -o cat --no-pager"
|
||||||
|
local unit
|
||||||
|
for unit in "$@"; do
|
||||||
|
printf -v remote_cmd '%s -u %q' "$remote_cmd" "$unit"
|
||||||
|
done
|
||||||
|
printf -v remote_cmd '%s | grep -Fq %q' "$remote_cmd" "$needle"
|
||||||
|
|
||||||
|
ssh_shell "$port" "$remote_cmd"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_remote_journal_marker() {
|
||||||
|
local label="$1"
|
||||||
|
local port="$2"
|
||||||
|
local needle="$3"
|
||||||
|
local timeout_secs="$4"
|
||||||
|
shift 4
|
||||||
|
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if remote_journal_has_marker "$port" "$needle" "$@" >/dev/null 2>&1; then
|
||||||
|
log "${label}: observed ${needle} via remote journal"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_reboot_transition() {
|
||||||
|
local label="$1"
|
||||||
|
local port="$2"
|
||||||
|
local previous_boot_id="$3"
|
||||||
|
local timeout_secs="$4"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
local current_boot_id
|
||||||
|
if current_boot_id="$(remote_boot_id "$port" 2>/dev/null)"; then
|
||||||
|
if [[ -n "$current_boot_id" && "$current_boot_id" != "$previous_boot_id" ]]; then
|
||||||
|
log "${label}: reboot completed with boot_id=${current_boot_id}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
observed_status() {
|
||||||
|
local node_id="$1"
|
||||||
|
local payload
|
||||||
|
if ! payload="$(
|
||||||
|
"$DEPLOYER_CTL_BIN" \
|
||||||
|
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
|
||||||
|
--cluster-id "$CLUSTER_ID" \
|
||||||
|
--cluster-namespace ultracloud \
|
||||||
|
--deployer-namespace deployer \
|
||||||
|
node inspect \
|
||||||
|
--node-id "$node_id" \
|
||||||
|
--include-observed-system \
|
||||||
|
--format json 2>/dev/null
|
||||||
|
)"; then
|
||||||
|
printf 'missing\n'
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
jq -r '.observed_system.status // "missing"' <<<"$payload"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_observed_active() {
|
||||||
|
local node_id="$1"
|
||||||
|
local timeout_secs="$2"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if [[ "$(observed_status "$node_id")" == "active" ]]; then
|
||||||
|
log "${node_id}: observed-system reached active"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_port_free() {
|
||||||
|
local port="$1"
|
||||||
|
if ss -ltn "( sport = :$port )" | grep -Fq ":$port"; then
|
||||||
|
die "port $port is already in use"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
start_host_services() {
|
||||||
|
cat >"$TMP_DIR/chainfire.toml" <<EOF
|
||||||
|
[node]
|
||||||
|
id = 1
|
||||||
|
name = "baremetal-iso-chainfire"
|
||||||
|
role = "control_plane"
|
||||||
|
|
||||||
|
[storage]
|
||||||
|
data_dir = "$TMP_DIR/chainfire-data"
|
||||||
|
|
||||||
|
[network]
|
||||||
|
api_addr = "0.0.0.0:2379"
|
||||||
|
http_addr = "0.0.0.0:8081"
|
||||||
|
raft_addr = "0.0.0.0:2380"
|
||||||
|
gossip_addr = "0.0.0.0:2381"
|
||||||
|
|
||||||
|
[cluster]
|
||||||
|
id = 1
|
||||||
|
initial_members = []
|
||||||
|
bootstrap = true
|
||||||
|
|
||||||
|
[raft]
|
||||||
|
role = "voter"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >"$TMP_DIR/deployer.toml" <<EOF
|
||||||
|
bind_addr = "0.0.0.0:8088"
|
||||||
|
cluster_id = "${CLUSTER_ID}"
|
||||||
|
cluster_namespace = "ultracloud"
|
||||||
|
heartbeat_timeout_secs = 300
|
||||||
|
local_state_path = "$TMP_DIR/deployer-state"
|
||||||
|
bootstrap_flake_bundle_path = "$FLAKE_BUNDLE"
|
||||||
|
bootstrap_token = "${BOOTSTRAP_TOKEN}"
|
||||||
|
require_chainfire = true
|
||||||
|
allow_unknown_nodes = false
|
||||||
|
allow_unauthenticated = true
|
||||||
|
allow_test_mappings = false
|
||||||
|
tls_self_signed = false
|
||||||
|
|
||||||
|
[chainfire]
|
||||||
|
endpoints = ["${CHAINFIRE_ENDPOINT}"]
|
||||||
|
namespace = "deployer"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
log "Starting host-side Chainfire"
|
||||||
|
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
|
||||||
|
"$CHAINFIRE_BIN" --config "$TMP_DIR/chainfire.toml" >"$CHAINFIRE_LOG" 2>&1 &
|
||||||
|
CHAINFIRE_PID="$!"
|
||||||
|
|
||||||
|
wait_for_http "http://127.0.0.1:8081/health" 120 \
|
||||||
|
|| die "host Chainfire did not become healthy"
|
||||||
|
|
||||||
|
log "Starting host-side Deployer"
|
||||||
|
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
|
||||||
|
"$DEPLOYER_SERVER_BIN" --config "$TMP_DIR/deployer.toml" >"$DEPLOYER_LOG" 2>&1 &
|
||||||
|
DEPLOYER_PID="$!"
|
||||||
|
|
||||||
|
wait_for_http "http://127.0.0.1:8088/health" 120 \
|
||||||
|
|| die "host Deployer did not become healthy"
|
||||||
|
}
|
||||||
|
|
||||||
|
seed_binary_cache() {
|
||||||
|
local path
|
||||||
|
local nar_rel
|
||||||
|
local nar_path
|
||||||
|
local store_base
|
||||||
|
local store_hash
|
||||||
|
local nar_hash
|
||||||
|
local nar_size
|
||||||
|
local refs
|
||||||
|
local deriver
|
||||||
|
|
||||||
|
mkdir -p "$NIX_CACHE_DIR/nar"
|
||||||
|
cat >"$NIX_CACHE_DIR/nix-cache-info" <<'EOF'
|
||||||
|
StoreDir: /nix/store
|
||||||
|
WantMassQuery: 1
|
||||||
|
Priority: 30
|
||||||
|
EOF
|
||||||
|
|
||||||
|
log "Seeding host-local Nix binary cache"
|
||||||
|
if [[ -n "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION:-}" && -f "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration" ]]; then
|
||||||
|
nix-store --load-db <"${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration"
|
||||||
|
fi
|
||||||
|
while IFS= read -r path; do
|
||||||
|
[[ -n "$path" ]] || continue
|
||||||
|
|
||||||
|
store_base="$(basename "$path")"
|
||||||
|
store_hash="${store_base%%-*}"
|
||||||
|
nar_rel="nar/${store_base}.nar"
|
||||||
|
nar_path="$NIX_CACHE_DIR/$nar_rel"
|
||||||
|
|
||||||
|
if [[ ! -f "$nar_path" ]]; then
|
||||||
|
nix-store --dump "$path" >"$nar_path"
|
||||||
|
fi
|
||||||
|
|
||||||
|
nar_size="$(stat -c%s "$nar_path")"
|
||||||
|
nar_hash="$(nix hash file --type sha256 --base32 "$nar_path")"
|
||||||
|
refs="$(nix-store --query --references "$path" | xargs -r -n1 basename | tr '\n' ' ' | sed 's/ $//')"
|
||||||
|
deriver="$(nix-store --query --deriver "$path" 2>/dev/null || true)"
|
||||||
|
deriver="$(basename "$deriver" 2>/dev/null || true)"
|
||||||
|
|
||||||
|
{
|
||||||
|
echo "StorePath: $path"
|
||||||
|
echo "URL: $nar_rel"
|
||||||
|
echo "Compression: none"
|
||||||
|
echo "FileHash: sha256:$nar_hash"
|
||||||
|
echo "FileSize: $nar_size"
|
||||||
|
echo "NarHash: sha256:$nar_hash"
|
||||||
|
echo "NarSize: $nar_size"
|
||||||
|
echo "References: $refs"
|
||||||
|
if [[ -n "$deriver" && "$deriver" != "unknown-deriver" ]]; then
|
||||||
|
echo "Deriver: $deriver"
|
||||||
|
fi
|
||||||
|
} >"$NIX_CACHE_DIR/${store_hash}.narinfo"
|
||||||
|
done < <(
|
||||||
|
nix-store --query --requisites \
|
||||||
|
"$CONTROL_TARGET_SYSTEM" \
|
||||||
|
"$WORKER_TARGET_SYSTEM" \
|
||||||
|
"$CONTROL_DISKO_SCRIPT" \
|
||||||
|
"$WORKER_DISKO_SCRIPT" \
|
||||||
|
| sort -u
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
start_binary_cache() {
|
||||||
|
seed_binary_cache
|
||||||
|
|
||||||
|
log "Starting host-local Nix binary cache"
|
||||||
|
python3 -m http.server 8090 --bind 0.0.0.0 --directory "$NIX_CACHE_DIR" \
|
||||||
|
>"$NIX_CACHE_LOG" 2>&1 &
|
||||||
|
NIX_CACHE_PID="$!"
|
||||||
|
|
||||||
|
wait_for_http "${BINARY_CACHE_ENDPOINT}/nix-cache-info" 120 \
|
||||||
|
|| die "host-local Nix binary cache did not become reachable"
|
||||||
|
}
|
||||||
|
|
||||||
|
apply_cluster_state() {
|
||||||
|
cat >"$TMP_DIR/cluster-state.yaml" <<EOF
|
||||||
|
cluster:
|
||||||
|
cluster_id: ${CLUSTER_ID}
|
||||||
|
environment: qemu
|
||||||
|
|
||||||
|
node_classes:
|
||||||
|
- name: iso-control-plane
|
||||||
|
description: Canonical ISO-installed QEMU control-plane target
|
||||||
|
roles:
|
||||||
|
- control-plane
|
||||||
|
labels:
|
||||||
|
tier: control-plane
|
||||||
|
canonical_install_path: iso
|
||||||
|
install_plan:
|
||||||
|
nixos_configuration: baremetal-qemu-control-plane
|
||||||
|
disko_config_path: nix/nodes/baremetal-qemu/control-plane/disko.nix
|
||||||
|
disko_script_path: ${CONTROL_DISKO_SCRIPT}
|
||||||
|
target_disk: /dev/vda
|
||||||
|
- name: iso-worker
|
||||||
|
description: Canonical ISO-installed QEMU worker target
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
labels:
|
||||||
|
tier: worker
|
||||||
|
canonical_install_path: iso
|
||||||
|
install_plan:
|
||||||
|
nixos_configuration: baremetal-qemu-worker
|
||||||
|
disko_config_path: nix/nodes/baremetal-qemu/worker/disko.nix
|
||||||
|
disko_script_path: ${WORKER_DISKO_SCRIPT}
|
||||||
|
target_disk: /dev/vda
|
||||||
|
|
||||||
|
pools:
|
||||||
|
- name: control
|
||||||
|
description: ISO bare-metal control-plane pool
|
||||||
|
node_class: iso-control-plane
|
||||||
|
labels:
|
||||||
|
pool.ultracloud.io/name: control
|
||||||
|
- name: workers
|
||||||
|
description: ISO bare-metal worker pool
|
||||||
|
node_class: iso-worker
|
||||||
|
labels:
|
||||||
|
pool.ultracloud.io/name: workers
|
||||||
|
|
||||||
|
nodes:
|
||||||
|
- node_id: ${CONTROL_NODE_ID}
|
||||||
|
hostname: ${CONTROL_NODE_ID}
|
||||||
|
ip: ${CONTROL_DHCP_START}
|
||||||
|
roles:
|
||||||
|
- control-plane
|
||||||
|
labels:
|
||||||
|
canonical_install_path: iso
|
||||||
|
pool: control
|
||||||
|
node_class: iso-control-plane
|
||||||
|
install_plan:
|
||||||
|
nixos_configuration: baremetal-qemu-control-plane
|
||||||
|
disko_config_path: nix/nodes/baremetal-qemu/control-plane/disko.nix
|
||||||
|
disko_script_path: ${CONTROL_DISKO_SCRIPT}
|
||||||
|
target_disk: /dev/vda
|
||||||
|
desired_system:
|
||||||
|
nixos_configuration: baremetal-qemu-control-plane
|
||||||
|
target_system: ${CONTROL_TARGET_SYSTEM}
|
||||||
|
health_check_command:
|
||||||
|
- test
|
||||||
|
- -f
|
||||||
|
- /etc/ultracloud-role-control-plane
|
||||||
|
rollback_on_failure: true
|
||||||
|
state: pending
|
||||||
|
- node_id: ${WORKER_NODE_ID}
|
||||||
|
hostname: ${WORKER_NODE_ID}
|
||||||
|
ip: ${WORKER_DHCP_START}
|
||||||
|
roles:
|
||||||
|
- worker
|
||||||
|
labels:
|
||||||
|
canonical_install_path: iso
|
||||||
|
pool: workers
|
||||||
|
node_class: iso-worker
|
||||||
|
install_plan:
|
||||||
|
nixos_configuration: baremetal-qemu-worker
|
||||||
|
disko_config_path: nix/nodes/baremetal-qemu/worker/disko.nix
|
||||||
|
disko_script_path: ${WORKER_DISKO_SCRIPT}
|
||||||
|
target_disk: /dev/vda
|
||||||
|
desired_system:
|
||||||
|
nixos_configuration: baremetal-qemu-worker
|
||||||
|
target_system: ${WORKER_TARGET_SYSTEM}
|
||||||
|
health_check_command:
|
||||||
|
- test
|
||||||
|
- -f
|
||||||
|
- /etc/ultracloud-role-worker
|
||||||
|
rollback_on_failure: true
|
||||||
|
state: pending
|
||||||
|
|
||||||
|
enrollment_rules:
|
||||||
|
- name: iso-control-plane
|
||||||
|
priority: 200
|
||||||
|
match_hostname_prefix: iso-control-plane
|
||||||
|
pool: control
|
||||||
|
node_class: iso-control-plane
|
||||||
|
labels:
|
||||||
|
canonical_install_path: iso
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ${SSH_PUBKEY}
|
||||||
|
- name: iso-worker
|
||||||
|
priority: 190
|
||||||
|
match_hostname_prefix: iso-worker
|
||||||
|
pool: workers
|
||||||
|
node_class: iso-worker
|
||||||
|
labels:
|
||||||
|
canonical_install_path: iso
|
||||||
|
ssh_authorized_keys:
|
||||||
|
- ${SSH_PUBKEY}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
"$DEPLOYER_CTL_BIN" \
|
||||||
|
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
|
||||||
|
--cluster-id "$CLUSTER_ID" \
|
||||||
|
--cluster-namespace ultracloud \
|
||||||
|
--deployer-namespace deployer \
|
||||||
|
apply --config "$TMP_DIR/cluster-state.yaml" --prune
|
||||||
|
}
|
||||||
|
|
||||||
|
launch_iso_vm() {
|
||||||
|
local label="$1"
|
||||||
|
local node_id="$2"
|
||||||
|
local ssh_port="$3"
|
||||||
|
local dhcp_start="$4"
|
||||||
|
local mac="$5"
|
||||||
|
local disk_size="$6"
|
||||||
|
local disk_path="$7"
|
||||||
|
local log_path="$8"
|
||||||
|
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
|
||||||
|
|
||||||
|
"$QEMU_IMG_BIN" create -f qcow2 "$disk_path" "$disk_size" >/dev/null
|
||||||
|
rm -f "$ovmf_vars_path"
|
||||||
|
cp "$OVMF_VARS_TEMPLATE" "$ovmf_vars_path"
|
||||||
|
chmod u+w "$ovmf_vars_path"
|
||||||
|
|
||||||
|
nohup "$QEMU_BIN" \
|
||||||
|
-name "$label" \
|
||||||
|
-machine accel=tcg \
|
||||||
|
-cpu max \
|
||||||
|
-smp 2 \
|
||||||
|
-m 2048 \
|
||||||
|
-nographic \
|
||||||
|
-no-reboot \
|
||||||
|
-boot order=dc,once=d,menu=off \
|
||||||
|
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
|
||||||
|
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
|
||||||
|
-drive file="$disk_path",if=virtio,format=qcow2 \
|
||||||
|
-cdrom "$ISO_IMAGE" \
|
||||||
|
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
|
||||||
|
-device virtio-net-pci,netdev=user0,mac="${mac}" \
|
||||||
|
-smbios type=1,product=UltraCloudQEMUBaremetal,serial="${node_id}" \
|
||||||
|
>"$log_path" 2>&1 &
|
||||||
|
echo "$!" >"${log_path}.pid"
|
||||||
|
}
|
||||||
|
|
||||||
|
launch_installed_vm() {
|
||||||
|
local label="$1"
|
||||||
|
local ssh_port="$2"
|
||||||
|
local dhcp_start="$3"
|
||||||
|
local mac="$4"
|
||||||
|
local disk_path="$5"
|
||||||
|
local log_path="$6"
|
||||||
|
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
|
||||||
|
|
||||||
|
[[ -f "$ovmf_vars_path" ]] || die "missing OVMF vars file for relaunch: $ovmf_vars_path"
|
||||||
|
|
||||||
|
nohup "$QEMU_BIN" \
|
||||||
|
-name "$label" \
|
||||||
|
-machine accel=tcg \
|
||||||
|
-cpu max \
|
||||||
|
-smp 2 \
|
||||||
|
-m 2048 \
|
||||||
|
-nographic \
|
||||||
|
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
|
||||||
|
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
|
||||||
|
-drive file="$disk_path",if=virtio,format=qcow2 \
|
||||||
|
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
|
||||||
|
-device virtio-net-pci,netdev=user0,mac="${mac}" \
|
||||||
|
>>"$log_path" 2>&1 &
|
||||||
|
echo "$!" >"${log_path}.pid"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_pid_exit() {
|
||||||
|
local label="$1"
|
||||||
|
local pid_file="$2"
|
||||||
|
local timeout_secs="$3"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
local pid
|
||||||
|
|
||||||
|
[[ -f "$pid_file" ]] || die "${label} is missing pid file $pid_file"
|
||||||
|
pid="$(cat "$pid_file")"
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if ! kill -0 "$pid" >/dev/null 2>&1; then
|
||||||
|
log "${label}: QEMU exited after installer-triggered reboot"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
verify_node() {
|
||||||
|
local node_id="$1"
|
||||||
|
local ssh_port="$2"
|
||||||
|
local disk_path="$3"
|
||||||
|
local log_path="$4"
|
||||||
|
local expected_role="$5"
|
||||||
|
local expected_system="$6"
|
||||||
|
local dhcp_start="$7"
|
||||||
|
local mac="$8"
|
||||||
|
|
||||||
|
wait_for_log_marker "$node_id" "$TMP_DIR/deployer.log" "Node registered successfully.*node_id=${node_id}" 900 \
|
||||||
|
|| die "${node_id} never completed /api/v1/phone-home registration"
|
||||||
|
wait_for_ssh "$node_id" "$ssh_port" 900 \
|
||||||
|
|| die "${node_id} never exposed SSH during the installer boot"
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.boot.${node_id}" 120 \
|
||||||
|
ultracloud-bootstrap.service ultracloud-install.service \
|
||||||
|
|| die "${node_id} never recorded the pre-install boot marker"
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.phone-home.complete.${node_id}" 120 \
|
||||||
|
ultracloud-bootstrap.service ultracloud-install.service \
|
||||||
|
|| die "${node_id} never recorded the phone-home completion marker"
|
||||||
|
marker "pre-install.${node_id}"
|
||||||
|
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.bundle-downloaded.${node_id}" 1200 \
|
||||||
|
ultracloud-install.service \
|
||||||
|
|| die "${node_id} never downloaded the flake bundle"
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.disko.complete.${node_id}" 2400 \
|
||||||
|
ultracloud-install.service \
|
||||||
|
|| die "${node_id} never completed disko"
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.nixos-install.complete.${node_id}" 3600 \
|
||||||
|
ultracloud-install.service \
|
||||||
|
|| die "${node_id} never finished nixos-install"
|
||||||
|
marker "install.${node_id}"
|
||||||
|
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER reboot.${node_id}" 3600 \
|
||||||
|
ultracloud-install.service \
|
||||||
|
|| die "${node_id} never emitted reboot marker"
|
||||||
|
marker "reboot.${node_id}"
|
||||||
|
|
||||||
|
wait_for_pid_exit "$node_id" "${log_path}.pid" 300 \
|
||||||
|
|| die "${node_id} installer VM did not exit after the reboot marker"
|
||||||
|
launch_installed_vm \
|
||||||
|
"ultracloud-baremetal-${node_id}-installed" \
|
||||||
|
"$ssh_port" \
|
||||||
|
"$dhcp_start" \
|
||||||
|
"$mac" \
|
||||||
|
"$disk_path" \
|
||||||
|
"$log_path"
|
||||||
|
wait_for_ssh "$node_id" "$ssh_port" 1800 \
|
||||||
|
|| die "${node_id} did not come back over SSH after reboot"
|
||||||
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER post-install.boot.${node_id}.${expected_role}" 1800 \
|
||||||
|
ultracloud-baremetal-postinstall-marker.service \
|
||||||
|
|| die "${node_id} never emitted post-install marker"
|
||||||
|
marker "post-install.${node_id}"
|
||||||
|
|
||||||
|
ssh_shell "$ssh_port" 'test -f /etc/ultracloud/node-config.json'
|
||||||
|
ssh_shell "$ssh_port" 'test -d /var/lib/photon-src/.bundle-inputs/nixpkgs'
|
||||||
|
ssh_shell "$ssh_port" 'systemctl is-active nix-agent.service >/dev/null'
|
||||||
|
ssh_shell "$ssh_port" "grep -Fx '${expected_role}' /etc/ultracloud-role"
|
||||||
|
if [[ "$expected_role" == "control-plane" ]]; then
|
||||||
|
ssh_shell "$ssh_port" 'systemctl is-active chainfire.service >/dev/null'
|
||||||
|
fi
|
||||||
|
|
||||||
|
wait_for_observed_active "$node_id" 1200 \
|
||||||
|
|| die "${node_id} never reached observed-system active"
|
||||||
|
[[ "$(current_system_path "$ssh_port")" == "$expected_system" ]] \
|
||||||
|
|| die "${node_id} current system does not match expected target"
|
||||||
|
marker "desired-system-active.${node_id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
local status="$?"
|
||||||
|
set +e
|
||||||
|
|
||||||
|
for pid_file in "$CONTROL_LOG.pid" "$WORKER_LOG.pid"; do
|
||||||
|
if [[ -f "$pid_file" ]]; then
|
||||||
|
pid="$(cat "$pid_file")"
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
wait "$pid" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -n "${DEPLOYER_PID:-}" ]]; then
|
||||||
|
kill "$DEPLOYER_PID" 2>/dev/null || true
|
||||||
|
wait "$DEPLOYER_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
if [[ -n "${CHAINFIRE_PID:-}" ]]; then
|
||||||
|
kill "$CHAINFIRE_PID" 2>/dev/null || true
|
||||||
|
wait "$CHAINFIRE_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
if [[ -n "${NIX_CACHE_PID:-}" ]]; then
|
||||||
|
kill "$NIX_CACHE_PID" 2>/dev/null || true
|
||||||
|
wait "$NIX_CACHE_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( status != 0 )); then
|
||||||
|
log "control-plane serial log tail:"
|
||||||
|
tail -n 120 "$CONTROL_LOG" 2>/dev/null || true
|
||||||
|
log "worker serial log tail:"
|
||||||
|
tail -n 120 "$WORKER_LOG" 2>/dev/null || true
|
||||||
|
log "deployer log tail:"
|
||||||
|
tail -n 120 "$DEPLOYER_LOG" 2>/dev/null || true
|
||||||
|
log "chainfire log tail:"
|
||||||
|
tail -n 120 "$CHAINFIRE_LOG" 2>/dev/null || true
|
||||||
|
log "binary cache log tail:"
|
||||||
|
tail -n 120 "$NIX_CACHE_LOG" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "${KEEP_STATE_DIR:-0}" != "1" ]]; then
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
fi
|
||||||
|
exit "$status"
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
require_cmd curl
|
||||||
|
require_cmd jq
|
||||||
|
require_cmd nix
|
||||||
|
require_cmd python3
|
||||||
|
require_cmd qemu-img
|
||||||
|
require_cmd qemu-system-x86_64
|
||||||
|
require_cmd ssh
|
||||||
|
require_cmd ssh-keygen
|
||||||
|
require_cmd ss
|
||||||
|
|
||||||
|
ISO_IMAGE="$(resolve_iso_image "$(resolve_store_path ULTRACLOUD_BAREMETAL_ISO_IMAGE 'nixosConfigurations.ultracloud-iso.config.system.build.isoImage')")"
|
||||||
|
FLAKE_BUNDLE="$(resolve_store_path ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE 'packages.x86_64-linux.ultracloudFlakeBundle')"
|
||||||
|
CONTROL_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_TARGET 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel')"
|
||||||
|
WORKER_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_TARGET 'nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel')"
|
||||||
|
CONTROL_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount')"
|
||||||
|
WORKER_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount')"
|
||||||
|
CHAINFIRE_BIN="$(resolve_binary ULTRACLOUD_CHAINFIRE_SERVER_BIN chainfire 'packages.x86_64-linux.chainfire-server')"
|
||||||
|
DEPLOYER_SERVER_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_SERVER_BIN deployer-server 'packages.x86_64-linux.deployer-server')"
|
||||||
|
DEPLOYER_CTL_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_CTL_BIN deployer-ctl 'packages.x86_64-linux.deployer-ctl')"
|
||||||
|
OVMF_CODE_FD="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_CODE 'FV/OVMF_CODE.fd')"
|
||||||
|
OVMF_VARS_TEMPLATE="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_VARS 'FV/OVMF_VARS.fd')"
|
||||||
|
QEMU_BIN="${ULTRACLOUD_QEMU_BIN:-$(command -v qemu-system-x86_64)}"
|
||||||
|
QEMU_IMG_BIN="${ULTRACLOUD_QEMU_IMG_BIN:-$(command -v qemu-img)}"
|
||||||
|
|
||||||
|
if [[ -n "${ULTRACLOUD_BAREMETAL_STATE_DIR:-}" ]]; then
|
||||||
|
TMP_DIR="$ULTRACLOUD_BAREMETAL_STATE_DIR"
|
||||||
|
KEEP_STATE_DIR=1
|
||||||
|
mkdir -p "$TMP_DIR"
|
||||||
|
find "$TMP_DIR" -mindepth 1 -maxdepth 1 \
|
||||||
|
! -name nix-cache \
|
||||||
|
-exec rm -rf {} +
|
||||||
|
else
|
||||||
|
TMP_DIR="$(mktemp -d -t ultracloud-baremetal-iso.XXXXXX)"
|
||||||
|
KEEP_STATE_DIR=0
|
||||||
|
fi
|
||||||
|
NIX_CACHE_DIR="$TMP_DIR/nix-cache"
|
||||||
|
CONTROL_LOG="$TMP_DIR/control-plane.serial.log"
|
||||||
|
WORKER_LOG="$TMP_DIR/worker.serial.log"
|
||||||
|
DEPLOYER_LOG="$TMP_DIR/deployer.log"
|
||||||
|
CHAINFIRE_LOG="$TMP_DIR/chainfire.log"
|
||||||
|
NIX_CACHE_LOG="$TMP_DIR/nix-cache.log"
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
SSH_KEY="$TMP_DIR/id_ed25519"
|
||||||
|
ssh-keygen -q -t ed25519 -N "" -f "$SSH_KEY" >/dev/null
|
||||||
|
SSH_PUBKEY="$(tr -d '\n' <"$SSH_KEY.pub")"
|
||||||
|
|
||||||
|
assert_port_free 2379
|
||||||
|
assert_port_free 8081
|
||||||
|
assert_port_free 8088
|
||||||
|
assert_port_free 8090
|
||||||
|
assert_port_free "$CONTROL_SSH_PORT"
|
||||||
|
assert_port_free "$WORKER_SSH_PORT"
|
||||||
|
|
||||||
|
start_binary_cache
|
||||||
|
start_host_services
|
||||||
|
apply_cluster_state
|
||||||
|
|
||||||
|
launch_iso_vm \
|
||||||
|
"ultracloud-baremetal-control-plane" \
|
||||||
|
"$CONTROL_NODE_ID" \
|
||||||
|
"$CONTROL_SSH_PORT" \
|
||||||
|
"$CONTROL_DHCP_START" \
|
||||||
|
"52:54:00:11:22:31" \
|
||||||
|
"$CONTROL_DISK_GIB" \
|
||||||
|
"$TMP_DIR/control-plane.qcow2" \
|
||||||
|
"$CONTROL_LOG"
|
||||||
|
|
||||||
|
verify_node \
|
||||||
|
"$CONTROL_NODE_ID" \
|
||||||
|
"$CONTROL_SSH_PORT" \
|
||||||
|
"$TMP_DIR/control-plane.qcow2" \
|
||||||
|
"$CONTROL_LOG" \
|
||||||
|
"control-plane" \
|
||||||
|
"$CONTROL_TARGET_SYSTEM" \
|
||||||
|
"$CONTROL_DHCP_START" \
|
||||||
|
"52:54:00:11:22:31"
|
||||||
|
|
||||||
|
launch_iso_vm \
|
||||||
|
"ultracloud-baremetal-worker" \
|
||||||
|
"$WORKER_NODE_ID" \
|
||||||
|
"$WORKER_SSH_PORT" \
|
||||||
|
"$WORKER_DHCP_START" \
|
||||||
|
"52:54:00:11:22:32" \
|
||||||
|
"$WORKER_DISK_GIB" \
|
||||||
|
"$TMP_DIR/worker.qcow2" \
|
||||||
|
"$WORKER_LOG"
|
||||||
|
|
||||||
|
verify_node \
|
||||||
|
"$WORKER_NODE_ID" \
|
||||||
|
"$WORKER_SSH_PORT" \
|
||||||
|
"$TMP_DIR/worker.qcow2" \
|
||||||
|
"$WORKER_LOG" \
|
||||||
|
"worker" \
|
||||||
|
"$WORKER_TARGET_SYSTEM" \
|
||||||
|
"$WORKER_DHCP_START" \
|
||||||
|
"52:54:00:11:22:32"
|
||||||
|
|
||||||
|
log "Canonical ISO bare-metal QEMU verification succeeded"
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
4
plans/baselines/logs/nix-build-deployer-vm-smoke.meta
Normal file
4
plans/baselines/logs/nix-build-deployer-vm-smoke.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix build .#checks.x86_64-linux.deployer-vm-smoke
|
||||||
|
start=2026-04-04T16:44:34+09:00
|
||||||
|
end=2026-04-04T16:50:40+09:00
|
||||||
|
status=1
|
||||||
4
plans/baselines/logs/nix-eval-netboot-all-in-one.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-all-in-one.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix eval --raw .#nixosConfigurations.netboot-all-in-one.config.system.build.toplevel.drvPath
|
||||||
|
start=2026-04-04T16:43:54+09:00
|
||||||
|
end=2026-04-04T16:43:56+09:00
|
||||||
|
status=1
|
||||||
4
plans/baselines/logs/nix-eval-netboot-control-plane.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-control-plane.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix eval --raw .#nixosConfigurations.netboot-control-plane.config.system.build.toplevel.drvPath
|
||||||
|
start=2026-04-04T16:43:54+09:00
|
||||||
|
end=2026-04-04T16:44:01+09:00
|
||||||
|
status=0
|
||||||
4
plans/baselines/logs/nix-eval-netboot-worker.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-worker.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix eval --raw .#nixosConfigurations.netboot-worker.config.system.build.toplevel.drvPath
|
||||||
|
start=2026-04-04T16:43:54+09:00
|
||||||
|
end=2026-04-04T16:43:56+09:00
|
||||||
|
status=1
|
||||||
4
plans/baselines/logs/nix-eval-node01.meta
Normal file
4
plans/baselines/logs/nix-eval-node01.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix eval --raw .#nixosConfigurations.node01.config.system.build.toplevel.drvPath
|
||||||
|
start=2026-04-04T16:43:45+09:00
|
||||||
|
end=2026-04-04T16:43:49+09:00
|
||||||
|
status=0
|
||||||
4
plans/baselines/logs/nix-eval-ultracloud-iso.meta
Normal file
4
plans/baselines/logs/nix-eval-ultracloud-iso.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix eval --raw .#nixosConfigurations.ultracloud-iso.config.system.build.toplevel.drvPath
|
||||||
|
start=2026-04-04T16:43:34+09:00
|
||||||
|
end=2026-04-04T16:43:41+09:00
|
||||||
|
status=0
|
||||||
4
plans/baselines/logs/nix-run-fresh-demo-vm-webapp.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-demo-vm-webapp.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||||
|
start=2026-04-04T16:48:18+09:00
|
||||||
|
end=2026-04-04T16:48:23+09:00
|
||||||
|
status=1
|
||||||
4
plans/baselines/logs/nix-run-fresh-matrix.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-matrix.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||||
|
start=2026-04-04T16:48:26+09:00
|
||||||
|
end=2026-04-04T16:48:29+09:00
|
||||||
|
status=1
|
||||||
4
plans/baselines/logs/nix-run-fresh-smoke.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-smoke.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
command=nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||||
|
start=2026-04-04T16:46:41+09:00
|
||||||
|
end=2026-04-04T16:48:14+09:00
|
||||||
|
status=1
|
||||||
52
plans/baselines/main-baseline-2026-04-04.md
Normal file
52
plans/baselines/main-baseline-2026-04-04.md
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
# UltraCloud Baseline 2026-04-04
|
||||||
|
|
||||||
|
Branch: `task/f5c70db0-baseline-profiles` from `origin/main`
|
||||||
|
|
||||||
|
This file records the required smoke/build/eval commands requested by task `f5c70db0-0106-4200-bf99-0c5105116367` before profile-definition changes.
|
||||||
|
|
||||||
|
## Branch Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git fetch origin && git switch -c task/f5c70db0-baseline-profiles origin/main
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: success. The working branch now tracks `origin/main`.
|
||||||
|
|
||||||
|
## Environment Notes
|
||||||
|
|
||||||
|
- Host kernel: `Linux cn-ubuntu-xgpu 6.17.0-14-generic`
|
||||||
|
- Nix: `2.33.3`
|
||||||
|
- `/dev/kvm`: absent in this environment
|
||||||
|
- Nix builder features observed during `deployer-vm-smoke`: `{benchmark, big-parallel, nixos-test, uid-range}`
|
||||||
|
- Raw command logs are stored under `plans/baselines/logs/`
|
||||||
|
|
||||||
|
## Baseline Command Results
|
||||||
|
|
||||||
|
| Command | Start | End | Status | Result summary |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| `nix run ./nix/test-cluster#cluster -- fresh-smoke` | `2026-04-04T16:46:41+09:00` | `2026-04-04T16:48:14+09:00` | `1` | built the cluster runner closure, then failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||||
|
| `nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp` | `2026-04-04T16:48:18+09:00` | `2026-04-04T16:48:23+09:00` | `1` | failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||||
|
| `nix run ./nix/test-cluster#cluster -- fresh-matrix` | `2026-04-04T16:48:26+09:00` | `2026-04-04T16:48:29+09:00` | `1` | failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||||
|
| `nix build .#checks.x86_64-linux.deployer-vm-smoke` | `2026-04-04T16:44:34+09:00` | `2026-04-04T16:50:40+09:00` | `1` | built most of the test closure, then failed because the current builder does not advertise the required `kvm` system feature |
|
||||||
|
|
||||||
|
## Baseline `nix eval` Results
|
||||||
|
|
||||||
|
| Output | Start | End | Status | Result |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| `ultracloud-iso` | `2026-04-04T16:43:34+09:00` | `2026-04-04T16:43:41+09:00` | `0` | `/nix/store/j60isp8ai10vkgdncvi3wcjdgxqwjzpy-nixos-system-nixos-26.05.20251208.addf7cf.drv` |
|
||||||
|
| `node01` | `2026-04-04T16:43:45+09:00` | `2026-04-04T16:43:49+09:00` | `0` | `/nix/store/94g1xyv25s09hyyi924sp5bxb0y8kir9-nixos-system-node01-26.05.20251208.addf7cf.drv` |
|
||||||
|
| `netboot-control-plane` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:44:01+09:00` | `0` | `/nix/store/afknxzr1mhrlrzrkp8mj9q1fwwahdld3-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv` |
|
||||||
|
| `netboot-worker` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:43:56+09:00` | `1` | `undefined variable 'plasmavmc-server'` at `nix/images/netboot-worker.nix:28:5` |
|
||||||
|
| `netboot-all-in-one` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:43:56+09:00` | `1` | `undefined variable 'chainfire-server'` at `nix/images/netboot-all-in-one.nix:39:5` |
|
||||||
|
|
||||||
|
## Post-Baseline Repair
|
||||||
|
|
||||||
|
After recording the baseline, `flake.nix` was adjusted so the netboot image configurations receive the UltraCloud overlay during evaluation. That keeps the baseline intact while making the named canonical-profile outputs evaluable.
|
||||||
|
|
||||||
|
Post-fix spot check:
|
||||||
|
|
||||||
|
- `ultracloud-iso`: `/nix/store/j60isp8ai10vkgdncvi3wcjdgxqwjzpy-nixos-system-nixos-26.05.20251208.addf7cf.drv`
|
||||||
|
- `node01`: `/nix/store/di87n45m5v30n8gccbs8pic2j8wbwgvr-nixos-system-node01-26.05.20251208.addf7cf.drv`
|
||||||
|
- `netboot-control-plane`: `/nix/store/afknxzr1mhrlrzrkp8mj9q1fwwahdld3-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||||
|
- `netboot-worker`: `/nix/store/6x51ss2ql1n4nhi8ad0avhvzk4n6arcr-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||||
|
- `netboot-all-in-one`: `/nix/store/2l57rda3pnd1hivjicfmp53zpimxn00n-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||||
Loading…
Add table
Reference in a new issue