From 11cd8be2f749cffe3d97287e6f587d3855574505 Mon Sep 17 00:00:00 2001 From: centra
Date: Sun, 5 Apr 2026 03:47:21 +0900
Subject: [PATCH] Establish canonical validation lanes
---
.github/workflows/kvm-publishable.yml | 37 +
.github/workflows/nix.yml | 28 +-
README.md | 91 +-
docs/README.md | 14 +-
docs/component-matrix.md | 74 +-
docs/testing.md | 111 ++-
flake.nix | 487 ++++++++++-
nix/ci/flake.nix | 3 +
nix/images/netboot-all-in-one.nix | 214 +----
nix/iso/ultracloud-iso.nix | 230 +++--
nix/modules/default.nix | 1 +
nix/nodes/baremetal-qemu/common.nix | 87 ++
.../control-plane/configuration.nix | 45 +
.../baremetal-qemu/control-plane/disko.nix | 5 +
.../baremetal-qemu/worker/configuration.nix | 35 +
nix/nodes/baremetal-qemu/worker/disko.nix | 5 +
nix/nodes/vm-cluster/common-disko.nix | 17 +-
nix/single-node/base.nix | 360 ++++++++
nix/single-node/qemu-vm.nix | 24 +
nix/test-cluster/README.md | 14 +-
nix/test-cluster/flake.nix | 4 +
nix/test-cluster/run-cluster.sh | 7 +
nix/test-cluster/run-publishable-kvm-suite.sh | 87 ++
nix/test-cluster/verify-baremetal-iso.sh | 824 ++++++++++++++++++
.../logs/nix-build-deployer-vm-smoke.meta | 4 +
.../logs/nix-eval-netboot-all-in-one.meta | 4 +
.../logs/nix-eval-netboot-control-plane.meta | 4 +
.../logs/nix-eval-netboot-worker.meta | 4 +
plans/baselines/logs/nix-eval-node01.meta | 4 +
.../logs/nix-eval-ultracloud-iso.meta | 4 +
.../logs/nix-run-fresh-demo-vm-webapp.meta | 4 +
.../baselines/logs/nix-run-fresh-matrix.meta | 4 +
plans/baselines/logs/nix-run-fresh-smoke.meta | 4 +
plans/baselines/main-baseline-2026-04-04.md | 52 ++
34 files changed, 2578 insertions(+), 314 deletions(-)
create mode 100644 .github/workflows/kvm-publishable.yml
create mode 100644 nix/nodes/baremetal-qemu/common.nix
create mode 100644 nix/nodes/baremetal-qemu/control-plane/configuration.nix
create mode 100644 nix/nodes/baremetal-qemu/control-plane/disko.nix
create mode 100644 nix/nodes/baremetal-qemu/worker/configuration.nix
create mode 100644 nix/nodes/baremetal-qemu/worker/disko.nix
create mode 100644 nix/single-node/base.nix
create mode 100644 nix/single-node/qemu-vm.nix
create mode 100755 nix/test-cluster/run-publishable-kvm-suite.sh
create mode 100644 nix/test-cluster/verify-baremetal-iso.sh
create mode 100644 plans/baselines/logs/nix-build-deployer-vm-smoke.meta
create mode 100644 plans/baselines/logs/nix-eval-netboot-all-in-one.meta
create mode 100644 plans/baselines/logs/nix-eval-netboot-control-plane.meta
create mode 100644 plans/baselines/logs/nix-eval-netboot-worker.meta
create mode 100644 plans/baselines/logs/nix-eval-node01.meta
create mode 100644 plans/baselines/logs/nix-eval-ultracloud-iso.meta
create mode 100644 plans/baselines/logs/nix-run-fresh-demo-vm-webapp.meta
create mode 100644 plans/baselines/logs/nix-run-fresh-matrix.meta
create mode 100644 plans/baselines/logs/nix-run-fresh-smoke.meta
create mode 100644 plans/baselines/main-baseline-2026-04-04.md
diff --git a/.github/workflows/kvm-publishable.yml b/.github/workflows/kvm-publishable.yml
new file mode 100644
index 0000000..b85f1b2
--- /dev/null
+++ b/.github/workflows/kvm-publishable.yml
@@ -0,0 +1,37 @@
+name: KVM Publishable Validation
+
+on:
+ workflow_dispatch:
+
+jobs:
+ publishable-kvm-suite:
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: DeterminateSystems/nix-installer-action@v11
+
+ - uses: DeterminateSystems/magic-nix-cache-action@v8
+
+ - name: Probe KVM Environment
+ run: |
+ set -euo pipefail
+ echo "hostname=$(hostname)"
+ uname -a
+ id
+ test -e /dev/kvm
+ ls -l /dev/kvm
+ if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then
+ echo "kvm_intel_nested=$(cat /sys/module/kvm_intel/parameters/nested)"
+ fi
+ if [[ -f /sys/module/kvm_amd/parameters/nested ]]; then
+ echo "kvm_amd_nested=$(cat /sys/module/kvm_amd/parameters/nested)"
+ fi
+
+ - name: Run Publishable KVM Suite
+ run: |
+ set -euo pipefail
+ chmod +x ./nix/test-cluster/run-publishable-kvm-suite.sh
+ ./nix/test-cluster/run-publishable-kvm-suite.sh "$RUNNER_TEMP/publishable-kvm-suite"
diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml
index 3b96a4d..7ef2f93 100644
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -96,6 +96,23 @@ jobs:
run: |
nix run ./nix/ci#gate-ci -- --shared-crate ${{ matrix.crate }} --tier 0 --no-logs
+ portable-regressions:
+ needs: filter
+ if: ${{ needs.filter.outputs.any_changed == 'true' || needs.filter.outputs.global_changed == 'true' || needs.filter.outputs.shared_crates_changed == 'true' }}
+ runs-on: ubuntu-latest
+ name: portable regressions
+ steps:
+ - uses: actions/checkout@v4
+ - uses: DeterminateSystems/nix-installer-action@v11
+ - uses: DeterminateSystems/magic-nix-cache-action@v8
+
+ - name: Run portable canonical profile regressions
+ run: |
+ nix build \
+ .#checks.x86_64-linux.canonical-profile-eval-guards \
+ .#checks.x86_64-linux.portable-control-plane-regressions \
+ --accept-flake-config
+
# Build server packages (tier 1+)
build:
needs: [filter, gate]
@@ -116,7 +133,7 @@ jobs:
# Summary job for PR status checks
ci-status:
- needs: [filter, gate, shared-crates-gate]
+ needs: [filter, gate, shared-crates-gate, portable-regressions]
if: always()
runs-on: ubuntu-latest
steps:
@@ -128,11 +145,18 @@ jobs:
if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then
exit 1
fi
- if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then
+ if [[ "${{ needs.portable-regressions.result }}" == "failure" ]]; then
+ exit 1
+ fi
+ if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" || "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
if [[ "${{ needs.gate.result }}" == "skipped" ]]; then
echo "Gate was skipped despite changes. This is unexpected."
exit 1
fi
+ if [[ "${{ needs.portable-regressions.result }}" == "skipped" ]]; then
+ echo "Portable regressions were skipped despite changes. This is unexpected."
+ exit 1
+ fi
fi
if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then
if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then
diff --git a/README.md b/README.md
index 1a2f63a..daef69e 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,8 @@
UltraCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository.
-The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
+The fastest public entrypoint is the one-command single-node quickstart. The canonical multi-node integration proof remains the six-node VM cluster under [`nix/test-cluster`](nix/test-cluster/README.md), which builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior.
+The canonical bare-metal bootstrap proof is the ISO-on-QEMU path under [`nix/test-cluster`](nix/test-cluster/README.md), which drives phone-home, Disko install, reboot, and desired-system convergence for one control-plane node and one worker-equivalent node.
## Components
@@ -15,38 +16,102 @@ The canonical local proof path is the six-node VM cluster under [`nix/test-clust
- `plasmavmc`: VM control plane and worker agents
- `coronafs`: shared filesystem for mutable VM volumes
- `lightningstor`: object storage and VM image backing
-- `k8shost`: Kubernetes-style hosting control plane
+- `k8shost`: Kubernetes-style hosting control plane for tenant pods and services
- `apigateway`: external API and proxy surface
- `nightlight`: metrics ingestion and query service
- `creditservice`: minimal reference quota/credit service
-- `deployer`: bootstrap and phone-home deployment service
+- `deployer`: bootstrap and phone-home deployment service that owns install plans and desired-system intent
- `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services
## Quick Start
+Single-node quickstart:
+
+```bash
+nix run .#single-node-quickstart
+```
+
+This app builds the minimal VM stack, boots a QEMU VM, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, checks their health endpoints, and verifies the in-guest VM runtime prerequisites. For an interactive session, keep the VM running:
+
+```bash
+ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart
+```
+
+The legacy name `.#all-in-one-quickstart` is kept as an alias.
+
+Portable local proof on hosts without `/dev/kvm`:
+
+```bash
+nix build .#checks.x86_64-linux.canonical-profile-eval-guards
+nix build .#checks.x86_64-linux.portable-control-plane-regressions
+```
+
+This TCG-safe lane keeps canonical profile drift, the core `chainfire` / `deployer` control-plane path, the `deployer -> nix-agent` boundary, and the `fleet-scheduler -> node-agent` boundary under regression coverage without requiring nested virtualization.
+
+Publishable nested-KVM suite:
+
```bash
nix develop
nix run ./nix/test-cluster#cluster -- fresh-smoke
+nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
+nix run ./nix/test-cluster#cluster -- fresh-matrix
+./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite
```
+Project-done release proof now requires both halves of the public validation surface to be green:
+
+- `baremetal-iso` and `baremetal-iso-e2e` for the canonical `deployer -> installer -> nix-agent` bare-metal bootstrap path
+- the KVM publishable suite (`fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`) for the nested-KVM multi-node VM-hosting path
+
+Canonical bare-metal bootstrap proof:
+
+```bash
+nix run ./nix/test-cluster#cluster -- baremetal-iso
+nix build .#checks.x86_64-linux.baremetal-iso-e2e
+```
+
+## Canonical Profiles
+
+UltraCloud now fixes the public support surface to three canonical profiles:
+
+| Profile | Primary Nix outputs | Required components | Optional components |
+| --- | --- | --- | --- |
+| `single-node dev` | `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer` |
+| `3-node HA control plane` | `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` |
+| `bare-metal bootstrap` | `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap |
+
+`netboot-base` is an internal helper image, not a public profile. `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` remain experimental helper images until they implement the same phone-home and install semantics as the ISO path. Older launch flows under `baremetal/vm-cluster` are `legacy/manual`, not canonical.
+
+## Responsibility Boundaries
+
+- `k8shost` owns Kubernetes-style pod and service APIs for tenant workloads, then translates them into `prismnet`, `flashdns`, and `fiberlb` objects. It does not place host-native cluster daemons.
+- `fleet-scheduler` owns placement and failover of host-native service instances from declarative cluster state. It consumes `node-agent` heartbeats and writes instance placement, but it does not expose tenant-facing Kubernetes semantics.
+- `deployer` owns machine enrollment, `/api/v1/phone-home`, install plans, cluster metadata, and desired-system references. It decides what a node should become, but it does not execute the host-local switch.
+- `nix-agent` owns host-local NixOS convergence only. It reads desired-system state from `deployer` or `chainfire`, activates the target closure, and rolls back on failed health checks.
+- `node-agent` owns host-local runtime execution only. It reports heartbeats and applies scheduled service-instance state, but it does not install the base OS or rewrite desired-system targets.
+
## Main Entrypoints
-- workspace flake: [flake.nix](/home/centra/cloud/flake.nix)
-- VM validation harness: [nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
-- shared volume notes: [coronafs/README.md](/home/centra/cloud/coronafs/README.md)
-- minimal quota-service rationale: [creditservice/README.md](/home/centra/cloud/creditservice/README.md)
-- archived manual VM launch scripts: [baremetal/vm-cluster/README.md](/home/centra/cloud/baremetal/vm-cluster/README.md)
+- workspace flake: [flake.nix](flake.nix)
+- single-node quickstart smoke: [`nix run .#single-node-quickstart`](docs/testing.md)
+- portable local proof: [`nix build .#checks.x86_64-linux.portable-control-plane-regressions`](docs/testing.md)
+- canonical bare-metal bootstrap smoke: [`nix run ./nix/test-cluster#cluster -- baremetal-iso`](docs/testing.md)
+- canonical profile guards: [`nix build .#checks.x86_64-linux.canonical-profile-eval-guards`](docs/testing.md), [`nix build .#checks.x86_64-linux.canonical-profile-build-guards`](docs/testing.md)
+- VM validation harness: [nix/test-cluster/README.md](nix/test-cluster/README.md)
+- shared volume notes: [coronafs/README.md](coronafs/README.md)
+- minimal quota-service rationale: [creditservice/README.md](creditservice/README.md)
+- legacy/manual VM launch scripts: [baremetal/vm-cluster/README.md](baremetal/vm-cluster/README.md)
## Repository Guide
-- [docs/README.md](/home/centra/cloud/docs/README.md): documentation entrypoint
-- [docs/testing.md](/home/centra/cloud/docs/testing.md): validation path summary
-- [docs/component-matrix.md](/home/centra/cloud/docs/component-matrix.md): supported multi-component compositions
-- [docs/storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
+- [docs/README.md](docs/README.md): documentation entrypoint
+- [docs/testing.md](docs/testing.md): validation path summary
+- [docs/component-matrix.md](docs/component-matrix.md): canonical profiles and optional bundles
+- [docs/storage-benchmarks.md](docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers
- `plans/`: design notes and exploration documents
## Scope
UltraCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
-Host-level NixOS rollout validation is also expected to stay reproducible: the `deployer-vm-smoke` VM test now proves that `nix-agent` can activate a prebuilt target system closure directly, without recompiling the stack inside the guest.
+Host-level NixOS rollout validation is also expected to stay reproducible: `baremetal-iso-e2e` is now the full install-path proof, `canonical-profile-eval-guards` and `canonical-profile-build-guards` fail fast when supported outputs drift, and `portable-control-plane-regressions` is the non-KVM developer lane that keeps the main control-plane and rollout boundaries green on TCG-only hosts before the publishable nested-KVM suite is rerun.
diff --git a/docs/README.md b/docs/README.md
index cca3840..ded7b31 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -4,16 +4,16 @@ This directory is the public documentation entrypoint for UltraCloud.
## Read First
-- [../README.md](/home/centra/cloud/README.md)
-- [testing.md](/home/centra/cloud/docs/testing.md)
-- [component-matrix.md](/home/centra/cloud/docs/component-matrix.md)
-- [storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md)
+- [../README.md](../README.md)
+- [testing.md](testing.md)
+- [component-matrix.md](component-matrix.md)
+- [storage-benchmarks.md](storage-benchmarks.md)
## Key References
-- VM validation harness: [../nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md)
-- CoronaFS storage role: [../coronafs/README.md](/home/centra/cloud/coronafs/README.md)
-- CreditService scope note: [../creditservice/README.md](/home/centra/cloud/creditservice/README.md)
+- VM validation harness: [../nix/test-cluster/README.md](../nix/test-cluster/README.md)
+- CoronaFS storage role: [../coronafs/README.md](../coronafs/README.md)
+- CreditService scope note: [../creditservice/README.md](../creditservice/README.md)
## Design Notes
diff --git a/docs/component-matrix.md b/docs/component-matrix.md
index ef48b89..aa8be99 100644
--- a/docs/component-matrix.md
+++ b/docs/component-matrix.md
@@ -1,54 +1,56 @@
# Component Matrix
-UltraCloud is intended to validate meaningful service combinations, not only a single all-on deployment.
-This page summarizes the compositions that are exercised by the VM-cluster harness today.
+UltraCloud now fixes the public support surface to three canonical profiles. This page defines the required and optional component bundles for each profile and keeps everything else explicitly outside the core contract.
-## Validated Control Plane
+## Canonical Profiles
-- `chainfire + flaredb + iam`
+### `single-node dev`
-## Validated Network Provider Layer
+- Required components: `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet`
+- Optional components: `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer`
+- Primary Nix outputs: `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, and companion install image `nixosConfigurations.netboot-all-in-one`
+- Optional component toggles: `ultracloud.quickstart.enableLightningStor`, `enableCoronafs`, `enableFlashDNS`, `enableFiberLB`, `enableApiGateway`, `enableNightlight`, `enableCreditService`, `enableK8sHost`
+- Primary use: one-command local bring-up, API development, and one-box VM experimentation without the HA control-plane overhead
-- `prismnet`
-- `prismnet + flashdns`
-- `prismnet + fiberlb`
-- `prismnet + flashdns + fiberlb`
+### `3-node HA control plane`
-These combinations justify the existence of the network services as composable providers rather than hidden internal subsystems.
+- Required components: `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node
+- Optional components: `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice`
+- Primary Nix outputs: `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane`
+- Primary use: stable replicated control plane that can later accept worker, storage, and edge bundles without redefining the bootstrap path
-## Validated VM Hosting Layer
+### `bare-metal bootstrap`
-- `plasmavmc + prismnet`
-- `plasmavmc + lightningstor`
-- `plasmavmc + coronafs`
-- `plasmavmc + coronafs + lightningstor`
-- `plasmavmc + prismnet + coronafs + lightningstor`
+- Required components: `deployer`, `first-boot-automation`, `install-target`, `nix-agent`
+- Optional components: `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after the first successful rollout
+- Primary Nix outputs: `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e`
+- Primary use: boot the installer ISO, phone home to `deployer`, fetch the flake bundle, run Disko, reboot, and converge QEMU-emulated or real machines into either the single-node or HA profile
-This split keeps mutable VM volumes on CoronaFS and immutable VM images on LightningStor object storage.
+## Optional Composition Bundles
-## Validated Kubernetes-Style Hosting Layer
+The optional bundles below remain important, but they are layered on top of the canonical profiles rather than treated as separate top-level products:
-- `k8shost + prismnet`
-- `k8shost + flashdns`
-- `k8shost + fiberlb`
-- `k8shost + prismnet + flashdns + fiberlb`
+- control-plane core: `chainfire + flaredb + iam`
+- network provider bundle: `prismnet + flashdns + fiberlb`
+- VM hosting bundle: `plasmavmc + prismnet + coronafs + lightningstor`
+- Kubernetes-style hosting bundle: `k8shost + prismnet + flashdns + fiberlb`
+- edge and tenant bundle: `apigateway + iam + nightlight + creditservice`
+- native rollout bundle: `deployer + chainfire + nix-agent + fleet-scheduler + node-agent`
-## Validated Edge And Tenant Services
+`fresh-matrix` is the publishable composition proof because it rebuilds the host-side VM images before validating these bundles on the VM cluster.
-- `apigateway + iam + prismnet`
-- `nightlight + apigateway`
-- `nightlight`
-- `creditservice + iam + apigateway`
-- `creditservice + iam`
-- `deployer + iam + chainfire`
+## Responsibility Boundaries
-## Validation Direction
+- `k8shost`: tenant workload API surface. It manages pod, deployment, and service semantics, then delegates network publication to `prismnet`, `flashdns`, and `fiberlb`.
+- `fleet-scheduler`: bare-metal service placement surface. It schedules host-native service instances from declarative cluster state and `node-agent` heartbeats, without exposing Kubernetes APIs.
+- `deployer`: enrollment and rollout authority. It serves `/api/v1/phone-home`, stores install plans and desired-system references, and seeds cluster metadata.
+- `nix-agent`: host OS reconciler. It turns `deployer` desired-system references into `switch-to-configuration` actions plus rollback and health-check handling.
+- `node-agent`: host runtime reconciler. It applies scheduled service-instance state, keeps runtime heartbeats fresh, and reports host-local execution status back to the scheduler.
-The VM cluster harness now exposes:
+The intended layering is `deployer -> nix-agent` for machine image or NixOS generation changes, and `deployer -> fleet-scheduler -> node-agent` for host-native service placement changes. `k8shost` stays separate because it is the tenant workload control plane, not the native service scheduler.
-```bash
-nix run ./nix/test-cluster#cluster -- matrix
-nix run ./nix/test-cluster#cluster -- fresh-matrix
-```
+## Non-Canonical Paths
-`fresh-matrix` is the publishable path because it rebuilds the host-side VM images before validating the composed service scenarios, including PrismNet-backed PlasmaVMC guests.
+- `baremetal/vm-cluster` remains `legacy/manual`
+- `netboot-control-plane`, `netboot-worker`, `netboot-all-in-one`, `netboot-base`, and `pxe-server` are internal or experimental helpers, not supported profiles by themselves
+- ad hoc shell-driven cluster bring-up is for debugging only and should not be presented as the canonical public path
diff --git a/docs/testing.md b/docs/testing.md
index fb73ad4..975d4cf 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -1,37 +1,113 @@
# Testing
-UltraCloud treats VM-first validation as the canonical local proof path.
+UltraCloud treats VM-first validation as the canonical local proof path and keeps the public support contract limited to three profiles.
-## Canonical Validation
+## Canonical Profiles
+
+| Profile | Primary outputs | Required components | Optional components |
+| --- | --- | --- | --- |
+| `single-node dev` | `nix run .#single-node-quickstart`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost`, `deployer` |
+| `3-node HA control plane` | `nixosConfigurations.node01`, `node02`, `node03`, `netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` |
+| `bare-metal bootstrap` | `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `netboot-control-plane`, `netboot-worker`, and `netboot-all-in-one` as experimental helper images, plus `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap |
+
+## Quickstart Smoke
```bash
-nix run ./nix/test-cluster#cluster -- fresh-smoke
+nix flake show . --all-systems | rg -n "single|all-in-one|quickstart"
+nix eval --no-eval-cache .#nixosConfigurations.single-node-quickstart.config.system.build.toplevel.drvPath --raw
+nix run .#single-node-quickstart
```
-This flow:
+`single-node-quickstart` is the supported one-box entrypoint. It boots the minimal VM stack under QEMU, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, and verifies their health from inside the guest. The launcher uses the generated NixOS VM runner, so it can fall back to TCG when `/dev/kvm` is absent.
-- builds all six VM images on the host
-- boots the cluster in dependency order
-- validates control-plane, worker, gateway, storage, and fault-injection behavior
-- proves that `deployer` seeds scheduler-managed native services directly from declarative Nix cluster state
+For debugging, keep the VM alive after the smoke passes:
+
+```bash
+ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart
+```
+
+## Canonical Bare-Metal Proof
+
+```bash
+nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel.drvPath --raw
+nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel.drvPath --raw
+nix run ./nix/test-cluster#cluster -- baremetal-iso
+nix build .#checks.x86_64-linux.baremetal-iso-e2e
+```
+
+`baremetal-iso` is the canonical install path for QEMU-as-bare-metal validation. It boots `nixosConfigurations.ultracloud-iso`, waits for `/api/v1/phone-home`, downloads the flake bundle from `deployer`, runs Disko, reboots, confirms the first post-install boot markers, and waits for `nix-agent` to report the desired system as `active` for both `baremetal-qemu-control-plane` and `baremetal-qemu-worker`. `baremetal-iso-e2e` runs the same flow under `flake check`.
+
+## Regression Guards
+
+```bash
+nix build .#checks.x86_64-linux.canonical-profile-eval-guards
+nix build .#checks.x86_64-linux.canonical-profile-build-guards
+```
+
+These two checks are the fast fail-first drift gates for the supported surface:
+
+- `canonical-profile-eval-guards`: forces evaluation of every canonical profile output, including `netboot-worker` and `netboot-all-in-one`, so broken attrs fail before any long-running harness work starts.
+- `canonical-profile-build-guards`: realizes the canonical VM, ISO, control-plane, and helper-image outputs so build-time drift is caught even when a cluster harness is not running.
+
+## Portable Local Proof
+
+```bash
+nix build .#checks.x86_64-linux.canonical-profile-eval-guards
+nix build .#checks.x86_64-linux.portable-control-plane-regressions
+```
+
+Use this lane on Linux hosts that do not expose `/dev/kvm`:
+
+- `portable-control-plane-regressions`: TCG-safe aggregate check that keeps the canonical profile eval guard, `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` green together.
+- It intentionally does not boot the six-node nested-KVM VM suite, so it is a developer regression path, not the publishable multi-node proof.
+- CI runs `canonical-profile-eval-guards` and `portable-control-plane-regressions` on every relevant change from `.github/workflows/nix.yml`.
## Publishable Checks
```bash
+nix run .#single-node-quickstart
+nix run ./nix/test-cluster#cluster -- baremetal-iso
nix run ./nix/test-cluster#cluster -- fresh-smoke
nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
nix run ./nix/test-cluster#cluster -- fresh-matrix
-nix run ./nix/test-cluster#cluster -- fresh-bench-storage
+./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite
+nix build .#checks.x86_64-linux.baremetal-iso-e2e
nix build .#checks.x86_64-linux.deployer-vm-smoke
```
Use these commands as the release-facing local proof set:
-- `fresh-smoke`: whole-cluster readiness, core behavior, and fault injection
-- `fresh-demo-vm-webapp`: focused VM demo showing a web app inside the guest with FlareDB-backed state and LightningStor object snapshots surviving restart and migration
-- `fresh-matrix`: composed service scenarios such as `prismnet + flashdns + fiberlb` and PrismNet-backed VM hosting bundles with `plasmavmc + coronafs + lightningstor`
-- `fresh-bench-storage`: CoronaFS local-vs-shared-volume throughput, cross-worker volume visibility, and LightningStor large/small-object throughput capture
-- `deployer-vm-smoke`: prebuilt NixOS system closure handoff into `nix-agent`, proving host rollout can activate a host-built target without guest-side compilation
+- `single-node-quickstart`: productized one-command quickstart gate for the minimal VM platform profile
+- `baremetal-iso`: canonical bare-metal bootstrap gate covering pre-install boot, phone-home, flake bundle fetch, Disko install, reboot, post-install boot, and desired-system activation on one control-plane node plus one worker-equivalent node
+- `fresh-smoke`: base VM-cluster gate for the canonical multi-node topology, including readiness, core behavior, and fault injection
+- `fresh-demo-vm-webapp`: optional VM-hosting bundle proof for `plasmavmc + prismnet` with state persisted through `lightningstor`
+- `fresh-matrix`: optional composition proof for provider bundles such as `prismnet + flashdns + fiberlb` and `plasmavmc + coronafs + lightningstor`
+- `run-publishable-kvm-suite.sh`: reproducible wrapper that captures the KVM environment and runs the full publishable nested-KVM trio in a single command
+- `baremetal-iso-e2e`: flake-check wrapper around the same canonical ISO harness
+- `deployer-vm-smoke`: lightweight regression proving that `nix-agent` can activate a host-built target closure without guest-side compilation
+
+## Responsibility Coverage
+
+- `baremetal-iso` and `baremetal-iso-e2e` are the canonical proof for `deployer -> installer -> nix-agent`. They cover phone-home, install-plan materialization, Disko, reboot, and desired-system activation.
+- `deployer-vm-smoke` is the smallest regression for the same `deployer -> nix-agent` boundary. It proves that a node can receive a prebuilt target closure and activate it without guest-side compilation.
+- `portable-control-plane-regressions` keeps the main non-KVM-safe boundaries under continuous coverage by composing `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` behind the canonical profile eval guard.
+- `fresh-smoke` and `fresh-matrix` are the canonical proof for `deployer -> fleet-scheduler -> node-agent`. They cover native service placement, heartbeats, failover, and runtime reconciliation.
+- `fresh-smoke` also covers `k8shost` separately from `fleet-scheduler`: `k8shost` exposes tenant pod and service semantics, while `fleet-scheduler` handles bare-metal host services.
+
+The three `fresh-*` VM-cluster commands are the publishable nested-KVM suite. They require a Linux host with `/dev/kvm` and nested virtualization, and the harness stops at preflight by design when that device is absent. `single-node-quickstart`, `baremetal-iso`, `baremetal-iso-e2e`, `deployer-vm-smoke`, and `portable-control-plane-regressions` can run on TCG-only hosts, but they are slower without host KVM.
+
+Release-facing completion now requires both of these to be green on the same branch:
+
+- the canonical bare-metal proof: `nix run ./nix/test-cluster#cluster -- baremetal-iso` plus `nix build .#checks.x86_64-linux.baremetal-iso-e2e`
+- the publishable nested-KVM suite: `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix`, preferably through `./nix/test-cluster/run-publishable-kvm-suite.sh`
+
+## Extended Measurements
+
+```bash
+nix run ./nix/test-cluster#cluster -- fresh-bench-storage
+```
+
+`fresh-bench-storage` remains useful for storage regression tracking, but it is a benchmark path, not part of the minimal canonical publish gate.
## Operational Commands
@@ -53,8 +129,11 @@ nix run ./nix/test-cluster#cluster -- clean
- package unit tests are useful but not sufficient
- host-built VM clusters are the main integration signal
+- bootstrap and rollout paths must stay evaluable independently of the larger VM-hosting feature set
- distributed storage and virtualization paths must be checked under failure, not only at steady state
-## Legacy Note
+## Legacy And Experimental Paths
-Older manual launch scripts under `baremetal/vm-cluster` are archived only for historical reference. They are not the release-validation path.
+- `baremetal/vm-cluster` manual launch scripts are `legacy/manual`, not canonical validation
+- direct `nix develop ./nix/test-cluster -c ./nix/test-cluster/run-cluster.sh ...` usage is a debugging path, not the publishable entrypoint
+- `netboot-control-plane`, `netboot-worker`, `netboot-all-in-one`, `netboot-base`, `pxe-server`, and other helper images are internal or experimental building blocks, not supported profiles by themselves
diff --git a/flake.nix b/flake.nix
index 9166f57..0292c63 100644
--- a/flake.nix
+++ b/flake.nix
@@ -963,6 +963,185 @@
self.packages.${system}.vmClusterDeployerState
];
};
+
+ single-node-quickstart-vm =
+ self.nixosConfigurations.single-node-quickstart.config.system.build.vm;
+
+ single-node-quickstart = pkgs.writeShellApplication {
+ name = "single-node-quickstart";
+ runtimeInputs = with pkgs; [
+ coreutils
+ findutils
+ netcat
+ openssh
+ procps
+ sshpass
+ ];
+ text = ''
+ set -euo pipefail
+
+ STATE_DIR="''${ULTRACLOUD_QUICKSTART_STATE_DIR:-$HOME/.ultracloud-single-node-quickstart}"
+ RUN_DIR="$STATE_DIR/run"
+ DISK_IMAGE="$STATE_DIR/quickstart.qcow2"
+ PID_FILE="$STATE_DIR/qemu.pid"
+ SERIAL_LOG="$STATE_DIR/serial.log"
+ SSH_PORT="''${ULTRACLOUD_QUICKSTART_SSH_PORT:-22220}"
+ KEEP_VM="''${ULTRACLOUD_QUICKSTART_KEEP_VM:-0}"
+ REUSE_DISK="''${ULTRACLOUD_QUICKSTART_REUSE_DISK:-0}"
+ VM_PATH="${self.packages.${system}.single-node-quickstart-vm}"
+ RUN_VM="$(find "$VM_PATH/bin" -maxdepth 1 -name 'run-*-vm' | head -n1)"
+
+ log() {
+ printf '[single-node-quickstart] %s\n' "$*"
+ }
+
+ dump_serial() {
+ if [ -f "$SERIAL_LOG" ]; then
+ log "serial log tail:"
+ tail -n 120 "$SERIAL_LOG" >&2 || true
+ fi
+ }
+
+ cleanup() {
+ if [ -f "$PID_FILE" ]; then
+ pid="$(cat "$PID_FILE")"
+ if kill -0 "$pid" >/dev/null 2>&1; then
+ kill "$pid" >/dev/null 2>&1 || true
+ for _ in $(seq 1 30); do
+ if ! kill -0 "$pid" >/dev/null 2>&1; then
+ break
+ fi
+ sleep 1
+ done
+ fi
+ rm -f "$PID_FILE"
+ fi
+ }
+
+ on_exit() {
+ status="$?"
+ if [ "$status" -ne 0 ]; then
+ dump_serial
+ fi
+ if [ "$KEEP_VM" != "1" ]; then
+ cleanup
+ fi
+ exit "$status"
+ }
+
+ wait_for_ssh() {
+ local deadline=$((SECONDS + 240))
+ while true; do
+ if sshpass -p ultracloud ssh \
+ -F /dev/null \
+ -o StrictHostKeyChecking=no \
+ -o UserKnownHostsFile=/dev/null \
+ -o LogLevel=ERROR \
+ -o ConnectTimeout=5 \
+ -o ConnectionAttempts=1 \
+ -p "$SSH_PORT" \
+ root@127.0.0.1 true >/dev/null 2>&1; then
+ return 0
+ fi
+ if [ "$SECONDS" -ge "$deadline" ]; then
+ log "timed out waiting for SSH on port $SSH_PORT"
+ return 1
+ fi
+ sleep 1
+ done
+ }
+
+ wait_for_unit_active() {
+ local unit="$1"
+ local deadline=$((SECONDS + 240))
+ while true; do
+ if ssh_cmd systemctl is-active "$unit" >/dev/null 2>&1; then
+ return 0
+ fi
+ if [ "$SECONDS" -ge "$deadline" ]; then
+ log "timed out waiting for $unit"
+ ssh_cmd systemctl status "$unit" --no-pager || true
+ return 1
+ fi
+ sleep 1
+ done
+ }
+
+ ssh_cmd() {
+ sshpass -p ultracloud ssh \
+ -F /dev/null \
+ -o StrictHostKeyChecking=no \
+ -o UserKnownHostsFile=/dev/null \
+ -o LogLevel=ERROR \
+ -o ConnectTimeout=5 \
+ -o ConnectionAttempts=1 \
+ -p "$SSH_PORT" \
+ root@127.0.0.1 -- "$@"
+ }
+
+ ssh_shell() {
+ local script="$1"
+ local quoted
+ printf -v quoted '%q' "$script"
+ sshpass -p ultracloud ssh \
+ -F /dev/null \
+ -o StrictHostKeyChecking=no \
+ -o UserKnownHostsFile=/dev/null \
+ -o LogLevel=ERROR \
+ -o ConnectTimeout=5 \
+ -o ConnectionAttempts=1 \
+ -p "$SSH_PORT" \
+ root@127.0.0.1 "bash -lc $quoted"
+ }
+
+ trap on_exit EXIT
+
+ [ -n "$RUN_VM" ] || {
+ log "failed to locate run-*-vm under $VM_PATH/bin"
+ exit 1
+ }
+
+ mkdir -p "$STATE_DIR"
+ rm -rf "$RUN_DIR"
+ mkdir -p "$RUN_DIR"
+ rm -f "$SERIAL_LOG"
+ if [ "$REUSE_DISK" != "1" ]; then
+ rm -f "$DISK_IMAGE"
+ fi
+
+ cleanup
+
+ log "launching single-node quickstart VM"
+ nohup env \
+ USE_TMPDIR=1 \
+ TMPDIR="$RUN_DIR" \
+ NIX_DISK_IMAGE="$DISK_IMAGE" \
+ QEMU_NET_OPTS="hostfwd=tcp:127.0.0.1:$SSH_PORT-:22" \
+ "$RUN_VM" >"$SERIAL_LOG" 2>&1 &
+ echo "$!" > "$PID_FILE"
+
+ log "waiting for guest SSH"
+ wait_for_ssh
+
+ log "waiting for in-guest readiness gate"
+ wait_for_unit_active ultracloud-single-node-quickstart-ready.service
+
+ log "verifying required services"
+ ssh_cmd systemctl is-active chainfire flaredb iam prismnet plasmavmc >/dev/null
+
+ log "verifying service health endpoints and VM runtime prerequisites"
+ ssh_shell 'curl -fsS http://127.0.0.1:8081/health >/dev/null && curl -fsS http://127.0.0.1:8082/health >/dev/null && curl -fsS http://127.0.0.1:8083/health >/dev/null && curl -fsS http://127.0.0.1:8087/health >/dev/null && curl -fsS http://127.0.0.1:8084/health >/dev/null && test -x /run/current-system/sw/bin/qemu-system-x86_64 && test -x /run/current-system/sw/bin/qemu-img && test -c /dev/net/tun'
+
+ log "single-node quickstart smoke passed"
+
+ if [ "$KEEP_VM" = "1" ]; then
+ trap - EXIT
+ log "VM left running"
+ log "ssh: sshpass -p ultracloud ssh -p $SSH_PORT root@127.0.0.1"
+ exit 0
+ fi
+ '';
+ };
};
# ======================================================================
@@ -1044,9 +1223,75 @@
fleet-scheduler = flake-utils.lib.mkApp {
drv = self.packages.${system}.fleet-scheduler;
};
+
+ single-node-quickstart = flake-utils.lib.mkApp {
+ drv = self.packages.${system}.single-node-quickstart;
+ };
+
+ all-in-one-quickstart = flake-utils.lib.mkApp {
+ drv = self.packages.${system}.single-node-quickstart;
+ };
};
- checks = {
+ checks =
+ let
+ stripKvmRequiredSystemFeature = drv:
+ drv.overrideTestDerivation (old: {
+ requiredSystemFeatures =
+ builtins.filter (feature: feature != "kvm") (old.requiredSystemFeatures or [ ]);
+ });
+
+ canonicalProfileEvalData = {
+ single-node-quickstart = {
+ hostName = self.nixosConfigurations.single-node-quickstart.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.single-node-quickstart.config.system.stateVersion;
+ };
+ node01 = {
+ hostName = self.nixosConfigurations.node01.config.networking.hostName;
+ stateVersion = self.nixosConfigurations.node01.config.system.stateVersion;
+ };
+ node02 = {
+ hostName = self.nixosConfigurations.node02.config.networking.hostName;
+ stateVersion = self.nixosConfigurations.node02.config.system.stateVersion;
+ };
+ node03 = {
+ hostName = self.nixosConfigurations.node03.config.networking.hostName;
+ stateVersion = self.nixosConfigurations.node03.config.system.stateVersion;
+ };
+ netboot-control-plane = {
+ hostName = self.nixosConfigurations.netboot-control-plane.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.netboot-control-plane.config.system.stateVersion;
+ };
+ netboot-worker = {
+ hostName = self.nixosConfigurations.netboot-worker.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.netboot-worker.config.system.stateVersion;
+ };
+ netboot-all-in-one = {
+ hostName = self.nixosConfigurations.netboot-all-in-one.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.netboot-all-in-one.config.system.stateVersion;
+ };
+ ultracloud-iso = {
+ hostName = self.nixosConfigurations.ultracloud-iso.config.networking.hostName;
+ imageFileName = self.nixosConfigurations.ultracloud-iso.config.image.fileName;
+ };
+ baremetal-qemu-control-plane = {
+ hostName =
+ self.nixosConfigurations.baremetal-qemu-control-plane.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.baremetal-qemu-control-plane.config.system.stateVersion;
+ };
+ baremetal-qemu-worker = {
+ hostName = self.nixosConfigurations.baremetal-qemu-worker.config.networking.hostName;
+ stateVersion =
+ self.nixosConfigurations.baremetal-qemu-worker.config.system.stateVersion;
+ };
+ };
+ in
+ {
workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit"
{
nativeBuildInputs = [ pkgs.python3 ];
@@ -1169,6 +1414,76 @@
touch "$out"
'';
+ canonical-profile-eval-guards = pkgs.writeText "canonical-profile-eval-guards.json"
+ (builtins.toJSON canonicalProfileEvalData);
+
+ canonical-profile-build-guards = pkgs.linkFarm "canonical-profile-build-guards" [
+ {
+ name = "single-node-quickstart-vm";
+ path = self.packages.${system}.single-node-quickstart-vm;
+ }
+ {
+ name = "node01-toplevel";
+ path = self.nixosConfigurations.node01.config.system.build.toplevel;
+ }
+ {
+ name = "node02-toplevel";
+ path = self.nixosConfigurations.node02.config.system.build.toplevel;
+ }
+ {
+ name = "node03-toplevel";
+ path = self.nixosConfigurations.node03.config.system.build.toplevel;
+ }
+ {
+ name = "netboot-control-plane-toplevel";
+ path = self.nixosConfigurations.netboot-control-plane.config.system.build.toplevel;
+ }
+ {
+ name = "netboot-worker-toplevel";
+ path = self.nixosConfigurations.netboot-worker.config.system.build.toplevel;
+ }
+ {
+ name = "netboot-all-in-one-toplevel";
+ path = self.nixosConfigurations.netboot-all-in-one.config.system.build.toplevel;
+ }
+ {
+ name = "ultracloud-iso-image";
+ path = self.nixosConfigurations.ultracloud-iso.config.system.build.isoImage;
+ }
+ {
+ name = "baremetal-qemu-control-plane-toplevel";
+ path = self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel;
+ }
+ {
+ name = "baremetal-qemu-worker-toplevel";
+ path = self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel;
+ }
+ ];
+
+ portable-control-plane-regressions =
+ pkgs.linkFarm "portable-control-plane-regressions" [
+ {
+ name = "canonical-profile-eval-guards";
+ path = self.checks.${system}.canonical-profile-eval-guards;
+ }
+ {
+ name = "deployer-bootstrap-e2e";
+ path = self.checks.${system}.deployer-bootstrap-e2e;
+ }
+ {
+ name = "host-lifecycle-e2e";
+ path = self.checks.${system}.host-lifecycle-e2e;
+ }
+ {
+ name = "deployer-vm-smoke";
+ path = self.checks.${system}.deployer-vm-smoke;
+ }
+ {
+ name = "fleet-scheduler-e2e";
+ path = self.checks.${system}.fleet-scheduler-e2e;
+ }
+ ];
+
first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest (
import ./nix/tests/first-boot-topology-vm-smoke.nix {
inherit pkgs;
@@ -1177,15 +1492,15 @@
}
);
- deployer-vm-smoke = pkgs.testers.runNixOSTest (
+ deployer-vm-smoke = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest (
import ./nix/tests/deployer-vm-smoke.nix {
inherit pkgs;
ultracloudPackages = self.packages.${system};
smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel;
}
- );
+ ));
- deployer-vm-rollback = pkgs.testers.runNixOSTest (
+ deployer-vm-rollback = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest (
import ./nix/tests/deployer-vm-smoke.nix {
inherit pkgs;
ultracloudPackages = self.packages.${system};
@@ -1198,7 +1513,83 @@
expectCurrentSystemMatchesTarget = false;
expectMarkerPresent = false;
}
- );
+ ));
+
+ baremetal-iso-e2e = pkgs.runCommand "baremetal-iso-e2e"
+ {
+ nativeBuildInputs = with pkgs; [
+ bash
+ coreutils
+ curl
+ findutils
+ gawk
+ gnugrep
+ gnused
+ iproute2
+ jq
+ nix
+ openssh
+ procps
+ python3
+ qemu
+ ];
+ preferLocalBuild = true;
+ allowSubstitutes = false;
+ ULTRACLOUD_BAREMETAL_ISO_IMAGE =
+ "${self.nixosConfigurations.ultracloud-iso.config.system.build.isoImage}";
+ ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE =
+ "${self.packages.${system}.ultracloudFlakeBundle}";
+ ULTRACLOUD_BAREMETAL_CONTROL_TARGET =
+ "${self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel}";
+ ULTRACLOUD_BAREMETAL_WORKER_TARGET =
+ "${self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel}";
+ ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT =
+ "${self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount}";
+ ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT =
+ "${self.nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount}";
+ ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION = "${pkgs.closureInfo {
+ rootPaths = [
+ self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel
+ self.nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel
+ self.nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount
+ self.nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount
+ ];
+ }}";
+ ULTRACLOUD_CHAINFIRE_SERVER_BIN =
+ "${self.packages.${system}.chainfire-server}/bin/chainfire";
+ ULTRACLOUD_DEPLOYER_SERVER_BIN =
+ "${self.packages.${system}.deployer-server}/bin/deployer-server";
+ ULTRACLOUD_DEPLOYER_CTL_BIN =
+ "${self.packages.${system}.deployer-ctl}/bin/deployer-ctl";
+ ULTRACLOUD_OVMF_CODE = "${pkgs.OVMF.fd}/FV/OVMF_CODE.fd";
+ ULTRACLOUD_OVMF_VARS = "${pkgs.OVMF.fd}/FV/OVMF_VARS.fd";
+ ULTRACLOUD_QEMU_BIN = "${pkgs.qemu}/bin/qemu-system-x86_64";
+ ULTRACLOUD_QEMU_IMG_BIN = "${pkgs.qemu}/bin/qemu-img";
+ ULTRACLOUD_REPO_ROOT = "${self}";
+ NIX_CONFIG = "experimental-features = nix-command flakes";
+ } ''
+ export HOME="$TMPDIR/home"
+ mkdir -p "$HOME"
+ export NIX_CONFIG="$NIX_CONFIG"
+ export PATH="${pkgs.lib.makeBinPath [
+ pkgs.bash
+ pkgs.coreutils
+ pkgs.curl
+ pkgs.findutils
+ pkgs.gawk
+ pkgs.gnugrep
+ pkgs.gnused
+ pkgs.iproute2
+ pkgs.jq
+ pkgs.nix
+ pkgs.openssh
+ pkgs.procps
+ pkgs.python3
+ pkgs.qemu
+ ]}"
+ bash ${./nix/test-cluster/verify-baremetal-iso.sh}
+ touch "$out"
+ '';
fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest (
import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix {
@@ -1363,6 +1754,9 @@
nixosConfigurations =
let
vmClusterLib = import ./nix/nodes/vm-cluster/lib.nix { lib = nixpkgs.lib; };
+ overlayModule = {
+ nixpkgs.overlays = [ self.overlays.default ];
+ };
mkVmClusterSystem = nodeName:
nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
@@ -1382,25 +1776,74 @@
# Control Plane netboot image (all 8 services)
netboot-control-plane = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
- modules = [ ./nix/images/netboot-control-plane.nix ];
+ modules = [
+ ./nix/images/netboot-control-plane.nix
+ overlayModule
+ ];
};
# Worker netboot image (compute-focused services)
netboot-worker = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
- modules = [ ./nix/images/netboot-worker.nix ];
+ modules = [
+ ./nix/images/netboot-worker.nix
+ overlayModule
+ ];
};
# All-in-One netboot image (single-node deployment)
netboot-all-in-one = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
- modules = [ ./nix/images/netboot-all-in-one.nix ];
+ modules = [
+ ./nix/images/netboot-all-in-one.nix
+ overlayModule
+ ];
+ };
+
+ # QEMU-first single-node quickstart for one-command local bring-up.
+ single-node-quickstart = nixpkgs.lib.nixosSystem {
+ system = "x86_64-linux";
+ modules = [
+ ./nix/single-node/qemu-vm.nix
+ ./nix/single-node/base.nix
+ self.nixosModules.default
+ overlayModule
+ {
+ ultracloud.quickstart.enable = true;
+ }
+ ];
+ };
+
+ # Canonical bare-metal ISO install targets used by the QEMU proof path.
+ baremetal-qemu-control-plane = nixpkgs.lib.nixosSystem {
+ system = "x86_64-linux";
+ modules = [
+ disko.nixosModules.disko
+ ./nix/nodes/baremetal-qemu/control-plane/configuration.nix
+ ./nix/nodes/baremetal-qemu/control-plane/disko.nix
+ self.nixosModules.default
+ overlayModule
+ ];
+ };
+
+ baremetal-qemu-worker = nixpkgs.lib.nixosSystem {
+ system = "x86_64-linux";
+ modules = [
+ disko.nixosModules.disko
+ ./nix/nodes/baremetal-qemu/worker/configuration.nix
+ ./nix/nodes/baremetal-qemu/worker/disko.nix
+ self.nixosModules.default
+ overlayModule
+ ];
};
# Base netboot image (minimal, for VM testing and provisioning)
netboot-base = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
- modules = [ ./nix/images/netboot-base.nix ];
+ modules = [
+ ./nix/images/netboot-base.nix
+ overlayModule
+ ];
};
# Offline-friendly target used by deployer VM smoke tests.
@@ -1412,6 +1855,20 @@
# UltraCloud ISO (T061.S5 - bootable ISO with cluster-config embedding)
ultracloud-iso = nixpkgs.lib.nixosSystem {
system = "x86_64-linux";
+ specialArgs = {
+ ultracloudBaremetalFormatMountPaths = {
+ baremetal-qemu-control-plane =
+ self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.formatMount;
+ baremetal-qemu-worker =
+ self.nixosConfigurations."baremetal-qemu-worker".config.system.build.formatMount;
+ };
+ ultracloudBaremetalSystemPaths = {
+ baremetal-qemu-control-plane =
+ self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.toplevel;
+ baremetal-qemu-worker =
+ self.nixosConfigurations."baremetal-qemu-worker".config.system.build.toplevel;
+ };
+ };
modules = [
./nix/iso/ultracloud-iso.nix
self.nixosModules.default
@@ -1455,13 +1912,13 @@
apigateway-server = self.packages.${final.system}.apigateway-server;
k8shost-server = self.packages.${final.system}.k8shost-server;
deployer-workspace = self.packages.${final.system}.deployer-workspace;
- deployer-server = self.packages.${final.system}.deployer-workspace;
- deployer-ctl = self.packages.${final.system}.deployer-workspace;
- ultracloud-reconciler = self.packages.${final.system}.deployer-workspace;
+ deployer-server = self.packages.${final.system}.deployer-server;
+ deployer-ctl = self.packages.${final.system}.deployer-ctl;
+ ultracloud-reconciler = self.packages.${final.system}.ultracloud-reconciler;
ultracloudFlakeBundle = self.packages.${final.system}.ultracloudFlakeBundle;
- nix-agent = self.packages.${final.system}.deployer-workspace;
- node-agent = self.packages.${final.system}.deployer-workspace;
- fleet-scheduler = self.packages.${final.system}.deployer-workspace;
+ nix-agent = self.packages.${final.system}.nix-agent;
+ node-agent = self.packages.${final.system}.node-agent;
+ fleet-scheduler = self.packages.${final.system}.fleet-scheduler;
};
};
}
diff --git a/nix/ci/flake.nix b/nix/ci/flake.nix
index d797f50..0ba7557 100644
--- a/nix/ci/flake.nix
+++ b/nix/ci/flake.nix
@@ -379,6 +379,9 @@
${gate}/bin/ultracloud-gate --tier 0 --no-logs
touch $out/ok
'';
+ checks.canonical-profile-eval-guards = ultracloud.checks.${system}.canonical-profile-eval-guards;
+ checks.portable-control-plane-regressions =
+ ultracloud.checks.${system}.portable-control-plane-regressions;
checks.deployer-vm-smoke = ultracloud.checks.${system}.deployer-vm-smoke;
checks.deployer-vm-rollback = ultracloud.checks.${system}.deployer-vm-rollback;
checks.deployer-bootstrap-e2e = ultracloud.checks.${system}.deployer-bootstrap-e2e;
diff --git a/nix/images/netboot-all-in-one.nix b/nix/images/netboot-all-in-one.nix
index b829e3d..919af2a 100644
--- a/nix/images/netboot-all-in-one.nix
+++ b/nix/images/netboot-all-in-one.nix
@@ -3,220 +3,113 @@
{
imports = [
./netboot-base.nix
- ../modules # Import UltraCloud service modules
+ ../modules
];
# ============================================================================
- # ALL-IN-ONE PROFILE
+ # SINGLE-NODE / ALL-IN-ONE INSTALL IMAGE
# ============================================================================
- # This profile includes all 8 UltraCloud services for a single-node deployment:
- # - Chainfire: Distributed configuration and coordination
- # - FlareDB: Time-series metrics and events database
- # - IAM: Identity and access management
- # - PlasmaVMC: Virtual machine control plane
- # - PrismNET: Software-defined networking controller
- # - FlashDNS: High-performance DNS server
- # - FiberLB: Layer 4/7 load balancer
- # - LightningStor: Distributed block storage
- # - K8sHost: Kubernetes hosting component
+ # This netboot image is the bare-metal companion to the QEMU-first
+ # `single-node-quickstart` profile. It keeps only the minimum VM stack in the
+ # image by default and leaves DNS, load-balancing, storage, API, metrics, and
+ # Kubernetes layers as explicit add-ons in the final installed system.
#
- # This profile is optimized for:
- # - Development/testing environments
- # - Small deployments (1-3 nodes)
- # - Edge locations with limited infrastructure
- # - Proof-of-concept installations
+ # Included by default:
+ # - Chainfire: local coordination and placement metadata
+ # - FlareDB: metadata/event storage
+ # - IAM: local identity plane for the dev profile
+ # - PrismNET: VM networking control plane
+ # - PlasmaVMC: VM control plane
#
- # Services are DISABLED by default in the netboot image.
- # They will be enabled in the final installed system configuration.
+ # Optional after install:
+ # - LightningStor, CoronaFS
+ # - FlashDNS, FiberLB
+ # - API Gateway, Nightlight, CreditService
+ # - K8sHost
# ============================================================================
- # ============================================================================
- # SERVICE PACKAGE AVAILABILITY
- # ============================================================================
- # Make all service packages available in the netboot image
environment.systemPackages = with pkgs; [
- # Core services
chainfire-server
flaredb-server
iam-server
-
- # Compute and networking
- plasmavmc-server
prismnet-server
-
- # Network services
- flashdns-server
- fiberlb-server
-
- # Storage
- lightningstor-server
-
- # Container orchestration
- k8shost-server
-
- # Additional tools for all-in-one deployment
- qemu # For running VMs
- libvirt # Virtualization management
- bridge-utils # Network bridge configuration
- openvswitch # Software-defined networking
+ plasmavmc-server
+ qemu
+ libvirt
+ bridge-utils
+ openvswitch
+ curl
+ jq
];
- # ============================================================================
- # CHAINFIRE CONFIGURATION (DISABLED)
- # ============================================================================
services.chainfire = {
enable = lib.mkDefault false;
port = 2379;
raftPort = 2380;
gossipPort = 2381;
+ httpPort = 8081;
};
- # ============================================================================
- # FLAREDB CONFIGURATION (DISABLED)
- # ============================================================================
services.flaredb = {
enable = lib.mkDefault false;
port = 2479;
raftPort = 2480;
+ httpPort = 8082;
};
- # ============================================================================
- # IAM CONFIGURATION (DISABLED)
- # ============================================================================
services.iam = {
enable = lib.mkDefault false;
- port = 8080;
+ port = 50080;
+ httpPort = 8083;
};
- # ============================================================================
- # PLASMAVMC CONFIGURATION (DISABLED)
- # ============================================================================
- services.plasmavmc = {
- enable = lib.mkDefault false;
- port = 8081;
- };
-
- # ============================================================================
- # PRISMNET CONFIGURATION (DISABLED)
- # ============================================================================
services.prismnet = {
enable = lib.mkDefault false;
- port = 8082;
+ port = 50081;
+ httpPort = 8087;
};
- # ============================================================================
- # FLASHDNS CONFIGURATION (DISABLED)
- # ============================================================================
- services.flashdns = {
+ services.plasmavmc = {
enable = lib.mkDefault false;
- port = 53;
+ port = 50082;
+ httpPort = 8084;
};
- # ============================================================================
- # FIBERLB CONFIGURATION (DISABLED)
- # ============================================================================
- services.fiberlb = {
- enable = lib.mkDefault false;
- port = 8083;
- };
-
- # ============================================================================
- # LIGHTNINGSTOR CONFIGURATION (DISABLED)
- # ============================================================================
- services.lightningstor = {
- enable = lib.mkDefault false;
- port = 8084;
- };
-
- # ============================================================================
- # K8SHOST CONFIGURATION (DISABLED)
- # ============================================================================
- services.k8shost = {
- enable = lib.mkDefault false;
- port = 8085;
- };
-
- # ============================================================================
- # VIRTUALIZATION SUPPORT
- # ============================================================================
- # Enable KVM virtualization
- boot.kernelModules = [ "kvm-intel" "kvm-amd" ];
-
- # Enable nested virtualization
+ boot.kernelModules = [ "kvm-intel" "kvm-amd" "tun" ];
boot.extraModprobeConfig = ''
options kvm_intel nested=1
options kvm_amd nested=1
'';
- # ============================================================================
- # NETWORKING CONFIGURATION
- # ============================================================================
- # Enable Open vSwitch for SDN
networking.vswitches = lib.mkDefault {};
- # Open firewall ports for all services
networking.firewall.allowedTCPPorts = [
- # Chainfire
- 2379 # API
- 2380 # Raft
- 2381 # Gossip
-
- # FlareDB
- 2479 # API
- 2480 # Raft
-
- # IAM
- 8080
-
- # PlasmaVMC
+ 22
+ 2379
+ 2380
+ 2381
+ 2479
+ 2480
+ 50080
+ 50081
+ 50082
8081
-
- # PrismNET
8082
-
- # FlashDNS
- 53
-
- # FiberLB
8083
-
- # LightningStor
8084
-
- # K8sHost
- 8085
-
- # QEMU/LibVirt
- 16509 # libvirtd
- 5900 # VNC (for VM console access)
+ 8087
+ 16509
+ 5900
];
networking.firewall.allowedUDPPorts = [
- # FlashDNS
- 53
-
- # Chainfire gossip
2381
-
- # VXLAN for overlay networking
4789
];
- # ============================================================================
- # STORAGE CONFIGURATION
- # ============================================================================
- # Enable LVM for flexible storage management
services.lvm.enable = true;
-
- # Enable ZFS if needed
boot.supportedFilesystems = [ "ext4" "xfs" "btrfs" "zfs" ];
- # ============================================================================
- # RESOURCE LIMITS (BALANCED FOR ALL-IN-ONE)
- # ============================================================================
- # Balance resources between services on a single node
- # These are minimal limits for netboot; adjust in final config based on hardware
-
systemd.services.chainfire.serviceConfig = lib.mkIf config.services.chainfire.enable {
MemoryMax = "1G";
CPUQuota = "100%";
@@ -242,26 +135,13 @@
CPUQuota = "50%";
};
- # ============================================================================
- # PERFORMANCE TUNING
- # ============================================================================
- # Optimize for mixed workload (services + VMs)
boot.kernel.sysctl = {
- # Increase max number of open files
"fs.file-max" = 1000000;
-
- # Increase network buffer sizes
+ "net.core.netdev_max_backlog" = 5000;
"net.core.rmem_max" = 134217728;
"net.core.wmem_max" = 134217728;
-
- # Enable IP forwarding for VM networking
"net.ipv4.ip_forward" = 1;
"net.ipv6.conf.all.forwarding" = 1;
-
- # Optimize for high-performance networking
- "net.core.netdev_max_backlog" = 5000;
-
- # Swappiness for server workloads
"vm.swappiness" = 10;
};
}
diff --git a/nix/iso/ultracloud-iso.nix b/nix/iso/ultracloud-iso.nix
index 0c35c1f..33397a3 100644
--- a/nix/iso/ultracloud-iso.nix
+++ b/nix/iso/ultracloud-iso.nix
@@ -2,7 +2,15 @@
# Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install
# For VM cluster deployment: boots, phones home, partitions disk, installs NixOS
-{ config, lib, pkgs, modulesPath, ... }:
+{
+ config,
+ lib,
+ pkgs,
+ modulesPath,
+ ultracloudBaremetalFormatMountPaths ? { },
+ ultracloudBaremetalSystemPaths ? { },
+ ...
+}:
{
imports = [
@@ -58,16 +66,34 @@
return 1
}
+ dmi_value() {
+ local path="$1"
+ if [ -r "$path" ]; then
+ tr -d '\n' <"$path" 2>/dev/null || true
+ fi
+ }
+
+ resolve_deployer_url() {
+ local explicit_url="''${DEPLOYER_URL:-}"
+ if [ -z "$explicit_url" ]; then
+ explicit_url="$(cmdline_value ultracloud.deployer_url || true)"
+ fi
+ if [ -n "$explicit_url" ]; then
+ echo "$explicit_url"
+ return 0
+ fi
+ if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
+ http://10.0.2.2:8088/health >/dev/null 2>&1; then
+ echo "http://10.0.2.2:8088"
+ return 0
+ fi
+ echo "http://192.168.100.1:8080"
+ }
+
mkdir -p /etc/ultracloud
# Discover Deployer via environment, kernel cmdline, or fallback.
- DEPLOYER_URL="''${DEPLOYER_URL:-}"
- if [ -z "$DEPLOYER_URL" ]; then
- DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)"
- fi
- if [ -z "$DEPLOYER_URL" ]; then
- DEPLOYER_URL="http://192.168.100.1:8080"
- fi
+ DEPLOYER_URL="$(resolve_deployer_url)"
# Get machine identity
MACHINE_ID=$(cat /etc/machine-id)
@@ -113,7 +139,24 @@
if [ -z "$NODE_IP" ]; then
NODE_IP=$(hostname -I 2>/dev/null | ${pkgs.gawk}/bin/awk '{print $1}')
fi
- NODE_HOSTNAME=$(hostname)
+ REQUESTED_NODE_ID="''${ULTRACLOUD_NODE_ID:-}"
+ if [ -z "$REQUESTED_NODE_ID" ]; then
+ REQUESTED_NODE_ID="$(cmdline_value ultracloud.node_id || true)"
+ fi
+ if [ -z "$REQUESTED_NODE_ID" ]; then
+ REQUESTED_NODE_ID="$(dmi_value /sys/class/dmi/id/product_serial)"
+ fi
+ if [ -z "$REQUESTED_NODE_ID" ]; then
+ REQUESTED_NODE_ID="$(hostname)"
+ fi
+ REQUESTED_HOSTNAME="''${ULTRACLOUD_HOSTNAME:-}"
+ if [ -z "$REQUESTED_HOSTNAME" ]; then
+ REQUESTED_HOSTNAME="$(cmdline_value ultracloud.hostname || true)"
+ fi
+ if [ -z "$REQUESTED_HOSTNAME" ]; then
+ REQUESTED_HOSTNAME="$REQUESTED_NODE_ID"
+ fi
+ echo "ULTRACLOUD_MARKER pre-install.boot.$REQUESTED_NODE_ID"
CPU_MODEL=$(${pkgs.gawk}/bin/awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true)
CPU_CORES=$(${pkgs.gawk}/bin/awk '/^cpu cores/ {print $4; exit}' /proc/cpuinfo 2>/dev/null || true)
CPU_THREADS=$(${pkgs.coreutils}/bin/nproc --all 2>/dev/null || true)
@@ -172,8 +215,8 @@
')
REQUEST_JSON=$(${pkgs.jq}/bin/jq -n \
--arg machine_id "$MACHINE_ID" \
- --arg node_id "$NODE_HOSTNAME" \
- --arg hostname "$NODE_HOSTNAME" \
+ --arg node_id "$REQUESTED_NODE_ID" \
+ --arg hostname "$REQUESTED_HOSTNAME" \
--arg ip "$NODE_IP" \
--argjson hardware_facts "$HARDWARE_FACTS" '
{
@@ -253,6 +296,7 @@
# Signal success
NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.assignment.node_id // "unknown"')
+ echo "ULTRACLOUD_MARKER pre-install.phone-home.complete.$NODE_ID"
echo "✓ Bootstrap complete: $NODE_ID"
exit 0
else
@@ -282,6 +326,7 @@
script = ''
set -euo pipefail
+ export PATH="${pkgs.nix}/bin:${config.system.build.nixos-install}/bin:$PATH"
cmdline_value() {
local key="$1"
@@ -297,6 +342,40 @@
return 1
}
+ resolve_deployer_url() {
+ local explicit_url="''${DEPLOYER_URL:-}"
+ if [ -z "$explicit_url" ]; then
+ explicit_url="$(cmdline_value ultracloud.deployer_url || true)"
+ fi
+ if [ -n "$explicit_url" ]; then
+ echo "$explicit_url"
+ return 0
+ fi
+ if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
+ http://10.0.2.2:8088/health >/dev/null 2>&1; then
+ echo "http://10.0.2.2:8088"
+ return 0
+ fi
+ echo "http://192.168.100.1:8080"
+ }
+
+ resolve_binary_cache_url() {
+ local explicit_url="''${ULTRACLOUD_BINARY_CACHE_URL:-}"
+ if [ -z "$explicit_url" ]; then
+ explicit_url="$(cmdline_value ultracloud.binary_cache_url || true)"
+ fi
+ if [ -n "$explicit_url" ]; then
+ echo "$explicit_url"
+ return 0
+ fi
+ if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \
+ http://10.0.2.2:8090/nix-cache-info >/dev/null 2>&1; then
+ echo "http://10.0.2.2:8090"
+ return 0
+ fi
+ return 1
+ }
+
if [ ! -s /etc/ultracloud/node-config.json ]; then
echo "ERROR: node-config.json missing (bootstrap not complete?)"
exit 1
@@ -305,16 +384,17 @@
NODE_ID=$(${pkgs.jq}/bin/jq -r '.assignment.hostname // .assignment.node_id // empty' /etc/ultracloud/node-config.json)
NODE_IP=$(${pkgs.jq}/bin/jq -r '.assignment.ip // empty' /etc/ultracloud/node-config.json)
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.nixos_configuration // .assignment.hostname // empty' /etc/ultracloud/node-config.json)
- DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_config_path // empty' /etc/ultracloud/node-config.json)
+ DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_script_path // empty' /etc/ultracloud/node-config.json)
+ if [ -z "$DISKO_SCRIPT_PATH" ] && [ -r /etc/ultracloud/disko-script-paths.json ]; then
+ DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/disko-script-paths.json)
+ fi
+ TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_system_path // empty' /etc/ultracloud/node-config.json)
+ if [ -z "$TARGET_SYSTEM_PATH" ] && [ -r /etc/ultracloud/system-paths.json ]; then
+ TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/system-paths.json)
+ fi
TARGET_DISK=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk // empty' /etc/ultracloud/node-config.json)
TARGET_DISK_BY_ID=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk_by_id // empty' /etc/ultracloud/node-config.json)
- DEPLOYER_URL="''${DEPLOYER_URL:-}"
- if [ -z "$DEPLOYER_URL" ]; then
- DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)"
- fi
- if [ -z "$DEPLOYER_URL" ]; then
- DEPLOYER_URL="http://192.168.100.1:8080"
- fi
+ DEPLOYER_URL="$(resolve_deployer_url)"
SRC_ROOT="/opt/ultracloud-src"
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
@@ -362,6 +442,7 @@
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
-o "$BUNDLE_PATH"; then
echo "Downloaded bootstrap flake bundle from deployer"
+ echo "ULTRACLOUD_MARKER install.bundle-downloaded.$NODE_ID"
rm -rf "$SRC_ROOT"
mkdir -p "$SRC_ROOT"
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
@@ -369,24 +450,12 @@
echo "No deployer flake bundle available; using embedded source tree"
fi
- if [ -z "$DISKO_PATH" ]; then
- CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
- if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then
- DISKO_PATH="$CANDIDATE_DISKO"
- fi
+ echo "ULTRACLOUD_MARKER install.start.$NODE_ID"
+ DISPLAY_TARGET_DISK="$TARGET_DISK"
+ if [ -n "$TARGET_DISK_BY_ID" ]; then
+ DISPLAY_TARGET_DISK="$TARGET_DISK_BY_ID"
fi
-
- if [ -z "$DISKO_PATH" ]; then
- echo "ERROR: node-config.json missing install_plan.disko_config_path and no default Disko path exists for $NODE_ID"
- exit 1
- fi
-
- if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then
- echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH"
- exit 1
- fi
-
- echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, disko_path=$DISKO_PATH)"
+ echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, target_disk=$DISPLAY_TARGET_DISK)"
# Resolve installation target disk.
if [ -n "$TARGET_DISK_BY_ID" ]; then
@@ -423,50 +492,99 @@
umount /mnt || true
fi
- echo "Validating NixOS configuration output..."
- nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
-
- EFFECTIVE_DISKO_PATH="$SRC_ROOT/$DISKO_PATH"
- if [ -n "$DISK" ]; then
- cat > /run/ultracloud/disko-wrapper.nix <