Compare commits
13 commits
task/0fe10
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| d87fe3f4c5 | |||
| 3cf0cd49b9 | |||
| 955214f393 | |||
| c527d50a9e | |||
| f931f892e3 | |||
| c1c610d2db | |||
| 26a306da1c | |||
| 8bb926d66f | |||
| a581c9f3b9 | |||
| b8ef9b64ad | |||
| 45e77a70ed | |||
| bf208ca0ff | |||
| 11cd8be2f7 |
14 changed files with 207 additions and 3 deletions
116
.github/workflows/kvm-publishable-selfhosted.yml
vendored
Normal file
116
.github/workflows/kvm-publishable-selfhosted.yml
vendored
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
name: KVM Publishable Validation
|
||||
|
||||
on:
|
||||
push:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publishable-kvm-suite:
|
||||
runs-on:
|
||||
- nix-host
|
||||
- cn-nixos-mouse-runner
|
||||
timeout-minutes: 360
|
||||
|
||||
steps:
|
||||
- name: Ensure Nix Is Available
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export PATH="/run/current-system/sw/bin:/nix/var/nix/profiles/default/bin:$HOME/.nix-profile/bin:$PATH"
|
||||
if [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then
|
||||
. /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
||||
elif [[ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]]; then
|
||||
. "$HOME/.nix-profile/etc/profile.d/nix.sh"
|
||||
fi
|
||||
if ! command -v nix >/dev/null 2>&1; then
|
||||
if ! command -v xz >/dev/null 2>&1; then
|
||||
echo "Nix is not on PATH and xz is unavailable for bootstrap"
|
||||
exit 1
|
||||
fi
|
||||
curl -L https://nixos.org/nix/install | sh -s -- --no-daemon
|
||||
if [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then
|
||||
. /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
||||
elif [[ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]]; then
|
||||
. "$HOME/.nix-profile/etc/profile.d/nix.sh"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$HOME/.config/nix"
|
||||
printf '%s\n' 'experimental-features = nix-command flakes' > "$HOME/.config/nix/nix.conf"
|
||||
nix --version
|
||||
|
||||
- name: Checkout Repository
|
||||
env:
|
||||
REPO_URL: https://git.centraworks.net/centra/photoncloud-monorepo
|
||||
REPO_ACTOR: ${{ github.actor }}
|
||||
REPO_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export PATH="/run/current-system/sw/bin:/usr/bin:/bin:$PATH"
|
||||
|
||||
choose_checkout_root() {
|
||||
local candidate avail best="" best_avail=-1
|
||||
for candidate in /var/tmp /tmp "$HOME"; do
|
||||
mkdir -p "$candidate" 2>/dev/null || continue
|
||||
avail="$(df -Pk "$candidate" 2>/dev/null | awk 'NR==2 { print $4 }')"
|
||||
[[ -n "$avail" ]] || continue
|
||||
if (( avail > best_avail )); then
|
||||
best="$candidate"
|
||||
best_avail="$avail"
|
||||
fi
|
||||
done
|
||||
printf '%s\n' "$best"
|
||||
}
|
||||
|
||||
checkout_root="$(choose_checkout_root)"
|
||||
repo_root="$(mktemp -d "${checkout_root}/ultracloud-kvm-checkout.XXXXXX")"
|
||||
auth="$(printf '%s' "${REPO_ACTOR}:${REPO_TOKEN}" | base64 | tr -d '\n')"
|
||||
|
||||
git init "$repo_root"
|
||||
cd "$repo_root"
|
||||
git remote add origin "$REPO_URL"
|
||||
git -c http.extraHeader="AUTHORIZATION: basic ${auth}" fetch --depth=1 origin "${GITHUB_SHA}"
|
||||
git checkout --detach FETCH_HEAD
|
||||
git config --global --add safe.directory "$repo_root"
|
||||
|
||||
{
|
||||
printf 'REPO_ROOT=%s\n' "$repo_root"
|
||||
printf 'CHECKOUT_ROOT=%s\n' "$checkout_root"
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Probe KVM Environment
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export PATH="/run/current-system/sw/bin:/usr/bin:/bin:$PATH"
|
||||
if [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then
|
||||
. /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
||||
elif [[ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]]; then
|
||||
. "$HOME/.nix-profile/etc/profile.d/nix.sh"
|
||||
fi
|
||||
echo "hostname=$(uname -n)"
|
||||
uname -a
|
||||
id
|
||||
test -e /dev/kvm
|
||||
ls -l /dev/kvm
|
||||
if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then
|
||||
echo "kvm_intel_nested=$(cat /sys/module/kvm_intel/parameters/nested)"
|
||||
fi
|
||||
if [[ -f /sys/module/kvm_amd/parameters/nested ]]; then
|
||||
echo "kvm_amd_nested=$(cat /sys/module/kvm_amd/parameters/nested)"
|
||||
fi
|
||||
echo "runner_temp=${RUNNER_TEMP}"
|
||||
echo "repo_root=${REPO_ROOT}"
|
||||
echo "checkout_root=${CHECKOUT_ROOT}"
|
||||
df -h / /tmp /var/tmp "$RUNNER_TEMP" || true
|
||||
df -h "$REPO_ROOT" || true
|
||||
df -h /nix || true
|
||||
|
||||
- name: Run Publishable KVM Suite
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export PATH="/run/current-system/sw/bin:/usr/bin:/bin:$PATH"
|
||||
if [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then
|
||||
. /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
|
||||
elif [[ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]]; then
|
||||
. "$HOME/.nix-profile/etc/profile.d/nix.sh"
|
||||
fi
|
||||
cd "$REPO_ROOT"
|
||||
bash ./nix/test-cluster/run-publishable-kvm-suite.sh "$RUNNER_TEMP/publishable-kvm-suite"
|
||||
|
|
@ -97,7 +97,7 @@ nix run ./nix/test-cluster#cluster -- chainfire-live-membership-proof
|
|||
./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite
|
||||
```
|
||||
|
||||
The checked-in entrypoint for the publishable nested-KVM suite is the local wrapper `./nix/test-cluster/run-publishable-kvm-suite.sh`. Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally not part of this re-aggregated baseline.
|
||||
The checked-in local entrypoint for the publishable nested-KVM suite is `./nix/test-cluster/run-publishable-kvm-suite.sh`. The repository-owned remote entrypoint is [`.github/workflows/kvm-publishable-selfhosted.yml`](.github/workflows/kvm-publishable-selfhosted.yml), which runs the same wrapper on Forgejo runners labeled `nix-host` and `cn-nixos-mouse-runner`.
|
||||
For the full supported-surface proof on a local AMD/KVM host, use `./nix/test-cluster/run-supported-surface-final-proof.sh ./work/final-proofs/latest`; it keeps builders local, builds `single-node-trial-vm`, runs `single-node-quickstart`, and captures the publishable KVM suite logs in one place.
|
||||
`nix run ./nix/test-cluster#cluster -- durability-proof` is the canonical chainfire flaredb deployer backup/restore lane. It persists artifacts under `./work/durability-proof/latest`, proves logical backup/restore for ChainFire keys and FlareDB SQL rows, uses the canonical Deployer admin pre-register request itself as the backup artifact, verifies that the pre-registered node survives a `deployer.service` restart, replays the same request idempotently, and injects CoronaFS plus LightningStor failures against the same live KVM cluster.
|
||||
`nix run ./nix/test-cluster#cluster -- rollout-soak` is the longer-running control-plane and rollout companion lane. It rebuilds from clean local KVM runtime state, persists artifacts under `./work/rollout-soak/latest`, validates exactly one planned `draining` maintenance cycle and one fail-stop worker-loss cycle on the two native-runtime workers, holds each degraded state for the configured soak window, then restarts `deployer`, `fleet-scheduler`, `node-agent`, `chainfire`, and `flaredb` before revalidating the cluster. The soak root also carries explicit scope markers so the supported boundary is encoded in the proof artifacts rather than only in docs. The steady-state KVM nodes do not run `nix-agent.service`, so the soak lane records explicit `nix-agent` scope markers instead of pretending a live-cluster `nix-agent` restart happened.
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ Use these commands as the release-facing local proof set:
|
|||
|
||||
`single-node-trial-vm` and `single-node-quickstart` are the standalone VM-platform story. They keep the minimal KVM-backed surface separate from the rollout stack.
|
||||
|
||||
The checked-in entrypoint for the publishable KVM proof is the local wrapper `./nix/test-cluster/run-publishable-kvm-suite.sh`. Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally excluded from this baseline branch.
|
||||
The checked-in local entrypoint for the publishable KVM proof is `./nix/test-cluster/run-publishable-kvm-suite.sh`. The repository-owned remote entrypoint is [`.github/workflows/kvm-publishable-selfhosted.yml`](../.github/workflows/kvm-publishable-selfhosted.yml), which runs the same wrapper on Forgejo runners labeled `nix-host` and `cn-nixos-mouse-runner`.
|
||||
The 2026-04-10 local AMD/KVM proof snapshot is recorded under `./work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final` for `supported-surface-guard`, `single-node-trial-vm`, and `single-node-quickstart`, under `./work/publishable-kvm-suite` for the passing `fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`, and wrapper environment capture, and under `./work/rollout-soak/20260410T164549+0900` for the longer-running rollout/control-plane soak.
|
||||
The 2026-04-10 exact bare-metal check-runner proof is recorded under `./work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c`; its outer `environment.txt` records `execution_model=materialized-check-runner`, while `state/environment.txt` records `vm_accelerator_mode=kvm`.
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ When `/dev/kvm` and nested virtualization are available, the reproducible publis
|
|||
`./nix/test-cluster/run-core-control-plane-ops-proof.sh` is the focused operator lifecycle proof for `chainfire`, `flaredb`, and `iam`. It records the published ChainFire live-membership API boundary, the FlareDB additive-first migration and destructive-DDL boundary, and the standalone IAM bootstrap hardening plus signing-key, credential, and mTLS rotation proof under `./work/core-control-plane-ops-proof`.
|
||||
`./nix/test-cluster/work-root-budget.sh` is the checked helper for local disk budget reporting, stronger local enforcement, and safer cleanup guidance under `./work`.
|
||||
The dated 2026-04-10 artifact root for the focused control-plane proof is `./work/core-control-plane-ops-proof/20260410T172148+09:00`.
|
||||
Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally excluded from this re-aggregated baseline; the checked-in artifact here is the local wrapper.
|
||||
The repository-owned remote entrypoint for the same publishable KVM proof is [`.github/workflows/kvm-publishable-selfhosted.yml`](../../.github/workflows/kvm-publishable-selfhosted.yml). It runs the local wrapper on Forgejo runners labeled `nix-host` and `cn-nixos-mouse-runner`.
|
||||
|
||||
## What it validates
|
||||
|
||||
|
|
|
|||
4
plans/baselines/logs/nix-build-deployer-vm-smoke.meta
Normal file
4
plans/baselines/logs/nix-build-deployer-vm-smoke.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix build .#checks.x86_64-linux.deployer-vm-smoke
|
||||
start=2026-04-04T16:44:34+09:00
|
||||
end=2026-04-04T16:50:40+09:00
|
||||
status=1
|
||||
4
plans/baselines/logs/nix-eval-netboot-all-in-one.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-all-in-one.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix eval --raw .#nixosConfigurations.netboot-all-in-one.config.system.build.toplevel.drvPath
|
||||
start=2026-04-04T16:43:54+09:00
|
||||
end=2026-04-04T16:43:56+09:00
|
||||
status=1
|
||||
4
plans/baselines/logs/nix-eval-netboot-control-plane.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-control-plane.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix eval --raw .#nixosConfigurations.netboot-control-plane.config.system.build.toplevel.drvPath
|
||||
start=2026-04-04T16:43:54+09:00
|
||||
end=2026-04-04T16:44:01+09:00
|
||||
status=0
|
||||
4
plans/baselines/logs/nix-eval-netboot-worker.meta
Normal file
4
plans/baselines/logs/nix-eval-netboot-worker.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix eval --raw .#nixosConfigurations.netboot-worker.config.system.build.toplevel.drvPath
|
||||
start=2026-04-04T16:43:54+09:00
|
||||
end=2026-04-04T16:43:56+09:00
|
||||
status=1
|
||||
4
plans/baselines/logs/nix-eval-node01.meta
Normal file
4
plans/baselines/logs/nix-eval-node01.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix eval --raw .#nixosConfigurations.node01.config.system.build.toplevel.drvPath
|
||||
start=2026-04-04T16:43:45+09:00
|
||||
end=2026-04-04T16:43:49+09:00
|
||||
status=0
|
||||
4
plans/baselines/logs/nix-eval-ultracloud-iso.meta
Normal file
4
plans/baselines/logs/nix-eval-ultracloud-iso.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix eval --raw .#nixosConfigurations.ultracloud-iso.config.system.build.toplevel.drvPath
|
||||
start=2026-04-04T16:43:34+09:00
|
||||
end=2026-04-04T16:43:41+09:00
|
||||
status=0
|
||||
4
plans/baselines/logs/nix-run-fresh-demo-vm-webapp.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-demo-vm-webapp.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp
|
||||
start=2026-04-04T16:48:18+09:00
|
||||
end=2026-04-04T16:48:23+09:00
|
||||
status=1
|
||||
4
plans/baselines/logs/nix-run-fresh-matrix.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-matrix.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix run ./nix/test-cluster#cluster -- fresh-matrix
|
||||
start=2026-04-04T16:48:26+09:00
|
||||
end=2026-04-04T16:48:29+09:00
|
||||
status=1
|
||||
4
plans/baselines/logs/nix-run-fresh-smoke.meta
Normal file
4
plans/baselines/logs/nix-run-fresh-smoke.meta
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
command=nix run ./nix/test-cluster#cluster -- fresh-smoke
|
||||
start=2026-04-04T16:46:41+09:00
|
||||
end=2026-04-04T16:48:14+09:00
|
||||
status=1
|
||||
52
plans/baselines/main-baseline-2026-04-04.md
Normal file
52
plans/baselines/main-baseline-2026-04-04.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# UltraCloud Baseline 2026-04-04
|
||||
|
||||
Branch: `task/f5c70db0-baseline-profiles` from `origin/main`
|
||||
|
||||
This file records the required smoke/build/eval commands requested by task `f5c70db0-0106-4200-bf99-0c5105116367` before profile-definition changes.
|
||||
|
||||
## Branch Setup
|
||||
|
||||
```bash
|
||||
git fetch origin && git switch -c task/f5c70db0-baseline-profiles origin/main
|
||||
```
|
||||
|
||||
Result: success. The working branch now tracks `origin/main`.
|
||||
|
||||
## Environment Notes
|
||||
|
||||
- Host kernel: `Linux cn-ubuntu-xgpu 6.17.0-14-generic`
|
||||
- Nix: `2.33.3`
|
||||
- `/dev/kvm`: absent in this environment
|
||||
- Nix builder features observed during `deployer-vm-smoke`: `{benchmark, big-parallel, nixos-test, uid-range}`
|
||||
- Raw command logs are stored under `plans/baselines/logs/`
|
||||
|
||||
## Baseline Command Results
|
||||
|
||||
| Command | Start | End | Status | Result summary |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `nix run ./nix/test-cluster#cluster -- fresh-smoke` | `2026-04-04T16:46:41+09:00` | `2026-04-04T16:48:14+09:00` | `1` | built the cluster runner closure, then failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||
| `nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp` | `2026-04-04T16:48:18+09:00` | `2026-04-04T16:48:23+09:00` | `1` | failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||
| `nix run ./nix/test-cluster#cluster -- fresh-matrix` | `2026-04-04T16:48:26+09:00` | `2026-04-04T16:48:29+09:00` | `1` | failed preflight with `/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization` |
|
||||
| `nix build .#checks.x86_64-linux.deployer-vm-smoke` | `2026-04-04T16:44:34+09:00` | `2026-04-04T16:50:40+09:00` | `1` | built most of the test closure, then failed because the current builder does not advertise the required `kvm` system feature |
|
||||
|
||||
## Baseline `nix eval` Results
|
||||
|
||||
| Output | Start | End | Status | Result |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| `ultracloud-iso` | `2026-04-04T16:43:34+09:00` | `2026-04-04T16:43:41+09:00` | `0` | `/nix/store/j60isp8ai10vkgdncvi3wcjdgxqwjzpy-nixos-system-nixos-26.05.20251208.addf7cf.drv` |
|
||||
| `node01` | `2026-04-04T16:43:45+09:00` | `2026-04-04T16:43:49+09:00` | `0` | `/nix/store/94g1xyv25s09hyyi924sp5bxb0y8kir9-nixos-system-node01-26.05.20251208.addf7cf.drv` |
|
||||
| `netboot-control-plane` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:44:01+09:00` | `0` | `/nix/store/afknxzr1mhrlrzrkp8mj9q1fwwahdld3-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv` |
|
||||
| `netboot-worker` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:43:56+09:00` | `1` | `undefined variable 'plasmavmc-server'` at `nix/images/netboot-worker.nix:28:5` |
|
||||
| `netboot-all-in-one` | `2026-04-04T16:43:54+09:00` | `2026-04-04T16:43:56+09:00` | `1` | `undefined variable 'chainfire-server'` at `nix/images/netboot-all-in-one.nix:39:5` |
|
||||
|
||||
## Post-Baseline Repair
|
||||
|
||||
After recording the baseline, `flake.nix` was adjusted so the netboot image configurations receive the UltraCloud overlay during evaluation. That keeps the baseline intact while making the named canonical-profile outputs evaluable.
|
||||
|
||||
Post-fix spot check:
|
||||
|
||||
- `ultracloud-iso`: `/nix/store/j60isp8ai10vkgdncvi3wcjdgxqwjzpy-nixos-system-nixos-26.05.20251208.addf7cf.drv`
|
||||
- `node01`: `/nix/store/di87n45m5v30n8gccbs8pic2j8wbwgvr-nixos-system-node01-26.05.20251208.addf7cf.drv`
|
||||
- `netboot-control-plane`: `/nix/store/afknxzr1mhrlrzrkp8mj9q1fwwahdld3-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||
- `netboot-worker`: `/nix/store/6x51ss2ql1n4nhi8ad0avhvzk4n6arcr-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||
- `netboot-all-in-one`: `/nix/store/2l57rda3pnd1hivjicfmp53zpimxn00n-nixos-system-nixos-kexec-26.05.20251208.addf7cf.drv`
|
||||
Loading…
Add table
Reference in a new issue