From c1d4178a52df7e2e1192d3eecd68c961dceaf024 Mon Sep 17 00:00:00 2001 From: centra Date: Fri, 10 Apr 2026 19:28:44 +0900 Subject: [PATCH] Establish baseline product surface and proof lanes --- .github/workflows/nix.yml | 28 +- .gitignore | 3 + README.md | 210 +- TODO.md | 411 ++++ apigateway/README.md | 17 + .../crates/apigateway-server/src/main.rs | 153 +- chainfire/Cargo.lock | 11 - .../baremetal/pxe-server/assets/.gitkeep | 2 +- chainfire/baremetal/pxe-server/ipxe/boot.ipxe | 4 +- chainfire/chainfire-client/src/client.rs | 51 +- chainfire/chainfire-client/src/watch.rs | 7 +- chainfire/crates/chainfire-api/build.rs | 5 +- .../chainfire-api/src/cluster_service.rs | 145 +- .../chainfire-api/src/internal_service.rs | 41 +- .../crates/chainfire-api/src/kv_service.rs | 40 +- .../crates/chainfire-api/src/lease_service.rs | 3 +- chainfire/crates/chainfire-api/src/lib.rs | 14 +- .../crates/chainfire-api/src/raft_client.rs | 21 +- .../crates/chainfire-api/src/watch_service.rs | 15 +- chainfire/crates/chainfire-core/Cargo.toml | 27 +- .../crates/chainfire-core/src/builder.rs | 238 --- .../crates/chainfire-core/src/callbacks.rs | 103 - .../crates/chainfire-core/src/cluster.rs | 313 --- chainfire/crates/chainfire-core/src/config.rs | 162 -- chainfire/crates/chainfire-core/src/events.rs | 198 -- chainfire/crates/chainfire-core/src/kvs.rs | 290 --- chainfire/crates/chainfire-core/src/lib.rs | 60 +- chainfire/crates/chainfire-core/src/traits.rs | 60 - .../crates/chainfire-gossip/src/broadcast.rs | 6 +- .../crates/chainfire-gossip/src/identity.rs | 8 +- .../crates/chainfire-gossip/src/membership.rs | 10 +- chainfire/crates/chainfire-raft/src/core.rs | 396 ++-- chainfire/crates/chainfire-raft/src/lib.rs | 5 +- .../crates/chainfire-raft/src/network.rs | 22 +- .../chainfire-server/benches/kv_bench.rs | 21 +- chainfire/crates/chainfire-server/src/main.rs | 23 +- chainfire/crates/chainfire-server/src/node.rs | 18 +- chainfire/crates/chainfire-server/src/rest.rs | 127 +- .../crates/chainfire-server/src/server.rs | 21 +- .../benches/storage_bench.rs | 8 +- .../crates/chainfire-storage/src/kv_store.rs | 4 +- .../chainfire-storage/src/lease_store.rs | 22 +- chainfire/crates/chainfire-storage/src/lib.rs | 2 +- .../chainfire-storage/src/log_storage.rs | 21 +- .../crates/chainfire-storage/src/snapshot.rs | 14 +- .../chainfire-storage/src/state_machine.rs | 8 +- .../crates/chainfire-storage/src/store.rs | 5 +- .../crates/chainfire-types/src/command.rs | 8 +- chainfire/crates/chainfire-types/src/kv.rs | 4 +- chainfire/crates/chainfire-types/src/node.rs | 4 +- .../crates/chainfire-watch/src/registry.rs | 15 +- chainfire/proto/chainfire.proto | 90 +- chainfire/proto/internal.proto | 25 - creditservice/README.md | 37 +- .../crates/creditservice-api/src/billing.rs | 40 +- .../creditservice-api/src/credit_service.rs | 39 +- .../creditservice-api/src/flaredb_storage.rs | 16 +- .../crates/creditservice-api/src/lib.rs | 16 +- .../creditservice-api/src/nightlight.rs | 10 +- .../creditservice-api/src/sql_storage.rs | 48 +- .../crates/creditservice-api/src/storage.rs | 10 +- .../crates/creditservice-server/src/config.rs | 2 +- .../crates/creditservice-server/src/main.rs | 15 +- .../crates/creditservice-types/src/lib.rs | 16 +- .../creditservice-types/src/reservation.rs | 4 +- .../crates/creditservice-types/src/wallet.rs | 4 +- creditservice/creditservice-client/src/lib.rs | 5 +- deployer/crates/deployer-ctl/src/chainfire.rs | 2 +- deployer/crates/deployer-ctl/src/main.rs | 4 +- deployer/crates/fleet-scheduler/src/main.rs | 4 +- deployer/crates/nix-agent/src/main.rs | 2 +- deployer/crates/node-agent/src/agent.rs | 2 +- deployer/crates/node-agent/src/main.rs | 2 +- deployer/crates/node-agent/src/process.rs | 192 +- .../crates/ultracloud-reconciler/src/hosts.rs | 2 +- docs/README.md | 48 +- docs/component-matrix.md | 116 +- docs/control-plane-ops.md | 77 + docs/edge-trial-surface.md | 83 + docs/hardware-bringup.md | 135 ++ docs/provider-vm-reality.md | 37 + docs/rollout-bundle.md | 103 + docs/testing.md | 247 ++- fiberlb/Cargo.lock | 111 + fiberlb/crates/fiberlb-server/Cargo.toml | 1 + .../fiberlb-server/proto/api/attribute.proto | 4 +- .../crates/fiberlb-server/src/dataplane.rs | 335 ++- .../crates/fiberlb-server/src/healthcheck.rs | 5 +- .../crates/fiberlb-server/src/l7_dataplane.rs | 283 ++- .../src/services/certificate.rs | 40 +- fiberlb/crates/fiberlb-server/src/tls.rs | 15 +- .../crates/fiberlb-types/src/certificate.rs | 4 +- fiberlb/crates/fiberlb-types/src/listener.rs | 2 +- flake.nix | 993 ++++++++- flaredb/crates/flaredb-client/src/client.rs | 13 +- .../crates/flaredb-proto/src/chainfire.proto | 36 +- flaredb/crates/flaredb-raft/src/network.rs | 11 +- .../flaredb-raft/src/persistent_storage.rs | 52 +- flaredb/crates/flaredb-raft/src/raft_node.rs | 11 +- flaredb/crates/flaredb-raft/src/storage.rs | 52 +- flaredb/crates/flaredb-raft/src/types.rs | 4 +- .../flaredb-server/benches/storage_bench.rs | 6 +- .../crates/flaredb-server/src/config/mod.rs | 5 +- .../crates/flaredb-server/src/heartbeat.rs | 3 +- .../crates/flaredb-server/src/raft_service.rs | 5 +- flaredb/crates/flaredb-server/src/rest.rs | 169 +- flaredb/crates/flaredb-sql/src/error.rs | 5 +- flaredb/crates/flaredb-sql/src/metadata.rs | 36 +- flaredb/crates/flaredb-sql/src/types.rs | 5 +- .../flaredb-storage/src/rocks_engine.rs | 52 +- iam/crates/iam-api/src/credential_service.rs | 71 + iam/crates/iam-api/src/lib.rs | 6 +- iam/crates/iam-authn/src/jwt.rs | 4 +- iam/crates/iam-authn/src/mtls.rs | 41 + iam/crates/iam-authz/src/evaluator.rs | 5 +- iam/crates/iam-server/src/main.rs | 2 +- iam/crates/iam-store/src/backend.rs | 101 +- iam/crates/iam-store/src/org_store.rs | 36 +- iam/crates/iam-store/src/project_store.rs | 40 +- iam/crates/iam-types/src/tenant.rs | 6 +- k8shost/Cargo.toml | 7 + k8shost/README.md | 20 + k8shost/crates/k8shost-cni/src/main.rs | 42 +- .../crates/k8shost-controllers/src/main.rs | 73 +- k8shost/crates/k8shost-csi/src/main.rs | 38 +- .../crates/k8shost-server/src/services/pod.rs | 138 +- k8shost/crates/k8shost-server/src/storage.rs | 4 +- .../src/backends/erasure_coded.rs | 74 +- .../src/backends/replicated.rs | 52 +- .../src/chunk/mod.rs | 21 +- .../src/erasure/mod.rs | 6 +- .../src/node/client.rs | 28 +- .../src/node/mock.rs | 4 +- .../src/node/registry.rs | 12 +- .../lightningstor-distributed/src/repair.rs | 1 - .../crates/lightningstor-node/src/main.rs | 12 +- .../crates/lightningstor-node/src/service.rs | 44 +- .../crates/lightningstor-node/src/storage.rs | 17 +- .../src/bucket_service.rs | 283 ++- .../crates/lightningstor-server/src/lib.rs | 2 +- .../src/object_service.rs | 393 +++- .../lightningstor-server/src/s3/auth.rs | 2 +- .../lightningstor-server/src/s3/router.rs | 54 + .../crates/lightningstor-server/src/tenant.rs | 8 +- .../lightningstor-storage/src/backend.rs | 49 +- .../lightningstor-storage/src/local_fs.rs | 145 +- .../crates/lightningstor-types/src/bucket.rs | 14 +- .../crates/lightningstor-types/src/object.rs | 1 - nightlight/README.md | 19 + nix-nos/flake.nix | 4 +- nix-nos/modules/default.nix | 9 +- nix/ci/flake.nix | 7 + nix/images/netboot-all-in-one.nix | 249 +-- nix/iso/ultracloud-iso.nix | 244 ++- nix/modules/creditservice.nix | 8 +- nix/modules/default.nix | 1 + nix/modules/deployer.nix | 10 +- nix/modules/fleet-scheduler.nix | 12 +- nix/modules/k8shost.nix | 62 +- nix/modules/lightningstor.nix | 23 +- nix/modules/nix-agent.nix | 19 +- nix/modules/node-agent.nix | 16 +- nix/modules/plasmavmc.nix | 41 +- nix/modules/ultracloud-cluster.nix | 10 +- nix/modules/ultracloud-resources.nix | 2 +- nix/nodes/baremetal-qemu/common.nix | 87 + .../control-plane/configuration.nix | 46 + .../baremetal-qemu/control-plane/disko.nix | 5 + .../baremetal-qemu/worker/configuration.nix | 36 + nix/nodes/baremetal-qemu/worker/disko.nix | 5 + nix/nodes/vm-cluster/common-disko.nix | 17 +- nix/single-node/base.nix | 421 ++++ nix/single-node/qemu-vm.nix | 24 + nix/single-node/surface.nix | 240 +++ nix/test-cluster/README.md | 84 +- nix/test-cluster/common.nix | 2 +- nix/test-cluster/flake.nix | 43 +- nix/test-cluster/hardware-smoke.sh | 615 ++++++ nix/test-cluster/node01.nix | 38 + nix/test-cluster/node06.nix | 2 +- nix/test-cluster/run-baremetal-iso-e2e.sh | 199 ++ nix/test-cluster/run-cluster.sh | 1881 ++++++++++++++++- .../run-core-control-plane-ops-proof.sh | 124 ++ nix/test-cluster/run-local-baseline.sh | 198 ++ nix/test-cluster/run-publishable-kvm-suite.sh | 231 ++ .../run-supported-surface-final-proof.sh | 196 ++ nix/test-cluster/verify-baremetal-iso.sh | 1098 ++++++++++ nix/test-cluster/vm-guest-image.nix | 4 +- nix/test-cluster/work-root-budget.sh | 238 +++ .../verify-fleet-scheduler-e2e-stable.sh | 284 +++ .../main-reaggregation-2026-04-06.md | 43 + plasmavmc/Cargo.lock | 1 - plasmavmc/Cargo.toml | 8 +- .../crates/plasmavmc-firecracker/src/lib.rs | 25 +- plasmavmc/crates/plasmavmc-kvm/src/lib.rs | 8 +- plasmavmc/crates/plasmavmc-server/Cargo.toml | 1 - plasmavmc/crates/plasmavmc-server/src/main.rs | 34 +- plasmavmc/crates/plasmavmc-server/src/rest.rs | 72 +- .../crates/plasmavmc-server/src/vm_service.rs | 49 +- .../crates/prismnet-server/src/ovn/client.rs | 126 +- .../src/services/security_group.rs | 16 +- 201 files changed, 12545 insertions(+), 3643 deletions(-) create mode 100644 TODO.md create mode 100644 apigateway/README.md delete mode 100644 chainfire/crates/chainfire-core/src/builder.rs delete mode 100644 chainfire/crates/chainfire-core/src/callbacks.rs delete mode 100644 chainfire/crates/chainfire-core/src/cluster.rs delete mode 100644 chainfire/crates/chainfire-core/src/config.rs delete mode 100644 chainfire/crates/chainfire-core/src/events.rs delete mode 100644 chainfire/crates/chainfire-core/src/kvs.rs delete mode 100644 chainfire/crates/chainfire-core/src/traits.rs create mode 100644 docs/control-plane-ops.md create mode 100644 docs/edge-trial-surface.md create mode 100644 docs/hardware-bringup.md create mode 100644 docs/provider-vm-reality.md create mode 100644 docs/rollout-bundle.md create mode 100644 k8shost/README.md create mode 100644 nightlight/README.md create mode 100644 nix/nodes/baremetal-qemu/common.nix create mode 100644 nix/nodes/baremetal-qemu/control-plane/configuration.nix create mode 100644 nix/nodes/baremetal-qemu/control-plane/disko.nix create mode 100644 nix/nodes/baremetal-qemu/worker/configuration.nix create mode 100644 nix/nodes/baremetal-qemu/worker/disko.nix create mode 100644 nix/single-node/base.nix create mode 100644 nix/single-node/qemu-vm.nix create mode 100644 nix/single-node/surface.nix create mode 100755 nix/test-cluster/hardware-smoke.sh create mode 100755 nix/test-cluster/run-baremetal-iso-e2e.sh create mode 100755 nix/test-cluster/run-core-control-plane-ops-proof.sh create mode 100755 nix/test-cluster/run-local-baseline.sh create mode 100755 nix/test-cluster/run-publishable-kvm-suite.sh create mode 100755 nix/test-cluster/run-supported-surface-final-proof.sh create mode 100644 nix/test-cluster/verify-baremetal-iso.sh create mode 100755 nix/test-cluster/work-root-budget.sh create mode 100644 nix/tests/verify-fleet-scheduler-e2e-stable.sh create mode 100644 plans/baselines/main-reaggregation-2026-04-06.md diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 3b96a4d..7ef2f93 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -96,6 +96,23 @@ jobs: run: | nix run ./nix/ci#gate-ci -- --shared-crate ${{ matrix.crate }} --tier 0 --no-logs + portable-regressions: + needs: filter + if: ${{ needs.filter.outputs.any_changed == 'true' || needs.filter.outputs.global_changed == 'true' || needs.filter.outputs.shared_crates_changed == 'true' }} + runs-on: ubuntu-latest + name: portable regressions + steps: + - uses: actions/checkout@v4 + - uses: DeterminateSystems/nix-installer-action@v11 + - uses: DeterminateSystems/magic-nix-cache-action@v8 + + - name: Run portable canonical profile regressions + run: | + nix build \ + .#checks.x86_64-linux.canonical-profile-eval-guards \ + .#checks.x86_64-linux.portable-control-plane-regressions \ + --accept-flake-config + # Build server packages (tier 1+) build: needs: [filter, gate] @@ -116,7 +133,7 @@ jobs: # Summary job for PR status checks ci-status: - needs: [filter, gate, shared-crates-gate] + needs: [filter, gate, shared-crates-gate, portable-regressions] if: always() runs-on: ubuntu-latest steps: @@ -128,11 +145,18 @@ jobs: if [[ "${{ needs.shared-crates-gate.result }}" == "failure" ]]; then exit 1 fi - if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" ]]; then + if [[ "${{ needs.portable-regressions.result }}" == "failure" ]]; then + exit 1 + fi + if [[ "${{ needs.filter.outputs.any_changed }}" == "true" || "${{ needs.filter.outputs.global_changed }}" == "true" || "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then if [[ "${{ needs.gate.result }}" == "skipped" ]]; then echo "Gate was skipped despite changes. This is unexpected." exit 1 fi + if [[ "${{ needs.portable-regressions.result }}" == "skipped" ]]; then + echo "Portable regressions were skipped despite changes. This is unexpected." + exit 1 + fi fi if [[ "${{ needs.filter.outputs.shared_crates_changed }}" == "true" ]]; then if [[ "${{ needs.shared-crates-gate.result }}" == "skipped" ]]; then diff --git a/.gitignore b/.gitignore index fa11ae8..38cb842 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ .code/ .codex/ .claude.json +.agent-r/ +agent-r.config.toml .ralphrc .sisyphus/ @@ -39,6 +41,7 @@ Thumbs.db # Logs *.log +nohup.out quanta/test_output_renamed.log plasmavmc/kvm_test_output.log diff --git a/README.md b/README.md index 1a2f63a..646aeb6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ UltraCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository. -The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior. +The fastest public entrypoint is the one-command single-node quickstart. The `3-node HA control plane` profile lives in `nixosConfigurations.node01`, `nixosConfigurations.node02`, and `nixosConfigurations.node03`; the six-node VM cluster under [`nix/test-cluster`](nix/test-cluster/README.md) is the publishable harness that extends that HA baseline with worker and optional service bundles on host-built QEMU guests. +The canonical bare-metal bootstrap proof is the ISO-on-QEMU path under [`nix/test-cluster`](nix/test-cluster/README.md), which drives phone-home, Disko install, reboot, and desired-system convergence for one control-plane node and one worker-equivalent node. ## Components @@ -15,38 +16,217 @@ The canonical local proof path is the six-node VM cluster under [`nix/test-clust - `plasmavmc`: VM control plane and worker agents - `coronafs`: shared filesystem for mutable VM volumes - `lightningstor`: object storage and VM image backing -- `k8shost`: Kubernetes-style hosting control plane +- `k8shost`: Kubernetes-style hosting control plane for tenant pods and services - `apigateway`: external API and proxy surface - `nightlight`: metrics ingestion and query service -- `creditservice`: minimal reference quota/credit service -- `deployer`: bootstrap and phone-home deployment service +- `creditservice`: quota, reservation, and admission-control service +- `deployer`: bootstrap and phone-home deployment service that owns install plans and desired-system intent - `fleet-scheduler`: non-Kubernetes service scheduler for bare-metal cluster services +## Core API Notes + +- `chainfire` ships a fixed-membership cluster API on the supported surface. Public cluster management is `MemberList` plus `Status`, and the internal Raft transport surface is `Vote` plus `AppendEntries`. `chainfire-core` is workspace-internal only; the old embeddable builder and distributed-KV scaffold are not part of the supported product contract. +- `flaredb` ships SQL on both gRPC and REST. The supported REST SQL surface is `POST /api/v1/sql` for statement execution and `GET /api/v1/tables` for table discovery, alongside the existing KV and scan endpoints. +- `plasmavmc` ships a KVM-only public VM backend contract. The supported create and recovery surface is the KVM path exercised in `single-node-quickstart`, `fresh-smoke`, and `fresh-matrix`; Firecracker and mvisor remain archived non-product backends outside the supported surface until they have real tenant-network coverage. +- `lightningstor` keeps its optional gRPC surface live: bucket versioning, bucket policy, bucket tagging, and explicit object version listing are part of the supported contract for the canonical optional bundle. +- `fiberlb` backend `Https` health checks currently do not verify backend TLS certificates. Supported scope is limited to TCP reachability plus HTTP status for the backend endpoint until CA-aware verification is wired through config, server code, and the canonical harness. +- `k8shost` keeps `WatchPods` on the supported surface as a bounded snapshot stream for the current matching pod set. The published contract is the tenant workload API, not a separate long-lived controller event bus. +- `k8shost` is fixed as an API/control-plane product surface; runtime dataplane helpers stay archived non-product until they have their own published contract and proof. +- `k8shost-cni`, `k8shost-controllers`, `lightningstor-csi`, `nixosConfigurations.netboot-worker`, and the older scripts under `baremetal/vm-cluster` are archived internal scaffolds or `legacy/manual` debugging paths outside the supported surface. + +## Core Control Plane Operations + +The control-plane operator contract is fixed in [docs/control-plane-ops.md](docs/control-plane-ops.md). + +- ChainFire dynamic membership, replace-node, and scale-out are unsupported on the supported surface; the supported operator path is fixed-membership restore or whole-cluster replacement backed by the `durability-proof` backup/restore baseline. +- FlareDB online migration and schema evolution must start from the durability-proof backup/restore baseline and stay additive-first until a later destructive cleanup window. FlareDB destructive DDL and fully automated online migration remain outside the supported product contract for this release. +- IAM bootstrap hardening requires an explicit admin token, an explicit signing key, and a 32-byte IAM_CRED_MASTER_KEY. Signing-key rotation, credential overlap-and-revoke rotation, and mTLS overlap-and-cutover rotation are part of the supported operator contract; multi-node IAM failover remains outside the supported product contract. The standalone proof is `./nix/test-cluster/run-core-control-plane-ops-proof.sh`. + +## Edge And Trial Surface + +The edge-bundle and trial-surface contract is fixed in [docs/edge-trial-surface.md](docs/edge-trial-surface.md). + +- APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer; live in-process reload is not part of the product contract. +- NightLight is supported as a single-node WAL/snapshot service; replicated HA metrics storage is not part of the product contract. +- CreditService export and backend migration are supported as offline export/import or backend-native snapshot workflows, not live mixed-writer migration. +- OCI/Docker artifact is intentionally not the public trial surface. +- Use `./nix/test-cluster/work-root-budget.sh status` for disk budget, GC, and cleanup guidance, `./nix/test-cluster/work-root-budget.sh enforce` for a stronger local budget gate, and `./nix/test-cluster/work-root-budget.sh prune-proof-logs 2` for safer dated-proof cleanup. + ## Quick Start +Single-node quickstart: + +```bash +nix run .#single-node-quickstart +``` + +This app is also the automated smoke check for the smallest realistic trial surface. It builds the minimal VM stack, boots a QEMU VM, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, checks their health endpoints, and verifies the in-guest VM runtime prerequisites. For an interactive session, keep the VM running: + +```bash +ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart +``` + +Buildable trial artifact: + +```bash +nix build .#single-node-trial-vm +nix run .#single-node-trial +``` + +`single-node-trial-vm` is the lightest supported artifact for local use: a host-built NixOS VM appliance for the VM-platform core. OCI/Docker artifact is intentionally not the public trial surface here, because the supported scope needs a guest kernel plus host KVM, `/dev/net/tun`, and OVS/libvirt semantics. A privileged container would be host-coupled and would not prove the same contract. + +The legacy name `.#all-in-one-quickstart` is kept as an alias, and `.#single-node-trial` is a friendlier alias for the same smoke launcher. + +Portable local proof on hosts without `/dev/kvm`: + +```bash +nix build .#checks.x86_64-linux.canonical-profile-eval-guards +nix build .#checks.x86_64-linux.portable-control-plane-regressions +``` + +This TCG-safe lane keeps canonical profile drift, the core `chainfire` / `deployer` control-plane path, the `deployer -> nix-agent` boundary, and the `fleet-scheduler -> node-agent` boundary under regression coverage without requiring nested virtualization. + +Publishable nested-KVM suite: + ```bash nix develop nix run ./nix/test-cluster#cluster -- fresh-smoke +nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp +nix run ./nix/test-cluster#cluster -- fresh-matrix +./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite ``` +The checked-in entrypoint for the publishable nested-KVM suite is the local wrapper `./nix/test-cluster/run-publishable-kvm-suite.sh`. Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally not part of this re-aggregated baseline. +For the full supported-surface proof on a local AMD/KVM host, use `./nix/test-cluster/run-supported-surface-final-proof.sh ./work/final-proofs/latest`; it keeps builders local, builds `single-node-trial-vm`, runs `single-node-quickstart`, and captures the publishable KVM suite logs in one place. +`nix run ./nix/test-cluster#cluster -- durability-proof` is the canonical chainfire flaredb deployer backup/restore lane. It persists artifacts under `./work/durability-proof/latest`, proves logical backup/restore for ChainFire keys and FlareDB SQL rows, uses the canonical Deployer admin pre-register request itself as the backup artifact, verifies that the pre-registered node survives a `deployer.service` restart, replays the same request idempotently, and injects CoronaFS plus LightningStor failures against the same live KVM cluster. +`nix run ./nix/test-cluster#cluster -- rollout-soak` is the longer-running control-plane and rollout companion lane. It rebuilds from clean local KVM runtime state, persists artifacts under `./work/rollout-soak/latest`, validates exactly one planned `draining` maintenance cycle and one fail-stop worker-loss cycle on the two native-runtime workers, holds each degraded state for the configured soak window, then restarts `deployer`, `fleet-scheduler`, `node-agent`, `chainfire`, and `flaredb` before revalidating the cluster. The soak root also carries explicit scope markers so the supported boundary is encoded in the proof artifacts rather than only in docs. The steady-state KVM nodes do not run `nix-agent.service`, so the soak lane records explicit `nix-agent` scope markers instead of pretending a live-cluster `nix-agent` restart happened. +`nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof` is the focused local-KVM reality lane for the provider and VM-hosting bundles. It stores artifacts under `./work/provider-vm-reality-proof/latest`, captures authoritative FlashDNS answers, FiberLB backend drain and restore evidence, and PlasmaVMC KVM shared-storage migration plus post-migration restart state. +The 2026-04-10 local AMD/KVM proof logs are in `./work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final` for `supported-surface-guard`, `single-node-trial-vm`, and `single-node-quickstart`, and in `./work/publishable-kvm-suite` for the final passing `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix` run through `./nix/test-cluster/run-publishable-kvm-suite.sh`. +The exact bare-metal check-runner proof from `2026-04-10` is in `./work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c`; its outer `environment.txt` records `execution_model=materialized-check-runner`, and `state/environment.txt` records `vm_accelerator_mode=kvm`. +The 2026-04-10 durability and failure-injection proof logs are in `./work/durability-proof/20260410T120618+0900`; `result.json` records `success=true`, `deployer_restore_mode="admin pre-register request replay with pre/post-restart list verification"`, and the artifact set includes `chainfire-backup-response.json`, `flaredb-restored.json`, `deployer-post-restart-list.json`, `coronafs-node04-local-state.json`, and `lightningstor-head-during-node05-outage.json`. +The 2026-04-10 longer-running rollout and control-plane soak is in `./work/rollout-soak/20260410T164549+0900`; `result.json` records `success=true`, `fleet_supported_native_runtime_nodes=2`, `validated_maintenance_cycles=1`, `validated_power_loss_cycles=1`, and `soak_hold_secs=30`, while the artifact set includes `maintenance-held.json`, `power-loss-held.json`, `deployer-post-restart-nodes.json`, `chainfire-post-restart-put.json`, `flaredb-post-restart.json`, `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, `fleet-scheduler-scope-fixed.txt`, and the `node01-nix-agent-scope.txt` / `node04-nix-agent-scope.txt` boundary markers. +The 2026-04-10 provider and VM-hosting reality proof logs are in `./work/provider-vm-reality-proof/20260410T135827+0900`; `result.json` records `success=true`, and the artifact set includes `network-provider/fiberlb-drain-summary.txt`, `network-provider/flashdns-service-authoritative-answer.txt`, `vm-hosting/migration-summary.json`, and `vm-hosting/root-volume-after-post-migration-restart.json`. +Physical-node bring-up now has a canonical preflight wrapper as well: `nix run ./nix/test-cluster#hardware-smoke -- preflight`. It writes `kernel-params.txt`, expected markers, failure markers, and a machine-readable blocked or ready state under `./work/hardware-smoke/latest`, and the same entrypoint can later be rerun as `run` or `capture` when USB or BMC/Redfish transport is actually present. + +Within that suite, `fresh-matrix` is the public provider-bundle proof: it exercises PrismNet VPC/subnet/port flows plus security-group ACL add/remove, FlashDNS record publication, and FiberLB TCP plus TLS-terminated `Https` / `TerminatedHttps` listeners in one tenant-scoped composition run. The published FiberLB L4 algorithms are kept honest with targeted server unit tests in-tree. `provider-vm-reality-proof` is the artifact-producing companion lane for the same bundle and for the VM-hosting path. +PrismNet real OVS/OVN dataplane validation remains outside the supported local KVM surface. FiberLB native BGP or BFD peer interop plus hardware VIP ownership also remain outside the supported local KVM surface. PlasmaVMC real-hardware migration or storage handoff remains a later hardware proof; the current local-KVM proof fixes the release surface to KVM shared-storage migration on the worker pair. + +Project-done release proof now requires both halves of the public validation surface to be green: + +- `baremetal-iso` and `baremetal-iso-e2e` for the canonical `deployer -> installer -> nix-agent` bare-metal bootstrap path +- the KVM publishable suite (`fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`) for the nested-KVM multi-node VM-hosting path + +Canonical bare-metal bootstrap proof: + +```bash +nix run ./nix/test-cluster#cluster -- baremetal-iso +nix build .#checks.x86_64-linux.baremetal-iso-e2e +./result/bin/baremetal-iso-e2e ./work/baremetal-iso-e2e/latest +``` + +`baremetal-iso-e2e` now materializes the exact local-KVM proof runner instead of trying to boot QEMU inside a sandboxed `nixbld` build. That older build-time execution model degraded to `TCG`; the built runner keeps the canonical attr name but executes the same `verify-baremetal-iso.sh` harness as the direct QEMU proof, with host KVM and persistent logs under `./work`. + +The QEMU ISO proof is a stand-in for the real install route, not a separate workflow. Build `nixosConfigurations.ultracloud-iso`, boot it under KVM locally or write the same ISO to USB or BMC virtual media on hardware, and pass the same bootstrap inputs that the installer consumes in the harness: `ultracloud.deployer_url=`, `ultracloud.bootstrap_token=` for authenticated bootstrap or a lab-only `deployer` configured with `allow_unauthenticated=true`, optional `ultracloud.ca_cert_url=`, optional `ultracloud.binary_cache_url=`, and optional `ultracloud.node_id=` / `ultracloud.hostname=` overrides when DMI serials or DHCP names are not the desired identity. + +The networking contract is the same in QEMU and on hardware: the live ISO needs DHCP or equivalent L3 reachability to `deployer` before Disko starts, and it needs reachability to the optional binary cache if you want it to pull prebuilt closures instead of compiling locally. The local QEMU proof relies on the `10.0.2.2` fallback addresses from user-mode NAT; real hardware should set `ultracloud.deployer_url` and, when used, `ultracloud.binary_cache_url` to routable control-plane endpoints. USB media and BMC virtual media are only transport differences for the same ISO and kernel parameters. For the local proof keep `./work` or `ULTRACLOUD_WORK_ROOT` on a large disk; the checked-in wrappers force local builders and derive Nix parallelism from the host CPU count unless you override it explicitly. + +Canonical hardware preflight and handoff for the same path: + +```bash +nix run ./nix/test-cluster#hardware-smoke -- preflight +nix run ./nix/test-cluster#hardware-smoke -- run +nix run ./nix/test-cluster#hardware-smoke -- capture +``` + +That wrapper keeps the QEMU proof and the physical-node proof on one contract by writing the exact kernel parameters, expected `ULTRACLOUD_MARKER` sequence, failure markers, and artifact root under `./work/hardware-smoke/latest`. + +Canonical hardware handoff for that path: + +1. Build `nixosConfigurations.ultracloud-iso` plus the target role configs (`baremetal-qemu-control-plane`, `baremetal-qemu-worker`, or their hardware-specific successors) and expose `deployer` plus an optional HTTP Nix cache on addresses the installer can reach. +2. Publish cluster state so that the reusable node class owns the install contract: `install_plan.nixos_configuration`, `install_plan.disko_config_path`, and preferably `install_plan.target_disk_by_id`. Node entries should only bind identity, pool, and any desired-system override that truly differs per host. When you expose a binary cache, prefer setting `desired_system.target_system` to the prebuilt class-owned closure as well so post-install convergence does not rebuild a dirty local variant on each node. +3. Boot the same ISO through USB or BMC virtual media and pass `ultracloud.deployer_url=...`, `ultracloud.bootstrap_token=...`, and, when used, `ultracloud.binary_cache_url=...` on the kernel command line. +4. Watch the canonical marker sequence from the installer journal: `pre-install.boot`, `pre-install.phone-home.complete`, `install.bundle-downloaded`, `install.disko.complete`, `install.nixos-install.complete`, `reboot`, `post-install.boot`. +5. Treat `nix-agent` reporting the desired system as `active` as the final convergence gate. The QEMU harness proves the same sequence, only with virtio disks and host-local endpoints standing in for the real chassis. + +The checked-in QEMU proof now mirrors the disk-selection contract that hardware should use. Its node classes install by stable `/dev/disk/by-id/virtio-uc-control-root` and `/dev/disk/by-id/virtio-uc-worker-root` selectors, backed by explicit QEMU disk serials, while the ISO resolves the prebuilt Disko script and target system from the install profile name embedded into the ISO. Hardware should keep the same class/profile structure and swap only the disk selector, routable URLs, and physical media transport. + +## Canonical Profiles + +UltraCloud now fixes the public support surface to three canonical profiles: + +| Profile | Canonical entrypoints | Required components | Optional components | +| --- | --- | --- | --- | +| `single-node dev` | `nix run .#single-node-quickstart`, `nix run .#single-node-trial`, `nix build .#single-node-trial-vm`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost` | +| `3-node HA control plane` | `nixosConfigurations.node01`, `nixosConfigurations.node02`, `nixosConfigurations.node03`, companion install image `nixosConfigurations.netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` | +| `bare-metal bootstrap` | `nix run ./nix/test-cluster#cluster -- baremetal-iso`, `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap | + +`nixosConfigurations.netboot-all-in-one` and `nixosConfigurations.netboot-control-plane` are canonical companion images for the supported `single-node dev` and `3-node HA control plane` profiles. `packages.single-node-trial-vm` is the low-friction trial artifact for the minimal VM-platform core. `nixosConfigurations.netboot-worker`, `netboot-base`, `pxe-server`, `vm-smoke-target`, and older launch flows under `baremetal/vm-cluster` are archived helpers or `legacy/manual` debugging paths outside the canonical profiles and their guard set. + +## Cluster Authoring + +`ultracloud.cluster` backed by `nix/lib/cluster-schema.nix` is the only supported cluster authoring source. It is the canonical place to define nodes, reusable deployer classes and pools, rollout objects, service placement intent, and the generated per-node bootstrap metadata consumed by `deployer`, `fleet-scheduler`, `nix-agent`, and `node-agent`. + +`nix-nos` is limited to legacy compatibility and low-level network primitives such as interfaces, VLANs, BGP, and static routing. It is not the canonical source for cluster topology, rollout intent, scheduler state, or bootstrap inventory. + +## Responsibility Boundaries + +- `plasmavmc` owns tenant VM lifecycle plus KVM worker registration. It can run against explicit remote IAM, PrismNet, and FlareDB endpoints, but it does not own machine enrollment, desired-system rollout, or host-native service placement. +- `k8shost` owns Kubernetes-style pod and service APIs for tenant workloads, then translates them into `prismnet`, `flashdns`, and `fiberlb` objects. It does not place host-native cluster daemons, and its runtime dataplane helpers remain archived non-product. +- `fleet-scheduler` owns placement and failover of host-native service instances from declarative cluster state derived from `ultracloud.cluster`. It consumes `node-agent` heartbeats and writes instance placement, but it does not expose tenant-facing Kubernetes semantics. +- `deployer` owns machine enrollment, `/api/v1/phone-home`, install plans, cluster metadata, and desired-system references. The supported declarative input for that state is the JSON generated from `ultracloud.cluster`; it decides what a node should become, but it does not execute the host-local switch. +- `nix-agent` owns host-local NixOS convergence only. It reads desired-system state from `deployer` or `chainfire`, activates the target closure, and rolls back on failed health checks. +- `node-agent` owns host-local runtime execution only. It reports heartbeats and applies scheduled service-instance state, but it does not install the base OS or rewrite desired-system targets. + +The single-node quickstart deliberately stops below that rollout stack: it ships only the VM-platform core plus optional add-ons, not `deployer`, `nix-agent`, `node-agent`, or `fleet-scheduler`. + +## Standalone Stories + +- `single-node-trial-vm` and `single-node-quickstart` are the standalone VM-platform story. They keep the minimal KVM-backed VM surface light and intentionally exclude `deployer`, `nix-agent`, `fleet-scheduler`, and `node-agent`. +- `deployer-vm-smoke`, `portable-control-plane-regressions`, and `baremetal-iso` are the standalone rollout-stack story. They validate `deployer -> nix-agent` and `deployer -> fleet-scheduler -> node-agent` without requiring the full VM-hosting bundle. + +## Rollout Bundle Operations + +The rollout-bundle operator contract is fixed in [docs/rollout-bundle.md](docs/rollout-bundle.md). As of 2026-04-10 the supported `deployer` recovery model is scope-fixed to one active writer plus optional cold-standby restore that reuses the same ChainFire namespace, credentials, bootstrap bundle, and local state backup. `deployer` is scope-fixed to one active writer plus optional cold-standby restore; automatic ChainFire-backed multi-instance failover is outside the supported product contract for this release. + +The same operator doc also fixes the `nix-agent` health-check and rollback contract, the `node-agent` logs/secrets/volume/upgrade contract, and the `fleet-scheduler` supported upper limit: the two native-runtime worker lab with one planned drain cycle, one fail-stop worker-loss cycle, and 30-second held degraded states in `rollout-soak`. `fleet-scheduler` is scope-fixed to the two native-runtime worker lab with one planned drain cycle, one fail-stop worker-loss cycle, and 30-second held degraded states in rollout-soak. The canonical proofs are `nix build .#checks.x86_64-linux.deployer-vm-rollback`, `nix build .#checks.x86_64-linux.fleet-scheduler-e2e`, `nix build .#checks.x86_64-linux.portable-control-plane-regressions`, `nix run ./nix/test-cluster#cluster -- fresh-smoke`, `nix run ./nix/test-cluster#cluster -- rollout-soak`, and `nix run ./nix/test-cluster#cluster -- durability-proof`. + ## Main Entrypoints -- workspace flake: [flake.nix](/home/centra/cloud/flake.nix) -- VM validation harness: [nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md) -- shared volume notes: [coronafs/README.md](/home/centra/cloud/coronafs/README.md) -- minimal quota-service rationale: [creditservice/README.md](/home/centra/cloud/creditservice/README.md) -- archived manual VM launch scripts: [baremetal/vm-cluster/README.md](/home/centra/cloud/baremetal/vm-cluster/README.md) +- workspace flake: [flake.nix](flake.nix) +- single-node quickstart smoke: [`nix run .#single-node-quickstart`](docs/testing.md) +- single-node trial artifact: [`nix build .#single-node-trial-vm`](docs/testing.md), [`nix run .#single-node-trial`](docs/testing.md) +- smallest rollback proof for `deployer -> nix-agent`: [`nix build .#checks.x86_64-linux.deployer-vm-rollback`](docs/rollout-bundle.md) +- `3-node HA control plane` configs: `nixosConfigurations.node01`, `nixosConfigurations.node02`, `nixosConfigurations.node03`, companion image `nixosConfigurations.netboot-control-plane` +- portable local proof: [`nix build .#checks.x86_64-linux.portable-control-plane-regressions`](docs/testing.md) +- longer-running control-plane and rollout soak: [`nix run ./nix/test-cluster#cluster -- rollout-soak`](docs/testing.md) +- canonical bare-metal bootstrap smoke: [`nix run ./nix/test-cluster#cluster -- baremetal-iso`](docs/testing.md) +- canonical bare-metal exact proof runner: [`nix build .#checks.x86_64-linux.baremetal-iso-e2e`](docs/testing.md) then `./result/bin/baremetal-iso-e2e` +- canonical physical-node preflight and handoff: [`nix run ./nix/test-cluster#hardware-smoke -- preflight`](docs/hardware-bringup.md), then `run` or `capture` +- canonical profile guards: [`nix build .#checks.x86_64-linux.canonical-profile-eval-guards`](docs/testing.md), [`nix build .#checks.x86_64-linux.canonical-profile-build-guards`](docs/testing.md) +- supported surface guard: [`nix build .#checks.x86_64-linux.supported-surface-guard`](docs/testing.md) for public docs wording, shipped server API completeness, and high-signal TODO or best-effort markers in the supported provider/backend servers +- VM validation harness: [nix/test-cluster/README.md](nix/test-cluster/README.md) +- work-root budget helper: [`./nix/test-cluster/work-root-budget.sh status`](docs/testing.md), `enforce`, and `prune-proof-logs` +- shared volume notes: [coronafs/README.md](coronafs/README.md) +- apigateway supported scope: [apigateway/README.md](apigateway/README.md) +- nightlight supported scope: [nightlight/README.md](nightlight/README.md) +- creditservice supported scope: [creditservice/README.md](creditservice/README.md) +- k8shost supported scope: [k8shost/README.md](k8shost/README.md) ## Repository Guide -- [docs/README.md](/home/centra/cloud/docs/README.md): documentation entrypoint -- [docs/testing.md](/home/centra/cloud/docs/testing.md): validation path summary -- [docs/component-matrix.md](/home/centra/cloud/docs/component-matrix.md): supported multi-component compositions -- [docs/storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers +- [docs/README.md](docs/README.md): documentation entrypoint +- [docs/testing.md](docs/testing.md): validation path summary +- [docs/component-matrix.md](docs/component-matrix.md): canonical profiles and optional bundles +- [docs/rollout-bundle.md](docs/rollout-bundle.md): rollout-bundle HA, rollback, drain, logs, secrets, and volume contract +- [docs/control-plane-ops.md](docs/control-plane-ops.md): ChainFire membership boundary, FlareDB schema or destructive-DDL boundary, and IAM bootstrap hardening plus signing-key, credential, and mTLS rotation +- [docs/edge-trial-surface.md](docs/edge-trial-surface.md): APIGateway, NightLight, CreditService, trial-surface, and work-root budget contract +- [docs/provider-vm-reality.md](docs/provider-vm-reality.md): PrismNet, FlashDNS, FiberLB, and PlasmaVMC local-KVM proof scope plus artifact contract +- [docs/hardware-bringup.md](docs/hardware-bringup.md): USB/BMC/Redfish preflight, artifact capture, and hardware-smoke handoff +- [docs/storage-benchmarks.md](docs/storage-benchmarks.md): latest CoronaFS and LightningStor lab numbers - `plans/`: design notes and exploration documents ## Scope -UltraCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products. +UltraCloud is centered on reproducible infrastructure behavior. Optional add-ons such as `creditservice` and `k8shost` remain part of the supported surface only when the documented scope, harness coverage, and public contract stay aligned with what the repository actually ships. -Host-level NixOS rollout validation is also expected to stay reproducible: the `deployer-vm-smoke` VM test now proves that `nix-agent` can activate a prebuilt target system closure directly, without recompiling the stack inside the guest. +Host-level NixOS rollout validation is also expected to stay reproducible: `baremetal-iso-e2e` is now the materialized exact proof runner for the full install path, `canonical-profile-eval-guards` and `canonical-profile-build-guards` fail fast when supported outputs drift, `supported-surface-guard` now rejects unfinished public wording, shipped server API stubs, high-signal completeness markers such as `TODO:` or `best-effort` in the supported network or backend servers, and archived helper regressions such as worker netboot or backend scaffolds re-entering the default product surface, while `portable-control-plane-regressions` remains the non-KVM developer lane that keeps the main control-plane and rollout boundaries green on TCG-only hosts before the publishable nested-KVM suite is rerun. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..335fc84 --- /dev/null +++ b/TODO.md @@ -0,0 +1,411 @@ +# UltraCloud Baseline TODO (2026-04-10) + +- Task: `0fe10731-bdbc-4f8f-8bcc-5f5a16903200` +- 作成ブランチ: `task/0fe10731-baseline-todo` +- ベース: `origin/main` at `b8ebd24d4e9b2dbe71e34ba09b77092dfa7dd43c` +- 引き継ぎ方針: `task/343c8c57-main-reaggregate` の dirty worktree は reset/revert せず、そのまま新ブランチへ持ち上げた。 +- この票の目的: 各コンポーネントの責務、正本 entrypoint、現時点の証拠、未証明事項、優先度付き問題票、依存関係を 1 枚に固定し、以後の自律実装の基準票にする。 +- 調査入力: `README.md`, `docs/component-matrix.md`, `docs/testing.md`, `nix/test-cluster/README.md`, `plans/cluster-investigation-2026-03-02/*`, 現在の `nix/modules/*`, `nix/single-node/*`, `nix/nodes/baremetal-qemu/*`, `nix/test-cluster/*`, 各 component の `src/main.rs` / API 定義。 + +## Canonical Boundary Snapshot + +- 正本 profile は 3 つ: `single-node dev`, `3-node HA control plane`, `bare-metal bootstrap`。 +- 最小コアは `chainfire + flaredb + iam + prismnet + plasmavmc`。 +- ネットワーク provider bundle は `prismnet + flashdns + fiberlb`。 +- VM hosting bundle は `plasmavmc + prismnet + coronafs + lightningstor`。 +- edge/tenant bundle は `apigateway + nightlight + creditservice`。 +- rollout bundle は `deployer + nix-agent + fleet-scheduler + node-agent`。 +- 2026-04-10 の current branch では、QEMU/KVM を正本の local proof とし、bare-metal proof も `QEMU as hardware` として同一 ISO 契約で扱う構造が入っている。 + +## 2026-03-02 Failure Split + +### 2026-03-02 の失敗で、2026-04-10 current branch では file-level に解消済みのもの + +- `ARCH-001`: `flake.nix` が欠損 `docs/.../configuration.nix` を参照していた件は解消済み。現在の正本は `nix/nodes/vm-cluster/node01`, `node02`, `node03` と `canonical-profile-eval-guards`。 +- `ARCH-002`: ISO install の `disko.nix` 欠損参照は解消済み。現在は `nix/nodes/baremetal-qemu/control-plane/disko.nix` と `.../worker/disko.nix` を `verify-baremetal-iso.sh` が直接使う。 +- `ARCH-003`: `deployer` の Nix wiring 欠損は解消済み。`nix/modules/deployer.nix`, `flake.nix` の package/app/check 定義, `deployer-server` の `/api/v1/phone-home` が存在する。 +- `TC-001`: `joinAddr` 不整合は解消済み。現在の `chainfire` / `flaredb` module は `initialPeers` 契約に揃っている。 +- `TC-002`: `node06` の `creditservice` 評価失敗は解消済み。現在の `nix/test-cluster/node06.nix` は `creditservice.nix` を import し、`flaredbAddr` も与えている。 +- `COMP-001` から `COMP-004`: IAM endpoint 注入ミスマッチは解消済み。`prismnet`, `plasmavmc`, `fiberlb`, `lightningstor`, `flashdns`, `creditservice` は現在 module から binary が実際に読む config key に変換している。 +- `ARCH-004`: first-boot の `leader_url` 契約不整合は解消済み。`nix/modules/first-boot-automation.nix` は `http://localhost:8081` / `8082` と `/admin/member/add` を前提にしている。 +- `ARCH-005`: FlareDB に first-boot 用 join API が無かった件は解消済み。`flaredb/crates/flaredb-server/src/rest.rs` に `POST /admin/member/add` がある。 +- `3.1 NightLight grpcPort mismatch`: 解消済み。`nightlight-server` は現在 HTTP と gRPC を両方 bind する。 +- `ARCH-006` / `cluster-config` 二重実装問題: 2026-03-02 にあった `nix-nos/topology.nix` 起点の重複は current tree ではそのまま見当たらず、正本は `nix/lib/cluster-schema.nix` と `nix/modules/ultracloud-cluster.nix` に寄っている。 +- `QLT-001`: `flake.nix` 上の大量 `doCheck = false` 群は、少なくとも current file-level ではそのまま残っていない。 + +### 2026-03-02 の失敗と切り分けて、2026-04-10 では「構造 fix はあるが runtime 再証明が未了」のもの + +- `VERIFY-001`: 2026-04-10 の local AMD/KVM host で `supported-surface-guard`, `single-node-trial-vm`, `single-node-quickstart`, `fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`, `./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite`, `canonical-profile-eval-guards`, `portable-control-plane-regressions`, `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `fleet-scheduler-e2e`, `baremetal-iso`, `nix build .#checks.x86_64-linux.baremetal-iso-e2e`, and the built `./result/bin/baremetal-iso-e2e` exact runner は再走済みで pass。未再証明なのは実機 bare-metal smoke のみ。 +- `VERIFY-002`: bare-metal bootstrap は QEMU ISO proof まで閉じているが、USB/BMC/実機への同契約再証明はまだ無い。ただし 2026-04-10 に `nix run ./nix/test-cluster#hardware-smoke -- preflight` を追加し、transport 不在時の blocked state は `./work/hardware-smoke/latest/status.env` と `missing-requirements.txt` へ機械的に残せるようになった。 +- `VERIFY-003`: config-contract 修正は `run-publishable-kvm-suite.sh` で全 add-on 有効 profile まで再確認済み。`baremetal-iso-e2e` も materialized host-KVM runner へ移行済みで、残件は hardware bring-up に絞られた。 + +## First Tranche Backlog + +- `TRANCHE-01`: 完了。`single-node dev` の optional bundle health gating は 2026-04-10 に修正済み。`coronafs` の port mismatch と `flashdns` / `fiberlb` / `lightningstor` の health 未監視を解消した。 +- `TRANCHE-02`: `baremetal-iso` と `baremetal-iso-e2e` exact runner は 2026-04-10 の local AMD/KVM host で再走済み。次段で USB/BMC/実機 1 台の smoke を追加する。 +- `TRANCHE-03`: 完了。2026-04-10 に `nix run ./nix/test-cluster#cluster -- durability-proof` を追加し、`chainfire` / `flaredb` の logical backup/restore と、`deployer` の admin pre-register request replay + restart persistence proof を product doc と harness へ固定した。 +- `TRANCHE-04`: 完了。`fleet-scheduler`, `nix-agent`, `node-agent`, `deployer-ctl` の local `chainfire` 既定 endpoint は 2026-04-10 に canonical `http://127.0.0.1:2379` へ正規化した。 +- `TRANCHE-05`: 完了。`fiberlb` の HTTPS health check は 2026-04-10 に supported scope を明文化し、現時点では backend TLS 証明書検証なしの `TCP reachability + HTTP status` のみが製品契約だと docs/guard/source comment へ固定した。 +- `TRANCHE-06`: 完了。`k8shost` は 2026-04-10 に API/control-plane 製品として固定し、runtime dataplane helpers は archived non-product と docs/guard/TODO を一致させた。 +- `TRANCHE-07`: 完了。2026-04-10 の `durability-proof` が `lightningstor` distributed backend の node-loss / repair と `coronafs` controller/node split outage を canonical failure-injection proof として保存する。 +- `TRANCHE-08`: 完了。2026-04-10 に `hardware-smoke` preflight/handoff wrapper を追加し、`deployer -> ISO -> first-boot -> nix-agent` の実機 bring-up を USB/BMC/Redfish 共通 entrypoint で準備できるようにした。transport 不在時の blocked artifact も `./work/hardware-smoke` に固定化した。 +- `TRANCHE-10`: 完了。2026-04-10 に `nix run ./nix/test-cluster#cluster -- rollout-soak` を longer-run KVM operator lane として固定し、`draining` maintenance, worker power-loss, `deployer` / `fleet-scheduler` / `node-agent` restart, fixed-membership `chainfire` / `flaredb` restart を同一 artifact root に保存した。steady-state `test-cluster` に `nix-agent.service` が載っていないことも scope marker artifact で明文化した。 +- `TRANCHE-11`: 完了。2026-04-10 に `DEPLOYER-P1-01` と `FLEET-P1-01` を scope-fixed final state へ更新し、`rollout-soak` が `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, `fleet-scheduler-scope-fixed.txt` を `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900` へ保存するようにした。`deployer` は one active writer plus optional cold-standby restore、`fleet-scheduler` は two native-runtime workers 上の one drain + one fail-stop cycle with 30-second hold を release boundary として固定した。 +- `TRANCHE-12`: 完了。2026-04-10 に `FDB-P1-01`, `IAM-P1-01`, `HARNESS-P2-01` を次段処理した。`run-core-control-plane-ops-proof.sh` は `/mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00` へ `scope-fixed-contract.json`, `iam-credential-rotation-tests.log`, `iam-mtls-rotation-tests.log`, `result.json` を保存し、FlareDB destructive DDL/fully automated online migration は scope-fixed unsupported、IAM は signing-key + credential + mTLS overlap rotation までを supported lifecycle とし multi-node failover は unsupported に固定した。`work-root-budget.sh` には `enforce` と `prune-proof-logs` を追加し、disk budget advisory から stronger local gate と safer cleanup workflow へ進めた。 + +## 2026-04-10 Physical Hardware Bring-Up Pack + +- `Task:` `3dba03d3-525b-4079-8c93-90af6a89d32b` +- `Canonical entrypoint:` `nix run ./nix/test-cluster#hardware-smoke -- preflight`, then `run` or `capture` +- `Current preflight artifact root:` `./work/hardware-smoke/latest` +- `Artifact contract:` `status.env`, `missing-requirements.txt`, `kernel-params.txt`, `expected-markers.txt`, `failure-markers.txt`, `operator-handoff.md`, `environment.txt` +- `Bridge to QEMU proof:` hardware wrapper reuses `nixosConfigurations.ultracloud-iso` and the same `ULTRACLOUD_MARKER pre-install.boot.*`, `pre-install.phone-home.complete.*`, `install.disko.complete.*`, `reboot.*`, `post-install.boot.*`, `desired-system-active.*` markers that `verify-baremetal-iso.sh` enforces in the QEMU harness. +- `Blocked-state recording:` when USB device or BMC/Redfish transport is missing, `preflight` records `status=blocked` and the missing transport, kernel-parameter, and capture inputs in `missing-requirements.txt` without pretending the hardware proof ran. +- `Still open:` an actual physical-node execution remains pending until a removable USB target or BMC/Redfish endpoint plus credentials are supplied. +- `TRANCHE-09`: 完了。2026-04-10 に `docs/rollout-bundle.md` を追加し、`deployer` single-writer DR、`nix-agent` health-check/rollback、`node-agent` logs/secrets/volume/upgrade、`fleet-scheduler` drain/maintenance/failover の product contract と proof command を固定した。 + +## 2026-04-10 Long-Run Control Plane And Rollout Soak + +- `Task:` `07d6137e-6e4c-4158-9142-8920f4f70a76` +- `Canonical entrypoint:` `nix run ./nix/test-cluster#cluster -- rollout-soak` +- `Artifact root:` `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900` +- `Scenario proof:` one planned `node04 -> draining -> active` cycle, one `node05` power-loss and recovery cycle, restart of `deployer.service`, `fleet-scheduler.service`, `node-agent.service` on both worker nodes, and fixed-membership restart of `chainfire.service` plus `flaredb.service` on `node02`. +- `Saved evidence:` `maintenance-during.json`, `maintenance-held.json`, `maintenance-restored.json`, `power-loss-during.json`, `power-loss-held.json`, `power-loss-restored.json`, `deployer-post-restart-nodes.json`, `fleet-scheduler-post-restart.json`, `node04-node-agent-post-restart.json`, `node05-node-agent-post-restart.json`, `chainfire-post-restart-put.json`, `flaredb-post-restart.json`, `post-control-plane-restarts.json`, `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, `fleet-scheduler-scope-fixed.txt`, `result.json`. +- `Long-run nix-agent boundary:` steady-state `nix/test-cluster` nodes do not ship `nix-agent.service`, so this soak records `node01-nix-agent-scope.txt` and `node04-nix-agent-scope.txt` instead of pretending a live-cluster `nix-agent` restart happened. The executable `nix-agent` proofs remain `deployer-vm-rollback`, `baremetal-iso`, and `baremetal-iso-e2e`. +- `Result:` PASS on the local AMD/KVM host. `result.json` records `success=true`, `fleet_supported_native_runtime_nodes=2`, `validated_maintenance_cycles=1`, `validated_power_loss_cycles=1`, `soak_hold_secs=30`, and the summary `validated one planned drain cycle and one fail-stop worker-loss cycle on the two-node native-runtime lab, held each degraded state for the configured soak window, restarted deployer or scheduler or agent services, and revalidated fixed-membership control-plane restarts while keeping deployer HA scope-fixed to single-writer recovery`. + +## 2026-04-10 Local Executable Baseline + +- `Task:` `b1e811fb-158f-415c-a011-64c724e84c5c` +- `Runner:` `nix/test-cluster/run-local-baseline.sh` +- `Log root:` `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c` +- `Local execution policy:` `ULTRACLOUD_WORK_ROOT=/mnt/d2/centra/photoncloud-monorepo/work`, `TMPDIR=/mnt/d2/centra/photoncloud-monorepo/work/tmp`, `XDG_CACHE_HOME=/mnt/d2/centra/photoncloud-monorepo/work/xdg-cache`, `PHOTON_CLUSTER_WORK_ROOT=/mnt/d2/centra/photoncloud-monorepo/work/test-cluster`, `PHOTON_VM_DIR=/mnt/d2/centra/photoncloud-monorepo/work/test-cluster/state`, `PHOTON_CLUSTER_VDE_SWITCH_DIR=/mnt/d2/centra/photoncloud-monorepo/work/test-cluster/vde-switch`, `NIX_CONFIG builders =` で remote builder を禁止。 +- `Host evidence:` `environment.txt` に `host_cpu_count=12`, `ultracloud_local_nix_max_jobs=6`, `ultracloud_local_nix_build_cores=2`, `photon_cluster_nix_max_jobs=6`, `photon_cluster_nix_build_cores=2`, `nix_builders=` (empty), `kvm_access=rw`, `nested_param_value=1` を保存済み。 +- `Guard/build checks:` + - `canonical-profile-eval-guards`: PASS. command `nix build .#checks.x86_64-linux.canonical-profile-eval-guards --no-link`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/canonical-profile-eval-guards.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/canonical-profile-eval-guards.log`. + - `supported-surface-guard`: PASS. command `nix build .#checks.x86_64-linux.supported-surface-guard --no-link`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/supported-surface-guard.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/supported-surface-guard.log`. + - `portable-control-plane-regressions`: PASS. command `nix build .#checks.x86_64-linux.portable-control-plane-regressions`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/portable-control-plane-regressions.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/portable-control-plane-regressions.log`. + - `deployer-bootstrap-e2e`: PASS. command `nix build .#checks.x86_64-linux.deployer-bootstrap-e2e`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/deployer-bootstrap-e2e.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/deployer-bootstrap-e2e.log`. + - `host-lifecycle-e2e`: PASS. command `nix build .#checks.x86_64-linux.host-lifecycle-e2e`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/host-lifecycle-e2e.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/host-lifecycle-e2e.log`. + - `fleet-scheduler-e2e`: PASS. command `nix build .#checks.x86_64-linux.fleet-scheduler-e2e`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/fleet-scheduler-e2e.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/fleet-scheduler-e2e.log`. +- `Runtime path checks:` + - `single-node-quickstart`: PASS. command `nix run .#single-node-quickstart`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/single-node-quickstart.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/single-node-quickstart.log`; success marker `single-node quickstart smoke passed`. + - `baremetal-iso`: PASS. command `nix run ./nix/test-cluster#cluster -- baremetal-iso`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/baremetal-iso.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/baremetal-iso.log`; success markers `ULTRACLOUD_MARKER desired-system-active.iso-control-plane-01`, `ULTRACLOUD_MARKER desired-system-active.iso-worker-01`, `Canonical ISO bare-metal QEMU verification succeeded`. + - `fresh-smoke`: PASS. command `nix run ./nix/test-cluster#cluster -- fresh-smoke`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/fresh-smoke.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baselines/b1e811fb-158f-415c-a011-64c724e84c5c/fresh-smoke.log`; success marker `Cluster validation succeeded`. +- `2026-04-10 execution failures:` none. 2026-03-02 の historical failure split は上節のままで、この local AMD/KVM baseline では required command 群を fail として再現していない。 +- `2026-04-10 observed non-failure risk:` + - `HARNESS-OBS-20260410-01`: 2026-04-10 に解消。`nix/test-cluster/run-cluster.sh` の stale VM cleanup は current `vm_dir` / `vde_switch_dir` を cmdline で確認した PID のみ収集するように変更し、path 非依存の `hostfwd=tcp::${port}-:22` fallback を撤去した。 + +## 2026-04-10 Bare-Metal Canonical Path + +- `Task:` `6d9f45e4-1954-4a0b-b886-c61482db6c3c` +- `QEMU-as-hardware runtime proof:` PASS. command `nix run ./nix/test-cluster#cluster -- baremetal-iso`; log root `/mnt/d2/centra/photoncloud-monorepo/work/baremetal-iso`; evidence files `environment.txt`, `deployer.log`, `chainfire.log`, `control-plane.serial.log`, `worker.serial.log`. +- `Runtime PASS markers:` `ULTRACLOUD_MARKER desired-system-active.iso-control-plane-01`, `ULTRACLOUD_MARKER desired-system-active.iso-worker-01`, `Canonical ISO bare-metal QEMU verification succeeded`. +- `Runtime contract now proven:` + - reusable node classes own `install_plan.nixos_configuration`, `install_plan.disko_config_path`, and stable `install_plan.target_disk_by_id` + - nodes carry identity plus desired-system overrides only; when a cache-backed prebuilt closure is available they now publish `desired_system.target_system` to converge to the exact shipped system instead of a dirty local rebuild + - installed nodes now keep `nix-agent` alive across their own `switch-to-configuration` transaction long enough for activation to finish, which restored post-install `chainfire` and `nix-agent` convergence +- `Historical blocker (resolved on 2026-04-10):` direct build-time execution of `nix build .#checks.x86_64-linux.baremetal-iso-e2e` ran under sandboxed `nixbld1` and fell back to `TCG`. The exact lane is now a materialized runner: the check build succeeds quickly and emits `./result/bin/baremetal-iso-e2e`, and that runner executes the same `verify-baremetal-iso.sh` harness with host KVM and logs under `./work`. + +## 2026-04-10 Responsibility And Minimal-Surface Alignment + +- `Task:` `65a13e46-1376-4f37-a5c1-e520b5b376ec` +- `Authoring source decision:` `ultracloud.cluster` backed by `nix/lib/cluster-schema.nix` is now documented in `README.md`, `docs/README.md`, and `docs/testing.md` as the only supported cluster authoring source. `nix-nos` is explicitly reduced to legacy compatibility plus low-level network primitives. +- `Module boundary alignment:` `services.deployer`, `services.fleet-scheduler`, `services.nix-agent`, and `services.node-agent` descriptions now agree on the canonical layering `ultracloud.cluster -> deployer -> (nix-agent | fleet-scheduler -> node-agent)`. +- `Minimal-surface friction reduction:` `services.plasmavmc` and `services.k8shost` now wait only for local backing services that they actually use. When explicit remote endpoints are configured, they no longer hard-wire unrelated local control-plane units into startup ordering, which preserves a lighter standalone story for the VM-platform core and remote-provider deployments. +- `Validation alignment:` `supported-surface-guard` now requires contract markers for the supported authoring source, the constrained `nix-nos` role, and the standalone VM-platform story so docs drift becomes a failing regression. +- `Still open:` rollout-stack の default port mismatch は解消済み。残件は hardware bring-up と longer-duration durability proof。 + +## 2026-04-10 Supported Surface Final Proof + +- `Task:` `32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0` +- `Guard + minimal-trial proof root:` `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final` + - `supported-surface-guard`: PASS. command `nix build .#checks.x86_64-linux.supported-surface-guard --no-link`; meta `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/supported-surface-guard.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/supported-surface-guard.log`. + - `single-node-trial-vm`: PASS. command `nix build .#single-node-trial-vm --no-link --print-out-paths`; meta `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/single-node-trial-vm.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/single-node-trial-vm.log`; output path `/nix/store/1nq4pkadm3lbxmhkr54iz7lgjd6vm7z3-nixos-vm`. + - `single-node-quickstart`: PASS. command `nix run .#single-node-quickstart`; meta `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/single-node-quickstart.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final/single-node-quickstart.log`; success marker `single-node quickstart smoke passed`. +- `Publishable KVM suite root:` `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite` + - `environment.txt` captures `host_cpu_count=12`, `local_nix_max_jobs=6`, `local_nix_build_cores=2`, `photon_cluster_nix_max_jobs=6`, `photon_cluster_nix_build_cores=2`, `kvm_present=yes`, `kvm_access=rw`, `kvm_amd_nested=1`, `nix_builders=`, `finished_at=2026-04-10T09:36:09+09:00`, `exit_status=0`. + - `fresh-smoke`: PASS. command `nix run ./nix/test-cluster#cluster -- fresh-smoke`; meta `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-smoke.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-smoke.log`; success marker `Cluster validation succeeded`. + - `fresh-demo-vm-webapp`: PASS. command `nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp`; meta `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-demo-vm-webapp.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-demo-vm-webapp.log`; success markers include `PHOTON_VM_DEMO_WEB_READY` and the guest web health check on `http://10.62.10.10:8080/health`. + - `fresh-matrix`: PASS. command `nix run ./nix/test-cluster#cluster -- fresh-matrix`; meta `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-matrix.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/fresh-matrix.log`; success marker `Component matrix validation succeeded`. + - `run-publishable-kvm-suite`: PASS. command `./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite`; environment `/mnt/d2/centra/photoncloud-monorepo/work/publishable-kvm-suite/environment.txt`; final stdout marker `publishable KVM suite passed; logs in ./work/publishable-kvm-suite`. +- `Fixed while proving the surface:` + - `NODEAGENT-FIX-20260410-01`: reboot-time PID reuse could make `node-agent` treat `native-daemon` as the resurrected `native-web` instance after worker reboot, stalling `fresh-smoke` at native runtime recovery. `deployer/crates/node-agent/src/process.rs` now persists argv + boot-id metadata, validates the live `/proc//cmdline`, and refuses to signal or reuse mismatched processes from stale pidfiles. + - `HARNESS-FIX-20260410-01`: `run-publishable-kvm-suite` exposed a control-plane LightningStor bootstrap race that was not consistently hit by ad-hoc reruns. `nix/test-cluster/node01.nix` now holds `lightningstor.service` behind explicit local control-plane and worker-replica TCP readiness with a longer start timeout, and `nix/test-cluster/run-cluster.sh` now waits the worker storage agents before gating the control-plane LightningStor unit. +- `Still open after the final supported-surface proof:` real hardware `baremetal-iso` smoke. + +## 2026-04-10 baremetal-iso-e2e Local-KVM Exact Lane + +- `Task:` `0de75570-dabd-471b-95fe-5898c54e2e8c` +- `Check build output:` `nix build .#checks.x86_64-linux.baremetal-iso-e2e` now materializes `./result/bin/baremetal-iso-e2e` instead of trying to execute QEMU inside the daemon sandbox. +- `Exact proof root:` `/mnt/d2/centra/photoncloud-monorepo/work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c` +- `Outer runner evidence:` `environment.txt` records `execution_model=materialized-check-runner`, `nix_builders=` (empty), `kvm_present=yes`, `kvm_access=rw`, and the local CPU-derived Nix parallelism. +- `Exact check build:` PASS. command `nix build .#checks.x86_64-linux.baremetal-iso-e2e`; output path is a runner package that ships `bin/baremetal-iso-e2e` plus `share/ultracloud/README.txt` documenting the sandbox/TCG reason for the materialized execution model. +- `Exact runner:` PASS. command `./result/bin/baremetal-iso-e2e ./work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c`; meta `/mnt/d2/centra/photoncloud-monorepo/work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c/baremetal-iso-e2e.meta`; log `/mnt/d2/centra/photoncloud-monorepo/work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c/baremetal-iso-e2e.log`. +- `Inner runtime evidence:` state dir `/mnt/d2/centra/photoncloud-monorepo/work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c/state`; `state/environment.txt` records `vm_accelerator_mode=kvm`; success markers in `baremetal-iso-e2e.log` include `ULTRACLOUD_MARKER desired-system-active.iso-control-plane-01`, `ULTRACLOUD_MARKER desired-system-active.iso-worker-01`, and `Canonical ISO bare-metal QEMU verification succeeded`. +- `Remaining delta vs direct runtime proof:` the harness is now identical because both `nix run ./nix/test-cluster#cluster -- baremetal-iso` and `./result/bin/baremetal-iso-e2e` call `nix/test-cluster/verify-baremetal-iso.sh`. The only intentional difference is execution entrypoint: `nix build` materializes the runner because daemon-sandboxed `nixbld` builds would otherwise lose host KVM and degrade to `TCG`. + +## 2026-04-10 Durability And Product-Boundary Hardening + +- `Task:` `541356be-b289-4583-ba40-cbf46b0f9680` +- `Guard rerun:` PASS. command `nix build .#checks.x86_64-linux.supported-surface-guard --no-link`. +- `Runtime rerun:` PASS. command `nix run ./nix/test-cluster#cluster -- fresh-matrix`; success marker `Component matrix validation succeeded`. +- `Durability proof:` PASS. command `nix run ./nix/test-cluster#cluster -- durability-proof`; artifact root `/mnt/d2/centra/photoncloud-monorepo/work/durability-proof/20260410T120618+0900`; convenience symlink `/mnt/d2/centra/photoncloud-monorepo/work/durability-proof/latest`. +- `ChainFire proof:` `chainfire-backup-response.json` と `chainfire-restored-response.json` が同じ logical payload を返し、DELETE 後の `chainfire-after-delete.out` は 404 を返す。 +- `FlareDB proof:` `flaredb-backup.json` と `flaredb-restored.json` が同じ SQL row を返し、`flaredb-after-delete.json` は空集合を返す。 +- `Deployer proof:` `deployer-pre-register-request.json` を backup artifact とし、`deployer-backup-list.json` で pre-registered node を観測し、`deployer.service` restart 後も `deployer-post-restart-list.json` に残ることを確認し、同じ request を replay した後も `deployer-replayed-list.json` の summary が変わらないことを確認した。`result.json` の `deployer_restore_mode` は `admin pre-register request replay with pre/post-restart list verification`。 +- `CoronaFS failure injection:` `coronafs-node04-local-state.json` は controller 停止中も `node_local=true` と materialized path を保持し、`coronafs-node04-capabilities.json` は node-only capability split (`supports_controller_api=false`, `supports_node_api=true`) を維持した。 +- `LightningStor failure injection:` `lightningstor-put-during-node05-outage.json`, `lightningstor-head-during-node05-outage.json`, `lightningstor-object-during-node05-outage.txt`, `lightningstor-object-after-repair.txt` が node05 停止中 write と repair 後 read-back を保存する。 +- `FiberLB supported limitation:` `fiberlb/crates/fiberlb-server/src/healthcheck.rs`, `README.md`, `docs/testing.md`, `docs/component-matrix.md`, `flake.nix` で、HTTPS backend health は TLS 証明書検証なしの限定契約だと固定した。 +- `k8shost boundary:` `README.md`, `docs/testing.md`, `docs/component-matrix.md`, `k8shost/README.md`, `nix/test-cluster/README.md`, `flake.nix` が `k8shost` を API/control-plane 製品 surface のみに固定し、`k8shost-cni`, `k8shost-controllers`, `lightningstor-csi` を archived non-product として揃えた。 +- `Proof-lane hardening done during this tranche:` 初回 `durability-proof` は FlareDB cleanup tail の unsupported `DROP TABLE` で落ちたため unique namespace 前提に整理し、次に cleanup trap の unbound local で落ちたため trap cleanup を `${var:-}` と guarded tunnel shutdown に直した。現在の lane は zero-exit で artifact を残す。 + +## 2026-04-10 Rollout Bundle HA And DR Hardening + +- `Task:` `a41343c5-116e-4313-8751-b333472f931c` +- `Operator doc:` `docs/rollout-bundle.md` +- `Verification reruns:` `nix build .#checks.x86_64-linux.portable-control-plane-regressions`, `nix build .#checks.x86_64-linux.fleet-scheduler-e2e`, and `nix build .#checks.x86_64-linux.deployer-vm-rollback` all passed on 2026-04-10 with local-only Nix settings. +- `Durability rerun:` `nix run ./nix/test-cluster#cluster -- durability-proof` passed again from a clean KVM cluster and wrote artifacts under `/mnt/d2/centra/photoncloud-monorepo/work/durability-proof/20260410T123535+0900`. +- `Supported deployer boundary:` single-writer deployer with restart-in-place or cold-standby restore. ChainFire-backed multi-instance failover is explicitly unsupported for now and the restore runbook is fixed to `cluster-state apply + preserved pre-register request replay + admin verification`. +- `Nix-agent proof:` `nix build .#checks.x86_64-linux.deployer-vm-rollback` passed on 2026-04-10 and is now the canonical reproducible proof for `health_check_command`, rollback, and `rolled-back` partial failure recovery semantics. +- `Fleet-scheduler semantics:` `fresh-smoke` and `fleet-scheduler-e2e` remain the release proofs for short-lived `draining` maintenance, fail-stop worker loss, and replica restoration. Long-duration maintenance and large-cluster drain choreography stay scope-limited rather than silently implied. +- `Node-agent contract:` product docs now fix `${stateDir}/pids/*.log` as the per-instance log location, `${stateDir}/pids/*.meta.json` as stale-pid metadata, secret delivery as caller-provided env or mounted files only, host-path volumes as pass-through only, and upgrades as replace-and-reconcile rather than in-place patching. + +## 2026-04-10 Core Control Plane Operator Lifecycle Proofs + +- `Task:` `dcdc961a-0aa6-47c3-aeba-a1c67bca27b7` +- `Operator doc:` `docs/control-plane-ops.md` +- `Focused proof:` `./nix/test-cluster/run-core-control-plane-ops-proof.sh /mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00` +- `Focused proof result:` passed on 2026-04-10 and wrote `result.json`, `scope-fixed-contract.json`, `iam-key-rotation-tests.log`, `iam-credential-rotation-tests.log`, `iam-mtls-rotation-tests.log`, and the contract-marker logs under `/mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00`. +- `Supported-surface guard:` rerun after the doc and proof updates so the public lifecycle contract is now guarded alongside the existing supported-surface wording. +- `ChainFire boundary:` dynamic membership, replace-node, and scale-out are now explicit non-supported actions on the product surface. The supported path is fixed-membership restore or whole-cluster replacement anchored by the existing `durability-proof` backup/restore lane. +- `FlareDB boundary:` online migration and schema evolution are now fixed to an additive-first, backup/restore-gated operator contract. Destructive DDL and fully automated online migration are explicit non-supported boundaries for this release rather than implied future promises. +- `IAM boundary:` bootstrap hardening now requires explicit admin token, signing key, and 32-byte `IAM_CRED_MASTER_KEY` inputs in docs. The standalone proof reruns signing-key rotation, credential overlap-and-revoke rotation, and mTLS overlap-and-cutover rotation tests while checking the hardening markers in `iam-server`; multi-node IAM failover remains unsupported. + +## 2026-04-10 Edge And Trial-Surface Productization + +- `Task:` `cc24ac5a-b940-4a32-9136-d706ecadf875` +- `Operator doc:` `docs/edge-trial-surface.md` +- `Component docs:` `apigateway/README.md`, `nightlight/README.md`, and `creditservice/README.md` +- `Helper:` `./nix/test-cluster/work-root-budget.sh status` now reports `./work` disk usage, soft budgets, and cleanup plus `nix store gc` guidance without mutating state by default. +- `Edge bundle boundary:` APIGateway is now documented as stateless replicated behind external L4 or VIP distribution, but restart-based rollout remains the only supported config distribution or reload model proven on this branch. NightLight is fixed to a single-node WAL/snapshot product shape with process-wide retention, and CreditService export plus migration is fixed to offline export/import or backend-native snapshots instead of live mixed-writer migration. +- `Trial boundary:` `single-node-trial-vm` and `single-node-quickstart` remain the only supported lightweight trial surface. OCI/Docker remains intentionally unsupported because it would not prove the same guest-kernel, KVM, `/dev/net/tun`, and OVS/libvirt contract. + +## 2026-04-10 Provider And VM-Hosting Reality Proof + +- `Task:` `41a074a3-dc5c-42fc-979e-c8ebf9919d55` +- `Focused proof lane:` `nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof` +- `Focused proof result:` passed on 2026-04-10 and wrote `result.json`, `meta.json`, journals, and provider or VM-hosting artifacts under `/mnt/d2/centra/photoncloud-monorepo/work/provider-vm-reality-proof/20260410T135827+0900`. +- `Provider artifacts:` `network-provider/prismnet-port-create.json`, `network-provider/prismnet-security-group-after-add.json`, `network-provider/flashdns-workload-authoritative-answer.txt`, `network-provider/flashdns-service-authoritative-answer.txt`, `network-provider/fiberlb-drain-summary.txt`, `network-provider/fiberlb-tcp-health-before-drain.txt`, and `network-provider/fiberlb-tcp-health-after-restore.txt` fix the current local-KVM proof to tenant network lifecycle, authoritative DNS answers, and listener drain or re-convergence. +- `VM-hosting artifacts:` `vm-hosting/vm-create-response.json`, `vm-hosting/root-volume-before-migration.json`, `vm-hosting/root-volume-after-migration.json`, `vm-hosting/data-volume-after-migration.json`, `vm-hosting/migration-summary.json`, `vm-hosting/prismnet-port-after-migration.json`, and `vm-hosting/demo-state-after-post-migration-restart.json` fix the current release proof to KVM shared-storage migration, CoronaFS handoff, and post-migration restart on the worker pair. +- `Scope-fixed gaps:` real OVS/OVN dataplane validation, native BGP or BFD peer interop with hardware VIP ownership, and real-hardware VM migration or storage handoff remain outside the supported local-KVM surface and are now explicit docs or guard limits rather than implied release claims. + +## chainfire + +- `責務:` UltraCloud 全体の replicated coordination store。KV, lease, watch, cluster membership view, rollout stack の state anchor を持つ。 +- `Canonical entrypoint:` `nix/modules/chainfire.nix`; `chainfire/crates/chainfire-server/src/main.rs`; supported API は `chainfire/proto/chainfire.proto`。 +- `現在ある証拠:` `README.md` が `MemberList` / `Status` を supported surface と明示; `chainfire/crates/chainfire-server/src/rest.rs` に health と member add がある; `docs/testing.md` が quickstart と HA proof を定義; `nix/single-node/base.nix` と `nix/nodes/vm-cluster/*` が正本 wiring; 2026-04-10 の `durability-proof` は `chainfire-backup-response.json` / `chainfire-restored-response.json` で logical KV backup/restore を保存し、`rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/chainfire-post-restart-put.json` と `post-control-plane-restarts.json` で fixed-membership restart 後の live proof を保存した。 +- `未証明事項:` rolling upgrade 手順; 実機 3 ノード上での membership 変更; power-loss 後の復旧 runbook。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `CF-P1-01` は 2026-04-10 に scope freeze から live-restart-proof 付きへ進んだ。dynamic membership / scale-out / replace-node は supported surface では explicit に unsupported のままだが、fixed-membership restart 自体は `rollout-soak` により live KVM proof へ格上げされた。次段で残るのは、live membership mutation 自体を製品化したい場合の dedicated KVM proof 追加だけ。 +- `P2:` `CF-P2-01` `chainfire-core` の internal pruning が current branch で進行中なので、公開境界と workspace 内部境界の最終整理が必要。 +- `依存関係:` local disk; host networking; `flaredb`, `iam`, `deployer`, `fleet-scheduler`, `nix-agent`, `node-agent`, `coronafs` から参照される。 + +## flaredb + +- `責務:` replicated KV/SQL metadata store。各サービスの metadata, quota state, object metadata, tenant network state の受け皿。 +- `Canonical entrypoint:` `nix/modules/flaredb.nix`; `flaredb/crates/flaredb-server/src/main.rs`; REST は `flaredb/crates/flaredb-server/src/rest.rs`。 +- `現在ある証拠:` `README.md` が `POST /api/v1/sql` と `GET /api/v1/tables` を supported と明記; `flaredb/crates/flaredb-server/src/rest.rs` に SQL/KV/scan/member add がある; `docs/testing.md` が control-plane proof と `fresh-matrix` 依存を説明; `nix/modules/flaredb.nix` が `pdAddr` と namespace mode を生成; 2026-04-10 の `rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/flaredb-post-restart-create.json`, `flaredb-post-restart-insert.json`, `flaredb-post-restart.json` で member restart 後の additive SQL を保存し、`run-core-control-plane-ops-proof.sh` は `/mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00/scope-fixed-contract.json` と `flaredb-migration-contract.log` で destructive DDL / fully automated online migration が supported surface の外だと固定した。 +- `未証明事項:` real hardware 上の storage pressure と multi-node repair。fully automated online migration と destructive DDL online cutover はこの release では intentionally unsupported。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `FDB-P1-01` は 2026-04-10 に scope-fixed final。supported SQL/KV surface の logical backup/restore は `durability-proof` と docs で固定済みで、online migration / schema-evolution は additive-first と backup/restore baseline 前提で整理された。`rollout-soak` は member restart 後の additive SQL を live KVM artifact として保存し、`run-core-control-plane-ops-proof.sh` は destructive DDL と fully automated online migration が supported surface の外だと `/mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00/scope-fixed-contract.json` に固定した。今後やるなら scope 拡張として destructive online migration proof を別 tranche で扱う。 +- `P2:` `FDB-P2-01` namespace ごとの `strong` / `eventual` 方針が module default に埋まっており、operator-facing contract としてはまだ弱い。 +- `依存関係:` `chainfire` を placement/coordination に使う; local disk; `iam`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `creditservice`, `k8shost` から参照される。 + +## iam + +- `責務:` identity, token issuance, authn, authz, tenant principal 管理。 +- `Canonical entrypoint:` `nix/modules/iam.nix`; `iam/crates/iam-server/src/main.rs`; API package は `iam/crates/iam-api/src/lib.rs`。 +- `現在ある証拠:` `README.md` と `docs/component-matrix.md` が core component として扱う; `nix/modules/iam.nix` が `chainfire` / `flaredb` 接続を正本生成; `iam-authn`, `iam-authz`, `iam-store` crate が分離; `fresh-matrix` と gateway path が credit/k8shost/plasmavmc 経由で IAM を前提にしている; `run-core-control-plane-ops-proof.sh` は `/mnt/d2/centra/photoncloud-monorepo/work/core-control-plane-ops-proof/20260410T172148+09:00` に `iam-key-rotation-tests.log`, `iam-credential-rotation-tests.log`, `iam-mtls-rotation-tests.log`, `scope-fixed-contract.json`, `result.json` を保存し、bootstrap hardening, signing-key rotation, credential overlap rotation, mTLS overlap rotation を standalone proof として固定した。 +- `未証明事項:` multi-node IAM failover; backend matrix 全体での same-lane lifecycle proof。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `IAM-P1-01` は 2026-04-10 に scope-fixed final。bootstrap hardening と token/signing-key rotation は `docs/control-plane-ops.md` と `run-core-control-plane-ops-proof.sh` で standalone に固定され、同じ proof root が credential overlap-and-revoke rotation と mTLS overlap-and-cutover rotation も保存するようになった。multi-node IAM failover は supported surface の外へ明示的に出した。今後やるなら scope 拡張として clustered IAM failover proof を別 tranche で扱う。 +- `P2:` `IAM-P2-01` `flaredb` / `postgres` / `sqlite` / `memory` の backend matrix 全体を harness ではまだ網羅していない。 +- `依存関係:` `flaredb` が主 storage; optional `chainfire`; `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `creditservice`, `k8shost`, `apigateway` が consumer。 + +## prismnet + +- `責務:` tenant network control plane。VPC, subnet, port, router, security group, service IP pool を扱う。 +- `Canonical entrypoint:` `nix/modules/prismnet.nix`; `prismnet/crates/prismnet-server/src/main.rs`; API は `prismnet/crates/prismnet-api/proto/prismnet.proto`。 +- `現在ある証拠:` `docs/testing.md` と `README.md` が `fresh-matrix` で VPC/subnet/port と security-group ACL add/remove を正本 proof と明示; `prismnet/crates/prismnet-server/src/services/*` に service 実装がある; `prismnet/crates/prismnet-server/src/ovn/client.rs` が OVN client を持つ; `nix/modules/prismnet.nix` が binary-consumed config を生成する。 +- `未証明事項:` 実機 OVS/OVN dataplane; DHCP/metadata service の実ハード proof; multi-rack network integration。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `PRISMNET-P1-01` は 2026-04-10 に narrowed。`provider-vm-reality-proof` が local KVM lab で VPC/subnet/port lifecycle, security-group ACL add/remove, attached-VM networking artifact を dated root に保存するようになった。未解消の次段は real OVS/OVN dataplane と hardware-switch integration を release proof に昇格させること。 +- `P2:` `PRISMNET-P2-01` `ovn/mock.rs` が近接して残っているため、supported path と archived/test path の境界を継続監視する必要がある。 +- `依存関係:` `iam`, `flaredb`, optional `chainfire`; consumer は `flashdns`, `fiberlb`, `plasmavmc`, `k8shost`。 + +## flashdns + +- `責務:` authoritative DNS publication。tenant record, reverse zone, DNS handler を持つ。 +- `Canonical entrypoint:` `nix/modules/flashdns.nix`; `flashdns/crates/flashdns-server/src/main.rs`; `flashdns/crates/flashdns-server/src/dns/*`。 +- `現在ある証拠:` `docs/testing.md` と `README.md` が `fresh-matrix` で record publication を正本 proof としている; `flashdns` server は record/zone/reverse-zone service を持つ; `nix/modules/flashdns.nix` が binary-consumed config を生成する。 +- `未証明事項:` real port 53 exposure; upstream/secondary integration; failover with real network gear。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `FLASHDNS-P1-01` は 2026-04-10 に narrowed。`provider-vm-reality-proof` が authoritative workload/service answers を dated root に保存するようになり、local KVM での publication evidence は release lane に入った。未解消の次段は real port 53 exposure と upstream/secondary interop を hardware or external-network proof に広げること。 +- `P2:` `FLASHDNS-P2-01` は 2026-04-10 に解消済み。`single-node dev` optional bundle は `nix/single-node/surface.nix` 上の TCP health gating を持つようになった。 +- `依存関係:` `iam`, `flaredb`, optional `chainfire`; publication source は `k8shost` と `fleet-scheduler`。 + +## fiberlb + +- `責務:` service publication / VIP / L4-L7 load balancing / native BGP advertisement。 +- `Canonical entrypoint:` `nix/modules/fiberlb.nix`; `fiberlb/crates/fiberlb-server/src/main.rs`; dataplane は `dataplane.rs`, `l7_dataplane.rs`, `vip_manager.rs`, `bgp_client.rs`。 +- `現在ある証拠:` `README.md` と `docs/testing.md` が `fresh-matrix` で TCP と TLS-terminated `Https` / `TerminatedHttps` listener を正本 proof としている; server code に native BGP/BFD, VIP ownership, TLS store, L7 dataplane 実装がある; L4 algorithm は in-tree tests を持つ。 +- `未証明事項:` 実機 BGP peer との interop; L2/VIP 所有権の hardware proof; IPv6 と mixed peer topology。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `FIBERLB-P1-01` は 2026-04-10 に scope-fixed。`fiberlb/crates/fiberlb-server/src/healthcheck.rs` の HTTPS health check は依然として backend TLS 証明書検証をしないが、その理由と supported 範囲 (`TCP reachability + HTTP status`) は docs/guard/source comment に固定された。将来の CA-aware verification は別 tranche。 +- `P1:` `FIBERLB-P1-02` は 2026-04-10 に narrowed。`provider-vm-reality-proof` が listener publication, backend disable, drain, restore, re-convergence の artifact を dated root に保存するようになった。未解消の次段は native BGP/BFD peer interop と hardware VIP ownership を real network proof へ広げること。 +- `P2:` `FIBERLB-P2-01` は 2026-04-10 に解消済み。`single-node dev` optional bundle は `nix/single-node/surface.nix` 上の TCP health gating を持つようになった。 +- `依存関係:` `iam`, `flaredb`, optional `chainfire`; publication consumer は `k8shost` と `fleet-scheduler`; 実ネットワーク peer が必要。 + +## plasmavmc + +- `責務:` tenant VM control plane と worker agent。VM lifecycle, image/materialization, worker registration, hypervisor integration を持つ。 +- `Canonical entrypoint:` `nix/modules/plasmavmc.nix`; `plasmavmc/crates/plasmavmc-server/src/main.rs`; supported public backend は `plasmavmc-kvm`。 +- `現在ある証拠:` `README.md` が KVM-only public contract を明記; `docs/testing.md` が `single-node-quickstart`, `fresh-smoke`, `fresh-matrix` で `HYPERVISOR_TYPE_KVM` を正本 proof とする; `vm_service.rs` は `HYPERVISOR_TYPE_KVM` 以外を public surface 外とする; `volume_manager.rs` が `coronafs` / `lightningstor` integration を持つ。 +- `未証明事項:` 実機での migration / storage handoff; long-running guest upgrade; network + storage fault 下での recovery。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `PLASMAVMC-P1-01` は 2026-04-10 に narrowed。`provider-vm-reality-proof` が shared-storage migration, PrismNet-attached post-migration networking, CoronaFS handoff, post-migration restart state を dated root に保存するようになった。未解消の次段は real-hardware migration と storage handoff の release proof を足すこと。 +- `P2:` `PLASMAVMC-P2-01` Firecracker / mvisor の archived code が in-tree に残るため、supported surface への逆流を guard し続ける必要がある。 +- `依存関係:` `iam`, `flaredb`, `prismnet`, optional `chainfire`, `lightningstor`, `coronafs`, host KVM/QEMU。 + +## coronafs + +- `責務:` mutable VM volume layer。raw volume を管理し、`qemu-nbd` で worker に export する。 +- `Canonical entrypoint:` `nix/modules/coronafs.nix`; `coronafs/crates/coronafs-server/src/main.rs`; 製品説明は `coronafs/README.md`。 +- `現在ある証拠:` `coronafs/README.md` が mutable VM-volume layer としての split を明言; `coronafs-server` は `/healthz` と volume/export API を持つ; `docs/testing.md` が `plasmavmc + coronafs + lightningstor` を `fresh-matrix` で proof 対象にしている; `plasmavmc/volume_manager.rs` に深い integration がある。 +- `未証明事項:` export interruption 後の recovery の長時間耐久; 実ディスク/実ネットワーク上での latency budget。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `CORONAFS-P1-01` は 2026-04-10 に解消済み。`nix/single-node/surface.nix` の quickstart health URL は `http://127.0.0.1:50088/healthz` に修正された。 +- `P1:` `CORONAFS-P1-02` は 2026-04-10 に解消済み。`durability-proof` が controller outage 中も node-local materialized volume の read と node-only capability split を検証する canonical failure-injection lane を持つ。 +- `P2:` `CORONAFS-P2-01` storage benchmark はあるが、canonical publish gate では recovery path の比重がまだ弱い。 +- `依存関係:` `qemu-nbd`, `qemu-img`, local disk; optional `chainfire` metadata backend; primary consumer は `plasmavmc`。 + +## lightningstor + +- `責務:` object storage と VM image backing。metadata plane と data node plane を持つ。 +- `Canonical entrypoint:` `nix/modules/lightningstor.nix`; `lightningstor/crates/lightningstor-server/src/main.rs`; `lightningstor/crates/lightningstor-node/src/main.rs`; S3 path は `src/s3/*`。 +- `現在ある証拠:` `README.md` が bucket versioning / policy / tagging / object version listing を supported surface と明記; `docs/testing.md` が `fresh-matrix` で bucket metadata と object-version APIs を proof 対象にしている; server は S3 auth, distributed backend, repair queue を持つ; module は metadata/data/all-in-one mode を持つ。 +- `未証明事項:` distributed backend の実機 failover; S3 compatibility breadth; cold-start image distribution on hardware。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `LIGHTNINGSTOR-P1-01` は 2026-04-10 に解消済み。`durability-proof` が node05 outage 中の write/head/read と service restore 後の repair/read-back を canonical failure-injection artifact として保存する。 +- `P2:` `LIGHTNINGSTOR-P2-01` は 2026-04-10 に解消済み。`single-node dev` optional bundle は `nix/single-node/surface.nix` 上の TCP health gating を持つようになった。 +- `依存関係:` `iam`, `flaredb`, optional `chainfire`; optional `lightningstor-node`; consumer は `plasmavmc` と tenant object clients。 + +## k8shost + +- `責務:` tenant workload API surface。pod/deployment/service を扱い、`prismnet`, `flashdns`, `fiberlb`, optional `creditservice` に投影する。 +- `Canonical entrypoint:` `nix/modules/k8shost.nix`; `k8shost/crates/k8shost-server/src/main.rs`; API protobuf は `k8shost/crates/k8shost-proto/proto/k8s.proto`。 +- `現在ある証拠:` `k8shost/README.md` が supported scope を定義; `README.md` が `WatchPods` を bounded snapshot stream と明記; `k8shost-server/src/services/pod.rs` が `ReceiverStream` ベースの `WatchPods` を実装; `docs/testing.md` が `fresh-smoke` / `fresh-matrix` で API contract を proof 対象にしている; 2026-04-10 には docs/guard/TODO で API/control-plane product surface のみに固定された。 +- `未証明事項:` 実 workload runtime; tenant networking dataplane with real CNI/CSI; node-level execution semantics。 +- `P0:` `K8SHOST-P0-01` は 2026-04-10 に解消済み。実 workload dataplane (`k8shost-cni`, `k8shost-controllers`, `lightningstor-csi`) は archived non-product として固定し、製品 narrative を API/control-plane scope のみに揃えた。 +- `P1:` `K8SHOST-P1-01` は 2026-04-10 に scope-resolved。canonical proof が API contract 中心であること自体を製品境界として明文化し、実 pod runtime は製品 claim から外した。 +- `P2:` `K8SHOST-P2-01` は 2026-04-10 に解消済み。archived scaffolds の非正本扱いは `supported-surface-guard` の contract marker で継続監視される。 +- `依存関係:` `iam`, `flaredb`, `chainfire`, `prismnet`, `flashdns`, `fiberlb`, optional `creditservice`。 + +## apigateway + +- `責務:` external API/proxy surface。route, auth provider, credit provider, request mediation を持つ。 +- `Canonical entrypoint:` `nix/modules/apigateway.nix`; `apigateway/crates/apigateway-server/src/main.rs`。 +- `現在ある証拠:` `node06` が `apigateway` を正本 gateway node として起動; `docs/testing.md` と `nix/test-cluster/README.md` が API-gateway-mediated flows を `fresh-matrix` に含める; server code は route, auth, credit provider, upstream timeout, request-id を持つ。 +- `未証明事項:` multi-node HA; config distribution / reload; TLS termination strategy; gateway as product docs。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `APIGW-P1-01` は 2026-04-10 に scope-fixed。APIGateway は stateless replicated behind external L4/VIP として supported、config distribution は rendered config + restart-based rollout、live in-process reload は unsupported と docs に固定された。次段で残るのは dedicated multi-gateway HA proof の追加。 +- `P2:` `APIGW-P2-01` release proof は `node06` と `fresh-matrix` への間接依存が中心で、専用 smoke gate が無い。 +- `依存関係:` upstream services; optional `iam` / `creditservice` provider; external clients。 + +## nightlight + +- `責務:` metrics ingestion と query。Prometheus remote_write / query API と gRPC query/admin を持つ。 +- `Canonical entrypoint:` `nix/modules/nightlight.nix`; `nightlight/crates/nightlight-server/src/main.rs`; API proto は `nightlight/crates/nightlight-api/proto/*`。 +- `現在ある証拠:` `nightlight-server` は HTTP と gRPC を両方 bind する; `node06` が gateway node で起動; `docs/testing.md` と `nix/test-cluster/README.md` が NightLight HTTP surface の host-forward proof を記述; local WAL/snapshot/retention loop がある。 +- `未証明事項:` replicated metrics topology; large retention; sustained remote_write load; tenant isolation。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `NIGHTLIGHT-P1-01` は 2026-04-10 に scope-fixed。NightLight は single-node WAL/snapshot service として product shape を固定し、replicated / HA metrics path は unsupported であることを docs と guard に反映した。 +- `P2:` `NIGHTLIGHT-P2-01` は 2026-04-10 に narrowed。tenant boundary は deployment-scoped か upstream-auth-scoped であり、process 内の hard multi-tenant auth や per-tenant retention は current product contract に含めないことを docs に固定した。次段は auth or quota aware multi-tenant proof の追加。 +- `依存関係:` local disk; optional `apigateway`; external metric writers/readers。 + +## creditservice + +- `責務:` quota, wallet, reservation, admission control。 +- `Canonical entrypoint:` `nix/modules/creditservice.nix`; `creditservice/crates/creditservice-server/src/main.rs`; 製品スコープは `creditservice/README.md`。 +- `現在ある証拠:` `creditservice/README.md` が supported scope と non-goals を明記; `docs/testing.md` が `fresh-matrix` で quota/wallet/reservation/API-gateway path を proof 対象にしている; module は `iamAddr`, `flaredbAddr`, optional SQL backend を持つ; `node06` が canonical gateway node で起動する。 +- `未証明事項:` backend migration; finance-system との分離運用; export/reporting path。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `CREDIT-P1-01` 製品 narrative が README の non-goal を越えて finance ledger に膨らまないよう境界維持が必要。 +- `P2:` `CREDIT-P2-01` は 2026-04-10 に narrowed。export と backend migration は offline export/import or backend-native snapshot workflows として README へ固定し、live mixed-writer migration は unsupported と明示した。次段は dedicated export proof の追加。 +- `依存関係:` `iam`, `flaredb`, optional `chainfire`; `apigateway`, `k8shost`, tenant admission flow が consumer。 + +## deployer + +- `責務:` bootstrap and rollout-intent authority。`/api/v1/phone-home`, install plan, desired-system reference, cluster inventory を持つ。 +- `Canonical entrypoint:` `nix/modules/deployer.nix`; `deployer/crates/deployer-server/src/main.rs`; route wiring は `deployer/crates/deployer-server/src/lib.rs`。 +- `現在ある証拠:` `/api/v1/phone-home` が server route に存在; `nix/modules/deployer.nix` が package/service/cluster-state seed を持つ; `docs/testing.md`, `docs/rollout-bundle.md`, `nix/test-cluster/README.md` が `baremetal-iso`, `baremetal-iso-e2e`, `deployer-vm-smoke`, `deployer-bootstrap-e2e`, `durability-proof`, `rollout-soak` を正本 proof とする; `verify-baremetal-iso.sh` が install path を end-to-end で辿る; 2026-04-10 の `durability-proof` は `deployer-pre-register-request.json`, `deployer-backup-list.json`, `deployer-post-restart-list.json`, `deployer-replayed-list.json` を保存し、`rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/deployer-post-restart-nodes.json`, `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, `deployer-journal.log` で longer-run live restart と release boundary marker を保存した。 +- `未証明事項:` 実機 USB/BMC install; deployer 自身の true HA; ChainFire-backed multi-instance active failover の実装; operator disaster recovery の実機確認。 +- `P0:` `DEPLOYER-P0-01` 現在の canonical bare-metal proof は QEMU-as-hardware までで、実機 regression lane はまだ無い。 +- `P1:` `DEPLOYER-P1-01` は 2026-04-10 に scope-fixed final。release contract は one active writer plus optional cold-standby restore with `ultracloud.cluster` state re-apply and preserved admin request replay で固定し、automatic ChainFire-backed multi-instance failover は supported surface の外へ明示的に出した。`rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/deployer-post-restart-nodes.json`, `scope-fixed-contract.json`, `deployer-scope-fixed.txt` で live restart proof と boundary marker を保存した。今後やるなら scope 拡張として true HA 実装を別 ticket で扱う。 +- `P2:` `DEPLOYER-P2-01` `bootstrapFlakeBundle` と optional binary cache を production でどう供給するかの標準運用形がまだ文書化不足。 +- `依存関係:` `chainfire`; `nix-agent`; `install-target`; ISO/first-boot path; optional binary cache。 + +## fleet-scheduler + +- `責務:` non-Kubernetes native service scheduler。cluster-native service placement, failover, publication reconciliation を持つ。 +- `Canonical entrypoint:` `nix/modules/fleet-scheduler.nix`; `deployer/crates/fleet-scheduler/src/main.rs`; publication code は `publish.rs`。 +- `現在ある証拠:` `docs/testing.md`, `docs/rollout-bundle.md`, `nix/test-cluster/README.md` が `fresh-smoke`, `fresh-matrix`, `fleet-scheduler-e2e`, `rollout-soak` をこの境界の proof とする; module は `iamEndpoint`, `fiberlbEndpoint`, `flashdnsEndpoint`, `heartbeatTimeoutSecs` を持つ; scheduler code は `chainfire` watch, dependency summary, publication reconciliation を持つ; `fresh-smoke` は `node04 -> draining`, `node05` fail-stop, worker return 後の replica restore を通し、`rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/maintenance-held.json`, `power-loss-held.json`, `fleet-scheduler-post-restart.json`, `scope-fixed-contract.json`, `fleet-scheduler-scope-fixed.txt` で scope-fixed longer-run proof を保存した。 +- `未証明事項:` 大規模クラスタ; multi-hour maintenance 窓; operator approval workflow を伴う drain choreography。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `FLEET-P1-01` は 2026-04-10 に scope-fixed final。release contract は two native-runtime workers 上の one planned drain cycle + one fail-stop worker-loss cycle + 30-second held degraded states で固定し、`rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T164549+0900/maintenance-held.json`, `power-loss-held.json`, `fleet-scheduler-post-restart.json`, `scope-fixed-contract.json`, `fleet-scheduler-scope-fixed.txt` でその upper bound を live KVM artifact として保存した。multi-hour maintenance windows, pinned singleton policies, operator approval workflows, and larger-cluster drain storms は supported surface の外へ明示的に出した。 +- `P2:` `FLEET-P2-01` は 2026-04-10 に解消済み。module/binary default の `chainfireEndpoint` は canonical `http://127.0.0.1:2379` へ揃えた。 +- `依存関係:` `chainfire`; `node-agent`; optional `iam`, `fiberlb`, `flashdns`। + +## nix-agent + +- `責務:` host-local NixOS convergence only。desired system を build/apply し、health check と rollback を担う。 +- `Canonical entrypoint:` `nix/modules/nix-agent.nix`; `deployer/crates/nix-agent/src/main.rs`。 +- `現在ある証拠:` `docs/testing.md`, `docs/rollout-bundle.md`, `nix/test-cluster/README.md` が `baremetal-iso`, `baremetal-iso-e2e`, `deployer-vm-smoke`, `deployer-vm-rollback`, `portable-control-plane-regressions` を proof とする; code は desired-system, observed-system, rollback-on-failure, health-check-command を持つ; `nix/modules/nix-agent.nix` がその CLI 契約を正本生成する; 2026-04-10 の `rollout-soak` は `/mnt/d2/centra/photoncloud-monorepo/work/rollout-soak/20260410T154744+0900/node01-nix-agent-scope.txt` と `node04-nix-agent-scope.txt` を保存し、steady-state `test-cluster` では live `nix-agent.service` restart を pretending しない boundary を artifact と docs で固定した。 +- `未証明事項:` kernel/network failure 下の rollback; multi-node wave rollout; real hardware recovery after partial switch。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `NIXAGENT-P1-01` は 2026-04-10 に解消済み。`healthCheckCommand` の argv 契約、`rollbackOnFailure` の `rolled-back` semantics、`deployer-vm-rollback` proof、partial failure recovery 手順は `docs/rollout-bundle.md` と `docs/testing.md` に固定された。 +- `P2:` `NIXAGENT-P2-01` は 2026-04-10 に解消済み。module/binary default の `chainfireEndpoint` は canonical `http://127.0.0.1:2379` へ揃えた。 +- `依存関係:` `chainfire`; deployer が publish する desired-system; local NixOS flake / switch-to-configuration。 + +## node-agent + +- `責務:` host-local runtime reconcile only。native service instance の heartbeat, process/container 実行, local observed state を担う。 +- `Canonical entrypoint:` `nix/modules/node-agent.nix`; `deployer/crates/node-agent/src/main.rs`。 +- `現在ある証拠:` `docs/testing.md`, `docs/rollout-bundle.md`, `nix/test-cluster/README.md` が `fresh-smoke`, `fresh-matrix`, `fleet-scheduler-e2e`, `portable-control-plane-regressions` を proof とする; code は `watcher`, `agent`, `process` を持つ; module は Podman enable, stateDir, pidDir, `allowLocalInstanceUpsert` を持つ; `process.rs` は `${stateDir}/pids/*.log` と `${stateDir}/pids/*.meta.json` の contract を実装する。 +- `未証明事項:` heterogeneous runtime support; crash-looping host service の細かな SLO; secret-rotation workflow そのもの。 +- `P0:` いまの static survey で即死級の file-level breakage は未検出。 +- `P1:` `NODEAGENT-P1-01` は 2026-04-10 に解消済み。logs / secrets / volume / upgrade 契約は `docs/rollout-bundle.md` と module description に固定された。 +- `P2:` `NODEAGENT-P2-01` は 2026-04-10 に解消済み。module/binary default の `chainfireEndpoint` は canonical `http://127.0.0.1:2379` へ揃えた。 +- `依存関係:` `chainfire`; `fleet-scheduler`; optional Podman; host systemd/process model。 + +## Nix/bootstrap/harness + +- `責務:` 製品 surface を定義し、`single-node dev`, `3-node HA control plane`, `bare-metal bootstrap` の NixOS outputs と VM/QEMU harness を正本化する。 +- `Canonical entrypoint:` `flake.nix`; `nix/modules/default.nix`; `nix/single-node/base.nix`; `nix/test-cluster/run-publishable-kvm-suite.sh`; `nix/test-cluster/run-local-baseline.sh`; `nix/test-cluster/verify-baremetal-iso.sh`; `nix/nodes/baremetal-qemu/*`。 +- `現在ある証拠:` `flake.nix` に `single-node-quickstart`, `single-node-trial-vm`, `canonical-profile-eval-guards`, `portable-control-plane-regressions`, `baremetal-iso-e2e` がある; `nix/modules/default.nix` が現在の module surface を一括 import する; `nix/single-node/base.nix` が最小 VM platform core と optional bundle を組む; `run-publishable-kvm-suite.sh` と `run-local-baseline.sh` が local CPU 並列度と local builder を固定する; `verify-baremetal-iso.sh` が ISO -> phone-home -> bundle fetch -> Disko -> reboot -> `nix-agent active` を辿る; `run-cluster.sh` には `durability-proof` と `rollout-soak` が追加され、`chainfire`, `flaredb`, `deployer`, `coronafs`, `lightningstor` の backup/restore と failure-injection artifact を `/work/durability-proof` に、longer-run rollout/control-plane maintenance artifact を `/work/rollout-soak` に保存する; 2026-04-10 の local AMD/KVM baseline で required 6 checks と `single-node-quickstart`, `baremetal-iso`, `fresh-smoke` がすべて pass した。 +- `未証明事項:` 実機 USB/BMC install; `/nix/store` 容量制御の自動 guard; optional bundle 全部入り quickstart の release proof; non-Nix easy-trial artifact。 +- `P0:` `HARNESS-P0-01` real hardware regression lane がまだ無く、canonical bare-metal proof は QEMU stand-in のまま。 +- `P1:` `HARNESS-P1-01` は 2026-04-10 に解消済み。quickstart optional bundle の health gating は `lightningstor`, `flashdns`, `fiberlb` の TCP probe と `coronafs` の `50088/healthz` へ揃えた。 +- `P1:` `HARNESS-P1-02` は 2026-04-10 に scope-fixed。easy-trial は `single-node-trial-vm` による Nix VM appliance で成立し、より軽い Docker/OCI 風 trial path を supported としない理由は `docs/edge-trial-surface.md`, `README.md`, `docs/testing.md`, `docs/component-matrix.md`, `nix/single-node/surface.nix`, `supported-surface-guard` に揃えた。 +- `P1:` `HARNESS-P1-03` は 2026-04-10 に解消済み。`fresh-smoke` の stale VM cleanup は current profile の `vm_dir` / `vde_switch_dir` に含まれる PID に限定し、別 checkout の同名 cluster VM を巻き込まないようにした。 +- `P2:` `HARNESS-P2-01` は 2026-04-10 に解消済み。`./work` と local builder parallelism に加えて `./nix/test-cluster/work-root-budget.sh` が `status` に加えて `enforce` と `prune-proof-logs` を持つようになり、disk budget advisory だけでなく stronger local budget gate と safer dated-proof cleanup workflow を提供するようになった。 +- `依存関係:` `nix`, `nixpkgs`, QEMU/KVM, host disk under `./work`, local CPU parallelism, 全 component module 群。 + +## Notes For The Next Implementation Agent + +- まず `DEPLOYER-P0-01` / `HARNESS-P0-01` を処理すると、hardware proof と実機 operator path の残件を低コストで減らせる。 +- baseline 再現は `nix/test-cluster/run-local-baseline.sh` を使うと、local-only builder と `./work` 配下ログを固定したまま同じ経路を再実行できる。 +- その次に `DEPLOYER-P0-01` / `HARNESS-P0-01` を実機 smoke へ進めると、QEMU-only から hardware path へ移れる。 +- `DEPLOYER-P1-01` と `FLEET-P1-01` は scope-fixed final になった。今後それらを再度開くなら、current release boundary を拡張する別 tranche として true deployer HA や larger-cluster scheduler maintenance proof を扱うとよい。 +- `FIBERLB-P1-01` は scope-fixed になったが、将来的に backend certificate verification を製品化するなら docs/guard の限定契約を書き換える必要がある。 diff --git a/apigateway/README.md b/apigateway/README.md new file mode 100644 index 0000000..e297321 --- /dev/null +++ b/apigateway/README.md @@ -0,0 +1,17 @@ +# APIGateway + +`apigateway` is UltraCloud's supported external API and proxy surface for auth-aware and credit-aware upstream traffic. + +## Supported product shape + +APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer; live in-process reload is not part of the product contract. + +- Config distribution is restart-based. Render routes, auth providers, and credit providers from Nix or generated cluster state, then replace or restart the process. +- Scale-out is supported by running multiple identical instances behind FiberLB or another L4 or VIP distribution layer. +- The release-facing proof remains `nix run ./nix/test-cluster#cluster -- fresh-matrix`, which validates the shipped single gateway-node composition on `node06`. + +## Explicit non-goals + +- hot route reload through an admin API or `SIGHUP` +- in-process config gossip or leader election between gateway replicas +- a claim that every HA layout is directly release-proven in the current harness diff --git a/apigateway/crates/apigateway-server/src/main.rs b/apigateway/crates/apigateway-server/src/main.rs index 3905c40..b233d50 100644 --- a/apigateway/crates/apigateway-server/src/main.rs +++ b/apigateway/crates/apigateway-server/src/main.rs @@ -366,7 +366,10 @@ async fn main() -> Result<(), Box> { .init(); if used_default_config { - info!("Config file not found: {}, using defaults", args.config.display()); + info!( + "Config file not found: {}, using defaults", + args.config.display() + ); } let routes = build_routes(config.routes)?; @@ -412,7 +415,11 @@ async fn main() -> Result<(), Box> { .with_state(state); let listener = tokio::net::TcpListener::bind(config.http_addr).await?; - axum::serve(listener, app.into_make_service_with_connect_info::()).await?; + axum::serve( + listener, + app.into_make_service_with_connect_info::(), + ) + .await?; Ok(()) } @@ -426,7 +433,13 @@ async fn health() -> Json { } async fn list_routes(State(state): State>) -> Json> { - Json(state.routes.iter().map(|route| route.config.clone()).collect()) + Json( + state + .routes + .iter() + .map(|route| route.config.clone()) + .collect(), + ) } async fn proxy( @@ -463,8 +476,12 @@ async fn proxy( let target_url = build_upstream_url(&route, request.uri())?; - let request_timeout = - Duration::from_millis(route.config.timeout_ms.unwrap_or(state.upstream_timeout.as_millis() as u64)); + let request_timeout = Duration::from_millis( + route + .config + .timeout_ms + .unwrap_or(state.upstream_timeout.as_millis() as u64), + ); let mut builder = state .client .request(request.method().clone(), target_url) @@ -630,13 +647,12 @@ async fn enforce_credit( credit_subject.as_ref().expect("credit subject resolved"), ) .await; - apply_credit_mode(credit_cfg.mode, credit_cfg.fail_open, decision) - .map(|decision| { - decision.map(|decision| CreditReservation { - provider: credit_cfg.provider.clone(), - reservation_id: decision.reservation_id, - }) + apply_credit_mode(credit_cfg.mode, credit_cfg.fail_open, decision).map(|decision| { + decision.map(|decision| CreditReservation { + provider: credit_cfg.provider.clone(), + reservation_id: decision.reservation_id, }) + }) } fn apply_credit_mode( @@ -837,13 +853,19 @@ async fn finalize_credit( CommitPolicy::Never => return, CommitPolicy::Always => { if let Err(err) = commit_credit(state, credit_cfg, &reservation).await { - warn!("Failed to commit credit reservation {}: {}", reservation.reservation_id, err); + warn!( + "Failed to commit credit reservation {}: {}", + reservation.reservation_id, err + ); } } CommitPolicy::Success => { if status.is_success() || status.is_redirection() { if let Err(err) = commit_credit(state, credit_cfg, &reservation).await { - warn!("Failed to commit credit reservation {}: {}", reservation.reservation_id, err); + warn!( + "Failed to commit credit reservation {}: {}", + reservation.reservation_id, err + ); } } else if let Err(err) = rollback_credit(state, credit_cfg, &reservation).await { warn!( @@ -1010,11 +1032,9 @@ async fn build_auth_providers( for config in configs { let provider_type = normalize_name(&config.provider_type); if providers.contains_key(&config.name) { - return Err(config_error(format!( - "duplicate auth provider name {}", - config.name - )) - .into()); + return Err( + config_error(format!("duplicate auth provider name {}", config.name)).into(), + ); } match provider_type.as_str() { @@ -1034,10 +1054,7 @@ async fn build_auth_providers( Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_AUTH_TIMEOUT_MS)); providers.insert( config.name.clone(), - AuthProvider::Grpc(GrpcAuthProvider { - channel, - timeout, - }), + AuthProvider::Grpc(GrpcAuthProvider { channel, timeout }), ); } _ => { @@ -1061,25 +1078,19 @@ async fn build_credit_providers( for config in configs { let provider_type = normalize_name(&config.provider_type); if providers.contains_key(&config.name) { - return Err(config_error(format!( - "duplicate credit provider name {}", - config.name - )) - .into()); + return Err( + config_error(format!("duplicate credit provider name {}", config.name)).into(), + ); } match provider_type.as_str() { "grpc" => { let mut endpoint = Endpoint::from_shared(config.endpoint.clone())? .connect_timeout(Duration::from_millis( - config - .timeout_ms - .unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS), + config.timeout_ms.unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS), )) .timeout(Duration::from_millis( - config - .timeout_ms - .unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS), + config.timeout_ms.unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS), )); if let Some(tls) = build_client_tls_config(&config.tls).await? { @@ -1087,17 +1098,11 @@ async fn build_credit_providers( } let channel = endpoint.connect().await?; - let timeout = Duration::from_millis( - config - .timeout_ms - .unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS), - ); + let timeout = + Duration::from_millis(config.timeout_ms.unwrap_or(DEFAULT_CREDIT_TIMEOUT_MS)); providers.insert( config.name.clone(), - CreditProvider::Grpc(GrpcCreditProvider { - channel, - timeout, - }), + CreditProvider::Grpc(GrpcCreditProvider { channel, timeout }), ); } _ => { @@ -1132,11 +1137,9 @@ fn build_routes(configs: Vec) -> Result, Box String { } if path == "/" { let trimmed = base.trim_end_matches('/'); - return if trimmed.is_empty() { "/".to_string() } else { trimmed.to_string() }; + return if trimmed.is_empty() { + "/".to_string() + } else { + trimmed.to_string() + }; } format!( @@ -1385,9 +1392,9 @@ fn build_upstream_url(route: &Route, uri: &Uri) -> Result { #[cfg(test)] mod tests { use super::*; + use apigateway_api::GatewayCreditServiceServer; use axum::routing::get; use creditservice_api::{CreditServiceImpl, CreditStorage, GatewayCreditServiceImpl}; - use apigateway_api::GatewayCreditServiceServer; use creditservice_types::Wallet; use iam_api::{GatewayAuthServiceImpl, GatewayAuthServiceServer}; use iam_authn::{InternalTokenConfig, InternalTokenService, SigningKey}; @@ -1470,7 +1477,11 @@ mod tests { } async fn start_iam_gateway() -> (SocketAddr, String) { - let backend = Arc::new(Backend::new(BackendConfig::Memory).await.expect("iam backend")); + let backend = Arc::new( + Backend::new(BackendConfig::Memory) + .await + .expect("iam backend"), + ); let principal_store = Arc::new(PrincipalStore::new(backend.clone())); let role_store = Arc::new(RoleStore::new(backend.clone())); let binding_store = Arc::new(BindingStore::new(backend.clone())); @@ -1516,12 +1527,8 @@ mod tests { role_store.clone(), cache, )); - let gateway_auth = GatewayAuthServiceImpl::new( - token_service, - principal_store, - token_store, - evaluator, - ); + let gateway_auth = + GatewayAuthServiceImpl::new(token_service, principal_store, token_store, evaluator); let listener = tokio::net::TcpListener::bind("127.0.0.1:0") .await @@ -1542,10 +1549,7 @@ mod tests { async fn start_credit_gateway(iam_addr: &SocketAddr) -> SocketAddr { let storage = creditservice_api::InMemoryStorage::new(); let wallet = Wallet::new("proj-1".into(), "org-1".into(), 100); - storage - .create_wallet(wallet) - .await - .expect("wallet create"); + storage.create_wallet(wallet).await.expect("wallet create"); let auth_service = Arc::new( iam_service_auth::AuthService::new(&format!("http://{}", iam_addr)) @@ -1636,7 +1640,10 @@ mod tests { let route = routes.first().unwrap(); let uri: Uri = "/api/v1/users?debug=true".parse().unwrap(); let url = build_upstream_url(route, &uri).unwrap(); - assert_eq!(url.as_str(), "http://example.com/base/api/v1/users?debug=true"); + assert_eq!( + url.as_str(), + "http://example.com/base/api/v1/users?debug=true" + ); } #[test] @@ -1671,7 +1678,8 @@ mod tests { let outcome = apply_auth_mode(PolicyMode::Optional, false, decision).unwrap(); assert!(outcome.subject.is_none()); - let outcome = apply_auth_mode(PolicyMode::Optional, false, Err(StatusCode::BAD_GATEWAY)).unwrap(); + let outcome = + apply_auth_mode(PolicyMode::Optional, false, Err(StatusCode::BAD_GATEWAY)).unwrap(); assert!(outcome.subject.is_none()); } @@ -1692,7 +1700,8 @@ mod tests { let outcome = apply_credit_mode(PolicyMode::Optional, false, decision).unwrap(); assert!(outcome.is_none()); - let outcome = apply_credit_mode(PolicyMode::Optional, false, Err(StatusCode::BAD_GATEWAY)).unwrap(); + let outcome = + apply_credit_mode(PolicyMode::Optional, false, Err(StatusCode::BAD_GATEWAY)).unwrap(); assert!(outcome.is_none()); } @@ -1783,7 +1792,8 @@ mod tests { Err(status) => panic!("unexpected proxy status: {}", status), } } - let response = response.expect("gateway auth+credit test timed out waiting for ready backends"); + let response = + response.expect("gateway auth+credit test timed out waiting for ready backends"); assert_eq!(response.status(), StatusCode::OK); } @@ -1812,7 +1822,10 @@ mod tests { let request = Request::builder() .method("GET") .uri("/v1/echo-auth") - .header(axum::http::header::AUTHORIZATION, "Bearer passthrough-token") + .header( + axum::http::header::AUTHORIZATION, + "Bearer passthrough-token", + ) .header(PHOTON_AUTH_TOKEN_HEADER, "photon-token") .body(Body::empty()) .expect("request build"); @@ -1828,8 +1841,14 @@ mod tests { let body = to_bytes(response.into_body(), 1024 * 1024).await.unwrap(); let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); - assert_eq!(json.get("authorization").and_then(|v| v.as_str()), Some("Bearer passthrough-token")); - assert_eq!(json.get("photon_token").and_then(|v| v.as_str()), Some("photon-token")); + assert_eq!( + json.get("authorization").and_then(|v| v.as_str()), + Some("Bearer passthrough-token") + ); + assert_eq!( + json.get("photon_token").and_then(|v| v.as_str()), + Some("photon-token") + ); } #[test] diff --git a/chainfire/Cargo.lock b/chainfire/Cargo.lock index dfd5a17..4ac4082 100644 --- a/chainfire/Cargo.lock +++ b/chainfire/Cargo.lock @@ -388,18 +388,7 @@ dependencies = [ name = "chainfire-core" version = "0.1.0" dependencies = [ - "async-trait", - "bytes", - "chainfire-gossip", - "chainfire-types", - "dashmap", - "futures", - "parking_lot", - "tempfile", "thiserror 1.0.69", - "tokio", - "tokio-stream", - "tracing", ] [[package]] diff --git a/chainfire/baremetal/pxe-server/assets/.gitkeep b/chainfire/baremetal/pxe-server/assets/.gitkeep index b27a78c..352bf44 100644 --- a/chainfire/baremetal/pxe-server/assets/.gitkeep +++ b/chainfire/baremetal/pxe-server/assets/.gitkeep @@ -1,4 +1,4 @@ -# This directory is a placeholder for runtime assets +# This directory is reserved for runtime assets # # Actual boot assets will be created at: /var/lib/pxe-boot/ # when the PXE server is deployed. diff --git a/chainfire/baremetal/pxe-server/ipxe/boot.ipxe b/chainfire/baremetal/pxe-server/ipxe/boot.ipxe index 397b6ea..6db3a62 100644 --- a/chainfire/baremetal/pxe-server/ipxe/boot.ipxe +++ b/chainfire/baremetal/pxe-server/ipxe/boot.ipxe @@ -190,8 +190,8 @@ set kernel-params ${kernel-params} console=tty0 console=ttyS0,115200n8 # set kernel-params ${kernel-params} systemd.log_level=debug echo Loading NixOS kernel... -# NOTE: These paths will be populated by the S3 image builder (T032.S3) -# For now, they point to placeholder paths that need to be updated +# NOTE: These paths are populated by the S3 image builder (T032.S3) +# and must resolve to the generated kernel/initrd objects at deploy time. kernel ${nixos-url}/bzImage ${kernel-params} || goto failed echo Loading NixOS initrd... diff --git a/chainfire/chainfire-client/src/client.rs b/chainfire/chainfire-client/src/client.rs index 55f0d08..edc2d83 100644 --- a/chainfire/chainfire-client/src/client.rs +++ b/chainfire/chainfire-client/src/client.rs @@ -4,8 +4,8 @@ use crate::error::{ClientError, Result}; use crate::watch::WatchHandle; use chainfire_proto::proto::{ cluster_client::ClusterClient, compare, kv_client::KvClient, request_op, response_op, - watch_client::WatchClient, Compare, DeleteRangeRequest, MemberAddRequest, PutRequest, - RangeRequest, RequestOp, StatusRequest, TxnRequest, + watch_client::WatchClient, Compare, DeleteRangeRequest, PutRequest, RangeRequest, RequestOp, + StatusRequest, TxnRequest, }; use std::time::Duration; use tonic::transport::Channel; @@ -616,53 +616,6 @@ impl Client { raft_term: resp.raft_term, }) } - - /// Add a member to the cluster - /// - /// # Arguments - /// * `peer_url` - The Raft address of the new member (e.g., "127.0.0.1:2380") - /// * `is_learner` - Whether to add as learner (true) or voter (false) - /// - /// # Returns - /// The node ID of the added member - pub async fn member_add( - &mut self, - node_id: u64, - peer_url: impl AsRef, - is_learner: bool, - ) -> Result { - let peer_url = peer_url.as_ref().to_string(); - let resp = self - .with_cluster_retry(|mut cluster| { - let peer_url = peer_url.clone(); - async move { - cluster - .member_add(MemberAddRequest { - node_id, - peer_urls: vec![peer_url], - is_learner, - }) - .await - .map(|resp| resp.into_inner()) - } - }) - .await?; - - // Extract the member ID from the response - let member_id = resp - .member - .map(|m| m.id) - .ok_or_else(|| ClientError::Internal("No member in response".to_string()))?; - - debug!( - member_id = member_id, - peer_url = peer_url.as_str(), - is_learner = is_learner, - "Added member to cluster" - ); - - Ok(member_id) - } } /// Cluster status diff --git a/chainfire/chainfire-client/src/watch.rs b/chainfire/chainfire-client/src/watch.rs index d1a338f..43d91d8 100644 --- a/chainfire/chainfire-client/src/watch.rs +++ b/chainfire/chainfire-client/src/watch.rs @@ -136,9 +136,10 @@ fn convert_event(event: Event) -> WatchEvent { EventType::Delete }; - let (key, value, revision) = event.kv.map(|kv| { - (kv.key, kv.value, kv.mod_revision as u64) - }).unwrap_or_default(); + let (key, value, revision) = event + .kv + .map(|kv| (kv.key, kv.value, kv.mod_revision as u64)) + .unwrap_or_default(); WatchEvent { event_type, diff --git a/chainfire/crates/chainfire-api/build.rs b/chainfire/crates/chainfire-api/build.rs index 1b77c9b..9550202 100644 --- a/chainfire/crates/chainfire-api/build.rs +++ b/chainfire/crates/chainfire-api/build.rs @@ -4,10 +4,7 @@ fn main() -> Result<(), Box> { .build_server(true) .build_client(true) .compile_protos( - &[ - "../../proto/chainfire.proto", - "../../proto/internal.proto", - ], + &["../../proto/chainfire.proto", "../../proto/internal.proto"], &["../../proto"], )?; diff --git a/chainfire/crates/chainfire-api/src/cluster_service.rs b/chainfire/crates/chainfire-api/src/cluster_service.rs index 9f83685..504294c 100644 --- a/chainfire/crates/chainfire-api/src/cluster_service.rs +++ b/chainfire/crates/chainfire-api/src/cluster_service.rs @@ -1,30 +1,22 @@ //! Cluster management service implementation //! //! This service handles cluster operations and status queries. -//! -//! NOTE: Custom RaftCore does not yet support dynamic membership changes. -//! Member add/remove operations are disabled for now. +//! The supported surface reports the fixed membership that the node booted with. use crate::conversions::make_header; use crate::proto::{ - cluster_server::Cluster, GetSnapshotRequest, GetSnapshotResponse, Member, MemberAddRequest, - MemberAddResponse, MemberListRequest, MemberListResponse, MemberRemoveRequest, - MemberRemoveResponse, SnapshotMeta, StatusRequest, StatusResponse, TransferSnapshotRequest, - TransferSnapshotResponse, + cluster_server::Cluster, Member, MemberListRequest, MemberListResponse, StatusRequest, + StatusResponse, }; use chainfire_raft::core::RaftCore; use std::sync::Arc; -use tokio::sync::mpsc; -use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; -use tracing::{debug, info, warn}; +use tracing::debug; /// Cluster service implementation pub struct ClusterServiceImpl { /// Raft core raft: Arc, - /// gRPC Raft client for managing node addresses - rpc_client: Arc, /// Cluster ID cluster_id: u64, /// Configured members with client and peer URLs @@ -37,13 +29,11 @@ impl ClusterServiceImpl { /// Create a new cluster service pub fn new( raft: Arc, - rpc_client: Arc, cluster_id: u64, members: Vec, ) -> Self { Self { raft, - rpc_client, cluster_id, members, version: env!("CARGO_PKG_VERSION").to_string(), @@ -55,8 +45,7 @@ impl ClusterServiceImpl { } /// Get current members as proto Member list - /// NOTE: Custom RaftCore doesn't track membership dynamically yet, so this returns - /// the configured static membership that the server was booted with. + /// Return the configured static membership that the server was booted with. async fn get_member_list(&self) -> Vec { if self.members.is_empty() { return vec![Member { @@ -73,35 +62,6 @@ impl ClusterServiceImpl { #[tonic::async_trait] impl Cluster for ClusterServiceImpl { - async fn member_add( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - debug!(node_id = req.node_id, peer_urls = ?req.peer_urls, is_learner = req.is_learner, "Member add request"); - - // Custom RaftCore doesn't support dynamic membership changes yet - warn!("Member add not supported in custom Raft implementation"); - Err(Status::unimplemented( - "Dynamic membership changes not supported in custom Raft implementation. \ - All cluster members must be configured at startup via initial_members." - )) - } - - async fn member_remove( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - debug!(member_id = req.id, "Member remove request"); - - // Custom RaftCore doesn't support dynamic membership changes yet - warn!("Member remove not supported in custom Raft implementation"); - Err(Status::unimplemented( - "Dynamic membership changes not supported in custom Raft implementation" - )) - } - async fn member_list( &self, _request: Request, @@ -128,104 +88,11 @@ impl Cluster for ClusterServiceImpl { Ok(Response::new(StatusResponse { header: Some(self.make_header(last_applied)), version: self.version.clone(), - db_size: 0, // TODO: get actual RocksDB size + db_size: 0, leader: leader.unwrap_or(0), raft_index: commit_index, raft_term: term, raft_applied_index: last_applied, })) } - - /// Transfer snapshot to a target node for pre-seeding (T041 Option C) - /// - /// This is a workaround for OpenRaft 0.9.x learner replication bug. - /// By pre-seeding learners with a snapshot, we avoid the assertion failure - /// during log replication. - /// - /// TODO(T041.S5): Full implementation pending - currently returns placeholder - async fn transfer_snapshot( - &self, - request: Request, - ) -> Result, Status> { - let req = request.into_inner(); - info!( - target_node_id = req.target_node_id, - target_addr = %req.target_addr, - "Snapshot transfer request (T041 Option C)" - ); - - // Get current state from state machine - let sm = self.raft.state_machine(); - let revision = sm.current_revision(); - let term = self.raft.current_term().await; - let membership = self.raft.membership().await; - - let meta = SnapshotMeta { - last_log_index: revision, - last_log_term: term, - membership: membership.clone(), - size: 0, // Will be set when full impl is done - }; - - // TODO(T041.S5): Implement full snapshot transfer - // 1. Serialize KV data using chainfire_storage::snapshot::SnapshotBuilder - // 2. Stream snapshot to target via InstallSnapshot RPC - // 3. Wait for target to apply snapshot - // - // For now, return success placeholder - the actual workaround can use - // data directory copy (Option C1) until this API is complete. - - warn!( - target = %req.target_addr, - "TransferSnapshot not yet fully implemented - use data dir copy workaround" - ); - - Ok(Response::new(TransferSnapshotResponse { - header: Some(self.make_header(revision)), - success: false, - error: "TransferSnapshot API not yet implemented - use data directory copy".to_string(), - meta: Some(meta), - })) - } - - type GetSnapshotStream = ReceiverStream>; - - /// Get snapshot from this node as a stream of chunks - /// - /// TODO(T041.S5): Full implementation pending - currently returns empty snapshot - async fn get_snapshot( - &self, - _request: Request, - ) -> Result, Status> { - debug!("Get snapshot request (T041 Option C)"); - - // Get current state from state machine - let sm = self.raft.state_machine(); - let revision = sm.current_revision(); - let term = self.raft.current_term().await; - let membership = self.raft.membership().await; - - let meta = SnapshotMeta { - last_log_index: revision, - last_log_term: term, - membership, - size: 0, - }; - - // Create channel for streaming response - let (tx, rx) = mpsc::channel(4); - - // TODO(T041.S5): Stream actual KV data - // For now, just send metadata with empty data - tokio::spawn(async move { - let response = GetSnapshotResponse { - meta: Some(meta), - chunk: vec![], - done: true, - }; - let _ = tx.send(Ok(response)).await; - }); - - Ok(Response::new(ReceiverStream::new(rx))) - } } diff --git a/chainfire/crates/chainfire-api/src/internal_service.rs b/chainfire/crates/chainfire-api/src/internal_service.rs index d6e26e0..ab77877 100644 --- a/chainfire/crates/chainfire-api/src/internal_service.rs +++ b/chainfire/crates/chainfire-api/src/internal_service.rs @@ -4,22 +4,17 @@ //! It bridges the gRPC layer with the custom Raft implementation. use crate::internal_proto::{ - raft_service_server::RaftService, - AppendEntriesRequest as ProtoAppendEntriesRequest, - AppendEntriesResponse as ProtoAppendEntriesResponse, - InstallSnapshotRequest, InstallSnapshotResponse, - VoteRequest as ProtoVoteRequest, + raft_service_server::RaftService, AppendEntriesRequest as ProtoAppendEntriesRequest, + AppendEntriesResponse as ProtoAppendEntriesResponse, VoteRequest as ProtoVoteRequest, VoteResponse as ProtoVoteResponse, }; -use chainfire_raft::core::{ - RaftCore, VoteRequest, AppendEntriesRequest, -}; -use chainfire_storage::{LogId, LogEntry as RaftLogEntry, EntryPayload}; +use chainfire_raft::core::{AppendEntriesRequest, RaftCore, VoteRequest}; +use chainfire_storage::{EntryPayload, LogEntry as RaftLogEntry, LogId}; use chainfire_types::command::RaftCommand; use std::sync::Arc; use tokio::sync::oneshot; -use tonic::{Request, Response, Status, Streaming}; -use tracing::{debug, info, trace, warn}; +use tonic::{Request, Response, Status}; +use tracing::{info, trace, warn}; /// Internal Raft RPC service implementation /// @@ -67,7 +62,11 @@ impl RaftService for RaftServiceImpl { Status::internal("Vote request failed: channel closed") })?; - trace!(term = resp.term, granted = resp.vote_granted, "Vote response"); + trace!( + term = resp.term, + granted = resp.vote_granted, + "Vote response" + ); Ok(Response::new(ProtoVoteResponse { term: resp.term, vote_granted: resp.vote_granted, @@ -141,22 +140,4 @@ impl RaftService for RaftServiceImpl { })) } - async fn install_snapshot( - &self, - request: Request>, - ) -> Result, Status> { - let mut stream = request.into_inner(); - debug!("InstallSnapshot stream started"); - - // Collect all chunks (for compatibility) - while let Some(chunk) = stream.message().await? { - if chunk.done { - break; - } - } - - // Custom Raft doesn't support snapshots yet - warn!("InstallSnapshot not supported in custom Raft implementation"); - Err(Status::unimplemented("Snapshots not supported in custom Raft implementation")) - } } diff --git a/chainfire/crates/chainfire-api/src/kv_service.rs b/chainfire/crates/chainfire-api/src/kv_service.rs index ff8d718..63d1b00 100644 --- a/chainfire/crates/chainfire-api/src/kv_service.rs +++ b/chainfire/crates/chainfire-api/src/kv_service.rs @@ -45,7 +45,9 @@ impl Kv for KvServiceImpl { // NOTE: Custom RaftCore doesn't yet support linearizable_read() method // For now, just warn if non-serializable read is requested if !req.serializable { - warn!("Linearizable reads not yet supported in custom Raft, performing serializable read"); + warn!( + "Linearizable reads not yet supported in custom Raft, performing serializable read" + ); } // Get state machine from Raft core @@ -84,7 +86,11 @@ impl Kv for KvServiceImpl { let command = RaftCommand::Put { key: req.key, value: req.value, - lease_id: if req.lease != 0 { Some(req.lease) } else { None }, + lease_id: if req.lease != 0 { + Some(req.lease) + } else { + None + }, prev_kv: req.prev_kv, }; @@ -115,19 +121,25 @@ impl Kv for KvServiceImpl { let req = request.into_inner(); debug!(key = ?String::from_utf8_lossy(&req.key), "Delete request"); - // Workaround: Pre-check key existence to determine deleted count - // TODO: Replace with proper RaftResponse.deleted once client_write returns full response + // Pre-check key existence because the current client_write path does not + // return a delete count in the write response. let sm = self.raft.state_machine(); let deleted_count = if req.range_end.is_empty() { // Single key delete - check if exists - let exists = sm.kv() + let exists = sm + .kv() .get(&req.key) .map_err(|e| Status::internal(e.to_string()))? .is_some(); - if exists { 1 } else { 0 } + if exists { + 1 + } else { + 0 + } } else { // Range delete - count keys in range - let kvs = sm.kv() + let kvs = sm + .kv() .range(&req.key, Some(&req.range_end)) .map_err(|e| Status::internal(e.to_string()))?; kvs.len() as i64 @@ -231,7 +243,7 @@ impl Kv for KvServiceImpl { Ok(Response::new(TxnResponse { header: Some(self.make_header(revision).await), - succeeded: true, // Assume success if no error + succeeded: true, // Assume success if no error responses: vec![], // Not supported yet })) } @@ -276,9 +288,7 @@ fn convert_txn_responses( .collect() } -fn convert_ops( - ops: &[crate::proto::RequestOp], -) -> Vec { +fn convert_ops(ops: &[crate::proto::RequestOp]) -> Vec { use chainfire_types::command::TxnOp; ops.iter() @@ -287,7 +297,11 @@ fn convert_ops( crate::proto::request_op::Request::RequestPut(put) => TxnOp::Put { key: put.key.clone(), value: put.value.clone(), - lease_id: if put.lease != 0 { Some(put.lease) } else { None }, + lease_id: if put.lease != 0 { + Some(put.lease) + } else { + None + }, }, crate::proto::request_op::Request::RequestDeleteRange(del) => { if del.range_end.is_empty() { @@ -307,7 +321,7 @@ fn convert_ops( limit: range.limit, keys_only: range.keys_only, count_only: range.count_only, - } + }, }) }) .collect() diff --git a/chainfire/crates/chainfire-api/src/lease_service.rs b/chainfire/crates/chainfire-api/src/lease_service.rs index 8eb166b..777a9fb 100644 --- a/chainfire/crates/chainfire-api/src/lease_service.rs +++ b/chainfire/crates/chainfire-api/src/lease_service.rs @@ -182,7 +182,8 @@ impl Lease for LeaseServiceImpl { let leases = sm.leases(); let lease_ids = leases.list(); - let statuses: Vec = lease_ids.into_iter().map(|id| LeaseStatus { id }).collect(); + let statuses: Vec = + lease_ids.into_iter().map(|id| LeaseStatus { id }).collect(); Ok(Response::new(LeaseLeasesResponse { header: Some(self.make_header(revision)), diff --git a/chainfire/crates/chainfire-api/src/lib.rs b/chainfire/crates/chainfire-api/src/lib.rs index 06a71ad..30c3d52 100644 --- a/chainfire/crates/chainfire-api/src/lib.rs +++ b/chainfire/crates/chainfire-api/src/lib.rs @@ -5,25 +5,25 @@ //! - gRPC service implementations //! - Client and server components +pub mod cluster_service; +pub mod conversions; pub mod generated; +pub mod internal_service; pub mod kv_service; pub mod lease_service; -pub mod watch_service; -pub mod cluster_service; -pub mod internal_service; pub mod raft_client; -pub mod conversions; +pub mod watch_service; // Re-export generated types -pub use generated::chainfire::v1 as proto; pub use generated::chainfire::internal as internal_proto; +pub use generated::chainfire::v1 as proto; // Re-export services +pub use cluster_service::ClusterServiceImpl; +pub use internal_service::RaftServiceImpl; pub use kv_service::KvServiceImpl; pub use lease_service::LeaseServiceImpl; pub use watch_service::WatchServiceImpl; -pub use cluster_service::ClusterServiceImpl; -pub use internal_service::RaftServiceImpl; // Re-export Raft client and config pub use raft_client::{GrpcRaftClient, RetryConfig}; diff --git a/chainfire/crates/chainfire-api/src/raft_client.rs b/chainfire/crates/chainfire-api/src/raft_client.rs index edb15e9..36a5f3a 100644 --- a/chainfire/crates/chainfire-api/src/raft_client.rs +++ b/chainfire/crates/chainfire-api/src/raft_client.rs @@ -112,7 +112,10 @@ impl GrpcRaftClient { } /// Get or create a gRPC client for the target node - async fn get_client(&self, target: NodeId) -> Result, RaftNetworkError> { + async fn get_client( + &self, + target: NodeId, + ) -> Result, RaftNetworkError> { // Check cache first { let clients = self.clients.read().await; @@ -290,9 +293,7 @@ impl RaftRpcClient for GrpcRaftClient { use chainfire_storage::EntryPayload; let data = match &e.payload { EntryPayload::Blank => vec![], - EntryPayload::Normal(cmd) => { - bincode::serialize(cmd).unwrap_or_default() - } + EntryPayload::Normal(cmd) => bincode::serialize(cmd).unwrap_or_default(), EntryPayload::Membership(_) => vec![], }; (e.log_id.index, e.log_id.term, data) @@ -333,8 +334,16 @@ impl RaftRpcClient for GrpcRaftClient { Ok(AppendEntriesResponse { term: resp.term, success: resp.success, - conflict_index: if resp.conflict_index > 0 { Some(resp.conflict_index) } else { None }, - conflict_term: if resp.conflict_term > 0 { Some(resp.conflict_term) } else { None }, + conflict_index: if resp.conflict_index > 0 { + Some(resp.conflict_index) + } else { + None + }, + conflict_term: if resp.conflict_term > 0 { + Some(resp.conflict_term) + } else { + None + }, }) } }) diff --git a/chainfire/crates/chainfire-api/src/watch_service.rs b/chainfire/crates/chainfire-api/src/watch_service.rs index 71aec81..449192a 100644 --- a/chainfire/crates/chainfire-api/src/watch_service.rs +++ b/chainfire/crates/chainfire-api/src/watch_service.rs @@ -1,9 +1,7 @@ //! Watch service implementation use crate::conversions::make_header; -use crate::proto::{ - watch_server::Watch, WatchRequest, WatchResponse, -}; +use crate::proto::{watch_server::Watch, WatchRequest, WatchResponse}; use chainfire_watch::{WatchRegistry, WatchStream}; use std::pin::Pin; use std::sync::Arc; @@ -39,7 +37,8 @@ impl WatchServiceImpl { #[tonic::async_trait] impl Watch for WatchServiceImpl { - type WatchStream = Pin> + Send>>; + type WatchStream = + Pin> + Send>>; async fn watch( &self, @@ -81,13 +80,17 @@ impl Watch for WatchServiceImpl { Ok(req) => { if let Some(request_union) = req.request_union { let response = match request_union { - crate::proto::watch_request::RequestUnion::CreateRequest(create) => { + crate::proto::watch_request::RequestUnion::CreateRequest( + create, + ) => { let internal_req: chainfire_types::watch::WatchRequest = create.into(); let resp = stream.create_watch(internal_req); internal_to_proto_response(resp, cluster_id, member_id) } - crate::proto::watch_request::RequestUnion::CancelRequest(cancel) => { + crate::proto::watch_request::RequestUnion::CancelRequest( + cancel, + ) => { let resp = stream.cancel_watch(cancel.watch_id); internal_to_proto_response(resp, cluster_id, member_id) } diff --git a/chainfire/crates/chainfire-core/Cargo.toml b/chainfire/crates/chainfire-core/Cargo.toml index db4ed3a..f076a5b 100644 --- a/chainfire/crates/chainfire-core/Cargo.toml +++ b/chainfire/crates/chainfire-core/Cargo.toml @@ -3,35 +3,12 @@ name = "chainfire-core" version.workspace = true edition.workspace = true license.workspace = true -description = "Embeddable distributed cluster library with Raft consensus and SWIM gossip" +description = "Internal compatibility crate for non-public ChainFire workspace types" rust-version.workspace = true +publish = false [dependencies] -# Internal crates -chainfire-types = { workspace = true } -chainfire-gossip = { workspace = true } -# Note: chainfire-storage, chainfire-raft, chainfire-watch -# will be added as implementation progresses -# chainfire-storage = { workspace = true } -# chainfire-raft = { workspace = true } -# chainfire-watch = { workspace = true } - -# Async runtime -tokio = { workspace = true } -tokio-stream = { workspace = true } -futures = { workspace = true } -async-trait = { workspace = true } - -# Utilities thiserror = { workspace = true } -tracing = { workspace = true } -bytes = { workspace = true } -parking_lot = { workspace = true } -dashmap = { workspace = true } - -[dev-dependencies] -tokio = { workspace = true, features = ["test-util"] } -tempfile = { workspace = true } [lints] workspace = true diff --git a/chainfire/crates/chainfire-core/src/builder.rs b/chainfire/crates/chainfire-core/src/builder.rs deleted file mode 100644 index 2d911a4..0000000 --- a/chainfire/crates/chainfire-core/src/builder.rs +++ /dev/null @@ -1,238 +0,0 @@ -//! Builder pattern for cluster creation - -use std::net::SocketAddr; -use std::path::PathBuf; -use std::sync::Arc; - -use chainfire_gossip::{GossipAgent, GossipId}; -use chainfire_types::node::NodeRole; -use chainfire_types::RaftRole; - -use crate::callbacks::{ClusterEventHandler, KvEventHandler}; -use crate::cluster::Cluster; -use crate::config::{ClusterConfig, MemberConfig, StorageBackendConfig, TimeoutConfig}; -use crate::error::{ClusterError, Result}; -use crate::events::EventDispatcher; - -/// Builder for creating a Chainfire cluster instance -/// -/// # Example -/// -/// ```ignore -/// use chainfire_core::ClusterBuilder; -/// -/// let cluster = ClusterBuilder::new(1) -/// .name("node-1") -/// .gossip_addr("0.0.0.0:7946".parse()?) -/// .raft_addr("0.0.0.0:2380".parse()?) -/// .bootstrap(true) -/// .build() -/// .await?; -/// ``` -pub struct ClusterBuilder { - config: ClusterConfig, - cluster_handlers: Vec>, - kv_handlers: Vec>, -} - -impl ClusterBuilder { - /// Create a new cluster builder with the given node ID - pub fn new(node_id: u64) -> Self { - Self { - config: ClusterConfig { - node_id, - ..Default::default() - }, - cluster_handlers: Vec::new(), - kv_handlers: Vec::new(), - } - } - - /// Set the node name - pub fn name(mut self, name: impl Into) -> Self { - self.config.node_name = name.into(); - self - } - - /// Set the node role (ControlPlane or Worker) - pub fn role(mut self, role: NodeRole) -> Self { - self.config.node_role = role; - self - } - - /// Set the Raft participation role (Voter, Learner, or None) - pub fn raft_role(mut self, role: RaftRole) -> Self { - self.config.raft_role = role; - self - } - - /// Set the API listen address - pub fn api_addr(mut self, addr: SocketAddr) -> Self { - self.config.api_addr = Some(addr); - self - } - - /// Set the Raft listen address (for control plane nodes) - pub fn raft_addr(mut self, addr: SocketAddr) -> Self { - self.config.raft_addr = Some(addr); - self - } - - /// Set the gossip listen address - pub fn gossip_addr(mut self, addr: SocketAddr) -> Self { - self.config.gossip_addr = addr; - self - } - - /// Set the storage backend - pub fn storage(mut self, backend: StorageBackendConfig) -> Self { - self.config.storage = backend; - self - } - - /// Set the data directory (convenience method for RocksDB storage) - pub fn data_dir(mut self, path: impl Into) -> Self { - self.config.storage = StorageBackendConfig::RocksDb { path: path.into() }; - self - } - - /// Use in-memory storage - pub fn memory_storage(mut self) -> Self { - self.config.storage = StorageBackendConfig::Memory; - self - } - - /// Add initial cluster members (for bootstrap) - pub fn initial_members(mut self, members: Vec) -> Self { - self.config.initial_members = members; - self - } - - /// Add a single initial member - pub fn add_member(mut self, member: MemberConfig) -> Self { - self.config.initial_members.push(member); - self - } - - /// Enable cluster bootstrap (first node) - pub fn bootstrap(mut self, bootstrap: bool) -> Self { - self.config.bootstrap = bootstrap; - self - } - - /// Set the cluster ID - pub fn cluster_id(mut self, id: u64) -> Self { - self.config.cluster_id = id; - self - } - - /// Enable gRPC API server - pub fn with_grpc_api(mut self, enabled: bool) -> Self { - self.config.enable_grpc_api = enabled; - self - } - - /// Set timeout configuration - pub fn timeouts(mut self, timeouts: TimeoutConfig) -> Self { - self.config.timeouts = timeouts; - self - } - - /// Register a cluster event handler - /// - /// Multiple handlers can be registered. They will all be called - /// when cluster events occur. - pub fn on_cluster_event(mut self, handler: H) -> Self - where - H: ClusterEventHandler + 'static, - { - self.cluster_handlers.push(Arc::new(handler)); - self - } - - /// Register a cluster event handler (Arc version) - pub fn on_cluster_event_arc(mut self, handler: Arc) -> Self { - self.cluster_handlers.push(handler); - self - } - - /// Register a KV event handler - /// - /// Multiple handlers can be registered. They will all be called - /// when KV events occur. - pub fn on_kv_event(mut self, handler: H) -> Self - where - H: KvEventHandler + 'static, - { - self.kv_handlers.push(Arc::new(handler)); - self - } - - /// Register a KV event handler (Arc version) - pub fn on_kv_event_arc(mut self, handler: Arc) -> Self { - self.kv_handlers.push(handler); - self - } - - /// Validate the configuration - fn validate(&self) -> Result<()> { - if self.config.node_id == 0 { - return Err(ClusterError::Config("node_id must be non-zero".into())); - } - - if self.config.node_name.is_empty() { - return Err(ClusterError::Config("node_name is required".into())); - } - - // Raft-participating nodes need a Raft address - if self.config.raft_role.participates_in_raft() && self.config.raft_addr.is_none() { - return Err(ClusterError::Config( - "raft_addr is required for Raft-participating nodes".into(), - )); - } - - Ok(()) - } - - /// Build the cluster instance - /// - /// This initializes the storage backend, Raft (if applicable), and gossip. - pub async fn build(self) -> Result { - self.validate()?; - - // Create event dispatcher with registered handlers - let mut event_dispatcher = EventDispatcher::new(); - for handler in self.cluster_handlers { - event_dispatcher.add_cluster_handler(handler); - } - for handler in self.kv_handlers { - event_dispatcher.add_kv_handler(handler); - } - - // Initialize gossip agent - let gossip_identity = GossipId::new( - self.config.node_id, - self.config.gossip_addr, - self.config.node_role, - ); - - let gossip_agent = GossipAgent::new(gossip_identity, chainfire_gossip::agent::default_config()) - .await - .map_err(|e| ClusterError::Gossip(e.to_string()))?; - - tracing::info!( - node_id = self.config.node_id, - gossip_addr = %self.config.gossip_addr, - "Gossip agent initialized" - ); - - // Create the cluster - let cluster = Cluster::new(self.config, Some(gossip_agent), event_dispatcher); - - // TODO: Initialize storage backend - // TODO: Initialize Raft if role participates - // TODO: Start background tasks - - Ok(cluster) - } -} diff --git a/chainfire/crates/chainfire-core/src/callbacks.rs b/chainfire/crates/chainfire-core/src/callbacks.rs deleted file mode 100644 index 1dcf8a1..0000000 --- a/chainfire/crates/chainfire-core/src/callbacks.rs +++ /dev/null @@ -1,103 +0,0 @@ -//! Callback traits for cluster events - -use async_trait::async_trait; - -use chainfire_types::node::NodeInfo; - -use crate::kvs::KvEntry; - -/// Handler for cluster lifecycle events -/// -/// Implement this trait to receive notifications about cluster membership -/// and leadership changes. -#[async_trait] -pub trait ClusterEventHandler: Send + Sync { - /// Called when a node joins the cluster - async fn on_node_joined(&self, _node: &NodeInfo) {} - - /// Called when a node leaves the cluster - async fn on_node_left(&self, _node_id: u64, _reason: LeaveReason) {} - - /// Called when leadership changes - async fn on_leader_changed(&self, _old_leader: Option, _new_leader: u64) {} - - /// Called when this node becomes leader - async fn on_became_leader(&self) {} - - /// Called when this node loses leadership - async fn on_lost_leadership(&self) {} - - /// Called when cluster membership changes - async fn on_membership_changed(&self, _members: &[NodeInfo]) {} - - /// Called when a network partition is detected - async fn on_partition_detected(&self, _reachable: &[u64], _unreachable: &[u64]) {} - - /// Called when cluster is ready (initial leader elected, etc.) - async fn on_cluster_ready(&self) {} -} - -/// Handler for KV store events -/// -/// Implement this trait to receive notifications about key-value changes. -#[async_trait] -pub trait KvEventHandler: Send + Sync { - /// Called when a key is created or updated - async fn on_key_changed( - &self, - _namespace: &str, - _key: &[u8], - _value: &[u8], - _revision: u64, - ) { - } - - /// Called when a key is deleted - async fn on_key_deleted(&self, _namespace: &str, _key: &[u8], _revision: u64) {} - - /// Called when multiple keys with a prefix are changed - async fn on_prefix_changed(&self, _namespace: &str, _prefix: &[u8], _entries: &[KvEntry]) {} -} - -/// Reason for node departure from the cluster -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum LeaveReason { - /// Node left gracefully - Graceful, - - /// Node timed out (failed to respond) - Timeout, - - /// Network partition detected - NetworkPartition, - - /// Node was explicitly evicted - Evicted, - - /// Unknown reason - Unknown, -} - -impl std::fmt::Display for LeaveReason { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - LeaveReason::Graceful => write!(f, "graceful"), - LeaveReason::Timeout => write!(f, "timeout"), - LeaveReason::NetworkPartition => write!(f, "network_partition"), - LeaveReason::Evicted => write!(f, "evicted"), - LeaveReason::Unknown => write!(f, "unknown"), - } - } -} - -/// A no-op event handler for when callbacks are not needed -pub struct NoOpClusterEventHandler; - -#[async_trait] -impl ClusterEventHandler for NoOpClusterEventHandler {} - -/// A no-op KV event handler -pub struct NoOpKvEventHandler; - -#[async_trait] -impl KvEventHandler for NoOpKvEventHandler {} diff --git a/chainfire/crates/chainfire-core/src/cluster.rs b/chainfire/crates/chainfire-core/src/cluster.rs deleted file mode 100644 index 5a2e669..0000000 --- a/chainfire/crates/chainfire-core/src/cluster.rs +++ /dev/null @@ -1,313 +0,0 @@ -//! Cluster management - -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; - -use parking_lot::RwLock; -use tokio::sync::broadcast; - -use chainfire_gossip::{GossipAgent, MembershipChange}; -use chainfire_types::node::NodeInfo; - -use crate::config::ClusterConfig; -use crate::error::{ClusterError, Result}; -use crate::events::EventDispatcher; -use crate::kvs::{Kv, KvHandle}; - -/// Current state of the cluster -#[derive(Debug, Clone)] -#[derive(Default)] -pub struct ClusterState { - /// Whether this node is the leader - pub is_leader: bool, - - /// Current leader's node ID - pub leader_id: Option, - - /// Current term (Raft) - pub term: u64, - - /// All known cluster members - pub members: Vec, - - /// Whether the cluster is ready (initial leader elected) - pub ready: bool, -} - - -/// Main cluster instance -/// -/// This is the primary interface for interacting with a Chainfire cluster. -/// It manages Raft consensus, gossip membership, and the distributed KV store. -pub struct Cluster { - /// Node configuration - config: ClusterConfig, - - /// Current cluster state - state: Arc>, - - /// KV store - kv: Arc, - - /// Gossip agent for cluster membership - gossip_agent: Option, - - /// Event dispatcher - event_dispatcher: Arc, - - /// Shutdown flag - shutdown: AtomicBool, - - /// Shutdown signal sender - shutdown_tx: broadcast::Sender<()>, -} - -impl Cluster { - /// Create a new cluster instance - pub(crate) fn new( - config: ClusterConfig, - gossip_agent: Option, - event_dispatcher: EventDispatcher, - ) -> Self { - let (shutdown_tx, _) = broadcast::channel(1); - - Self { - config, - state: Arc::new(RwLock::new(ClusterState::default())), - kv: Arc::new(Kv::new()), - gossip_agent, - event_dispatcher: Arc::new(event_dispatcher), - shutdown: AtomicBool::new(false), - shutdown_tx, - } - } - - /// Get this node's ID - pub fn node_id(&self) -> u64 { - self.config.node_id - } - - /// Get this node's name - pub fn node_name(&self) -> &str { - &self.config.node_name - } - - /// Get a handle for interacting with the cluster - /// - /// Handles are lightweight and can be cloned freely. - pub fn handle(&self) -> ClusterHandle { - ClusterHandle { - node_id: self.config.node_id, - state: self.state.clone(), - kv: self.kv.clone(), - shutdown_tx: self.shutdown_tx.clone(), - } - } - - /// Get the KV store interface - pub fn kv(&self) -> &Arc { - &self.kv - } - - /// Get current cluster state - pub fn state(&self) -> ClusterState { - self.state.read().clone() - } - - /// Check if this node is the leader - pub fn is_leader(&self) -> bool { - self.state.read().is_leader - } - - /// Get current leader ID - pub fn leader(&self) -> Option { - self.state.read().leader_id - } - - /// Get all cluster members - pub fn members(&self) -> Vec { - self.state.read().members.clone() - } - - /// Check if the cluster is ready - pub fn is_ready(&self) -> bool { - self.state.read().ready - } - - /// Join an existing cluster - /// - /// Connects to seed nodes and joins the cluster via gossip. - pub async fn join(&mut self, seed_addrs: &[std::net::SocketAddr]) -> Result<()> { - if seed_addrs.is_empty() { - return Err(ClusterError::Config("No seed addresses provided".into())); - } - - let gossip_agent = self.gossip_agent.as_mut().ok_or_else(|| { - ClusterError::Config("Gossip agent not initialized".into()) - })?; - - // Announce to all seed nodes to discover the cluster - for &addr in seed_addrs { - tracing::info!(%addr, "Announcing to seed node"); - gossip_agent - .announce(addr) - .map_err(|e| ClusterError::Gossip(e.to_string()))?; - } - - tracing::info!(seeds = seed_addrs.len(), "Joined cluster via gossip"); - Ok(()) - } - - /// Leave the cluster gracefully - pub async fn leave(&self) -> Result<()> { - // TODO: Implement graceful leave - self.shutdown(); - Ok(()) - } - - /// Add a new node to the cluster (leader only) - pub async fn add_node(&self, _node: NodeInfo, _as_learner: bool) -> Result<()> { - if !self.is_leader() { - return Err(ClusterError::NotLeader { - leader_id: self.leader(), - }); - } - - // TODO: Implement node addition via Raft - Ok(()) - } - - /// Remove a node from the cluster (leader only) - pub async fn remove_node(&self, _node_id: u64) -> Result<()> { - if !self.is_leader() { - return Err(ClusterError::NotLeader { - leader_id: self.leader(), - }); - } - - // TODO: Implement node removal via Raft - Ok(()) - } - - /// Promote a learner to voter (leader only) - pub async fn promote_learner(&self, _node_id: u64) -> Result<()> { - if !self.is_leader() { - return Err(ClusterError::NotLeader { - leader_id: self.leader(), - }); - } - - // TODO: Implement learner promotion via Raft - Ok(()) - } - - /// Run the cluster (blocks until shutdown) - pub async fn run(self) -> Result<()> { - self.run_until_shutdown(std::future::pending()).await - } - - /// Run with graceful shutdown signal - pub async fn run_until_shutdown(mut self, shutdown_signal: F) -> Result<()> - where - F: std::future::Future, - { - let mut shutdown_rx = self.shutdown_tx.subscribe(); - - // Start gossip agent if present - let gossip_task = if let Some(mut gossip_agent) = self.gossip_agent.take() { - let state = self.state.clone(); - let shutdown_rx_gossip = self.shutdown_tx.subscribe(); - - // Spawn task to handle gossip membership changes - Some(tokio::spawn(async move { - // Run the gossip agent with shutdown signal - if let Err(e) = gossip_agent.run_until_shutdown(shutdown_rx_gossip).await { - tracing::error!(error = %e, "Gossip agent error"); - } - })) - } else { - None - }; - - tokio::select! { - _ = shutdown_signal => { - tracing::info!("Received shutdown signal"); - } - _ = shutdown_rx.recv() => { - tracing::info!("Received internal shutdown"); - } - } - - // Wait for gossip task to finish - if let Some(task) = gossip_task { - let _ = task.await; - } - - Ok(()) - } - - /// Trigger shutdown - pub fn shutdown(&self) { - self.shutdown.store(true, Ordering::SeqCst); - let _ = self.shutdown_tx.send(()); - } - - /// Check if shutdown was requested - pub fn is_shutting_down(&self) -> bool { - self.shutdown.load(Ordering::SeqCst) - } - - /// Get the event dispatcher - pub(crate) fn event_dispatcher(&self) -> &Arc { - &self.event_dispatcher - } -} - -/// Lightweight handle for cluster operations -/// -/// This handle can be cloned and passed around cheaply. It provides -/// access to cluster state and the KV store without owning the cluster. -#[derive(Clone)] -pub struct ClusterHandle { - node_id: u64, - state: Arc>, - kv: Arc, - shutdown_tx: broadcast::Sender<()>, -} - -impl ClusterHandle { - /// Get this node's ID - pub fn node_id(&self) -> u64 { - self.node_id - } - - /// Get a KV handle - pub fn kv(&self) -> KvHandle { - KvHandle::new(self.kv.clone()) - } - - /// Check if this node is the leader - pub fn is_leader(&self) -> bool { - self.state.read().is_leader - } - - /// Get current leader ID - pub fn leader(&self) -> Option { - self.state.read().leader_id - } - - /// Get all cluster members - pub fn members(&self) -> Vec { - self.state.read().members.clone() - } - - /// Get current cluster state - pub fn state(&self) -> ClusterState { - self.state.read().clone() - } - - /// Trigger cluster shutdown - pub fn shutdown(&self) { - let _ = self.shutdown_tx.send(()); - } -} diff --git a/chainfire/crates/chainfire-core/src/config.rs b/chainfire/crates/chainfire-core/src/config.rs deleted file mode 100644 index c206d4f..0000000 --- a/chainfire/crates/chainfire-core/src/config.rs +++ /dev/null @@ -1,162 +0,0 @@ -//! Configuration types for chainfire-core - -use std::net::SocketAddr; -use std::path::PathBuf; -use std::sync::Arc; -use std::time::Duration; - -use chainfire_types::node::NodeRole; -use chainfire_types::RaftRole; - -// Forward declaration - will be implemented in chainfire-storage -// For now, use a placeholder trait -use async_trait::async_trait; - -/// Storage backend trait for pluggable storage -#[async_trait] -pub trait StorageBackend: Send + Sync { - /// Get a value by key - async fn get(&self, key: &[u8]) -> std::io::Result>>; - /// Put a value - async fn put(&self, key: &[u8], value: &[u8]) -> std::io::Result<()>; - /// Delete a key - async fn delete(&self, key: &[u8]) -> std::io::Result; -} - -/// Configuration for a cluster node -#[derive(Debug, Clone)] -pub struct ClusterConfig { - /// Unique node ID - pub node_id: u64, - - /// Human-readable node name - pub node_name: String, - - /// Node role (ControlPlane or Worker) - pub node_role: NodeRole, - - /// Raft participation role (Voter, Learner, or None) - pub raft_role: RaftRole, - - /// API listen address for client connections - pub api_addr: Option, - - /// Raft listen address for peer-to-peer Raft communication - pub raft_addr: Option, - - /// Gossip listen address for membership discovery - pub gossip_addr: SocketAddr, - - /// Storage backend configuration - pub storage: StorageBackendConfig, - - /// Initial cluster members for bootstrap - pub initial_members: Vec, - - /// Whether to bootstrap the cluster (first node) - pub bootstrap: bool, - - /// Cluster ID - pub cluster_id: u64, - - /// Enable gRPC API server - pub enable_grpc_api: bool, - - /// Timeouts - pub timeouts: TimeoutConfig, -} - -impl Default for ClusterConfig { - fn default() -> Self { - Self { - node_id: 0, - node_name: String::new(), - node_role: NodeRole::ControlPlane, - raft_role: RaftRole::Voter, - api_addr: None, - raft_addr: None, - gossip_addr: "0.0.0.0:7946".parse().unwrap(), - storage: StorageBackendConfig::Memory, - initial_members: Vec::new(), - bootstrap: false, - cluster_id: 1, - enable_grpc_api: false, - timeouts: TimeoutConfig::default(), - } - } -} - -/// Storage backend configuration -#[derive(Clone)] -pub enum StorageBackendConfig { - /// In-memory storage (for testing/simple deployments) - Memory, - - /// RocksDB storage - RocksDb { - /// Data directory path - path: PathBuf, - }, - - /// Custom storage backend - Custom(Arc), -} - -impl std::fmt::Debug for StorageBackendConfig { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - StorageBackendConfig::Memory => write!(f, "Memory"), - StorageBackendConfig::RocksDb { path } => { - f.debug_struct("RocksDb").field("path", path).finish() - } - StorageBackendConfig::Custom(_) => write!(f, "Custom(...)"), - } - } -} - -/// Configuration for a cluster member -#[derive(Debug, Clone)] -pub struct MemberConfig { - /// Node ID - pub id: u64, - - /// Node name - pub name: String, - - /// Raft address - pub raft_addr: String, - - /// Client API address - pub client_addr: String, -} - -/// Timeout configuration -#[derive(Debug, Clone)] -pub struct TimeoutConfig { - /// Raft heartbeat interval - pub heartbeat_interval: Duration, - - /// Raft election timeout range (min) - pub election_timeout_min: Duration, - - /// Raft election timeout range (max) - pub election_timeout_max: Duration, - - /// Connection timeout - pub connection_timeout: Duration, - - /// Request timeout - pub request_timeout: Duration, -} - -impl Default for TimeoutConfig { - fn default() -> Self { - Self { - heartbeat_interval: Duration::from_millis(150), - election_timeout_min: Duration::from_millis(300), - election_timeout_max: Duration::from_millis(600), - connection_timeout: Duration::from_secs(5), - request_timeout: Duration::from_secs(10), - } - } -} diff --git a/chainfire/crates/chainfire-core/src/events.rs b/chainfire/crates/chainfire-core/src/events.rs deleted file mode 100644 index 1395e67..0000000 --- a/chainfire/crates/chainfire-core/src/events.rs +++ /dev/null @@ -1,198 +0,0 @@ -//! Event types and dispatcher - -use std::sync::Arc; - -use tokio::sync::broadcast; - -use chainfire_types::node::NodeInfo; - -use crate::callbacks::{ClusterEventHandler, KvEventHandler, LeaveReason}; - -/// Cluster-level events -#[derive(Debug, Clone)] -pub enum ClusterEvent { - /// A node joined the cluster - NodeJoined(NodeInfo), - - /// A node left the cluster - NodeLeft { - /// The node ID that left - node_id: u64, - /// Why the node left - reason: LeaveReason, - }, - - /// Leadership changed - LeaderChanged { - /// Previous leader (None if no previous leader) - old: Option, - /// New leader - new: u64, - }, - - /// This node became the leader - BecameLeader, - - /// This node lost leadership - LostLeadership, - - /// Cluster membership changed - MembershipChanged(Vec), - - /// Network partition detected - PartitionDetected { - /// Nodes that are reachable - reachable: Vec, - /// Nodes that are unreachable - unreachable: Vec, - }, - - /// Cluster is ready - ClusterReady, -} - -/// KV store events -#[derive(Debug, Clone)] -pub enum KvEvent { - /// A key was created or updated - KeyChanged { - /// Namespace of the key - namespace: String, - /// The key that changed - key: Vec, - /// New value - value: Vec, - /// Revision number - revision: u64, - }, - - /// A key was deleted - KeyDeleted { - /// Namespace of the key - namespace: String, - /// The key that was deleted - key: Vec, - /// Revision number - revision: u64, - }, -} - -/// Event dispatcher that manages callbacks and event broadcasting -pub struct EventDispatcher { - cluster_handlers: Vec>, - kv_handlers: Vec>, - event_tx: broadcast::Sender, -} - -impl EventDispatcher { - /// Create a new event dispatcher - pub fn new() -> Self { - let (event_tx, _) = broadcast::channel(1024); - Self { - cluster_handlers: Vec::new(), - kv_handlers: Vec::new(), - event_tx, - } - } - - /// Add a cluster event handler - pub fn add_cluster_handler(&mut self, handler: Arc) { - self.cluster_handlers.push(handler); - } - - /// Add a KV event handler - pub fn add_kv_handler(&mut self, handler: Arc) { - self.kv_handlers.push(handler); - } - - /// Get a subscriber for cluster events - pub fn subscribe(&self) -> broadcast::Receiver { - self.event_tx.subscribe() - } - - /// Dispatch a cluster event to all handlers - pub async fn dispatch_cluster_event(&self, event: ClusterEvent) { - // Broadcast to channel subscribers - let _ = self.event_tx.send(event.clone()); - - // Call registered handlers - match &event { - ClusterEvent::NodeJoined(node) => { - for handler in &self.cluster_handlers { - handler.on_node_joined(node).await; - } - } - ClusterEvent::NodeLeft { node_id, reason } => { - for handler in &self.cluster_handlers { - handler.on_node_left(*node_id, *reason).await; - } - } - ClusterEvent::LeaderChanged { old, new } => { - for handler in &self.cluster_handlers { - handler.on_leader_changed(*old, *new).await; - } - } - ClusterEvent::BecameLeader => { - for handler in &self.cluster_handlers { - handler.on_became_leader().await; - } - } - ClusterEvent::LostLeadership => { - for handler in &self.cluster_handlers { - handler.on_lost_leadership().await; - } - } - ClusterEvent::MembershipChanged(members) => { - for handler in &self.cluster_handlers { - handler.on_membership_changed(members).await; - } - } - ClusterEvent::PartitionDetected { - reachable, - unreachable, - } => { - for handler in &self.cluster_handlers { - handler.on_partition_detected(reachable, unreachable).await; - } - } - ClusterEvent::ClusterReady => { - for handler in &self.cluster_handlers { - handler.on_cluster_ready().await; - } - } - } - } - - /// Dispatch a KV event to all handlers - pub async fn dispatch_kv_event(&self, event: KvEvent) { - match &event { - KvEvent::KeyChanged { - namespace, - key, - value, - revision, - } => { - for handler in &self.kv_handlers { - handler - .on_key_changed(namespace, key, value, *revision) - .await; - } - } - KvEvent::KeyDeleted { - namespace, - key, - revision, - } => { - for handler in &self.kv_handlers { - handler.on_key_deleted(namespace, key, *revision).await; - } - } - } - } -} - -impl Default for EventDispatcher { - fn default() -> Self { - Self::new() - } -} diff --git a/chainfire/crates/chainfire-core/src/kvs.rs b/chainfire/crates/chainfire-core/src/kvs.rs deleted file mode 100644 index b4ffbac..0000000 --- a/chainfire/crates/chainfire-core/src/kvs.rs +++ /dev/null @@ -1,290 +0,0 @@ -//! Key-Value store abstraction - -use std::sync::Arc; -use std::time::Duration; - -use dashmap::DashMap; - -use crate::error::{ClusterError, Result}; - -/// KV store interface -/// -/// Provides access to distributed key-value storage with namespace isolation. -pub struct Kv { - namespaces: DashMap>, - default_namespace: Arc, -} - -impl Kv { - /// Create a new KV store - pub(crate) fn new() -> Self { - let default_namespace = Arc::new(KvNamespace::new("default".to_string())); - Self { - namespaces: DashMap::new(), - default_namespace, - } - } - - /// Get or create a namespace - pub fn namespace(&self, name: &str) -> Arc { - if name == "default" { - return self.default_namespace.clone(); - } - - self.namespaces - .entry(name.to_string()) - .or_insert_with(|| Arc::new(KvNamespace::new(name.to_string()))) - .clone() - } - - /// Get the default namespace - pub fn default_namespace(&self) -> &Arc { - &self.default_namespace - } - - // Convenience methods on default namespace - - /// Get a value by key from the default namespace - pub async fn get(&self, key: impl AsRef<[u8]>) -> Result>> { - self.default_namespace.get(key).await - } - - /// Put a value in the default namespace - pub async fn put(&self, key: impl AsRef<[u8]>, value: impl AsRef<[u8]>) -> Result { - self.default_namespace.put(key, value).await - } - - /// Delete a key from the default namespace - pub async fn delete(&self, key: impl AsRef<[u8]>) -> Result { - self.default_namespace.delete(key).await - } - - /// Compare-and-swap in the default namespace - pub async fn compare_and_swap( - &self, - key: impl AsRef<[u8]>, - expected_version: u64, - value: impl AsRef<[u8]>, - ) -> Result { - self.default_namespace - .compare_and_swap(key, expected_version, value) - .await - } -} - -/// KV namespace for data isolation -pub struct KvNamespace { - name: String, - // TODO: Add storage backend and raft reference -} - -impl KvNamespace { - pub(crate) fn new(name: String) -> Self { - Self { name } - } - - /// Get the namespace name - pub fn name(&self) -> &str { - &self.name - } - - /// Get a value by key - pub async fn get(&self, _key: impl AsRef<[u8]>) -> Result>> { - // TODO: Implement with storage backend - Ok(None) - } - - /// Get with revision - pub async fn get_with_revision( - &self, - _key: impl AsRef<[u8]>, - ) -> Result, u64)>> { - // TODO: Implement with storage backend - Ok(None) - } - - /// Put a value (goes through Raft if available) - pub async fn put(&self, _key: impl AsRef<[u8]>, _value: impl AsRef<[u8]>) -> Result { - // TODO: Implement with Raft - Ok(0) - } - - /// Put with options - pub async fn put_with_options( - &self, - _key: impl AsRef<[u8]>, - _value: impl AsRef<[u8]>, - _options: KvOptions, - ) -> Result { - // TODO: Implement with Raft - Ok(KvPutResult { - revision: 0, - prev_value: None, - }) - } - - /// Delete a key - pub async fn delete(&self, _key: impl AsRef<[u8]>) -> Result { - // TODO: Implement with Raft - Ok(false) - } - - /// Compare-and-swap - pub async fn compare_and_swap( - &self, - _key: impl AsRef<[u8]>, - expected_version: u64, - _value: impl AsRef<[u8]>, - ) -> Result { - // TODO: Implement with storage backend - Err(ClusterError::VersionMismatch { - expected: expected_version, - actual: 0, - }) - } - - /// Scan keys with prefix - pub async fn scan_prefix( - &self, - _prefix: impl AsRef<[u8]>, - _limit: u32, - ) -> Result> { - // TODO: Implement with storage backend - Ok(Vec::new()) - } - - /// Scan keys in a range - pub async fn scan_range( - &self, - _start: impl AsRef<[u8]>, - _end: impl AsRef<[u8]>, - _limit: u32, - ) -> Result> { - // TODO: Implement with storage backend - Ok(Vec::new()) - } - - /// Get with specified consistency level - pub async fn get_with_consistency( - &self, - _key: impl AsRef<[u8]>, - _consistency: ReadConsistency, - ) -> Result>> { - // TODO: Implement with consistency options - Ok(None) - } -} - -/// Options for KV operations -#[derive(Debug, Clone, Default)] -pub struct KvOptions { - /// Lease ID for TTL-based expiration - pub lease_id: Option, - - /// Return previous value - pub prev_kv: bool, - - /// Time-to-live for the key - pub ttl: Option, -} - -/// Result of a put operation -#[derive(Debug, Clone)] -pub struct KvPutResult { - /// New revision after the put - pub revision: u64, - - /// Previous value, if requested and existed - pub prev_value: Option>, -} - -/// A key-value entry with metadata -#[derive(Debug, Clone)] -pub struct KvEntry { - /// The key - pub key: Vec, - - /// The value - pub value: Vec, - - /// Revision when the key was created - pub create_revision: u64, - - /// Revision when the key was last modified - pub mod_revision: u64, - - /// Version number (increments on each update) - pub version: u64, - - /// Lease ID if the key is attached to a lease - pub lease_id: Option, -} - -/// Result of a compare-and-swap operation -#[derive(Debug, Clone)] -pub enum CasResult { - /// CAS succeeded, contains new revision - Success(u64), - - /// CAS failed due to version mismatch - Conflict { - /// Expected version - expected: u64, - /// Actual version found - actual: u64, - }, - - /// Key did not exist - NotFound, -} - -/// Read consistency level -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] -pub enum ReadConsistency { - /// Read from local storage (may be stale) - Local, - - /// Read from any node, but verify with leader's committed index - Serializable, - - /// Read only from leader (linearizable, strongest guarantee) - #[default] - Linearizable, -} - -/// Lightweight handle for KV operations -#[derive(Clone)] -pub struct KvHandle { - kv: Arc, -} - -impl KvHandle { - pub(crate) fn new(kv: Arc) -> Self { - Self { kv } - } - - /// Get the underlying KV store - pub fn inner(&self) -> &Arc { - &self.kv - } - - /// Get a value by key - pub async fn get(&self, key: impl AsRef<[u8]>) -> Result>> { - self.kv.get(key).await - } - - /// Put a value - pub async fn put(&self, key: impl AsRef<[u8]>, value: impl AsRef<[u8]>) -> Result { - self.kv.put(key, value).await - } - - /// Delete a key - pub async fn delete(&self, key: impl AsRef<[u8]>) -> Result { - self.kv.delete(key).await - } - - /// Get a namespace - pub fn namespace(&self, name: &str) -> Arc { - self.kv.namespace(name) - } -} diff --git a/chainfire/crates/chainfire-core/src/lib.rs b/chainfire/crates/chainfire-core/src/lib.rs index e924c25..24a2f79 100644 --- a/chainfire/crates/chainfire-core/src/lib.rs +++ b/chainfire/crates/chainfire-core/src/lib.rs @@ -1,58 +1,10 @@ -//! Chainfire Core - Embeddable distributed cluster library +//! Internal compatibility crate for workspace-local ChainFire types. //! -//! This crate provides cluster management, distributed KVS, and event callbacks -//! for embedding Raft consensus and SWIM gossip into applications. -//! -//! # Example -//! -//! ```ignore -//! use chainfire_core::{ClusterBuilder, ClusterEventHandler}; -//! use std::net::SocketAddr; -//! -//! struct MyHandler; -//! -//! impl ClusterEventHandler for MyHandler { -//! async fn on_leader_changed(&self, old: Option, new: u64) { -//! println!("Leader changed: {:?} -> {}", old, new); -//! } -//! } -//! -//! #[tokio::main] -//! async fn main() -> Result<(), Box> { -//! let cluster = ClusterBuilder::new(1) -//! .name("node-1") -//! .gossip_addr("0.0.0.0:7946".parse()?) -//! .raft_addr("0.0.0.0:2380".parse()?) -//! .on_cluster_event(MyHandler) -//! .build() -//! .await?; -//! -//! // Use the KVS -//! cluster.kv().put("key", b"value").await?; -//! -//! Ok(()) -//! } -//! ``` +//! The supported ChainFire product surface is the fixed-membership +//! `chainfire-server` / `chainfire-api` contract documented in the repository +//! root. This crate intentionally does not export an embeddable cluster, +//! membership-mutation, or distributed-KV API. -pub mod builder; -pub mod callbacks; -pub mod cluster; -pub mod config; -pub mod error; -pub mod events; -pub mod kvs; +mod error; -// Re-exports from chainfire-types -pub use chainfire_types::{ - node::{NodeId, NodeInfo, NodeRole}, - RaftRole, -}; - -// Re-exports from this crate -pub use builder::ClusterBuilder; -pub use callbacks::{ClusterEventHandler, KvEventHandler, LeaveReason}; -pub use cluster::{Cluster, ClusterHandle, ClusterState}; -pub use config::{ClusterConfig, StorageBackend, StorageBackendConfig}; pub use error::{ClusterError, Result}; -pub use events::{ClusterEvent, EventDispatcher, KvEvent}; -pub use kvs::{CasResult, Kv, KvEntry, KvHandle, KvNamespace, KvOptions, ReadConsistency}; diff --git a/chainfire/crates/chainfire-core/src/traits.rs b/chainfire/crates/chainfire-core/src/traits.rs deleted file mode 100644 index 3c20646..0000000 --- a/chainfire/crates/chainfire-core/src/traits.rs +++ /dev/null @@ -1,60 +0,0 @@ -use async_trait::async_trait; -use chainfire_types::node::NodeInfo; -use crate::error::Result; -use std::net::SocketAddr; - -/// Abstract interface for Gossip protocol -#[async_trait] -pub trait Gossip: Send + Sync { - /// Start the gossip agent - async fn start(&self) -> Result<()>; - - /// Join a cluster via seed nodes - async fn join(&self, seeds: &[SocketAddr]) -> Result<()>; - - /// Announce presence to a specific node - async fn announce(&self, addr: SocketAddr) -> Result<()>; - - /// Get list of known members - fn members(&self) -> Vec; - - /// Shutdown the gossip agent - async fn shutdown(&self) -> Result<()>; -} - -/// Abstract interface for Consensus protocol (Raft) -#[async_trait] -pub trait Consensus: Send + Sync { - /// Initialize the consensus module - async fn initialize(&self) -> Result<()>; - - /// Start the event loop - async fn run(&self) -> Result<()>; - - /// Propose a command to the state machine - async fn propose(&self, data: Vec) -> Result; - - /// Add a node to the consensus group - async fn add_node(&self, node_id: u64, addr: String, as_learner: bool) -> Result<()>; - - /// Remove a node from the consensus group - async fn remove_node(&self, node_id: u64) -> Result<()>; - - /// Check if this node is the leader - fn is_leader(&self) -> bool; - - /// Get the current leader ID - fn leader_id(&self) -> Option; -} - -/// Abstract interface for State Machine -pub trait StateMachine: Send + Sync { - /// Apply a committed entry - fn apply(&self, index: u64, data: &[u8]) -> Result>; - - /// Take a snapshot of current state - fn snapshot(&self) -> Result>; - - /// Restore state from a snapshot - fn restore(&self, snapshot: &[u8]) -> Result<()>; -} \ No newline at end of file diff --git a/chainfire/crates/chainfire-gossip/src/broadcast.rs b/chainfire/crates/chainfire-gossip/src/broadcast.rs index 94e1f14..9f7b0a1 100644 --- a/chainfire/crates/chainfire-gossip/src/broadcast.rs +++ b/chainfire/crates/chainfire-gossip/src/broadcast.rs @@ -141,7 +141,11 @@ impl ActualStateBroadcast { } } - debug!(node_id, timestamp = state.timestamp, "Received actual state"); + debug!( + node_id, + timestamp = state.timestamp, + "Received actual state" + ); self.cluster_state.insert(node_id, state); true } diff --git a/chainfire/crates/chainfire-gossip/src/identity.rs b/chainfire/crates/chainfire-gossip/src/identity.rs index 9413cba..f6afda7 100644 --- a/chainfire/crates/chainfire-gossip/src/identity.rs +++ b/chainfire/crates/chainfire-gossip/src/identity.rs @@ -77,13 +77,7 @@ impl Identity for GossipId { impl std::fmt::Display for GossipId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}@{}:{}", - self.node_id, - self.addr, - self.incarnation - ) + write!(f, "{}@{}:{}", self.node_id, self.addr, self.incarnation) } } diff --git a/chainfire/crates/chainfire-gossip/src/membership.rs b/chainfire/crates/chainfire-gossip/src/membership.rs index e0e42de..2e8a757 100644 --- a/chainfire/crates/chainfire-gossip/src/membership.rs +++ b/chainfire/crates/chainfire-gossip/src/membership.rs @@ -129,8 +129,14 @@ mod tests { fn test_role_filtering() { let state = MembershipState::new(); - state.handle_change(MembershipChange::MemberUp(create_id(1, NodeRole::ControlPlane))); - state.handle_change(MembershipChange::MemberUp(create_id(2, NodeRole::ControlPlane))); + state.handle_change(MembershipChange::MemberUp(create_id( + 1, + NodeRole::ControlPlane, + ))); + state.handle_change(MembershipChange::MemberUp(create_id( + 2, + NodeRole::ControlPlane, + ))); state.handle_change(MembershipChange::MemberUp(create_id(3, NodeRole::Worker))); state.handle_change(MembershipChange::MemberUp(create_id(4, NodeRole::Worker))); state.handle_change(MembershipChange::MemberUp(create_id(5, NodeRole::Worker))); diff --git a/chainfire/crates/chainfire-raft/src/core.rs b/chainfire/crates/chainfire-raft/src/core.rs index 2664255..ed31295 100644 --- a/chainfire/crates/chainfire-raft/src/core.rs +++ b/chainfire/crates/chainfire-raft/src/core.rs @@ -12,12 +12,12 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; -use tokio::sync::{mpsc, oneshot, RwLock, Mutex}; +use tokio::sync::{mpsc, oneshot, Mutex, RwLock}; use tokio::time; -use chainfire_storage::{LogStorage, StateMachine, LogEntry, EntryPayload, LogId}; -use chainfire_types::command::RaftCommand; use crate::network::RaftRpcClient; +use chainfire_storage::{EntryPayload, LogEntry, LogId, LogStorage, StateMachine}; +use chainfire_types::command::RaftCommand; use tracing::{debug, trace}; pub type NodeId = u64; @@ -155,10 +155,7 @@ pub enum RaftEvent { response_tx: oneshot::Sender, }, /// RequestVote RPC response received - VoteResponse { - from: NodeId, - resp: VoteResponse, - }, + VoteResponse { from: NodeId, resp: VoteResponse }, /// AppendEntries RPC response received AppendEntriesResponse { from: NodeId, @@ -305,7 +302,10 @@ impl RaftCore { tracing::info!("No persistent state found, starting fresh"); } Err(e) => { - return Err(RaftError::StorageError(format!("Failed to load vote: {}", e))); + return Err(RaftError::StorageError(format!( + "Failed to load vote: {}", + e + ))); } } Ok(()) @@ -339,7 +339,10 @@ impl RaftCore { // Main event loop let mut event_rx = self.event_rx.lock().await; - eprintln!("[Node {}] EVENT LOOP acquired event_rx, starting recv loop", self.node_id); + eprintln!( + "[Node {}] EVENT LOOP acquired event_rx, starting recv loop", + self.node_id + ); loop { tokio::select! { @@ -379,7 +382,10 @@ impl RaftCore { RaftEvent::HeartbeatTimeout => { self.handle_heartbeat_timeout().await?; } - RaftEvent::ClientWrite { command, response_tx } => { + RaftEvent::ClientWrite { + command, + response_tx, + } => { let result = self.handle_client_write(command).await; let _ = response_tx.send(result); } @@ -388,8 +394,10 @@ impl RaftCore { let _ = response_tx.send(resp); } RaftEvent::AppendEntries { req, response_tx } => { - eprintln!("[Node {}] EVENT LOOP processing AppendEntries from {} term={}", - self.node_id, req.leader_id, req.term); + eprintln!( + "[Node {}] EVENT LOOP processing AppendEntries from {} term={}", + self.node_id, req.leader_id, req.term + ); let resp = self.handle_append_entries(req).await?; let _ = response_tx.send(resp); } @@ -411,11 +419,17 @@ impl RaftCore { async fn handle_election_timeout(&self) -> Result<(), RaftError> { let role = *self.role.read().await; - eprintln!("[Node {}] handle_election_timeout: role={:?}", self.node_id, role); + eprintln!( + "[Node {}] handle_election_timeout: role={:?}", + self.node_id, role + ); // Only followers and candidates start elections if role == RaftRole::Leader { - eprintln!("[Node {}] Already leader, ignoring election timeout", self.node_id); + eprintln!( + "[Node {}] Already leader, ignoring election timeout", + self.node_id + ); return Ok(()); } @@ -433,7 +447,10 @@ impl RaftCore { let current_term = persistent.current_term; drop(persistent); - eprintln!("[Node {}] Starting election for term {}", self.node_id, current_term); + eprintln!( + "[Node {}] Starting election for term {}", + self.node_id, current_term + ); // Persist vote to storage before sending RPCs (Raft safety) self.persist_vote().await?; @@ -448,11 +465,16 @@ impl RaftCore { // Check if already have majority (single-node case) let cluster_size = self.peers.len() + 1; let majority = cluster_size / 2 + 1; - eprintln!("[Node {}] Cluster size={}, majority={}, peers={:?}", - self.node_id, cluster_size, majority, self.peers); + eprintln!( + "[Node {}] Cluster size={}, majority={}, peers={:?}", + self.node_id, cluster_size, majority, self.peers + ); if 1 >= majority { // For single-node cluster, immediately become leader - eprintln!("[Node {}] Single-node cluster, becoming leader immediately", self.node_id); + eprintln!( + "[Node {}] Single-node cluster, becoming leader immediately", + self.node_id + ); self.become_leader().await?; return Ok(()); } @@ -477,14 +499,16 @@ impl RaftCore { tokio::spawn(async move { // Send vote request via network (using real RaftRpcClient - GrpcRaftClient in production) - let resp = network.vote(peer_id, req).await - .unwrap_or(VoteResponse { - term: current_term, - vote_granted: false, - }); + let resp = network.vote(peer_id, req).await.unwrap_or(VoteResponse { + term: current_term, + vote_granted: false, + }); // Send response back to main event loop - let _ = event_tx.send(RaftEvent::VoteResponse { from: peer_id, resp }); + let _ = event_tx.send(RaftEvent::VoteResponse { + from: peer_id, + resp, + }); }); } @@ -515,8 +539,8 @@ impl RaftCore { } // Check if we can grant vote - let can_vote = persistent.voted_for.is_none() - || persistent.voted_for == Some(req.candidate_id); + let can_vote = + persistent.voted_for.is_none() || persistent.voted_for == Some(req.candidate_id); if !can_vote { return Ok(VoteResponse { @@ -554,7 +578,11 @@ impl RaftCore { } /// Handle VoteResponse from a peer - async fn handle_vote_response(&self, from: NodeId, resp: VoteResponse) -> Result<(), RaftError> { + async fn handle_vote_response( + &self, + from: NodeId, + resp: VoteResponse, + ) -> Result<(), RaftError> { let role = *self.role.read().await; let persistent = self.persistent.read().await; @@ -625,7 +653,8 @@ impl RaftCore { *self.leader_state.write().await = Some(leader_state); // Start sending heartbeats immediately - self.event_tx.send(RaftEvent::HeartbeatTimeout) + self.event_tx + .send(RaftEvent::HeartbeatTimeout) .map_err(|e| RaftError::NetworkError(format!("Failed to send heartbeat: {}", e)))?; Ok(()) @@ -665,8 +694,10 @@ impl RaftCore { let term = self.persistent.read().await.current_term; let (last_log_index, _) = self.get_last_log_info().await?; - eprintln!("[Node {}] Sending heartbeat to peers: {:?} (term={})", - self.node_id, self.peers, term); + eprintln!( + "[Node {}] Sending heartbeat to peers: {:?} (term={})", + self.node_id, self.peers, term + ); // Send AppendEntries (with entries if available) to all peers for peer_id in &self.peers { @@ -677,7 +708,8 @@ impl RaftCore { // Get prevLogIndex and prevLogTerm for this peer let leader_state = self.leader_state.read().await; - let next_index = leader_state.as_ref() + let next_index = leader_state + .as_ref() .and_then(|ls| ls.next_index.get(&peer_id).copied()) .unwrap_or(1); drop(leader_state); @@ -685,7 +717,8 @@ impl RaftCore { let prev_log_index = next_index.saturating_sub(1); let prev_log_term = if prev_log_index > 0 { // Read as Vec since that's how it's stored - let entries: Vec>> = self.storage + let entries: Vec>> = self + .storage .get_log_entries(prev_log_index..=prev_log_index) .map_err(|e| RaftError::StorageError(format!("Failed to read log: {}", e)))?; @@ -701,36 +734,55 @@ impl RaftCore { // Get entries to send (if any) let entries: Vec> = if next_index <= last_log_index { // Read entries from storage (stored as Vec) - let stored_entries: Vec>> = self.storage + let stored_entries: Vec>> = self + .storage .get_log_entries(next_index..=last_log_index) - .map_err(|e| RaftError::StorageError(format!("Failed to read log entries: {}", e)))?; + .map_err(|e| { + RaftError::StorageError(format!("Failed to read log entries: {}", e)) + })?; // Convert Vec back to RaftCommand - stored_entries.into_iter().map(|entry| { - let command = bincode::deserialize(match &entry.payload { - EntryPayload::Normal(data) => data, - EntryPayload::Blank => return Ok(LogEntry { - log_id: entry.log_id, - payload: EntryPayload::Blank, - }), - EntryPayload::Membership(nodes) => return Ok(LogEntry { - log_id: entry.log_id, - payload: EntryPayload::Membership(nodes.clone()), - }), - }).map_err(|e| RaftError::StorageError(format!("Failed to deserialize command: {}", e)))?; + stored_entries + .into_iter() + .map(|entry| { + let command = bincode::deserialize(match &entry.payload { + EntryPayload::Normal(data) => data, + EntryPayload::Blank => { + return Ok(LogEntry { + log_id: entry.log_id, + payload: EntryPayload::Blank, + }) + } + EntryPayload::Membership(nodes) => { + return Ok(LogEntry { + log_id: entry.log_id, + payload: EntryPayload::Membership(nodes.clone()), + }) + } + }) + .map_err(|e| { + RaftError::StorageError(format!("Failed to deserialize command: {}", e)) + })?; - Ok(LogEntry { - log_id: entry.log_id, - payload: EntryPayload::Normal(command), + Ok(LogEntry { + log_id: entry.log_id, + payload: EntryPayload::Normal(command), + }) }) - }).collect::, RaftError>>()? + .collect::, RaftError>>()? } else { // No entries to send, just heartbeat vec![] }; - eprintln!("[Node {}] HEARTBEAT to {}: entries.len()={} next_index={} last_log_index={}", - self.node_id, peer_id, entries.len(), next_index, last_log_index); + eprintln!( + "[Node {}] HEARTBEAT to {}: entries.len()={} next_index={} last_log_index={}", + self.node_id, + peer_id, + entries.len(), + next_index, + last_log_index + ); let req = AppendEntriesRequest { term, @@ -741,8 +793,10 @@ impl RaftCore { leader_commit: commit_index, }; - eprintln!("[Node {}] LEADER sending to {}: leader_commit={}", - self.node_id, peer_id, commit_index); + eprintln!( + "[Node {}] LEADER sending to {}: leader_commit={}", + self.node_id, peer_id, commit_index + ); let network = Arc::clone(&self.network); let event_tx = self.event_tx.clone(); @@ -761,18 +815,25 @@ impl RaftCore { Ok(()) } - async fn handle_append_entries(&self, req: AppendEntriesRequest) -> Result { + async fn handle_append_entries( + &self, + req: AppendEntriesRequest, + ) -> Result { let mut persistent = self.persistent.write().await; let current_term = persistent.current_term; // DIAGNOSTIC: Log all AppendEntries received - eprintln!("[Node {}] Received AppendEntries from {} term={} (my term={})", - self.node_id, req.leader_id, req.term, current_term); + eprintln!( + "[Node {}] Received AppendEntries from {} term={} (my term={})", + self.node_id, req.leader_id, req.term, current_term + ); // If RPC request contains term T > currentTerm: set currentTerm = T, convert to follower if req.term > current_term { - eprintln!("[Node {}] STEPPING DOWN: req.term={} > my term={}", - self.node_id, req.term, current_term); + eprintln!( + "[Node {}] STEPPING DOWN: req.term={} > my term={}", + self.node_id, req.term, current_term + ); persistent.current_term = req.term; persistent.voted_for = None; drop(persistent); @@ -780,8 +841,10 @@ impl RaftCore { *self.role.write().await = RaftRole::Follower; *self.candidate_state.write().await = None; *self.leader_state.write().await = None; - eprintln!("[Node {}] Stepped down to Follower (now term={})", - self.node_id, req.term); + eprintln!( + "[Node {}] Stepped down to Follower (now term={})", + self.node_id, req.term + ); } else { drop(persistent); } @@ -810,7 +873,8 @@ impl RaftCore { // Reply false if log doesn't contain an entry at prevLogIndex whose term matches prevLogTerm if req.prev_log_index > 0 { // Try to get the entry at prevLogIndex (stored as Vec) - let prev_entries: Vec>> = self.storage + let prev_entries: Vec>> = self + .storage .get_log_entries(req.prev_log_index..=req.prev_log_index) .map_err(|e| RaftError::StorageError(format!("Failed to read log: {}", e)))?; @@ -835,9 +899,10 @@ impl RaftCore { // Search backwards to find first entry of this term let mut conflict_index = req.prev_log_index; for idx in (1..req.prev_log_index).rev() { - let entries: Vec>> = self.storage - .get_log_entries(idx..=idx) - .map_err(|e| RaftError::StorageError(format!("Failed to read log: {}", e)))?; + let entries: Vec>> = + self.storage.get_log_entries(idx..=idx).map_err(|e| { + RaftError::StorageError(format!("Failed to read log: {}", e)) + })?; if !entries.is_empty() && entries[0].log_id.term != conflict_term { conflict_index = idx + 1; @@ -861,33 +926,39 @@ impl RaftCore { let first_new_index = req.entries[0].log_id.index; // Check if there's a conflict (stored as Vec) - let existing: Vec>> = self.storage + let existing: Vec>> = self + .storage .get_log_entries(first_new_index..=first_new_index) .map_err(|e| RaftError::StorageError(format!("Failed to read log: {}", e)))?; if !existing.is_empty() && existing[0].log_id.term != req.entries[0].log_id.term { // Conflict detected - truncate from this index - self.storage - .truncate(first_new_index) - .map_err(|e| RaftError::StorageError(format!("Failed to truncate log: {}", e)))?; + self.storage.truncate(first_new_index).map_err(|e| { + RaftError::StorageError(format!("Failed to truncate log: {}", e)) + })?; } // Convert RaftCommand entries to Vec before storing - let entries_to_store: Vec>> = req.entries.iter().map(|entry| { - let payload = match &entry.payload { - EntryPayload::Normal(cmd) => { - let bytes = bincode::serialize(cmd) - .map_err(|e| RaftError::StorageError(format!("Serialize failed: {}", e)))?; - EntryPayload::Normal(bytes) - } - EntryPayload::Blank => EntryPayload::Blank, - EntryPayload::Membership(nodes) => EntryPayload::Membership(nodes.clone()), - }; - Ok(LogEntry { - log_id: entry.log_id, - payload, + let entries_to_store: Vec>> = req + .entries + .iter() + .map(|entry| { + let payload = match &entry.payload { + EntryPayload::Normal(cmd) => { + let bytes = bincode::serialize(cmd).map_err(|e| { + RaftError::StorageError(format!("Serialize failed: {}", e)) + })?; + EntryPayload::Normal(bytes) + } + EntryPayload::Blank => EntryPayload::Blank, + EntryPayload::Membership(nodes) => EntryPayload::Membership(nodes.clone()), + }; + Ok(LogEntry { + log_id: entry.log_id, + payload, + }) }) - }).collect::, RaftError>>()?; + .collect::, RaftError>>()?; // Append converted entries self.storage @@ -895,14 +966,22 @@ impl RaftCore { .map_err(|e| RaftError::StorageError(format!("Failed to append entries: {}", e)))?; let (last_log_index, _) = self.get_last_log_info().await?; - eprintln!("[Node {}] FOLLOWER appended {} entries, last_index_now={}", - self.node_id, req.entries.len(), last_log_index); + eprintln!( + "[Node {}] FOLLOWER appended {} entries, last_index_now={}", + self.node_id, + req.entries.len(), + last_log_index + ); } // P2: Update commit index // If leaderCommit > commitIndex, set commitIndex = min(leaderCommit, index of last new entry) - eprintln!("[Node {}] FOLLOWER commit check: req.leader_commit={} my_commit={}", - self.node_id, req.leader_commit, self.volatile.read().await.commit_index); + eprintln!( + "[Node {}] FOLLOWER commit check: req.leader_commit={} my_commit={}", + self.node_id, + req.leader_commit, + self.volatile.read().await.commit_index + ); if req.leader_commit > 0 { let mut volatile = self.volatile.write().await; if req.leader_commit > volatile.commit_index { @@ -913,8 +992,10 @@ impl RaftCore { }; let new_commit = std::cmp::min(req.leader_commit, last_new_index); - eprintln!("[Node {}] FOLLOWER updating commit: {} -> {}", - self.node_id, volatile.commit_index, new_commit); + eprintln!( + "[Node {}] FOLLOWER updating commit: {} -> {}", + self.node_id, volatile.commit_index, new_commit + ); volatile.commit_index = new_commit; debug!( @@ -939,7 +1020,11 @@ impl RaftCore { }) } - async fn handle_append_entries_response(&self, from: NodeId, resp: AppendEntriesResponse) -> Result<(), RaftError> { + async fn handle_append_entries_response( + &self, + from: NodeId, + resp: AppendEntriesResponse, + ) -> Result<(), RaftError> { // Only leaders process AppendEntries responses let role = *self.role.read().await; if role != RaftRole::Leader { @@ -984,8 +1069,14 @@ impl RaftCore { leader_state.match_index.insert(from, new_match_index); leader_state.next_index.insert(from, new_match_index + 1); - eprintln!("[Node {}] RESP from {}: success={} match_index={} next_index={}", - self.node_id, from, resp.success, new_match_index, new_match_index + 1); + eprintln!( + "[Node {}] RESP from {}: success={} match_index={} next_index={}", + self.node_id, + from, + resp.success, + new_match_index, + new_match_index + 1 + ); trace!( peer = from, @@ -1040,11 +1131,7 @@ impl RaftCore { // Collect all match_index values plus leader's own log let (last_log_index, _) = self.get_last_log_info().await?; - let mut match_indices: Vec = leader_state - .match_index - .values() - .copied() - .collect(); + let mut match_indices: Vec = leader_state.match_index.values().copied().collect(); // Add leader's own index match_indices.push(last_log_index); @@ -1056,8 +1143,10 @@ impl RaftCore { let majority_index = match_indices.len() / 2; let new_commit_index = match_indices[majority_index]; - eprintln!("[Node {}] COMMIT CHECK: match_indices={:?} majority_idx={} new_commit={}", - self.node_id, match_indices, majority_index, new_commit_index); + eprintln!( + "[Node {}] COMMIT CHECK: match_indices={:?} majority_idx={} new_commit={}", + self.node_id, match_indices, majority_index, new_commit_index + ); let current_term = self.persistent.read().await.current_term; let old_commit_index = self.volatile.read().await.commit_index; @@ -1067,9 +1156,12 @@ impl RaftCore { // 2. The entry at new_commit_index is from current term (Raft safety) if new_commit_index > old_commit_index { // Check term of entry at new_commit_index (stored as Vec) - let entries: Vec>> = self.storage + let entries: Vec>> = self + .storage .get_log_entries(new_commit_index..=new_commit_index) - .map_err(|e| RaftError::StorageError(format!("Failed to read log for commit: {}", e)))?; + .map_err(|e| { + RaftError::StorageError(format!("Failed to read log for commit: {}", e)) + })?; if !entries.is_empty() && entries[0].log_id.term == current_term { // Safe to commit @@ -1100,20 +1192,24 @@ impl RaftCore { } // Get entries to apply (stored as Vec) - let stored_entries: Vec>> = self.storage + let stored_entries: Vec>> = self + .storage .get_log_entries((last_applied + 1)..=commit_index) - .map_err(|e| RaftError::StorageError(format!("Failed to read entries for apply: {}", e)))?; + .map_err(|e| { + RaftError::StorageError(format!("Failed to read entries for apply: {}", e)) + })?; // Apply each entry to state machine for entry in &stored_entries { if let EntryPayload::Normal(data) = &entry.payload { // Deserialize the command - let command: RaftCommand = bincode::deserialize(data) - .map_err(|e| RaftError::StorageError(format!("Failed to deserialize for apply: {}", e)))?; + let command: RaftCommand = bincode::deserialize(data).map_err(|e| { + RaftError::StorageError(format!("Failed to deserialize for apply: {}", e)) + })?; - self.state_machine - .apply(command) - .map_err(|e| RaftError::StorageError(format!("Failed to apply to state machine: {}", e)))?; + self.state_machine.apply(command).map_err(|e| { + RaftError::StorageError(format!("Failed to apply to state machine: {}", e)) + })?; debug!( index = entry.log_id.index, @@ -1148,14 +1244,23 @@ impl RaftCore { // Get current term and last log index let term = self.persistent.read().await.current_term; - eprintln!("[Node {}] handle_client_write: getting last_log_info...", self.node_id); + eprintln!( + "[Node {}] handle_client_write: getting last_log_info...", + self.node_id + ); let (last_log_index, _) = match self.get_last_log_info().await { Ok(info) => { - eprintln!("[Node {}] handle_client_write: last_log_index={}", self.node_id, info.0); + eprintln!( + "[Node {}] handle_client_write: last_log_index={}", + self.node_id, info.0 + ); info } Err(e) => { - eprintln!("[Node {}] handle_client_write: ERROR getting last_log_info: {:?}", self.node_id, e); + eprintln!( + "[Node {}] handle_client_write: ERROR getting last_log_info: {:?}", + self.node_id, e + ); return Err(e); } }; @@ -1177,14 +1282,26 @@ impl RaftCore { }; // Append to leader's log - eprintln!("[Node {}] handle_client_write: appending entry index={} term={}...", self.node_id, new_index, term); + eprintln!( + "[Node {}] handle_client_write: appending entry index={} term={}...", + self.node_id, new_index, term + ); match self.storage.append(&[entry.clone()]) { Ok(()) => { - eprintln!("[Node {}] handle_client_write: append SUCCESS index={}", self.node_id, new_index); + eprintln!( + "[Node {}] handle_client_write: append SUCCESS index={}", + self.node_id, new_index + ); } Err(e) => { - eprintln!("[Node {}] handle_client_write: append FAILED: {:?}", self.node_id, e); - return Err(RaftError::StorageError(format!("Failed to append entry: {}", e))); + eprintln!( + "[Node {}] handle_client_write: append FAILED: {:?}", + self.node_id, e + ); + return Err(RaftError::StorageError(format!( + "Failed to append entry: {}", + e + ))); } } @@ -1198,7 +1315,9 @@ impl RaftCore { // Send AppendEntries with the new entry to all peers self.event_tx .send(RaftEvent::HeartbeatTimeout) - .map_err(|e| RaftError::NetworkError(format!("Failed to trigger replication: {}", e)))?; + .map_err(|e| { + RaftError::NetworkError(format!("Failed to trigger replication: {}", e)) + })?; // Single-node cluster: immediately commit since we're the only voter if self.peers.is_empty() { @@ -1218,7 +1337,8 @@ impl RaftCore { /// Get last log index and term async fn get_last_log_info(&self) -> Result<(LogIndex, Term), RaftError> { - let log_state = self.storage + let log_state = self + .storage .get_log_state() .map_err(|e| RaftError::StorageError(format!("Failed to get log state: {}", e)))?; @@ -1238,9 +1358,9 @@ impl RaftCore { tokio::spawn(async move { eprintln!("[ELECTION TIMER] Spawned"); loop { - let timeout = rand::random::() % - (config.election_timeout_max - config.election_timeout_min) + - config.election_timeout_min; + let timeout = rand::random::() + % (config.election_timeout_max - config.election_timeout_min) + + config.election_timeout_min; eprintln!("[ELECTION TIMER] Waiting {}ms", timeout); tokio::select! { @@ -1275,7 +1395,8 @@ impl RaftCore { let config = self.config.clone(); tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_millis(config.heartbeat_interval)); + let mut interval = + tokio::time::interval(Duration::from_millis(config.heartbeat_interval)); // Skip the first tick (fires immediately) interval.tick().await; @@ -1313,12 +1434,11 @@ impl RaftCore { } /// Inject RequestVote RPC (for testing) - pub async fn request_vote_rpc( - &self, - req: VoteRequest, - resp_tx: oneshot::Sender, - ) { - let _ = self.event_tx.send(RaftEvent::VoteRequest { req, response_tx: resp_tx }); + pub async fn request_vote_rpc(&self, req: VoteRequest, resp_tx: oneshot::Sender) { + let _ = self.event_tx.send(RaftEvent::VoteRequest { + req, + response_tx: resp_tx, + }); } /// Inject AppendEntries RPC (for testing) @@ -1327,12 +1447,19 @@ impl RaftCore { req: AppendEntriesRequest, resp_tx: oneshot::Sender, ) { - eprintln!("[Node {}] append_entries_rpc: from {} term={}", - self.node_id, req.leader_id, req.term); - let result = self.event_tx.send(RaftEvent::AppendEntries { req, response_tx: resp_tx }); + eprintln!( + "[Node {}] append_entries_rpc: from {} term={}", + self.node_id, req.leader_id, req.term + ); + let result = self.event_tx.send(RaftEvent::AppendEntries { + req, + response_tx: resp_tx, + }); if let Err(e) = result { - eprintln!("[Node {}] ERROR: Failed to send AppendEntries event: channel closed", - self.node_id); + eprintln!( + "[Node {}] ERROR: Failed to send AppendEntries event: channel closed", + self.node_id + ); } } @@ -1357,7 +1484,10 @@ impl RaftCore { /// Submit a client write and wait for commit (blocking version) /// Returns RaftResponse after the command is committed and applied - pub async fn write(&self, command: RaftCommand) -> Result { + pub async fn write( + &self, + command: RaftCommand, + ) -> Result { use chainfire_types::command::RaftResponse; // Get current commit index before write diff --git a/chainfire/crates/chainfire-raft/src/lib.rs b/chainfire/crates/chainfire-raft/src/lib.rs index 20360f6..e230ded 100644 --- a/chainfire/crates/chainfire-raft/src/lib.rs +++ b/chainfire/crates/chainfire-raft/src/lib.rs @@ -10,5 +10,8 @@ pub mod core; // Common modules pub mod network; -pub use core::{RaftCore, RaftConfig, RaftRole, VoteRequest, VoteResponse, AppendEntriesRequest, AppendEntriesResponse}; +pub use core::{ + AppendEntriesRequest, AppendEntriesResponse, RaftConfig, RaftCore, RaftRole, VoteRequest, + VoteResponse, +}; pub use network::RaftNetworkError; diff --git a/chainfire/crates/chainfire-raft/src/network.rs b/chainfire/crates/chainfire-raft/src/network.rs index c9d70e0..83b96cb 100644 --- a/chainfire/crates/chainfire-raft/src/network.rs +++ b/chainfire/crates/chainfire-raft/src/network.rs @@ -2,8 +2,8 @@ //! //! This module provides network adapters for Raft to communicate between nodes. +use crate::core::{AppendEntriesRequest, AppendEntriesResponse, VoteRequest, VoteResponse}; use chainfire_types::NodeId; -use crate::core::{VoteRequest, VoteResponse, AppendEntriesRequest, AppendEntriesResponse}; use std::sync::Arc; use thiserror::Error; @@ -54,10 +54,7 @@ pub mod test_client { } pub enum RpcMessage { - Vote( - VoteRequest, - tokio::sync::oneshot::Sender, - ), + Vote(VoteRequest, tokio::sync::oneshot::Sender), AppendEntries( AppendEntriesRequest, tokio::sync::oneshot::Sender, @@ -109,13 +106,14 @@ pub mod test_client { req: AppendEntriesRequest, ) -> Result { let channels = self.channels.read().await; - let tx = channels - .get(&target) - .ok_or_else(|| { - eprintln!("[RPC] NodeNotFound: target={}, registered={:?}", - target, channels.keys().collect::>()); - RaftNetworkError::NodeNotFound(target) - })?; + let tx = channels.get(&target).ok_or_else(|| { + eprintln!( + "[RPC] NodeNotFound: target={}, registered={:?}", + target, + channels.keys().collect::>() + ); + RaftNetworkError::NodeNotFound(target) + })?; let (resp_tx, resp_rx) = tokio::sync::oneshot::channel(); let send_result = tx.send(RpcMessage::AppendEntries(req.clone(), resp_tx)); diff --git a/chainfire/crates/chainfire-server/benches/kv_bench.rs b/chainfire/crates/chainfire-server/benches/kv_bench.rs index d96447c..537af11 100644 --- a/chainfire/crates/chainfire-server/benches/kv_bench.rs +++ b/chainfire/crates/chainfire-server/benches/kv_bench.rs @@ -1,5 +1,7 @@ use chainfire_client::ChainFireClient; -use chainfire_server::config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig}; +use chainfire_server::config::{ + ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig, +}; use chainfire_server::node::Node; use chainfire_types::RaftRole; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; @@ -84,7 +86,10 @@ fn bench_put_throughput(c: &mut Criterion) { rt.block_on(async { for i in 0..NUM_KEYS_THROUGHPUT { let key = format!("bench_key_{}", i); - client.put(black_box(&key), black_box(&value)).await.unwrap(); + client + .put(black_box(&key), black_box(&value)) + .await + .unwrap(); } }) }); @@ -180,7 +185,10 @@ fn bench_put_latency(c: &mut Criterion) { let key = format!("latency_key_{}", key_counter); key_counter += 1; rt.block_on(async { - client.put(black_box(&key), black_box(&value)).await.unwrap(); + client + .put(black_box(&key), black_box(&value)) + .await + .unwrap(); }) }); }); @@ -192,5 +200,10 @@ fn bench_put_latency(c: &mut Criterion) { drop(rt); } -criterion_group!(benches, bench_put_throughput, bench_get_throughput, bench_put_latency); +criterion_group!( + benches, + bench_put_throughput, + bench_get_throughput, + bench_put_latency +); criterion_main!(benches); diff --git a/chainfire/crates/chainfire-server/src/main.rs b/chainfire/crates/chainfire-server/src/main.rs index 4eb1316..f34f065 100644 --- a/chainfire/crates/chainfire-server/src/main.rs +++ b/chainfire/crates/chainfire-server/src/main.rs @@ -85,10 +85,7 @@ async fn main() -> Result<()> { "chainfire_kv_requests_total", "Total number of KV requests by operation type" ); - metrics::describe_counter!( - "chainfire_kv_bytes_read", - "Total bytes read from KV store" - ); + metrics::describe_counter!("chainfire_kv_bytes_read", "Total bytes read from KV store"); metrics::describe_counter!( "chainfire_kv_bytes_written", "Total bytes written to KV store" @@ -97,10 +94,7 @@ async fn main() -> Result<()> { "chainfire_kv_request_duration_seconds", "KV request duration in seconds" ); - metrics::describe_gauge!( - "chainfire_raft_term", - "Current Raft term" - ); + metrics::describe_gauge!("chainfire_raft_term", "Current Raft term"); metrics::describe_gauge!( "chainfire_raft_is_leader", "Whether this node is the Raft leader (1=yes, 0=no)" @@ -110,10 +104,10 @@ async fn main() -> Result<()> { "Total number of watch events emitted" ); -use config::{Config as Cfg, Environment, File, FileFormat}; -use toml; // Import toml for serializing defaults + use config::{Config as Cfg, Environment, File, FileFormat}; + use toml; // Import toml for serializing defaults -// ... (rest of existing imports) + // ... (rest of existing imports) // Load configuration using config-rs let mut settings = Cfg::builder() @@ -124,8 +118,7 @@ use toml; // Import toml for serializing defaults )) // Layer 2: Environment variables (e.g., CHAINFIRE_NODE__ID, CHAINFIRE_NETWORK__API_ADDR) .add_source( - Environment::with_prefix("CHAINFIRE") - .separator("__") // Use double underscore for nested fields + Environment::with_prefix("CHAINFIRE").separator("__"), // Use double underscore for nested fields ); // Layer 3: Configuration file (if specified) @@ -136,9 +129,7 @@ use toml; // Import toml for serializing defaults info!("Config file not found, using defaults and environment variables."); } - let mut config: ServerConfig = settings - .build()? - .try_deserialize()?; + let mut config: ServerConfig = settings.build()?.try_deserialize()?; // Apply command line overrides (Layer 4: highest precedence) if let Some(node_id) = args.node_id { diff --git a/chainfire/crates/chainfire-server/src/node.rs b/chainfire/crates/chainfire-server/src/node.rs index 9d89eaf..8298dba 100644 --- a/chainfire/crates/chainfire-server/src/node.rs +++ b/chainfire/crates/chainfire-server/src/node.rs @@ -6,9 +6,9 @@ use crate::config::ServerConfig; use anyhow::Result; use chainfire_api::GrpcRaftClient; use chainfire_gossip::{GossipAgent, GossipId}; -use chainfire_raft::core::{RaftCore, RaftConfig}; +use chainfire_raft::core::{RaftConfig, RaftCore}; use chainfire_raft::network::RaftRpcClient; -use chainfire_storage::{RocksStore, LogStorage, StateMachine}; +use chainfire_storage::{LogStorage, RocksStore, StateMachine}; use chainfire_types::node::NodeRole; use chainfire_types::RaftRole; use chainfire_watch::{stream::WatchEventHandler, WatchRegistry}; @@ -58,12 +58,16 @@ impl Node { // Create gRPC Raft client and register peer addresses let rpc_client = Arc::new(GrpcRaftClient::new()); for member in &config.cluster.initial_members { - rpc_client.add_node(member.id, member.raft_addr.clone()).await; + rpc_client + .add_node(member.id, member.raft_addr.clone()) + .await; info!(node_id = member.id, addr = %member.raft_addr, "Registered peer"); } // Extract peer node IDs (excluding self) - let peers: Vec = config.cluster.initial_members + let peers: Vec = config + .cluster + .initial_members .iter() .map(|m| m.id) .filter(|&id| id != config.node.id) @@ -115,10 +119,8 @@ impl Node { let gossip_id = GossipId::new(config.node.id, config.network.gossip_addr, gossip_role); - let gossip = Some( - GossipAgent::new(gossip_id, chainfire_gossip::agent::default_config()) - .await?, - ); + let gossip = + Some(GossipAgent::new(gossip_id, chainfire_gossip::agent::default_config()).await?); info!( addr = %config.network.gossip_addr, gossip_role = ?gossip_role, diff --git a/chainfire/crates/chainfire-server/src/rest.rs b/chainfire/crates/chainfire-server/src/rest.rs index 3b615a2..442d44f 100644 --- a/chainfire/crates/chainfire-server/src/rest.rs +++ b/chainfire/crates/chainfire-server/src/rest.rs @@ -145,7 +145,12 @@ pub struct ReadQuery { pub fn build_router(state: RestApiState) -> Router { Router::new() // Wildcard route handles all keys (with or without slashes) - .route("/api/v1/kv/*key", get(get_kv_wildcard).put(put_kv_wildcard).delete(delete_kv_wildcard)) + .route( + "/api/v1/kv/*key", + get(get_kv_wildcard) + .put(put_kv_wildcard) + .delete(delete_kv_wildcard), + ) .route("/api/v1/kv", get(list_kv)) .route("/api/v1/cluster/status", get(cluster_status)) .route("/api/v1/cluster/members", post(add_member)) @@ -159,7 +164,9 @@ pub fn build_router(state: RestApiState) -> Router { async fn health_check() -> (StatusCode, Json>) { ( StatusCode::OK, - Json(SuccessResponse::new(serde_json::json!({ "status": "healthy" }))), + Json(SuccessResponse::new( + serde_json::json!({ "status": "healthy" }), + )), ) } @@ -187,9 +194,13 @@ async fn get_kv_wildcard( let sm = state.raft.state_machine(); let key_bytes = full_key.as_bytes().to_vec(); - let results = sm.kv() - .get(&key_bytes) - .map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?; + let results = sm.kv().get(&key_bytes).map_err(|e| { + error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + &e.to_string(), + ) + })?; let value = results .into_iter() @@ -207,7 +218,8 @@ async fn put_kv_wildcard( State(state): State, Path(key): Path, Json(req): Json, -) -> Result<(StatusCode, Json>), (StatusCode, Json)> { +) -> Result<(StatusCode, Json>), (StatusCode, Json)> +{ // Use key as-is for simple keys, prepend / for namespaced keys let full_key = if key.contains('/') { format!("/{}", key) @@ -225,7 +237,9 @@ async fn put_kv_wildcard( Ok(( StatusCode::OK, - Json(SuccessResponse::new(serde_json::json!({ "key": full_key, "success": true }))), + Json(SuccessResponse::new( + serde_json::json!({ "key": full_key, "success": true }), + )), )) } @@ -233,7 +247,8 @@ async fn put_kv_wildcard( async fn delete_kv_wildcard( State(state): State, Path(key): Path, -) -> Result<(StatusCode, Json>), (StatusCode, Json)> { +) -> Result<(StatusCode, Json>), (StatusCode, Json)> +{ // Use key as-is for simple keys, prepend / for namespaced keys let full_key = if key.contains('/') { format!("/{}", key) @@ -249,7 +264,9 @@ async fn delete_kv_wildcard( Ok(( StatusCode::OK, - Json(SuccessResponse::new(serde_json::json!({ "key": full_key, "success": true }))), + Json(SuccessResponse::new( + serde_json::json!({ "key": full_key, "success": true }), + )), )) } @@ -271,9 +288,13 @@ async fn list_kv( let start_key = prefix.as_bytes().to_vec(); let end_key = format!("{}~", prefix).as_bytes().to_vec(); - let results = sm.kv() - .range(&start_key, Some(&end_key)) - .map_err(|e| error_response(StatusCode::INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", &e.to_string()))?; + let results = sm.kv().range(&start_key, Some(&end_key)).map_err(|e| { + error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "INTERNAL_ERROR", + &e.to_string(), + ) + })?; let items: Vec = results .into_iter() @@ -325,14 +346,20 @@ fn string_to_node_id(s: &str) -> u64 { async fn add_member( State(state): State, Json(req): Json, -) -> Result<(StatusCode, Json>), (StatusCode, Json)> { - let rpc_client = state - .rpc_client - .as_ref() - .ok_or_else(|| error_response(StatusCode::SERVICE_UNAVAILABLE, "SERVICE_UNAVAILABLE", "RPC client not available"))?; +) -> Result<(StatusCode, Json>), (StatusCode, Json)> +{ + let rpc_client = state.rpc_client.as_ref().ok_or_else(|| { + error_response( + StatusCode::SERVICE_UNAVAILABLE, + "SERVICE_UNAVAILABLE", + "RPC client not available", + ) + })?; // Add node to RPC client's routing table - rpc_client.add_node(req.node_id, req.raft_addr.clone()).await; + rpc_client + .add_node(req.node_id, req.raft_addr.clone()) + .await; // Note: RaftCore doesn't have add_peer() - members are managed via configuration // For now, we just register the node in the RPC client @@ -353,13 +380,17 @@ async fn add_member( async fn add_member_legacy( State(state): State, Json(req): Json, -) -> Result<(StatusCode, Json>), (StatusCode, Json)> { +) -> Result<(StatusCode, Json>), (StatusCode, Json)> +{ let node_id = string_to_node_id(&req.id); - let rpc_client = state - .rpc_client - .as_ref() - .ok_or_else(|| error_response(StatusCode::SERVICE_UNAVAILABLE, "SERVICE_UNAVAILABLE", "RPC client not available"))?; + let rpc_client = state.rpc_client.as_ref().ok_or_else(|| { + error_response( + StatusCode::SERVICE_UNAVAILABLE, + "SERVICE_UNAVAILABLE", + "RPC client not available", + ) + })?; // Add node to RPC client's routing table rpc_client.add_node(node_id, req.raft_addr.clone()).await; @@ -459,15 +490,19 @@ async fn proxy_write_to_leader( if response.status().is_success() { return Ok(()); } - let status = StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); - let payload = response.json::().await.unwrap_or_else(|err| ErrorResponse { - error: ErrorDetail { - code: "LEADER_PROXY_FAILED".to_string(), - message: format!("leader {leader_id} returned {status}: {err}"), - details: None, - }, - meta: ResponseMeta::new(), - }); + let status = + StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); + let payload = response + .json::() + .await + .unwrap_or_else(|err| ErrorResponse { + error: ErrorDetail { + code: "LEADER_PROXY_FAILED".to_string(), + message: format!("leader {leader_id} returned {status}: {err}"), + details: None, + }, + meta: ResponseMeta::new(), + }); Err((status, Json(payload))) } @@ -510,11 +545,7 @@ where &format!("leader {leader_id} is known but has no HTTP endpoint mapping"), ) })?; - let url = format!( - "{}{}", - leader_http_addr.trim_end_matches('/'), - path - ); + let url = format!("{}{}", leader_http_addr.trim_end_matches('/'), path); let mut request = state.http_client.get(&url); if let Some(query) = query { request = request.query(query); @@ -536,15 +567,19 @@ where })?; return Ok(Json(payload)); } - let status = StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); - let payload = response.json::().await.unwrap_or_else(|err| ErrorResponse { - error: ErrorDetail { - code: "LEADER_PROXY_FAILED".to_string(), - message: format!("leader {leader_id} returned {status}: {err}"), - details: None, - }, - meta: ResponseMeta::new(), - }); + let status = + StatusCode::from_u16(response.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); + let payload = response + .json::() + .await + .unwrap_or_else(|err| ErrorResponse { + error: ErrorDetail { + code: "LEADER_PROXY_FAILED".to_string(), + message: format!("leader {leader_id} returned {status}: {err}"), + details: None, + }, + meta: ResponseMeta::new(), + }); Err((status, Json(payload))) } diff --git a/chainfire/crates/chainfire-server/src/server.rs b/chainfire/crates/chainfire-server/src/server.rs index 354bb62..e217b09 100644 --- a/chainfire/crates/chainfire-server/src/server.rs +++ b/chainfire/crates/chainfire-server/src/server.rs @@ -36,10 +36,7 @@ impl Server { } /// Apply TLS configuration to a server builder - async fn apply_tls_config( - &self, - builder: TonicServer, - ) -> Result { + async fn apply_tls_config(&self, builder: TonicServer) -> Result { if let Some(tls_config) = &self.config.network.tls { info!("TLS enabled, loading certificates..."); let cert = tokio::fs::read(&tls_config.cert_file).await?; @@ -48,12 +45,9 @@ impl Server { let tls = if tls_config.require_client_cert { info!("mTLS enabled, requiring client certificates"); - let ca_cert = tokio::fs::read( - tls_config - .ca_file - .as_ref() - .ok_or_else(|| anyhow::anyhow!("ca_file required when require_client_cert=true"))?, - ) + let ca_cert = tokio::fs::read(tls_config.ca_file.as_ref().ok_or_else(|| { + anyhow::anyhow!("ca_file required when require_client_cert=true") + })?) .await?; let ca = Certificate::from_pem(ca_cert); @@ -100,15 +94,8 @@ impl Server { raft.node_id(), ); - let rpc_client = self - .node - .rpc_client() - .expect("rpc_client should exist in full mode") - .clone(); - let cluster_service = ClusterServiceImpl::new( Arc::clone(&raft), - rpc_client, self.node.cluster_id(), configured_members(&self.config), ); diff --git a/chainfire/crates/chainfire-storage/benches/storage_bench.rs b/chainfire/crates/chainfire-storage/benches/storage_bench.rs index 3e72dbe..cf55099 100644 --- a/chainfire/crates/chainfire-storage/benches/storage_bench.rs +++ b/chainfire/crates/chainfire-storage/benches/storage_bench.rs @@ -23,7 +23,9 @@ fn bench_write_throughput(c: &mut Criterion) { b.iter(|| { for i in 0..NUM_KEYS_THROUGHPUT { let key = format!("bench_key_{:08}", i).into_bytes(); - store.put(black_box(key), black_box(value.clone()), None).unwrap(); + store + .put(black_box(key), black_box(value.clone()), None) + .unwrap(); } }); }); @@ -77,7 +79,9 @@ fn bench_write_latency(c: &mut Criterion) { b.iter(|| { let key = format!("latency_key_{:08}", key_counter).into_bytes(); key_counter += 1; - store.put(black_box(key), black_box(value.clone()), None).unwrap(); + store + .put(black_box(key), black_box(value.clone()), None) + .unwrap(); }); }); diff --git a/chainfire/crates/chainfire-storage/src/kv_store.rs b/chainfire/crates/chainfire-storage/src/kv_store.rs index 402ec75..80168b9 100644 --- a/chainfire/crates/chainfire-storage/src/kv_store.rs +++ b/chainfire/crates/chainfire-storage/src/kv_store.rs @@ -62,8 +62,8 @@ impl KvStore { .cf_handle(cf::META) .ok_or_else(|| StorageError::RocksDb("META cf not found".into()))?; - let bytes = - bincode::serialize(&revision).map_err(|e| StorageError::Serialization(e.to_string()))?; + let bytes = bincode::serialize(&revision) + .map_err(|e| StorageError::Serialization(e.to_string()))?; self.store .db() diff --git a/chainfire/crates/chainfire-storage/src/lease_store.rs b/chainfire/crates/chainfire-storage/src/lease_store.rs index 723429a..91c7649 100644 --- a/chainfire/crates/chainfire-storage/src/lease_store.rs +++ b/chainfire/crates/chainfire-storage/src/lease_store.rs @@ -43,7 +43,10 @@ impl LeaseStore { } else { // Check if ID is already in use if self.leases.contains_key(&id) { - return Err(StorageError::LeaseError(format!("Lease {} already exists", id))); + return Err(StorageError::LeaseError(format!( + "Lease {} already exists", + id + ))); } // Update next_id if necessary let _ = self.next_id.fetch_max(id + 1, Ordering::SeqCst); @@ -61,7 +64,11 @@ impl LeaseStore { pub fn revoke(&self, id: LeaseId) -> Result>, StorageError> { match self.leases.remove(&id) { Some((_, lease)) => { - info!(lease_id = id, keys_count = lease.keys.len(), "Lease revoked"); + info!( + lease_id = id, + keys_count = lease.keys.len(), + "Lease revoked" + ); Ok(lease.keys) } None => Err(StorageError::LeaseError(format!("Lease {} not found", id))), @@ -88,9 +95,9 @@ impl LeaseStore { /// Get remaining TTL for a lease pub fn time_to_live(&self, id: LeaseId) -> Option<(i64, i64, Vec>)> { - self.leases.get(&id).map(|lease| { - (lease.remaining(), lease.ttl, lease.keys.clone()) - }) + self.leases + .get(&id) + .map(|lease| (lease.remaining(), lease.ttl, lease.keys.clone())) } /// List all lease IDs @@ -105,7 +112,10 @@ impl LeaseStore { lease.attach_key(key); Ok(()) } - None => Err(StorageError::LeaseError(format!("Lease {} not found", lease_id))), + None => Err(StorageError::LeaseError(format!( + "Lease {} not found", + lease_id + ))), } } diff --git a/chainfire/crates/chainfire-storage/src/lib.rs b/chainfire/crates/chainfire-storage/src/lib.rs index b63082d..e5b9f3f 100644 --- a/chainfire/crates/chainfire-storage/src/lib.rs +++ b/chainfire/crates/chainfire-storage/src/lib.rs @@ -17,7 +17,7 @@ pub mod store; pub use kv_store::KvStore; pub use lease_store::{LeaseExpirationWorker, LeaseStore}; -pub use log_storage::{LogStorage, LogEntry, EntryPayload, LogId, Vote, LogState}; +pub use log_storage::{EntryPayload, LogEntry, LogId, LogState, LogStorage, Vote}; pub use snapshot::{Snapshot, SnapshotBuilder, SnapshotMeta}; pub use state_machine::StateMachine; pub use store::RocksStore; diff --git a/chainfire/crates/chainfire-storage/src/log_storage.rs b/chainfire/crates/chainfire-storage/src/log_storage.rs index a9ccfdc..3773bfa 100644 --- a/chainfire/crates/chainfire-storage/src/log_storage.rs +++ b/chainfire/crates/chainfire-storage/src/log_storage.rs @@ -16,8 +16,9 @@ pub type LogIndex = u64; pub type Term = u64; /// Log ID combining term and index -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] -#[derive(Default)] +#[derive( + Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default, +)] pub struct LogId { pub term: Term, pub index: LogIndex, @@ -29,7 +30,6 @@ impl LogId { } } - /// A log entry stored in the Raft log #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LogEntry { @@ -120,10 +120,7 @@ impl LogStorage { let last_purged_log_id = self.get_last_purged_log_id()?; // Get last log ID - let mut last_iter = self - .store - .db() - .iterator_cf(&cf, rocksdb::IteratorMode::End); + let mut last_iter = self.store.db().iterator_cf(&cf, rocksdb::IteratorMode::End); let last_log_id = if let Some(Ok((_, value))) = last_iter.next() { // Skip empty or corrupt entries - treat as empty log @@ -133,7 +130,10 @@ impl LogStorage { match bincode::deserialize::>>(&value) { Ok(entry) => Some(entry.log_id), Err(e) => { - eprintln!("Warning: Failed to deserialize log entry: {}, treating as empty log", e); + eprintln!( + "Warning: Failed to deserialize log entry: {}, treating as empty log", + e + ); last_purged_log_id } } @@ -369,7 +369,10 @@ impl LogStorage { match bincode::deserialize::(&bytes) { Ok(log_id) => Ok(Some(log_id)), Err(e) => { - eprintln!("Warning: Failed to deserialize last_purged: {}, treating as None", e); + eprintln!( + "Warning: Failed to deserialize last_purged: {}, treating as None", + e + ); Ok(None) } } diff --git a/chainfire/crates/chainfire-storage/src/snapshot.rs b/chainfire/crates/chainfire-storage/src/snapshot.rs index 6726ab2..5ea198e 100644 --- a/chainfire/crates/chainfire-storage/src/snapshot.rs +++ b/chainfire/crates/chainfire-storage/src/snapshot.rs @@ -38,8 +38,8 @@ impl Snapshot { /// Serialize snapshot to bytes pub fn to_bytes(&self) -> Result, StorageError> { // Format: [meta_len: u32][meta][data] - let meta_bytes = - bincode::serialize(&self.meta).map_err(|e| StorageError::Serialization(e.to_string()))?; + let meta_bytes = bincode::serialize(&self.meta) + .map_err(|e| StorageError::Serialization(e.to_string()))?; let mut result = Vec::with_capacity(4 + meta_bytes.len() + self.data.len()); result.extend_from_slice(&(meta_bytes.len() as u32).to_le_bytes()); @@ -108,8 +108,8 @@ impl SnapshotBuilder { } // Serialize entries - let data = bincode::serialize(&entries) - .map_err(|e| StorageError::Serialization(e.to_string()))?; + let data = + bincode::serialize(&entries).map_err(|e| StorageError::Serialization(e.to_string()))?; let meta = SnapshotMeta { last_log_index, @@ -277,10 +277,8 @@ mod tests { // Add data to store1 let kv1 = KvStore::new(store1.clone()).unwrap(); - kv1.put(b"key1".to_vec(), b"value1".to_vec(), None) - .unwrap(); - kv1.put(b"key2".to_vec(), b"value2".to_vec(), None) - .unwrap(); + kv1.put(b"key1".to_vec(), b"value1".to_vec(), None).unwrap(); + kv1.put(b"key2".to_vec(), b"value2".to_vec(), None).unwrap(); // Build snapshot from store1 let builder1 = SnapshotBuilder::new(store1.clone()); diff --git a/chainfire/crates/chainfire-storage/src/state_machine.rs b/chainfire/crates/chainfire-storage/src/state_machine.rs index f1c50a1..e7b0c3b 100644 --- a/chainfire/crates/chainfire-storage/src/state_machine.rs +++ b/chainfire/crates/chainfire-storage/src/state_machine.rs @@ -277,7 +277,7 @@ impl StateMachine { txn_responses.push(TxnOpResponse::Range { kvs, count, - more: false, // TODO: handle pagination + more: false, }); } } @@ -341,7 +341,11 @@ impl StateMachine { /// Apply a lease grant command fn apply_lease_grant(&self, id: i64, ttl: i64) -> Result { let lease = self.leases.grant(id, ttl)?; - Ok(RaftResponse::lease(self.current_revision(), lease.id, lease.ttl)) + Ok(RaftResponse::lease( + self.current_revision(), + lease.id, + lease.ttl, + )) } /// Apply a lease revoke command diff --git a/chainfire/crates/chainfire-storage/src/store.rs b/chainfire/crates/chainfire-storage/src/store.rs index 85939aa..4d7f4a6 100644 --- a/chainfire/crates/chainfire-storage/src/store.rs +++ b/chainfire/crates/chainfire-storage/src/store.rs @@ -115,10 +115,7 @@ mod tests { { let store = RocksStore::new(dir.path()).unwrap(); let cf = store.cf_handle(cf::META).unwrap(); - store - .db() - .put_cf(&cf, b"test_key", b"test_value") - .unwrap(); + store.db().put_cf(&cf, b"test_key", b"test_value").unwrap(); } // Reopen and verify data persisted diff --git a/chainfire/crates/chainfire-types/src/command.rs b/chainfire/crates/chainfire-types/src/command.rs index 5929f44..5c2e1b3 100644 --- a/chainfire/crates/chainfire-types/src/command.rs +++ b/chainfire/crates/chainfire-types/src/command.rs @@ -7,8 +7,7 @@ use crate::Revision; use serde::{Deserialize, Serialize}; /// Commands submitted to Raft consensus -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] pub enum RaftCommand { /// Put a key-value pair Put { @@ -69,7 +68,6 @@ pub enum RaftCommand { Noop, } - /// Comparison for transaction conditions #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct Compare { @@ -129,9 +127,7 @@ pub enum TxnOp { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum TxnOpResponse { /// Response from a Put operation - Put { - prev_kv: Option, - }, + Put { prev_kv: Option }, /// Response from a Delete/DeleteRange operation Delete { deleted: u64, diff --git a/chainfire/crates/chainfire-types/src/kv.rs b/chainfire/crates/chainfire-types/src/kv.rs index 4db7aa1..277f831 100644 --- a/chainfire/crates/chainfire-types/src/kv.rs +++ b/chainfire/crates/chainfire-types/src/kv.rs @@ -7,8 +7,7 @@ use serde::{Deserialize, Serialize}; pub type Revision = u64; /// A key-value entry with metadata -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] pub struct KvEntry { /// The key pub key: Vec, @@ -77,7 +76,6 @@ impl KvEntry { } } - /// Range of keys for scan operations #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct KeyRange { diff --git a/chainfire/crates/chainfire-types/src/node.rs b/chainfire/crates/chainfire-types/src/node.rs index db49208..166d65f 100644 --- a/chainfire/crates/chainfire-types/src/node.rs +++ b/chainfire/crates/chainfire-types/src/node.rs @@ -7,8 +7,7 @@ use std::net::SocketAddr; pub type NodeId = u64; /// Role of a node in the cluster -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] pub enum NodeRole { /// Control Plane node - participates in Raft consensus ControlPlane, @@ -17,7 +16,6 @@ pub enum NodeRole { Worker, } - /// Raft participation role for a node. /// /// This determines whether and how a node participates in the Raft consensus protocol. diff --git a/chainfire/crates/chainfire-watch/src/registry.rs b/chainfire/crates/chainfire-watch/src/registry.rs index 7c02bda..11165b2 100644 --- a/chainfire/crates/chainfire-watch/src/registry.rs +++ b/chainfire/crates/chainfire-watch/src/registry.rs @@ -51,11 +51,7 @@ impl WatchRegistry { } /// Create a new watch subscription - pub fn create_watch( - &self, - req: WatchRequest, - sender: mpsc::Sender, - ) -> i64 { + pub fn create_watch(&self, req: WatchRequest, sender: mpsc::Sender) -> i64 { let watch_id = if req.watch_id != 0 { req.watch_id } else { @@ -72,7 +68,9 @@ impl WatchRegistry { watch_id, matcher, prev_kv: req.prev_kv, - created_revision: req.start_revision.unwrap_or_else(|| self.current_revision()), + created_revision: req + .start_revision + .unwrap_or_else(|| self.current_revision()), sender, }; @@ -82,10 +80,7 @@ impl WatchRegistry { // Add to prefix index { let mut index = self.prefix_index.write(); - index - .entry(req.key.clone()) - .or_default() - .insert(watch_id); + index.entry(req.key.clone()).or_default().insert(watch_id); } debug!(watch_id, key = ?String::from_utf8_lossy(&req.key), "Created watch"); diff --git a/chainfire/proto/chainfire.proto b/chainfire/proto/chainfire.proto index a551d94..03b17f4 100644 --- a/chainfire/proto/chainfire.proto +++ b/chainfire/proto/chainfire.proto @@ -23,26 +23,14 @@ service Watch { rpc Watch(stream WatchRequest) returns (stream WatchResponse); } -// Cluster management service +// Cluster management service for fixed-membership clusters. service Cluster { - // MemberAdd adds a member into the cluster - rpc MemberAdd(MemberAddRequest) returns (MemberAddResponse); - - // MemberRemove removes an existing member from the cluster - rpc MemberRemove(MemberRemoveRequest) returns (MemberRemoveResponse); - - // MemberList lists all the members in the cluster + // MemberList lists the members configured at cluster bootstrap time rpc MemberList(MemberListRequest) returns (MemberListResponse); // Status gets the status of the cluster rpc Status(StatusRequest) returns (StatusResponse); - // TransferSnapshot transfers a snapshot to a target node for pre-seeding - // This is used as a workaround for OpenRaft 0.9.x learner replication bug - rpc TransferSnapshot(TransferSnapshotRequest) returns (TransferSnapshotResponse); - - // GetSnapshot returns the current snapshot from this node - rpc GetSnapshot(GetSnapshotRequest) returns (stream GetSnapshotResponse); } // Lease service for TTL-based key expiration @@ -295,34 +283,6 @@ message Member { bool is_learner = 5; } -message MemberAddRequest { - // node_id is the joining node's actual ID - uint64 node_id = 1; - // peer_urls are the URLs to reach the new member - repeated string peer_urls = 2; - // is_learner indicates if the member is a learner - bool is_learner = 3; -} - -message MemberAddResponse { - ResponseHeader header = 1; - // member is the member information for the added member - Member member = 2; - // members is the list of all members after adding - repeated Member members = 3; -} - -message MemberRemoveRequest { - // ID is the member ID to remove - uint64 id = 1; -} - -message MemberRemoveResponse { - ResponseHeader header = 1; - // members is the list of all members after removing - repeated Member members = 2; -} - message MemberListRequest {} message MemberListResponse { @@ -421,49 +381,3 @@ message LeaseStatus { // ID is the lease ID int64 id = 1; } - -// ========== Snapshot Transfer (T041 Option C workaround) ========== - -// Snapshot metadata -message SnapshotMeta { - // last_log_index is the last log index included in the snapshot - uint64 last_log_index = 1; - // last_log_term is the term of the last log entry included - uint64 last_log_term = 2; - // membership is the cluster membership at snapshot time - repeated uint64 membership = 3; - // size is the size of snapshot data in bytes - uint64 size = 4; -} - -// Request to transfer snapshot to a target node -message TransferSnapshotRequest { - // target_node_id is the ID of the node to receive the snapshot - uint64 target_node_id = 1; - // target_addr is the gRPC address of the target node - string target_addr = 2; -} - -// Response from snapshot transfer -message TransferSnapshotResponse { - ResponseHeader header = 1; - // success indicates if the transfer completed successfully - bool success = 2; - // error is the error message if transfer failed - string error = 3; - // meta is the metadata of the transferred snapshot - SnapshotMeta meta = 4; -} - -// Request to get snapshot from this node -message GetSnapshotRequest {} - -// Streaming response containing snapshot chunks -message GetSnapshotResponse { - // meta is the snapshot metadata (only in first chunk) - SnapshotMeta meta = 1; - // chunk is the snapshot data chunk - bytes chunk = 2; - // done indicates if this is the last chunk - bool done = 3; -} diff --git a/chainfire/proto/internal.proto b/chainfire/proto/internal.proto index 3b48480..6656ab0 100644 --- a/chainfire/proto/internal.proto +++ b/chainfire/proto/internal.proto @@ -9,9 +9,6 @@ service RaftService { // AppendEntries sends log entries to followers rpc AppendEntries(AppendEntriesRequest) returns (AppendEntriesResponse); - - // InstallSnapshot sends a snapshot to a follower - rpc InstallSnapshot(stream InstallSnapshotRequest) returns (InstallSnapshotResponse); } message VoteRequest { @@ -69,25 +66,3 @@ message AppendEntriesResponse { // conflict_term is the term of the conflicting entry uint64 conflict_term = 4; } - -message InstallSnapshotRequest { - // term is the leader's term - uint64 term = 1; - // leader_id is the leader's ID - uint64 leader_id = 2; - // last_included_index is the snapshot replaces all entries up through and including this index - uint64 last_included_index = 3; - // last_included_term is term of last_included_index - uint64 last_included_term = 4; - // offset is byte offset where chunk is positioned in the snapshot file - uint64 offset = 5; - // data is raw bytes of the snapshot chunk - bytes data = 6; - // done is true if this is the last chunk - bool done = 7; -} - -message InstallSnapshotResponse { - // term is the current term - uint64 term = 1; -} diff --git a/creditservice/README.md b/creditservice/README.md index ce33eec..61038ca 100644 --- a/creditservice/README.md +++ b/creditservice/README.md @@ -1,22 +1,25 @@ # CreditService -`creditservice` is a minimal reference service that proves UltraCloud can integrate vendor-specific quota and credit control with platform auth and gateway admission. +`creditservice` is UltraCloud's supported quota, reservation, wallet, and admission-control service. It integrates with platform auth, persists state in the platform data plane, and sits behind the same gateway and VM-cluster validation used for the rest of the published surface. -It is intentionally not a full billing product. - -## What this proves - -- a vendor-specific credit or quota service can be built in-tree -- the service can authenticate against Photon IAM -- the service can participate in gateway and control-plane admission flows -- the service can persist state in Photon-supported backends +It is intentionally scoped to real-time control and admission, not finance-system ownership. ## Supported scope -- quota checks -- credit reservations, commits, and releases -- tenant-aware auth integration +- quota creation, lookup, and enforcement +- credit reservations, commits, releases, and wallet mutations +- tenant-aware auth integration through IAM - gateway-facing admission control hooks +- persistent state in FlareDB, PostgreSQL, or SQLite depending on deployment mode + +## Export And Migration + +CreditService export and backend migration are supported as offline export/import or backend-native snapshot workflows, not live mixed-writer migration. + +- Use backend-native snapshots or logical API replay as the export baseline. +- Drain or quiesce writes before moving between FlareDB, PostgreSQL, and SQLite backends. +- Rehydrate the target backend, then cut APIGateway or direct callers over to the new endpoint. +- Treat finance-grade reporting, settlement, or ledger history export as out of scope for the product boundary. ## Explicit non-goals @@ -26,14 +29,12 @@ It is intentionally not a full billing product. - finance-grade ledger completeness - full metering platform ownership -## Test expectation +## Release proof -The main proof should come from cluster-level VM validation in `nix/test-cluster`, not from expanding `creditservice` into a larger product surface. - -Concrete proof path: +The release-facing proof comes from the publishable VM-cluster harness: ```bash -nix run ./nix/test-cluster#cluster -- fresh-smoke +nix run ./nix/test-cluster#cluster -- fresh-matrix ``` -That flow boots node06 with `apigateway`, `nightlight`, and `creditservice`, and validates that `creditservice` starts in the IAM-integrated cluster path. +That flow boots node06 with `apigateway`, `nightlight`, and `creditservice`, then validates REST and gRPC quota flows, wallet and reservation mutations, IAM integration, and API-gateway-mediated admission traffic. diff --git a/creditservice/crates/creditservice-api/src/billing.rs b/creditservice/crates/creditservice-api/src/billing.rs index 1e0cbe1..bb53cb5 100644 --- a/creditservice/crates/creditservice-api/src/billing.rs +++ b/creditservice/crates/creditservice-api/src/billing.rs @@ -53,16 +53,16 @@ impl Default for PricingRules { fn default() -> Self { let mut prices = HashMap::new(); // Default pricing (credits per hour/GB) - prices.insert(ResourceType::VmInstance, 100); // 100 credits/hour - prices.insert(ResourceType::VmCpu, 10); // 10 credits/CPU-hour - prices.insert(ResourceType::VmMemoryGb, 5); // 5 credits/GB-hour - prices.insert(ResourceType::StorageGb, 1); // 1 credit/GB-hour - prices.insert(ResourceType::NetworkPort, 2); // 2 credits/port-hour - prices.insert(ResourceType::LoadBalancer, 50); // 50 credits/hour - prices.insert(ResourceType::DnsZone, 10); // 10 credits/zone-hour - prices.insert(ResourceType::DnsRecord, 1); // 1 credit/record-hour - prices.insert(ResourceType::K8sCluster, 200); // 200 credits/hour - prices.insert(ResourceType::K8sNode, 100); // 100 credits/node-hour + prices.insert(ResourceType::VmInstance, 100); // 100 credits/hour + prices.insert(ResourceType::VmCpu, 10); // 10 credits/CPU-hour + prices.insert(ResourceType::VmMemoryGb, 5); // 5 credits/GB-hour + prices.insert(ResourceType::StorageGb, 1); // 1 credit/GB-hour + prices.insert(ResourceType::NetworkPort, 2); // 2 credits/port-hour + prices.insert(ResourceType::LoadBalancer, 50); // 50 credits/hour + prices.insert(ResourceType::DnsZone, 10); // 10 credits/zone-hour + prices.insert(ResourceType::DnsRecord, 1); // 1 credit/record-hour + prices.insert(ResourceType::K8sCluster, 200); // 200 credits/hour + prices.insert(ResourceType::K8sNode, 100); // 100 credits/node-hour Self { prices } } } @@ -128,12 +128,16 @@ impl UsageMetricsProvider for MockUsageMetricsProvider { period_start: DateTime, period_end: DateTime, ) -> Result { - Ok(self.mock_data.get(project_id).cloned().unwrap_or_else(|| UsageMetrics { - project_id: project_id.to_string(), - resource_usage: HashMap::new(), - period_start, - period_end, - })) + Ok(self + .mock_data + .get(project_id) + .cloned() + .unwrap_or_else(|| UsageMetrics { + project_id: project_id.to_string(), + resource_usage: HashMap::new(), + period_start, + period_end, + })) } async fn list_projects_with_usage( @@ -199,6 +203,8 @@ mod tests { .unwrap(); assert_eq!(metrics.project_id, "proj-1"); - assert!(metrics.resource_usage.contains_key(&ResourceType::VmInstance)); + assert!(metrics + .resource_usage + .contains_key(&ResourceType::VmInstance)); } } diff --git a/creditservice/crates/creditservice-api/src/credit_service.rs b/creditservice/crates/creditservice-api/src/credit_service.rs index a50bd64..21a8926 100644 --- a/creditservice/crates/creditservice-api/src/credit_service.rs +++ b/creditservice/crates/creditservice-api/src/credit_service.rs @@ -120,7 +120,10 @@ impl CreditServiceImpl { let org_id = req_org_id.unwrap_or(""); resolve_tenant_ids_from_context(tenant, org_id, req_project_id) } - None => Ok((req_org_id.unwrap_or("").to_string(), req_project_id.to_string())), + None => Ok(( + req_org_id.unwrap_or("").to_string(), + req_project_id.to_string(), + )), } } @@ -377,7 +380,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), @@ -424,8 +428,11 @@ impl CreditService for CreditServiceImpl { )); } - let (org_id, project_id) = - self.resolve_project_scope(tenant.as_ref(), Some(req.org_id.as_str()), &req.project_id)?; + let (org_id, project_id) = self.resolve_project_scope( + tenant.as_ref(), + Some(req.org_id.as_str()), + &req.project_id, + )?; self.authorize_project_action( tenant.as_ref(), @@ -481,7 +488,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), @@ -559,7 +567,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), @@ -629,7 +638,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), @@ -716,7 +726,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), @@ -981,7 +992,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), ACTION_BILLING_PROCESS, @@ -1080,7 +1092,8 @@ impl CreditService for CreditServiceImpl { } let resource_type = proto_to_resource_type(req.resource_type)?; - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), ACTION_QUOTA_SET, @@ -1121,7 +1134,8 @@ impl CreditService for CreditServiceImpl { } let resource_type = proto_to_resource_type(req.resource_type)?; - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), ACTION_QUOTA_READ, @@ -1165,7 +1179,8 @@ impl CreditService for CreditServiceImpl { return Err(Status::invalid_argument("project_id is required")); } - let (org_id, project_id) = self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; + let (org_id, project_id) = + self.resolve_project_scope(tenant.as_ref(), None, &req.project_id)?; self.authorize_project_action( tenant.as_ref(), ACTION_QUOTA_READ, diff --git a/creditservice/crates/creditservice-api/src/flaredb_storage.rs b/creditservice/crates/creditservice-api/src/flaredb_storage.rs index facd4bc..1efa48b 100644 --- a/creditservice/crates/creditservice-api/src/flaredb_storage.rs +++ b/creditservice/crates/creditservice-api/src/flaredb_storage.rs @@ -36,8 +36,8 @@ impl FlareDbStorage { .unwrap_or_else(|| endpoint.clone()); debug!(endpoint = %endpoint, "Connecting to FlareDB"); let client = RdbClient::connect_with_pd_namespace(endpoint, pd_endpoint, "creditservice") - .await - .map_err(|e| Error::Storage(format!("Failed to connect to FlareDB: {}", e)))?; + .await + .map_err(|e| Error::Storage(format!("Failed to connect to FlareDB: {}", e)))?; Ok(Arc::new(Self { client: Arc::new(Mutex::new(client)), @@ -60,7 +60,11 @@ impl FlareDbStorage { } fn quota_key(project_id: &str, resource_type: ResourceType) -> String { - format!("/creditservice/quotas/{}/{}", project_id, resource_type.as_str()) + format!( + "/creditservice/quotas/{}/{}", + project_id, + resource_type.as_str() + ) } fn transactions_prefix(project_id: &str) -> String { @@ -273,7 +277,11 @@ impl CreditStorage for FlareDbStorage { Ok(reservations) } - async fn get_quota(&self, project_id: &str, resource_type: ResourceType) -> Result> { + async fn get_quota( + &self, + project_id: &str, + resource_type: ResourceType, + ) -> Result> { let key = Self::quota_key(project_id, resource_type); self.get_value_with_version(&key) .await? diff --git a/creditservice/crates/creditservice-api/src/lib.rs b/creditservice/crates/creditservice-api/src/lib.rs index 6f49304..5511e84 100644 --- a/creditservice/crates/creditservice-api/src/lib.rs +++ b/creditservice/crates/creditservice-api/src/lib.rs @@ -1,24 +1,24 @@ -//! gRPC service implementations for the Photon credit-control reference service. +//! gRPC service implementations for the supported credit-control service. //! -//! The goal is to prove quota and admission control can be integrated with -//! Photon IAM and gateway flows without turning this crate into a full billing -//! product. +//! The product boundary is quota, reservation, wallet, and admission control. +//! This crate intentionally avoids turning that scope into a finance-grade +//! billing or settlement system. mod billing; -mod flaredb_storage; -mod sql_storage; mod credit_service; +mod flaredb_storage; mod gateway_credit_service; mod nightlight; +mod sql_storage; mod storage; pub use billing::{ MockUsageMetricsProvider, PricingRules, ProjectBillingResult, ResourceUsage, UsageMetrics, UsageMetricsProvider, }; -pub use flaredb_storage::FlareDbStorage; -pub use sql_storage::SqlStorage; pub use credit_service::CreditServiceImpl; +pub use flaredb_storage::FlareDbStorage; pub use gateway_credit_service::GatewayCreditServiceImpl; pub use nightlight::NightLightClient; +pub use sql_storage::SqlStorage; pub use storage::{CreditStorage, InMemoryStorage}; diff --git a/creditservice/crates/creditservice-api/src/nightlight.rs b/creditservice/crates/creditservice-api/src/nightlight.rs index 865ccf1..a51ebc9 100644 --- a/creditservice/crates/creditservice-api/src/nightlight.rs +++ b/creditservice/crates/creditservice-api/src/nightlight.rs @@ -181,17 +181,11 @@ impl NightLightClient { (query, "lb-hours".to_string()) } ResourceType::DnsZone => { - let query = format!( - r#"count(dns_zone_active{{project_id="{}"}})"#, - project_id - ); + let query = format!(r#"count(dns_zone_active{{project_id="{}"}})"#, project_id); (query, "zones".to_string()) } ResourceType::DnsRecord => { - let query = format!( - r#"count(dns_record_active{{project_id="{}"}})"#, - project_id - ); + let query = format!(r#"count(dns_record_active{{project_id="{}"}})"#, project_id); (query, "records".to_string()) } ResourceType::K8sCluster => { diff --git a/creditservice/crates/creditservice-api/src/sql_storage.rs b/creditservice/crates/creditservice-api/src/sql_storage.rs index 695e265..a4333fc 100644 --- a/creditservice/crates/creditservice-api/src/sql_storage.rs +++ b/creditservice/crates/creditservice-api/src/sql_storage.rs @@ -46,7 +46,9 @@ impl SqlStorage { )); } if url.contains(":memory:") { - return Err(Error::Storage("In-memory SQLite is not allowed".to_string())); + return Err(Error::Storage( + "In-memory SQLite is not allowed".to_string(), + )); } let pool = PoolOptions::::new() .max_connections(1) @@ -114,7 +116,11 @@ impl SqlStorage { } fn quota_key(project_id: &str, resource_type: ResourceType) -> String { - format!("/creditservice/quotas/{}/{}", project_id, resource_type.as_str()) + format!( + "/creditservice/quotas/{}/{}", + project_id, + resource_type.as_str() + ) } fn transactions_prefix(project_id: &str) -> String { @@ -171,15 +177,15 @@ impl SqlStorage { async fn put_if_absent(&self, key: &str, value: &[u8]) -> Result { let rows_affected = match &self.backend { - SqlBackend::Postgres(pool) => { - sqlx::query("INSERT INTO creditservice_kv (key, value) VALUES ($1, $2) ON CONFLICT DO NOTHING") - .bind(key) - .bind(value) - .execute(pool.as_ref()) - .await - .map_err(|e| Error::Storage(format!("Postgres insert failed: {}", e)))? - .rows_affected() - } + SqlBackend::Postgres(pool) => sqlx::query( + "INSERT INTO creditservice_kv (key, value) VALUES ($1, $2) ON CONFLICT DO NOTHING", + ) + .bind(key) + .bind(value) + .execute(pool.as_ref()) + .await + .map_err(|e| Error::Storage(format!("Postgres insert failed: {}", e)))? + .rows_affected(), SqlBackend::Sqlite(pool) => { sqlx::query("INSERT OR IGNORE INTO creditservice_kv (key, value) VALUES (?1, ?2)") .bind(key) @@ -226,14 +232,12 @@ impl SqlStorage { .map_err(|e| Error::Storage(format!("Postgres delete failed: {}", e)))? .rows_affected() } - SqlBackend::Sqlite(pool) => { - sqlx::query("DELETE FROM creditservice_kv WHERE key = ?1") - .bind(key) - .execute(pool.as_ref()) - .await - .map_err(|e| Error::Storage(format!("SQLite delete failed: {}", e)))? - .rows_affected() - } + SqlBackend::Sqlite(pool) => sqlx::query("DELETE FROM creditservice_kv WHERE key = ?1") + .bind(key) + .execute(pool.as_ref()) + .await + .map_err(|e| Error::Storage(format!("SQLite delete failed: {}", e)))? + .rows_affected(), }; Ok(rows_affected > 0) } @@ -368,7 +372,11 @@ impl CreditStorage for SqlStorage { Ok(reservations) } - async fn get_quota(&self, project_id: &str, resource_type: ResourceType) -> Result> { + async fn get_quota( + &self, + project_id: &str, + resource_type: ResourceType, + ) -> Result> { let key = Self::quota_key(project_id, resource_type); self.get(&key) .await? diff --git a/creditservice/crates/creditservice-api/src/storage.rs b/creditservice/crates/creditservice-api/src/storage.rs index 5c4579c..7ea11ac 100644 --- a/creditservice/crates/creditservice-api/src/storage.rs +++ b/creditservice/crates/creditservice-api/src/storage.rs @@ -34,7 +34,11 @@ pub trait CreditStorage: Send + Sync { async fn get_pending_reservations(&self, project_id: &str) -> Result>; // Quota operations - async fn get_quota(&self, project_id: &str, resource_type: ResourceType) -> Result>; + async fn get_quota( + &self, + project_id: &str, + resource_type: ResourceType, + ) -> Result>; async fn set_quota(&self, quota: Quota) -> Result; async fn list_quotas(&self, project_id: &str) -> Result>; } @@ -161,7 +165,9 @@ impl CreditStorage for InMemoryStorage { resource_type: ResourceType, ) -> Result> { let quotas = self.quotas.read().await; - Ok(quotas.get(&(project_id.to_string(), resource_type)).cloned()) + Ok(quotas + .get(&(project_id.to_string(), resource_type)) + .cloned()) } async fn set_quota(&self, quota: Quota) -> Result { diff --git a/creditservice/crates/creditservice-server/src/config.rs b/creditservice/crates/creditservice-server/src/config.rs index f5b68d6..3907fa7 100644 --- a/creditservice/crates/creditservice-server/src/config.rs +++ b/creditservice/crates/creditservice-server/src/config.rs @@ -1,4 +1,4 @@ -//! File-first configuration for the minimal credit-control reference service. +//! File-first configuration for the supported credit-control service. use photon_config::load_toml_config; use photon_state::StateBackend; diff --git a/creditservice/crates/creditservice-server/src/main.rs b/creditservice/crates/creditservice-server/src/main.rs index 794e4f5..cd09b44 100644 --- a/creditservice/crates/creditservice-server/src/main.rs +++ b/creditservice/crates/creditservice-server/src/main.rs @@ -1,7 +1,7 @@ -//! CreditService reference server. +//! CreditService server. //! -//! Main entry point for the minimal auth-integrated quota and credit-control -//! service used to prove vendor-replaceable integration. +//! Main entry point for the auth-integrated quota, reservation, wallet, and +//! admission-control service shipped in the supported UltraCloud add-on surface. mod config; mod rest; @@ -24,7 +24,7 @@ use tracing::info; #[derive(Parser, Debug)] #[command(name = "creditservice-server")] -#[command(about = "Minimal auth-integrated credit and quota control reference service")] +#[command(about = "Auth-integrated credit, quota, wallet, and admission-control service")] struct Args { /// Configuration file path #[arg(short, long, default_value = "creditservice.toml")] @@ -86,11 +86,8 @@ async fn main() -> anyhow::Result<()> { .as_deref() .unwrap_or("127.0.0.1:2479"); info!("Using FlareDB for persistent storage: {}", flaredb_endpoint); - FlareDbStorage::new_with_pd( - flaredb_endpoint, - config.chainfire_endpoint.as_deref(), - ) - .await? + FlareDbStorage::new_with_pd(flaredb_endpoint, config.chainfire_endpoint.as_deref()) + .await? } StateBackend::Postgres | StateBackend::Sqlite => { let database_url = config.database_url.as_deref().ok_or_else(|| { diff --git a/creditservice/crates/creditservice-types/src/lib.rs b/creditservice/crates/creditservice-types/src/lib.rs index 37bc156..872c2c3 100644 --- a/creditservice/crates/creditservice-types/src/lib.rs +++ b/creditservice/crates/creditservice-types/src/lib.rs @@ -2,14 +2,14 @@ //! //! This crate defines the domain types used throughout the CreditService. -mod wallet; -mod transaction; -mod reservation; -mod quota; mod error; +mod quota; +mod reservation; +mod transaction; +mod wallet; -pub use wallet::{Wallet, WalletStatus}; -pub use transaction::{Transaction, TransactionType}; -pub use reservation::{Reservation, ReservationStatus}; -pub use quota::{Quota, ResourceType}; pub use error::{Error, Result}; +pub use quota::{Quota, ResourceType}; +pub use reservation::{Reservation, ReservationStatus}; +pub use transaction::{Transaction, TransactionType}; +pub use wallet::{Wallet, WalletStatus}; diff --git a/creditservice/crates/creditservice-types/src/reservation.rs b/creditservice/crates/creditservice-types/src/reservation.rs index d598c83..1bec711 100644 --- a/creditservice/crates/creditservice-types/src/reservation.rs +++ b/creditservice/crates/creditservice-types/src/reservation.rs @@ -50,8 +50,7 @@ impl Reservation { } /// Reservation status -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] pub enum ReservationStatus { /// Reservation is pending #[default] @@ -63,4 +62,3 @@ pub enum ReservationStatus { /// Reservation has expired Expired, } - diff --git a/creditservice/crates/creditservice-types/src/wallet.rs b/creditservice/crates/creditservice-types/src/wallet.rs index bbf99b5..f27dcd2 100644 --- a/creditservice/crates/creditservice-types/src/wallet.rs +++ b/creditservice/crates/creditservice-types/src/wallet.rs @@ -61,8 +61,7 @@ impl Wallet { } /// Wallet status -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] pub enum WalletStatus { /// Wallet is active and can be used #[default] @@ -73,7 +72,6 @@ pub enum WalletStatus { Closed, } - #[cfg(test)] mod tests { use super::*; diff --git a/creditservice/creditservice-client/src/lib.rs b/creditservice/creditservice-client/src/lib.rs index 63b5e95..c8e9b31 100644 --- a/creditservice/creditservice-client/src/lib.rs +++ b/creditservice/creditservice-client/src/lib.rs @@ -70,7 +70,10 @@ impl Client { quantity, estimated_cost, }; - self.inner.check_quota(request).await.map(|r| r.into_inner()) + self.inner + .check_quota(request) + .await + .map(|r| r.into_inner()) } /// Reserve credits for a resource creation diff --git a/deployer/crates/deployer-ctl/src/chainfire.rs b/deployer/crates/deployer-ctl/src/chainfire.rs index 6420487..c3850a1 100644 --- a/deployer/crates/deployer-ctl/src/chainfire.rs +++ b/deployer/crates/deployer-ctl/src/chainfire.rs @@ -443,7 +443,7 @@ where Fut: Future>, { let endpoints = if endpoints.is_empty() { - vec!["http://127.0.0.1:7000".to_string()] + vec!["http://127.0.0.1:2379".to_string()] } else { endpoints.to_vec() }; diff --git a/deployer/crates/deployer-ctl/src/main.rs b/deployer/crates/deployer-ctl/src/main.rs index 40c4ca0..17fad8f 100644 --- a/deployer/crates/deployer-ctl/src/main.rs +++ b/deployer/crates/deployer-ctl/src/main.rs @@ -16,8 +16,8 @@ mod remote; #[derive(Parser, Debug)] #[command(author, version, about)] struct Cli { - /// Chainfire API エンドポイント (例: http://127.0.0.1:7000) - #[arg(long, global = true, default_value = "http://127.0.0.1:7000")] + /// Chainfire API エンドポイント (例: http://127.0.0.1:2379) + #[arg(long, global = true, default_value = "http://127.0.0.1:2379")] chainfire_endpoint: String, /// UltraCloud Cluster ID (論理名) diff --git a/deployer/crates/fleet-scheduler/src/main.rs b/deployer/crates/fleet-scheduler/src/main.rs index 31d4bfe..7da853a 100644 --- a/deployer/crates/fleet-scheduler/src/main.rs +++ b/deployer/crates/fleet-scheduler/src/main.rs @@ -48,7 +48,7 @@ fn instances_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec { #[derive(Debug, Parser)] #[command(author, version, about = "UltraCloud non-Kubernetes fleet scheduler")] struct Cli { - #[arg(long, default_value = "http://127.0.0.1:7000")] + #[arg(long, default_value = "http://127.0.0.1:2379")] chainfire_endpoint: String, #[arg(long, default_value = "ultracloud")] @@ -1506,7 +1506,7 @@ mod tests { fn test_scheduler() -> Scheduler { Scheduler::new(Cli { - chainfire_endpoint: "http://127.0.0.1:7000".to_string(), + chainfire_endpoint: "http://127.0.0.1:2379".to_string(), cluster_namespace: "ultracloud".to_string(), cluster_id: "test-cluster".to_string(), interval_secs: 1, diff --git a/deployer/crates/nix-agent/src/main.rs b/deployer/crates/nix-agent/src/main.rs index 3c00e05..5b122f8 100644 --- a/deployer/crates/nix-agent/src/main.rs +++ b/deployer/crates/nix-agent/src/main.rs @@ -48,7 +48,7 @@ fn key_observed_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) #[derive(Parser, Debug)] #[command(author, version, about)] struct Cli { - #[arg(long, default_value = "http://127.0.0.1:7000")] + #[arg(long, default_value = "http://127.0.0.1:2379")] chainfire_endpoint: String, #[arg(long, default_value = "ultracloud")] diff --git a/deployer/crates/node-agent/src/agent.rs b/deployer/crates/node-agent/src/agent.rs index 4ebaf9c..8b2eaac 100644 --- a/deployer/crates/node-agent/src/agent.rs +++ b/deployer/crates/node-agent/src/agent.rs @@ -1138,7 +1138,7 @@ mod tests { fn test_agent() -> Agent { Agent::new( - "http://127.0.0.1:7000".to_string(), + "http://127.0.0.1:2379".to_string(), "ultracloud".to_string(), "test-cluster".to_string(), "node01".to_string(), diff --git a/deployer/crates/node-agent/src/main.rs b/deployer/crates/node-agent/src/main.rs index 00ec9fd..5eb46c2 100644 --- a/deployer/crates/node-agent/src/main.rs +++ b/deployer/crates/node-agent/src/main.rs @@ -18,7 +18,7 @@ mod watcher; #[command(author, version, about)] struct Cli { /// Chainfire API エンドポイント - #[arg(long, default_value = "http://127.0.0.1:7000")] + #[arg(long, default_value = "http://127.0.0.1:2379")] chainfire_endpoint: String, /// UltraCloud cluster namespace (default: ultracloud) diff --git a/deployer/crates/node-agent/src/process.rs b/deployer/crates/node-agent/src/process.rs index 4f939ef..f53aaa6 100644 --- a/deployer/crates/node-agent/src/process.rs +++ b/deployer/crates/node-agent/src/process.rs @@ -142,6 +142,10 @@ struct ManagedProcessMetadata { instance_id: String, #[serde(default)] command: Option, + #[serde(default)] + args: Vec, + #[serde(default)] + boot_id: Option, } fn metadata_file_path(pid_file: &PathBuf) -> PathBuf { @@ -152,6 +156,50 @@ fn log_file_path(pid_file: &PathBuf) -> PathBuf { PathBuf::from(format!("{}.log", pid_file.display())) } +fn current_boot_id() -> Option { + fs::read_to_string("/proc/sys/kernel/random/boot_id") + .ok() + .map(|value| value.trim().to_string()) + .filter(|value| !value.is_empty()) +} + +fn read_process_argv(pid: u32) -> Option> { + let bytes = fs::read(format!("/proc/{pid}/cmdline")).ok()?; + let argv = bytes + .split(|byte| *byte == 0) + .filter(|part| !part.is_empty()) + .map(|part| String::from_utf8_lossy(part).into_owned()) + .collect::>(); + (!argv.is_empty()).then_some(argv) +} + +fn process_argv_matches(spec_command: &str, spec_args: &[String], argv: &[String]) -> bool { + if argv.is_empty() || argv.len() != spec_args.len() + 1 { + return false; + } + + let actual_command = Path::new(&argv[0]) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(argv[0].as_str()); + let expected_command = Path::new(spec_command) + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(spec_command); + + if actual_command != expected_command && argv[0] != spec_command { + return false; + } + + argv[1..] == *spec_args +} + +fn read_process_metadata(path: &Path) -> Option { + fs::read(path) + .ok() + .and_then(|bytes| serde_json::from_slice(&bytes).ok()) +} + const FALLBACK_EXEC_PATHS: &[&str] = &[ "/run/current-system/sw/bin", "/run/current-system/sw/sbin", @@ -307,6 +355,8 @@ impl ManagedProcess { service: self.service.clone(), instance_id: self.instance_id.clone(), command: Some(self.spec.command.clone()), + args: self.spec.args.clone(), + boot_id: current_boot_id(), }; fs::write(&self.metadata_file, serde_json::to_vec(&metadata)?).with_context(|| { format!("failed to write process metadata {:?}", self.metadata_file) @@ -349,12 +399,35 @@ impl ManagedProcess { // PIDファイルからPIDを読み取って停止 if let Ok(pid_str) = fs::read_to_string(&self.pid_file) { if let Ok(pid) = pid_str.trim().parse::() { - Command::new("kill") - .arg(pid.to_string()) - .output() - .await - .ok(); - for _ in 0..10 { + let metadata = read_process_metadata(&self.metadata_file); + let boot_matches = metadata + .as_ref() + .and_then(|value| value.boot_id.as_deref()) + .map(|expected| current_boot_id().as_deref() == Some(expected)) + .unwrap_or(true); + let argv_matches = read_process_argv(pid) + .map(|argv| process_argv_matches(&self.spec.command, &self.spec.args, &argv)) + .unwrap_or(false); + + if boot_matches && argv_matches { + Command::new("kill") + .arg(pid.to_string()) + .output() + .await + .ok(); + for _ in 0..10 { + let still_running = Command::new("kill") + .arg("-0") + .arg(pid.to_string()) + .output() + .await + .map(|output| output.status.success()) + .unwrap_or(false); + if !still_running { + break; + } + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; + } let still_running = Command::new("kill") .arg("-0") .arg(pid.to_string()) @@ -362,25 +435,21 @@ impl ManagedProcess { .await .map(|output| output.status.success()) .unwrap_or(false); - if !still_running { - break; + if still_running { + Command::new("kill") + .arg("-9") + .arg(pid.to_string()) + .output() + .await + .ok(); } - tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; - } - let still_running = Command::new("kill") - .arg("-0") - .arg(pid.to_string()) - .output() - .await - .map(|output| output.status.success()) - .unwrap_or(false); - if still_running { - Command::new("kill") - .arg("-9") - .arg(pid.to_string()) - .output() - .await - .ok(); + } else { + warn!( + service = %self.service, + instance_id = %self.instance_id, + pid = pid, + "pid file points to a different process; removing stale pid-dir entry without sending a signal" + ); } } } @@ -410,6 +479,7 @@ impl ManagedProcess { })? { self.child = None; fs::remove_file(&self.pid_file).ok(); + fs::remove_file(&self.metadata_file).ok(); return Ok(false); } return Ok(true); @@ -437,18 +507,30 @@ impl ManagedProcess { .with_context(|| format!("failed to check process {}", pid))?; if !output.status.success() { + fs::remove_file(&self.pid_file).ok(); + fs::remove_file(&self.metadata_file).ok(); return Ok(false); } - // PID再利用対策: /proc からコマンドラインを確認 - let cmdline_path = format!("/proc/{}/cmdline", pid); - if let Ok(cmdline) = fs::read_to_string(&cmdline_path) { - let cmdline = cmdline.replace('\0', " "); - if !cmdline.contains(&self.spec.command) { - return Ok(false); + if let Some(metadata) = read_process_metadata(&self.metadata_file) { + if let Some(expected_boot_id) = metadata.boot_id.as_deref() { + if current_boot_id().as_deref() != Some(expected_boot_id) { + fs::remove_file(&self.pid_file).ok(); + fs::remove_file(&self.metadata_file).ok(); + return Ok(false); + } } } + if !read_process_argv(pid) + .map(|argv| process_argv_matches(&self.spec.command, &self.spec.args, &argv)) + .unwrap_or(false) + { + fs::remove_file(&self.pid_file).ok(); + fs::remove_file(&self.metadata_file).ok(); + return Ok(false); + } + if self.started_at.is_none() { self.started_at = Some(Utc::now()); } @@ -561,6 +643,54 @@ mod tests { let _ = fs::remove_dir_all(&temp); } + + #[test] + fn test_process_argv_matches_exact_command_and_args() { + let argv = vec![ + "/run/current-system/sw/bin/python3".to_string(), + "-m".to_string(), + "http.server".to_string(), + "18190".to_string(), + "--bind".to_string(), + "10.100.0.22".to_string(), + ]; + + assert!(process_argv_matches( + "python3", + &[ + "-m".to_string(), + "http.server".to_string(), + "18190".to_string(), + "--bind".to_string(), + "10.100.0.22".to_string() + ], + &argv + )); + } + + #[test] + fn test_process_argv_rejects_same_binary_with_different_args() { + let argv = vec![ + "/run/current-system/sw/bin/python3".to_string(), + "-m".to_string(), + "http.server".to_string(), + "18193".to_string(), + "--bind".to_string(), + "10.100.0.22".to_string(), + ]; + + assert!(!process_argv_matches( + "python3", + &[ + "-m".to_string(), + "http.server".to_string(), + "18190".to_string(), + "--bind".to_string(), + "10.100.0.22".to_string() + ], + &argv + )); + } } impl ProcessManager { @@ -663,7 +793,7 @@ impl ProcessManager { instance_id: metadata.instance_id, spec: ProcessSpec { command: metadata.command.unwrap_or_default(), - args: Vec::new(), + args: metadata.args.clone(), working_dir: None, env: Default::default(), }, diff --git a/deployer/crates/ultracloud-reconciler/src/hosts.rs b/deployer/crates/ultracloud-reconciler/src/hosts.rs index 7c37931..5bb3995 100644 --- a/deployer/crates/ultracloud-reconciler/src/hosts.rs +++ b/deployer/crates/ultracloud-reconciler/src/hosts.rs @@ -1218,7 +1218,7 @@ mod tests { fn test_controller() -> HostDeploymentController { HostDeploymentController::new(HostsCommand { - endpoint: "http://127.0.0.1:7000".to_string(), + endpoint: "http://127.0.0.1:2379".to_string(), cluster_namespace: "ultracloud".to_string(), cluster_id: "test-cluster".to_string(), interval_secs: 1, diff --git a/docs/README.md b/docs/README.md index cca3840..22967f3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,16 +4,50 @@ This directory is the public documentation entrypoint for UltraCloud. ## Read First -- [../README.md](/home/centra/cloud/README.md) -- [testing.md](/home/centra/cloud/docs/testing.md) -- [component-matrix.md](/home/centra/cloud/docs/component-matrix.md) -- [storage-benchmarks.md](/home/centra/cloud/docs/storage-benchmarks.md) +- [../README.md](../README.md) +- [testing.md](testing.md) +- [component-matrix.md](component-matrix.md) +- [rollout-bundle.md](rollout-bundle.md) +- [control-plane-ops.md](control-plane-ops.md) +- [edge-trial-surface.md](edge-trial-surface.md) +- [provider-vm-reality.md](provider-vm-reality.md) +- [hardware-bringup.md](hardware-bringup.md) +- [storage-benchmarks.md](storage-benchmarks.md) + +## Canonical Profiles + +- `single-node dev`: `nix run .#single-node-quickstart`, `nix run .#single-node-trial`, `nix build .#single-node-trial-vm`, `nixosConfigurations.single-node-quickstart`, companion image `nixosConfigurations.netboot-all-in-one` +- `3-node HA control plane`: `nixosConfigurations.node01`, `nixosConfigurations.node02`, `nixosConfigurations.node03`, companion image `nixosConfigurations.netboot-control-plane` +- `bare-metal bootstrap`: `nix run ./nix/test-cluster#cluster -- baremetal-iso`, `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` followed by `./result/bin/baremetal-iso-e2e` for the exact host-KVM proof + +`nixosConfigurations.netboot-worker` is an archived helper outside the canonical profiles and their guard set. `baremetal/vm-cluster`, `k8shost-cni`, `k8shost-controllers`, `lightningstor-csi`, Firecracker, and mvisor remain in-tree only as non-product scaffolds or `legacy/manual` debugging paths. + +`single-node-trial-vm` is the low-friction trial artifact for local use. OCI/Docker artifact is intentionally not the public trial surface because a privileged container would not exercise the same KVM, `/dev/net/tun`, and guest-kernel contract. + +`ultracloud.cluster` backed by `nix/lib/cluster-schema.nix` is the only supported cluster authoring source. +`nix-nos` is limited to legacy compatibility and low-level network primitives. +`single-node-trial-vm` and `single-node-quickstart` are the standalone VM-platform story. ## Key References -- VM validation harness: [../nix/test-cluster/README.md](/home/centra/cloud/nix/test-cluster/README.md) -- CoronaFS storage role: [../coronafs/README.md](/home/centra/cloud/coronafs/README.md) -- CreditService scope note: [../creditservice/README.md](/home/centra/cloud/creditservice/README.md) +- VM validation harness: [../nix/test-cluster/README.md](../nix/test-cluster/README.md) +- Hardware bring-up bridge: [hardware-bringup.md](hardware-bringup.md) +- Provider and VM-hosting reality proof: [provider-vm-reality.md](provider-vm-reality.md) +- Rollout bundle operator contract: [rollout-bundle.md](rollout-bundle.md) +- Core control-plane operator contract: [control-plane-ops.md](control-plane-ops.md) +- Edge and trial-surface contract: [edge-trial-surface.md](edge-trial-surface.md) +- APIGateway supported scope: [../apigateway/README.md](../apigateway/README.md) +- NightLight supported scope: [../nightlight/README.md](../nightlight/README.md) +- CoronaFS storage role: [../coronafs/README.md](../coronafs/README.md) +- CreditService supported scope: [../creditservice/README.md](../creditservice/README.md) +- K8sHost supported scope: [../k8shost/README.md](../k8shost/README.md) + +## Core API Notes + +- `chainfire` supports fixed-membership cluster introspection on the public surface: `MemberList`, `Status`, and the internal `Vote` plus `AppendEntries` Raft transport. `chainfire-core` remains a workspace-internal compatibility crate rather than a supported embeddable API. +- `flaredb` supports SQL over both gRPC and REST. The public REST endpoints are `POST /api/v1/sql` and `GET /api/v1/tables`. +- `lightningstor` keeps bucket versioning, bucket policy, bucket tagging, and explicit object version listing on the supported optional surface. +- `k8shost` keeps `WatchPods` on the supported surface as a bounded snapshot stream of the current matching pods. ## Design Notes diff --git a/docs/component-matrix.md b/docs/component-matrix.md index ef48b89..2cc9dc1 100644 --- a/docs/component-matrix.md +++ b/docs/component-matrix.md @@ -1,54 +1,98 @@ # Component Matrix -UltraCloud is intended to validate meaningful service combinations, not only a single all-on deployment. -This page summarizes the compositions that are exercised by the VM-cluster harness today. +UltraCloud now fixes the public support surface to three canonical profiles. This page defines the required and optional component bundles for each profile and keeps everything else explicitly outside the core contract. -## Validated Control Plane +## Canonical Profiles -- `chainfire + flaredb + iam` +### `single-node dev` -## Validated Network Provider Layer +- Required components: `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` +- Optional components: `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost` +- Canonical entrypoints: `nix run .#single-node-quickstart`, `nix run .#single-node-trial`, `nix build .#single-node-trial-vm`, `nixosConfigurations.single-node-quickstart`, and companion install image `nixosConfigurations.netboot-all-in-one` +- Optional component toggles: `ultracloud.quickstart.enableLightningStor`, `enableCoronafs`, `enableFlashDNS`, `enableFiberLB`, `enableApiGateway`, `enableNightlight`, `enableCreditService`, `enableK8sHost` +- Primary use: one-command local bring-up, API development, and one-box VM experimentation without the HA control-plane or rollout-stack overhead +- Trial artifact: `single-node-trial-vm` is the supported buildable VM appliance for local use; the `single-node-quickstart` or `single-node-trial` app is the smoke launcher for that same minimal surface -- `prismnet` -- `prismnet + flashdns` -- `prismnet + fiberlb` -- `prismnet + flashdns + fiberlb` +### `3-node HA control plane` -These combinations justify the existence of the network services as composable providers rather than hidden internal subsystems. +- Required components: `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node +- Optional components: `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` +- Canonical entrypoints: `nixosConfigurations.node01`, `nixosConfigurations.node02`, `nixosConfigurations.node03`, and companion install image `nixosConfigurations.netboot-control-plane` +- Primary use: stable replicated control plane that can later accept worker, storage, and edge bundles without redefining the bootstrap path -## Validated VM Hosting Layer +### `bare-metal bootstrap` -- `plasmavmc + prismnet` -- `plasmavmc + lightningstor` -- `plasmavmc + coronafs` -- `plasmavmc + coronafs + lightningstor` -- `plasmavmc + prismnet + coronafs + lightningstor` +- Required components: `deployer`, `first-boot-automation`, `install-target`, `nix-agent` +- Optional components: `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after the first successful rollout +- Canonical entrypoints: `nix run ./nix/test-cluster#cluster -- baremetal-iso`, `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e`, and the built runner `./result/bin/baremetal-iso-e2e` for the exact host-KVM proof +- Primary use: boot the installer ISO, phone home to `deployer`, fetch the flake bundle, run Disko, reboot, and converge QEMU-emulated or real machines into either the single-node or HA profile -This split keeps mutable VM volumes on CoronaFS and immutable VM images on LightningStor object storage. +## Companion And Helper Outputs -## Validated Kubernetes-Style Hosting Layer +- `nixosConfigurations.netboot-all-in-one`: canonical companion install image for `single-node dev` +- `nixosConfigurations.netboot-control-plane`: canonical companion install image for `3-node HA control plane` +- `packages.single-node-trial-vm`: low-friction buildable VM appliance for the minimal VM-platform core +- `nixosConfigurations.netboot-worker`: archived/non-product worker helper kept in-tree for manual lab debugging only -- `k8shost + prismnet` -- `k8shost + flashdns` -- `k8shost + fiberlb` -- `k8shost + prismnet + flashdns + fiberlb` +## Cluster Authoring Source -## Validated Edge And Tenant Services +`ultracloud.cluster` backed by `nix/lib/cluster-schema.nix` is the only supported cluster authoring source. It is the canonical input for deployer classes and pools, service placement state, rollout objects, and per-node bootstrap metadata. -- `apigateway + iam + prismnet` -- `nightlight + apigateway` -- `nightlight` -- `creditservice + iam + apigateway` -- `creditservice + iam` -- `deployer + iam + chainfire` +`nix-nos` is limited to legacy compatibility and low-level network primitives such as interfaces, VLANs, BGP, and static routing. It is not the canonical source for cluster topology, rollout intent, or scheduler state. -## Validation Direction +## Optional Composition Bundles -The VM cluster harness now exposes: +The optional bundles below remain important, but they are layered on top of the canonical profiles rather than treated as separate top-level products: -```bash -nix run ./nix/test-cluster#cluster -- matrix -nix run ./nix/test-cluster#cluster -- fresh-matrix -``` +- control-plane core: `chainfire + flaredb + iam` +- network provider bundle: `prismnet + flashdns + fiberlb` +- VM hosting bundle: `plasmavmc + prismnet + coronafs + lightningstor` +- Kubernetes-style hosting bundle: `k8shost + prismnet + flashdns + fiberlb` +- edge and tenant bundle: `apigateway + iam + nightlight + creditservice` +- native rollout bundle: `deployer + chainfire + nix-agent + fleet-scheduler + node-agent` -`fresh-matrix` is the publishable path because it rebuilds the host-side VM images before validating the composed service scenarios, including PrismNet-backed PlasmaVMC guests. +`fresh-matrix` is the publishable composition proof because it rebuilds the host-side VM images before validating these bundles on the VM cluster. + +For the edge and tenant bundle, the published contract now means: APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer, but config rollout is restart-based and live in-process reload is not promised; NightLight is supported as a single-node WAL/snapshot service with instance-wide retention rather than replicated HA metrics storage; and CreditService stays scoped to quota, wallet, reservation, and admission control, with export or backend migration handled as offline export/import or backend-native snapshot workflows instead of live mixed-writer migration. + +For the network provider bundle specifically, the published contract now means: PrismNet can create tenant VPC/subnet/port state and add then delete security-group ACLs deterministically, FlashDNS can publish records for those workloads, and FiberLB can front them with TCP plus TLS-terminated `Https` / `TerminatedHttps` listeners. `provider-vm-reality-proof` is the artifact-producing companion lane for that surface; it records authoritative DNS answers plus FiberLB backend drain and re-convergence under `./work/provider-vm-reality-proof/latest`. The shipped FiberLB L4 algorithms stay under targeted server tests in-tree. +PrismNet real OVS/OVN dataplane validation remains outside the supported local KVM surface. +FiberLB native BGP or BFD peer interop and hardware VIP ownership remain outside the supported local KVM surface. +FiberLB HTTPS health checks currently do not verify backend TLS certificates. Supported scope is limited to TCP reachability plus HTTP status for the backend endpoint until CA-aware verification is wired through config, server code, and the canonical harness. +For the VM hosting bundle, the published PlasmaVMC contract is the KVM-backed VM lifecycle path plus PrismNet-attached guest networking. `provider-vm-reality-proof` records KVM shared-storage migration and post-migration restart artifacts on the worker pair. Real-hardware migration or storage handoff remains a later hardware proof. Firecracker and mvisor code stays in-tree only as archived non-product backend scaffolding until it has end-to-end tenant-network coverage and publishable suite proof. + +## Responsibility Boundaries + +- `k8shost`: tenant workload API surface. It manages pod, deployment, and service semantics, then delegates network publication to `prismnet`, `flashdns`, and `fiberlb`. +- `k8shost` is fixed as an API/control-plane product surface. Supported binaries stop at `k8shost-server`, and `k8shost-cni`, `lightningstor-csi`, plus `k8shost-controllers` stay archived non-product until they have their own published coverage and a real network or storage dataplane contract. +- `plasmavmc`: tenant VM API surface. The supported public backend is KVM; it can run against explicit remote IAM, PrismNet, and FlareDB endpoints, and other backend implementations stay outside the canonical contract until they have end-to-end runtime and tenant-network coverage. +- `creditservice`: tenant quota, wallet, reservation, and admission-control surface. It stays in the supported bundle because `fresh-matrix` exercises both its direct APIs and the API-gateway path. +- `fleet-scheduler`: bare-metal service placement surface. It schedules host-native service instances from declarative cluster state generated from `ultracloud.cluster` plus `node-agent` heartbeats, without exposing Kubernetes APIs. +- `deployer`: enrollment and rollout authority. It serves `/api/v1/phone-home`, stores install plans and desired-system references, and seeds cluster metadata from the generated `ultracloud.cluster` state. +- `nix-agent`: host OS reconciler. It turns `deployer` desired-system references into `switch-to-configuration` actions plus rollback and health-check handling. +- `node-agent`: host runtime reconciler. It applies scheduled service-instance state, keeps runtime heartbeats fresh, and reports host-local execution status back to the scheduler. + +The intended layering is `deployer -> nix-agent` for machine image or NixOS generation changes, and `deployer -> fleet-scheduler -> node-agent` for host-native service placement changes. `k8shost` stays separate because it is the tenant workload control plane, not the native service scheduler. The `single-node dev` profile intentionally stops before that rollout stack and keeps only the VM-platform core plus explicit add-ons. + +## Standalone Stories + +- `single-node-trial-vm` and `single-node-quickstart` are the standalone VM-platform story for the minimal KVM-backed surface. +- `deployer-vm-smoke`, `portable-control-plane-regressions`, and `baremetal-iso` are the standalone rollout-stack story for `deployer -> nix-agent` and `deployer -> fleet-scheduler -> node-agent`. +- OCI/Docker artifact is intentionally not the public trial surface because the supported VM-platform contract depends on a guest kernel plus host KVM, `/dev/net/tun`, and OVS/libvirt semantics. + +## Archived Scaffolds + +- `k8shost-cni`: internal scaffold for old tenant-network experiments; excluded from default workspace members and canonical docs +- `k8shost-controllers`: controller prototype scaffold; excluded from default workspace members and canonical docs +- `lightningstor-csi`: storage helper prototype; excluded from default workspace members and canonical docs +- Firecracker and mvisor: archived PlasmaVMC backend scaffolds outside the supported KVM-only contract and excluded from the default PlasmaVMC workspace members +- `nixosConfigurations.netboot-worker`: archived worker helper image outside canonical profile guards +- `baremetal/vm-cluster`: `legacy/manual` debugging path outside the main product surface + +## Non-Canonical Paths + +- `baremetal/vm-cluster` remains `legacy/manual` +- standalone use of `netboot-control-plane` or `netboot-all-in-one` outside the documented profiles is a debugging path, not a fourth supported profile +- `netboot-worker`, Firecracker, mvisor, `k8shost-cni`, `lightningstor-csi`, and `k8shost-controllers` are archived non-product scaffolds rather than canonical entrypoints +- `netboot-base`, `pxe-server`, and `vm-smoke-target` are internal or legacy helpers, not supported profiles by themselves +- ad hoc shell-driven cluster bring-up is for debugging only and should not be presented as the canonical public path diff --git a/docs/control-plane-ops.md b/docs/control-plane-ops.md new file mode 100644 index 0000000..85cd673 --- /dev/null +++ b/docs/control-plane-ops.md @@ -0,0 +1,77 @@ +# Core Control Plane Operations + +This document fixes the supported operator lifecycle for the core control-plane services: `chainfire`, `flaredb`, and `iam`. + +## ChainFire Membership And Node Replacement + +ChainFire dynamic membership, replace-node, and scale-out are unsupported on the supported surface. + +The supported public surface is the fixed-membership cluster API already documented in `chainfire-api`: `MemberList` and `Status` report the membership that the node booted with, and operators should treat that membership as immutable for a release branch. + +Supported operator actions today: + +1. Keep the canonical control plane at the documented fixed membership for the branch. +2. Use the canonical `durability-proof` backup/restore lane before disruptive maintenance. +3. Use `nix run ./nix/test-cluster#cluster -- rollout-soak` when you need a longer-running fixed-membership restart proof after maintenance or rollout work. +4. Recover failed nodes by restoring the same fixed-membership cluster shape or by rebuilding the whole cluster with a freshly published static membership and then restoring data. + +Unsupported operator actions today: + +1. Live `replace-node` through a public ChainFire API. +2. Live `scale-out` by adding new voters on the supported surface. +3. Relying on internal membership helpers as a published product contract. + +The focused boundary proof is `./nix/test-cluster/run-core-control-plane-ops-proof.sh`, which records the fixed-membership source marker from `chainfire-api` and the public docs markers under `./work/core-control-plane-ops-proof`. The live-operations companion is `nix run ./nix/test-cluster#cluster -- rollout-soak`, which on 2026-04-10 recorded `chainfire-post-restart-put.json`, `chainfire-post-restart.json`, and `post-control-plane-restarts.json` under `./work/rollout-soak/20260410T164549+0900` after repeated maintenance and worker power-loss, without promoting dynamic membership to supported scope. + +## FlareDB Online Migration And Schema Evolution + +FlareDB online migration and schema evolution must start from the durability-proof backup/restore baseline. + +The supported operator contract is additive-first schema evolution: + +1. Run `nix run ./nix/test-cluster#cluster -- durability-proof` or keep an equivalent logical backup artifact before changing schema. +2. Apply additive changes first: new tables, new nullable columns, new indexes, and code paths that tolerate both old and new shapes. +3. Backfill data and cut read traffic to the new schema before deleting or rewriting old state. +4. Treat destructive cleanup, `DROP TABLE`, and incompatible column rewrites as a later maintenance step after a fresh backup. + +This keeps the migration runbook consistent with the current product proof: the durability lane proves logical SQL backup/restore, and the 2026-04-10 `rollout-soak` artifact root `./work/rollout-soak/20260410T164549+0900` rechecks additive SQL operations through `flaredb-post-restart-create.json`, `flaredb-post-restart-insert.json`, and `flaredb-post-restart.json` after a FlareDB member restart. The operator contract for live changes stays additive schema evolution rather than destructive in-place rewrites. + +FlareDB destructive DDL and fully automated online migration remain outside the supported product contract for this release. When you need `DROP TABLE`, incompatible column rewrites, or automated destructive cutover, stop at the additive-first boundary above, take a fresh logical backup, and treat the destructive step as an explicit offline maintenance action rather than a release-proven online behavior. + +Internal raft membership helpers in `flaredb-raft` exist for implementation work, but they are not the published operator API for schema migration. + +## IAM Bootstrap Hardening And Rotation + +IAM bootstrap hardening requires an explicit admin token, an explicit signing key, and a 32-byte IAM_CRED_MASTER_KEY; signing-key rotation, credential rotation, and mTLS overlap-and-cutover rotation are the supported recovery paths. + +Production bootstrap contract: + +1. Set `IAM_ADMIN_TOKEN` or `PHOTON_IAM_ADMIN_TOKEN`. +2. Set `authn.internal_token.signing_key` in config or provide the equivalent environment-backed configuration. +3. Set `IAM_CRED_MASTER_KEY` to a 32-byte value before enabling credential issuance. +4. Keep `admin.allow_unauthenticated=true`, `IAM_ALLOW_UNAUTHENTICATED_ADMIN=true`, and random signing keys limited to local development or lab proof environments. + +Supported token and key rotation flow: + +1. Add the new signing key and keep the old key available for verification during the overlap window. +2. Issue new tokens from the new active key. +3. Wait for the maximum supported token TTL or explicitly revoke the old population before retiring the old key. +4. Purge retired keys only after the overlap and retirement windows are complete. + +Supported credential rotation flow: + +1. Keep `IAM_CRED_MASTER_KEY` explicit and stable across the overlap window. +2. Mint a new credential for the same principal before revoking the old one. +3. Move clients to the new access key and verify it can still read back its secret material. +4. Revoke the old credential only after cutover is complete. + +Supported mTLS overlap-and-cutover rotation flow: + +1. Configure IAM to trust both the old and new service identity mapping or trust roots during the overlap window. +2. Issue or install the new client certificate and cut traffic over to it. +3. Remove the old mapping or trust root only after the new certificate is serving traffic successfully. +4. Verify the old certificate is rejected once the overlap window closes. + +Multi-node IAM failover remains outside the supported product contract for this release. The current release proof is lifecycle-oriented rather than HA-oriented: bootstrap hardening, signing-key rotation, credential overlap-and-revoke rotation, and mTLS overlap-and-cutover rotation are supported; clustered IAM failover is future scope expansion. + +The standalone proof is `./nix/test-cluster/run-core-control-plane-ops-proof.sh`. It runs the `iam-authn` signing-key and mTLS rotation tests plus the `iam-api` credential rotation test, records the bootstrap hardening source markers from `iam-server`, and persists logs plus `result.json` and `scope-fixed-contract.json` under `./work/core-control-plane-ops-proof`. The dated 2026-04-10 artifact root is `./work/core-control-plane-ops-proof/20260410T172148+09:00`. diff --git a/docs/edge-trial-surface.md b/docs/edge-trial-surface.md new file mode 100644 index 0000000..648be90 --- /dev/null +++ b/docs/edge-trial-surface.md @@ -0,0 +1,83 @@ +# Edge And Trial Surface + +This document fixes the supported product boundary for the edge bundle and the lightest trial surface. + +## APIGateway + +APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer; live in-process reload is not part of the product contract. + +Supported operator contract: + +1. Render gateway config from Nix or `ultracloud.cluster` generated inputs and restart or replace the process when routes, auth providers, or credit providers change. +2. Scale out by running multiple identical gateway instances behind FiberLB, an external load balancer, or another L4 or VIP distribution layer. +3. Treat route distribution as configuration rollout, not as a dynamic control-plane API. + +Explicit non-supported behavior: + +1. Hot route reload through an admin API or `SIGHUP`. +2. Stateful leader election or in-process config distribution between gateway replicas. +3. A release promise that every HA topology is directly exercised by `fresh-matrix`. + +Current proof scope: + +1. `fresh-matrix` proves the shipped single gateway-node composition on `node06`. +2. The HA story is a supported operator shape, but the release-facing proof remains one stateless gateway instance plus restart-based rollout. + +## NightLight + +NightLight is supported as a single-node WAL/snapshot service; replicated HA metrics storage is not part of the product contract. + +Supported operator contract: + +1. Use one NightLight instance per edge bundle, per lab, or per tenant environment when you need a hard operational boundary. +2. Use `retention_days`, the WAL, and periodic snapshots as the retention contract for that instance. +3. Put shared access control in front of NightLight with APIGateway or another authenticated front door when multiple writers or readers share the same endpoint. + +Explicit non-supported behavior: + +1. Multi-node or quorum-backed NightLight replication. +2. Per-tenant retention enforcement inside NightLight itself. +3. Treating NightLight labels as a hard security boundary. + +The supported tenant contract is therefore deployment-scoped: one NightLight instance can serve one environment or a carefully trusted shared bundle, but tenant isolation is not enforced inside the process. + +## CreditService + +CreditService export and backend migration are supported as offline export/import or backend-native snapshot workflows, not live mixed-writer migration. + +Supported operator contract: + +1. Keep CreditService scoped to quota, wallet, reservation, and admission-control behavior. +2. Use backend-native snapshots or logical API replay as the export baseline. +3. Drain or quiesce writes before moving between FlareDB, PostgreSQL, or SQLite backends. +4. Rehydrate the target backend, then cut APIGateway or callers over to the new endpoint. + +Explicit non-supported behavior: + +1. Finance-grade ledger ownership. +2. Live mixed-writer backend migration. +3. Turning the service into a pricing, invoicing, or settlement platform. + +## Trial Surface + +OCI/Docker artifact is intentionally not the public trial surface. + +The supported lightweight trial remains: + +1. `nix build .#single-node-trial-vm` +2. `nix run .#single-node-trial` +3. `nix run .#single-node-quickstart` + +That boundary exists because the supported VM-platform contract needs a guest kernel plus host KVM, `/dev/net/tun`, and OVS or libvirt semantics. A Docker or OCI image would either be host-coupled and privileged or prove a different, weaker contract. + +## Work Root Budget + +Use `./nix/test-cluster/work-root-budget.sh status` for reporting, `./nix/test-cluster/work-root-budget.sh enforce` for a stronger local budget gate, and `./nix/test-cluster/work-root-budget.sh prune-proof-logs 2` for safer dated-proof cleanup. + +Recommended soft budgets on a local AMD/KVM proof host: + +1. Keep `./work/test-cluster/state` under roughly 35 GiB. +2. Keep disposable runtime state such as `./work/tmp` and `./work/publishable-kvm-runtime` under roughly 10 GiB combined. +3. Keep dated proof roots trimmed so combined proof logs stay under roughly 20 GiB unless you are intentionally archiving a release snapshot. + +The helper prints current sizes, highlights budget overruns, and prints safe cleanup steps such as stopping the cluster, cleaning runtime state, deleting disposable log roots, and then running a Nix store GC after old result symlinks are no longer needed. The `enforce` mode lets local proof lanes fail fast when the operator has let `./work` drift beyond the documented soft budget, and `prune-proof-logs` gives a dry-run-first workflow for trimming dated proof roots. diff --git a/docs/hardware-bringup.md b/docs/hardware-bringup.md new file mode 100644 index 0000000..59ec80e --- /dev/null +++ b/docs/hardware-bringup.md @@ -0,0 +1,135 @@ +# Hardware Bring-Up + +This document is the operator bridge between the canonical QEMU ISO proof and a real USB or BMC/Redfish install smoke. + +## Canonical entrypoint + +```bash +nix run ./nix/test-cluster#hardware-smoke -- preflight +nix run ./nix/test-cluster#hardware-smoke -- run +nix run ./nix/test-cluster#hardware-smoke -- capture +``` + +The wrapper always writes artifacts under `./work/hardware-smoke/` and refreshes `./work/hardware-smoke/latest`. + +## What it fixes + +- kernel parameters are emitted once in `kernel-params.txt` +- expected success markers are emitted once in `expected-markers.txt` +- failure markers are emitted once in `failure-markers.txt` +- operator instructions are emitted once in `operator-handoff.md` +- missing transport inputs are emitted once in `missing-requirements.txt` + +When transport is absent, `preflight` exits successfully but records `status=blocked` in `status.env`. + +## Shared ISO contract + +The physical-node wrapper uses the same ISO attr and the same success markers as the QEMU proof: + +- ISO attr: `.#nixosConfigurations.ultracloud-iso.config.system.build.isoImage` +- QEMU proof: `nix run ./nix/test-cluster#cluster -- baremetal-iso` +- exact local-KVM proof: `nix build .#checks.x86_64-linux.baremetal-iso-e2e && ./result/bin/baremetal-iso-e2e` + +The bridge is intentional: QEMU stands in for the chassis only. The install sequence stays `phone-home -> bundle download -> Disko -> reboot -> post-install boot -> desired-system active`. + +## Required kernel parameters + +`hardware-smoke.sh` writes the exact kernel parameter set to `kernel-params.txt`: + +- `ultracloud.deployer_url=` +- `ultracloud.bootstrap_token=` or a deliberate unauthenticated lab deployer with `ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED=1` +- optional `ultracloud.ca_cert_url=` +- optional `ultracloud.binary_cache_url=` +- optional `ultracloud.node_id=` +- optional `ultracloud.hostname=` + +## Expected success markers + +The wrapper records the canonical marker list in `expected-markers.txt`: + +- `ULTRACLOUD_MARKER pre-install.boot.` +- `ULTRACLOUD_MARKER pre-install.phone-home.complete.` +- `ULTRACLOUD_MARKER install.bundle-downloaded.` +- `ULTRACLOUD_MARKER install.disko.complete.` +- `ULTRACLOUD_MARKER install.nixos-install.complete.` +- `ULTRACLOUD_MARKER reboot.` +- `ULTRACLOUD_MARKER post-install.boot..` +- `ULTRACLOUD_MARKER desired-system-active.` + +The wrapper also expects `nix-agent.service` to be active after install, and `chainfire.service` to be active when the node role is `control-plane`. + +## USB path + +Provide: + +- `ULTRACLOUD_HARDWARE_TRANSPORT=usb` +- `ULTRACLOUD_HARDWARE_USB_DEVICE=/dev/sdX` +- `ULTRACLOUD_HARDWARE_ALLOW_DESTRUCTIVE=YES` +- `ULTRACLOUD_HARDWARE_DEPLOYER_URL=...` +- `ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN=...` or `ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED=1` +- `ULTRACLOUD_HARDWARE_SSH_HOST=...` or `ULTRACLOUD_HARDWARE_SERIAL_LOG=...` + +Example: + +```bash +ULTRACLOUD_HARDWARE_TRANSPORT=usb \ +ULTRACLOUD_HARDWARE_USB_DEVICE=/dev/sdX \ +ULTRACLOUD_HARDWARE_ALLOW_DESTRUCTIVE=YES \ +ULTRACLOUD_HARDWARE_DEPLOYER_URL=http://10.0.0.10:8088 \ +ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN=lab-bootstrap-token \ +ULTRACLOUD_HARDWARE_SSH_HOST=10.0.0.21 \ +nix run ./nix/test-cluster#hardware-smoke -- run +``` + +## BMC / Redfish virtual media path + +Provide: + +- `ULTRACLOUD_HARDWARE_TRANSPORT=redfish` or `bmc` +- `ULTRACLOUD_HARDWARE_REDFISH_ENDPOINT=https://bmc.example` +- `ULTRACLOUD_HARDWARE_REDFISH_USERNAME=...` +- `ULTRACLOUD_HARDWARE_REDFISH_PASSWORD=...` +- `ULTRACLOUD_HARDWARE_ISO_URL=https://http-server/ultracloud-bootstrap.iso` +- optional `ULTRACLOUD_HARDWARE_REDFISH_SYSTEM_ID=System.Embedded.1` +- optional `ULTRACLOUD_HARDWARE_REDFISH_MANAGER_ID=iDRAC.Embedded.1` +- optional `ULTRACLOUD_HARDWARE_REDFISH_VIRTUAL_MEDIA_ID=CD` +- `ULTRACLOUD_HARDWARE_DEPLOYER_URL=...` +- `ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN=...` or `ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED=1` +- `ULTRACLOUD_HARDWARE_SSH_HOST=...` or `ULTRACLOUD_HARDWARE_SERIAL_LOG=...` + +Example: + +```bash +ULTRACLOUD_HARDWARE_TRANSPORT=redfish \ +ULTRACLOUD_HARDWARE_REDFISH_ENDPOINT=https://bmc.example \ +ULTRACLOUD_HARDWARE_REDFISH_USERNAME=admin \ +ULTRACLOUD_HARDWARE_REDFISH_PASSWORD=secret \ +ULTRACLOUD_HARDWARE_ISO_URL=https://mirror.example/ultracloud-bootstrap.iso \ +ULTRACLOUD_HARDWARE_DEPLOYER_URL=http://10.0.0.10:8088 \ +ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN=lab-bootstrap-token \ +ULTRACLOUD_HARDWARE_SSH_HOST=10.0.0.21 \ +nix run ./nix/test-cluster#hardware-smoke -- run +``` + +## Capture-only mode + +If the transport action is manual, keep the same proof root and collect the success evidence later: + +```bash +ULTRACLOUD_HARDWARE_PROOF_ROOT=./work/hardware-smoke/latest \ +ULTRACLOUD_HARDWARE_SSH_HOST=10.0.0.21 \ +nix run ./nix/test-cluster#hardware-smoke -- capture +``` + +## Failure and blocked behavior + +`preflight` records `status=blocked` when any of these are missing: + +- transport device or BMC/Redfish endpoint +- deployer URL +- bootstrap token or explicit unauthenticated acknowledgement +- USB destructive acknowledgement +- BMC/Redfish ISO URL +- capture channel for `desired-system active` + +That blocked state is intentional. It means the repo is ready for a physical-node run, but the local session still lacks the external transport or credentials needed to execute it. diff --git a/docs/provider-vm-reality.md b/docs/provider-vm-reality.md new file mode 100644 index 0000000..36f10c9 --- /dev/null +++ b/docs/provider-vm-reality.md @@ -0,0 +1,37 @@ +# Provider And VM-Hosting Reality Proof + +The focused local-KVM proof for the provider and VM-hosting bundles is: + +```bash +nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof +``` + +Artifacts are written under `./work/provider-vm-reality-proof/` and `./work/provider-vm-reality-proof/latest`. + +## What This Lane Proves + +- PrismNet tenant VPC, subnet, port, and security-group ACL lifecycle on the supported local-KVM surface. +- FlashDNS authoritative record exposure on the DNS listener, with captured answers for workload and service records. +- FiberLB listener publication plus backend drain and re-convergence for the shipped local-KVM listener surface. +- PlasmaVMC KVM shared-storage migration, CoronaFS handoff, and post-migration restart on the supported worker pair. + +The proof is intentionally narrower than `fresh-matrix`. `fresh-matrix` remains the broad composition suite; `provider-vm-reality-proof` is the artifact-producing companion lane that keeps provider and VM-hosting evidence in one dated root. + +## Recorded Artifacts + +The proof root keeps two subtrees: + +- `network-provider/`: PrismNet, FlashDNS, and FiberLB create or get responses, authoritative DNS answers, FiberLB backend disable or restore evidence, and service journals. +- `vm-hosting/`: VM create response, VM spec, volume state before and after migration, PrismNet port state after migration, VM watch output, and PlasmaVMC or CoronaFS service journals. + +`result.json` records the overall proof status, start and finish timestamps, and the artifact subdirectories. + +## Supported Scope And Fixed Limits + +The local-KVM proof intentionally does not claim the full hardware-network surface. + +- PrismNet real OVS/OVN dataplane validation remains outside the supported local KVM surface. The current proof keeps tenant API lifecycle and attached-VM networking honest, but not a release-grade `ovn-nbctl` or hardware-switch path. +- FiberLB native BGP or BFD peer interop and hardware VIP ownership remain outside the supported local KVM surface. The current proof fixes the shipped contract to listener publication plus backend drain or re-convergence inside the lab. +- PlasmaVMC real-hardware migration or storage handoff remains a later hardware proof. The current proof fixes the release surface to KVM shared-storage migration on the local worker pair. + +Use the hardware bring-up pack in [hardware-bringup.md](hardware-bringup.md) when transport becomes available and the ISO path can be exercised on a real machine. diff --git a/docs/rollout-bundle.md b/docs/rollout-bundle.md new file mode 100644 index 0000000..078a1b5 --- /dev/null +++ b/docs/rollout-bundle.md @@ -0,0 +1,103 @@ +# Rollout Bundle Operations + +This document fixes the supported operator contract for the native rollout bundle: + +- `deployer` +- `fleet-scheduler` +- `nix-agent` +- `node-agent` + +The supported layering is still `deployer -> nix-agent` for host OS rollout and `deployer -> fleet-scheduler -> node-agent` for host-native service placement. + +## Supported Scope + +- `deployer` is supported as a single logical rollout authority. The supported recovery model is restart-in-place or cold-standby replacement that reuses the same `chainfire` namespace, admin and bootstrap credentials, bootstrap flake bundle, and local state backup. +- `deployer` is scope-fixed to one active writer plus optional cold-standby restore; automatic ChainFire-backed multi-instance failover is outside the supported product contract for this release. Do not run multiple writers against the same `deployer` namespace and assume automatic leader failover is safe. +- `nix-agent` is supported for host-local desired-system apply, post-activation health-check execution, and rollback to the previous known system. +- `fleet-scheduler` is scope-fixed to the two native-runtime worker lab with one planned drain cycle, one fail-stop worker-loss cycle, and 30-second held degraded states in `rollout-soak`; multi-hour maintenance windows, pinned singleton policies, and large-cluster drain storms are outside the supported product contract for this release. +- `node-agent` is supported for host-local runtime reconcile, process and container execution, per-instance logs, and declared host-path volume mounts. It is not a secret manager, a storage provisioner, or an in-place binary patch system. + +## Proof Commands + +- `nix build .#checks.x86_64-linux.deployer-vm-smoke` +- `nix build .#checks.x86_64-linux.deployer-vm-rollback` +- `nix build .#checks.x86_64-linux.portable-control-plane-regressions` +- `nix build .#checks.x86_64-linux.fleet-scheduler-e2e` +- `nix run ./nix/test-cluster#cluster -- fresh-smoke` +- `nix run ./nix/test-cluster#cluster -- rollout-soak` +- `nix run ./nix/test-cluster#cluster -- durability-proof` + +`deployer-vm-rollback` is the smallest reproducible proof for the `nix-agent` health-check and rollback path. `fresh-smoke` and `fleet-scheduler-e2e` keep the short regression semantics green. `rollout-soak` is the longer-running KVM operator lane for one planned drain cycle, one fail-stop worker-loss cycle, and service-restart behavior across `deployer`, `fleet-scheduler`, `node-agent`, and the fixed-membership control plane. It writes `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, and `fleet-scheduler-scope-fixed.txt` so the release boundary is captured in the proof root instead of being implied only by docs. The steady-state `nix/test-cluster` nodes record explicit `nix-agent` scope markers instead of pretending they run `nix-agent.service`. `durability-proof` remains the canonical persisted artifact lane for `deployer` backup, restart, replay, and storage-side failure injection. + +## Deployer HA And DR + +Supported deployer recovery is a single-writer restore runbook. `DEPLOYER-P1-01` is closed as a scope-fixed release boundary rather than an implied future HA promise: + +1. Preserve the generated cluster state from `ultracloud.cluster`, the deployer bootstrap and admin credentials, and `services.deployer.localStatePath`. +2. Start exactly one `deployer` instance with the same `chainfireEndpoints`, `clusterNamespace`, `chainfireNamespace`, tokens, and optional TLS CA inputs. +3. Re-apply the canonical cluster state: + +```bash +deployer-ctl \ + --chainfire-endpoint http://127.0.0.1:2379 \ + --cluster-id \ + --cluster-namespace ultracloud \ + --deployer-namespace deployer \ + apply --config cluster-state.json --prune +``` + +4. Replay any preserved admin pre-register requests in the same shape as `./work/durability-proof/latest/deployer-pre-register-request.json`. +5. Verify the recovered state with `curl -fsS -H 'x-deployer-token: ' http://:8088/api/v1/admin/nodes | jq` and, for node rollout intent, `deployer-ctl node inspect --node-id --include-desired-system --include-observed-system`. + +The 2026-04-10 canonical backup-and-replay proof for this contract is `nix run ./nix/test-cluster#cluster -- durability-proof`, which recorded `deployer-pre-register-request.json`, `deployer-backup-list.json`, `deployer-post-restart-list.json`, and `deployer-replayed-list.json` under `./work/durability-proof/20260410T120618+0900`. The longer-run live-operations companion is `nix run ./nix/test-cluster#cluster -- rollout-soak`, which on 2026-04-10 recorded `deployer-post-restart-nodes.json`, `maintenance-held.json`, `power-loss-held.json`, `post-control-plane-restarts.json`, `scope-fixed-contract.json`, and `deployer-scope-fixed.txt` under `./work/rollout-soak/20260410T164549+0900` while holding degraded states and re-checking the admin inventory. + +## Nix-Agent Operator Contract + +- `services.nix-agent.healthCheckCommand` is an argv vector, not a shell fragment. Every entry is passed to the process directly. +- The command runs after `switch-to-configuration`. +- Exit status `0` means the desired system stays active. +- Non-zero exit with `rollbackOnFailure = true` causes rollback to the previous known system and reports observed status `rolled-back`. +- Non-zero exit with `rollbackOnFailure = false` leaves the failed generation in place and requires operator intervention. + +The supported recovery flow is: + +1. Inspect the desired and observed rollout state: + +```bash +deployer-ctl \ + --chainfire-endpoint http://127.0.0.1:2379 \ + --cluster-id \ + --cluster-namespace ultracloud \ + --deployer-namespace deployer \ + node inspect \ + --node-id \ + --include-desired-system \ + --include-observed-system +``` + +2. If the node reports `rolled-back`, fix the failed target or health-check input, then re-publish the desired system. +3. Re-run the smallest proof lane with `nix build .#checks.x86_64-linux.deployer-vm-rollback` when the issue is in the `deployer -> nix-agent` boundary, or the installer-backed `baremetal-iso` and `baremetal-iso-e2e` lanes when the issue includes first boot. + +`deployer-vm-rollback` is the canonical reproducible proof for this contract. It publishes a desired system whose `health_check_command = ["false"]`, expects observed status `rolled-back`, and proves that the current system does not remain on the rejected target generation. The longer-running 2026-04-10 `rollout-soak` lane does not pretend the steady-state `nix/test-cluster` nodes are deployer-managed `nix-agent` nodes; instead it records `node01-nix-agent-scope.txt` and `node04-nix-agent-scope.txt` under `./work/rollout-soak/20260410T154744+0900`, while the executable `nix-agent` proof surface remains `deployer-vm-rollback`, `baremetal-iso`, and `baremetal-iso-e2e`. + +## Fleet-Scheduler Drain And Maintenance Contract + +- Use `deployer-ctl node set-state --node-id --state draining` for planned short-lived maintenance. +- `draining` removes the node from new placement and causes the scheduler to relocate replicated work when capacity exists. +- `active` re-admits the node and allows replica count to grow back, but healthy singleton work is not required to churn back automatically. +- Fail-stop worker loss is treated like implicit maintenance exhaustion: the scheduler restores healthy placement on the remaining eligible nodes when placement policy allows it. +- The supported release proof is limited to the two native-runtime worker lab with one planned drain cycle and one fail-stop worker-loss cycle, each held for 30 seconds in `rollout-soak`. +- Multi-hour maintenance windows, operator approval workflows, pinned singleton drain choreography, and large-cluster drain storms remain outside the supported contract for this release. + +`fresh-smoke` is the canonical KVM proof for the baseline behavior. It drains `node04`, verifies that `native-web`, `native-container`, and `native-daemon` relocate to `node05`, restores `node04`, then simulates `node05` loss and verifies failover back to `node04` plus replica restoration when `node05` returns. `rollout-soak` reruns that choreography as exactly one planned drain cycle and one fail-stop worker-loss cycle, holds each degraded state for 30 seconds, restarts the rollout services, and then rechecks the live runtime state; the 2026-04-10 run under `./work/rollout-soak/20260410T164549+0900` is the current release-grade artifact for that scope-fixed boundary. `fleet-scheduler-e2e` remains the cheap regression lane for the same scheduling semantics. + +## Node-Agent Logs, Secrets, Volumes, And Upgrade Contract + +- Runtime state lives under `services.node-agent.stateDir`, with pid files, metadata, and per-instance logs under `${stateDir}/pids`. +- Each managed instance writes combined stdout and stderr to `${stateDir}/pids/-.pid.log`. +- Metadata is persisted beside the pid file as `${stateDir}/pids/-.pid.meta.json`, including argv and boot-id data used to reject stale pid reuse across reboot. +- Secrets are not fetched, rotated, or encrypted by `node-agent`. Supported secret delivery is limited to values already present in the rendered service spec, environment, or mounted host files. +- Volumes are declared host-path mounts from `ContainerVolumeSpec`. `node-agent` passes them through to the runtime and honors `read_only`, but it does not provision or garbage-collect those paths. +- Upgrades are replace-and-reconcile operations driven by `fleet-scheduler` state changes. `node-agent` does not patch binaries in place; it stops stale processes or containers and starts new ones from the updated spec. + +`fresh-smoke`, `fresh-matrix`, `fleet-scheduler-e2e`, and `rollout-soak` are the operator proofs for the live runtime path, while the persisted process metadata in `deployer/crates/node-agent/src/process.rs` is the source of truth for the log and stale-pid contract. `rollout-soak` restarts `node-agent.service` on live worker nodes and records the longer-running restart survival artifacts under `./work/rollout-soak/20260410T164549+0900`; `nix-agent` stays scope-fixed to its dedicated deployer and installer proofs because the steady-state KVM cluster nodes do not run `nix-agent.service`. diff --git a/docs/testing.md b/docs/testing.md index fb73ad4..a6a1c03 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -1,37 +1,247 @@ # Testing -UltraCloud treats VM-first validation as the canonical local proof path. +UltraCloud treats VM-first validation as the canonical local proof path and keeps the public support contract limited to three profiles. -## Canonical Validation +## Canonical Profiles + +| Profile | Canonical entrypoints | Required components | Optional components | +| --- | --- | --- | --- | +| `single-node dev` | `nix run .#single-node-quickstart`, `nix run .#single-node-trial`, `nix build .#single-node-trial-vm`, `nixosConfigurations.single-node-quickstart`, companion install image `nixosConfigurations.netboot-all-in-one` | `chainfire`, `flaredb`, `iam`, `plasmavmc`, `prismnet` | `lightningstor`, `coronafs`, `flashdns`, `fiberlb`, `apigateway`, `nightlight`, `creditservice`, `k8shost` | +| `3-node HA control plane` | `nixosConfigurations.node01`, `nixosConfigurations.node02`, `nixosConfigurations.node03`, companion install image `nixosConfigurations.netboot-control-plane` | `chainfire`, `flaredb`, `iam`, `nix-agent` on every control-plane node, plus `deployer` on the bootstrap node | `fleet-scheduler`, `node-agent`, `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, `coronafs`, `k8shost`, `apigateway`, `nightlight`, `creditservice` | +| `bare-metal bootstrap` | `nix run ./nix/test-cluster#cluster -- baremetal-iso`, `nixosConfigurations.ultracloud-iso`, `nixosConfigurations.baremetal-qemu-control-plane`, `nixosConfigurations.baremetal-qemu-worker`, `checks.x86_64-linux.baremetal-iso-e2e` | `deployer`, `first-boot-automation`, `install-target`, `nix-agent` | `node-agent`, `fleet-scheduler`, and higher-level storage or edge services after bootstrap | + +`nixosConfigurations.netboot-all-in-one` and `nixosConfigurations.netboot-control-plane` are canonical companion images for the single-node and HA profiles. `nixosConfigurations.netboot-worker` is an archived worker helper outside the canonical profiles and their guard set, and `baremetal/vm-cluster` remains a `legacy/manual` debugging path rather than a publishable entrypoint. + +## Cluster Authoring Source + +`ultracloud.cluster` backed by `nix/lib/cluster-schema.nix` is the only supported cluster authoring source. The supported rollout and scheduling tests consume cluster state generated from that module rather than treating `nix-nos` or ad hoc shell state as a primary source. + +`nix-nos` is limited to legacy compatibility and low-level network primitives such as interfaces, VLANs, BGP, and static routing. + +## Quickstart Smoke ```bash -nix run ./nix/test-cluster#cluster -- fresh-smoke +nix flake show . --all-systems | rg -n "quickstart|single-node|trial|container|oci" +nix build .#single-node-trial-vm +nix eval --no-eval-cache .#nixosConfigurations.single-node-quickstart.config.system.build.toplevel.drvPath --raw +nix run .#single-node-quickstart ``` -This flow: +`single-node-trial-vm` is the buildable trial artifact for the minimal VM-platform core, and `single-node-quickstart` is the automated smoke launcher for that same surface. The launcher boots the minimal VM stack under QEMU, waits for `chainfire`, `flaredb`, `iam`, `prismnet`, and `plasmavmc`, verifies their health from inside the guest, and checks the machine-readable product-surface manifest shipped in the VM. The launcher uses the generated NixOS VM runner, so it can fall back to TCG when `/dev/kvm` is absent. -- builds all six VM images on the host -- boots the cluster in dependency order -- validates control-plane, worker, gateway, storage, and fault-injection behavior -- proves that `deployer` seeds scheduler-managed native services directly from declarative Nix cluster state +`single-node-trial` is a public alias for the same smoke launcher. OCI/Docker artifact is intentionally not the public trial surface because the supported scope needs a guest kernel plus host KVM, `/dev/net/tun`, and OVS/libvirt semantics; a privileged container would not represent the same contract. + +For debugging, keep the VM alive after the smoke passes: + +```bash +ULTRACLOUD_QUICKSTART_KEEP_VM=1 nix run .#single-node-quickstart +``` + +## 3-Node HA Control Plane + +```bash +nix eval --no-eval-cache .#nixosConfigurations.node01.config.system.build.toplevel.drvPath --raw +nix eval --no-eval-cache .#nixosConfigurations.node02.config.system.build.toplevel.drvPath --raw +nix eval --no-eval-cache .#nixosConfigurations.node03.config.system.build.toplevel.drvPath --raw +nix eval --no-eval-cache .#nixosConfigurations.netboot-control-plane.config.system.build.toplevel.drvPath --raw +``` + +These are the canonical HA control-plane entrypoints. The publishable six-node VM-cluster suite under `./nix/test-cluster` extends this baseline with worker and optional service nodes, but it does not redefine the supported profile names. + +## Canonical Bare-Metal Proof + +```bash +nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel.drvPath --raw +nix eval --no-eval-cache .#nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel.drvPath --raw +nix run ./nix/test-cluster#cluster -- baremetal-iso +nix build .#checks.x86_64-linux.baremetal-iso-e2e +./result/bin/baremetal-iso-e2e ./work/baremetal-iso-e2e/latest +nix run ./nix/test-cluster#hardware-smoke -- preflight +``` + +`baremetal-iso` is the canonical install path for QEMU-as-bare-metal validation. It boots `nixosConfigurations.ultracloud-iso`, waits for `/api/v1/phone-home`, downloads the flake bundle from `deployer`, runs Disko, reboots, confirms the first post-install boot markers, and waits for `nix-agent` to report the desired system as `active` for both `baremetal-qemu-control-plane` and `baremetal-qemu-worker`. + +`baremetal-iso-e2e` now keeps the exact flake attr but changes the execution model: `nix build .#checks.x86_64-linux.baremetal-iso-e2e` materializes `./result/bin/baremetal-iso-e2e`, and that built runner executes the same `nix/test-cluster/verify-baremetal-iso.sh` harness with host KVM and logs under `./work` by default. This avoids the old daemon-sandbox path where a `nixbld` build fell back to `TCG` instead of the host's `/dev/kvm`. + +The local proof intentionally mirrors the real hardware route. Build `nixosConfigurations.ultracloud-iso`, then either boot that ISO in QEMU with KVM or put the same image on USB or BMC virtual media for the target machine. The live installer consumes the same bootstrap parameters in every environment: + +- `ultracloud.deployer_url=` for the reachable `deployer` endpoint +- `ultracloud.bootstrap_token=` for authenticated phone-home, or a lab-only `deployer` with `allow_unauthenticated=true` +- `ultracloud.ca_cert_url=` when `deployer` is TLS-enabled with a private CA +- `ultracloud.binary_cache_url=` when you want the installer to fetch host-built closures instead of compiling locally +- `ultracloud.node_id=` and `ultracloud.hostname=` only when you need to override the DMI-serial or hostname-derived identity + +The networking assumptions are also the same. The ISO needs DHCP or equivalent IP configuration that can reach `deployer` before Disko starts, and it must also reach the optional binary cache when that URL is set. The QEMU harness uses user-mode NAT and the built-in `10.0.2.2` fallback endpoints for the local host; physical installs should set the deployer and cache URLs explicitly to routable control-plane addresses. + +The proven marker sequence from `nix/test-cluster/verify-baremetal-iso.sh` is the same sequence you should expect on hardware: `pre-install.boot`, `pre-install.phone-home.complete`, `install.bundle-downloaded`, `install.disko.complete`, `install.nixos-install.complete`, `reboot`, `post-install.boot`, and finally `nix-agent` reporting the desired system as `active`. USB and BMC virtual media change only how the ISO is presented to the machine; they do not change the bootstrap contract. + +## Hardware Bring-Up Pack + +```bash +nix run ./nix/test-cluster#hardware-smoke -- preflight +nix run ./nix/test-cluster#hardware-smoke -- run +nix run ./nix/test-cluster#hardware-smoke -- capture +``` + +`hardware-smoke` is the canonical USB/BMC/Redfish bridge for the physical-node proof. It always writes artifacts under `./work/hardware-smoke/` and refreshes `./work/hardware-smoke/latest`. + +- `preflight` emits `kernel-params.txt`, `expected-markers.txt`, `failure-markers.txt`, `operator-handoff.md`, and `status.env`. +- With no USB device or BMC/Redfish credentials, `preflight` records `status=blocked` and the exact missing transport inputs in `missing-requirements.txt`. +- With transport present, the same wrapper can write USB media or call Redfish virtual media and then capture the real `desired-system active` evidence through SSH or a supplied serial log. +- The expected hardware markers are the same `ULTRACLOUD_MARKER pre-install.boot.*`, `pre-install.phone-home.complete.*`, `install.disko.complete.*`, `reboot.*`, `post-install.boot.*`, and `desired-system-active.*` lines used by `verify-baremetal-iso.sh`. + +Hardware runbook for the same canonical path: + +1. Build `nixosConfigurations.ultracloud-iso` and the target install profiles you want the installer to materialize. +2. Publish cluster state where each reusable node class owns `install_plan.nixos_configuration`, `install_plan.disko_config_path`, and a stable disk selector. Prefer `install_plan.target_disk_by_id` on hardware; the QEMU proof now uses `/dev/disk/by-id/virtio-uc-control-root` and `/dev/disk/by-id/virtio-uc-worker-root` to exercise the same contract. When the live ISO can reach a binary cache, also publish `desired_system.target_system` with the prebuilt closure for that class so `nix-agent` converges to the exact shipped system instead of rebuilding a dirty local copy. +3. Make `deployer` and the optional binary cache reachable from the live ISO, then boot the ISO through USB or BMC virtual media with `ultracloud.deployer_url=...`, `ultracloud.bootstrap_token=...`, and optional `ultracloud.binary_cache_url=...`. +4. Confirm the live installer resolves the install profile, downloads the flake bundle, runs Disko against the selected disk, reboots, and lands on the post-install marker. +5. Confirm `nix-agent` on the installed node converges the desired system to `active`. + +QEMU-to-hardware mapping for the proof: + +| QEMU harness proof | Hardware proof | +| --- | --- | +| `nix run ./nix/test-cluster#cluster -- baremetal-iso` | boot the same `nixosConfigurations.ultracloud-iso` through USB or BMC virtual media | +| user-mode NAT fallback to `10.0.2.2` | routable `ultracloud.deployer_url` and optional `ultracloud.binary_cache_url` | +| virtio disk by-id selectors seeded by explicit QEMU serials | server, NVMe, or RAID-controller `/dev/disk/by-id/...` selectors in the node class | +| host-local QEMU logs and SSH on `127.0.0.1:22231/22232` | serial-over-LAN, BMC console, or physical console plus SSH on the installed host | +| same marker sequence and `nix-agent` active gate | same marker sequence and `nix-agent` active gate | + +Host prerequisites for the KVM-backed proof are a Linux host with readable and writable `/dev/kvm`, nested virtualization enabled, and enough free space under `./work` or `ULTRACLOUD_WORK_ROOT` for VM disks, logs, and temporary build state. The checked-in wrappers force local Nix builders and derive `max-jobs` and per-build cores from the host CPU count unless `ULTRACLOUD_LOCAL_NIX_MAX_JOBS`, `ULTRACLOUD_LOCAL_NIX_BUILD_CORES`, `PHOTON_CLUSTER_NIX_MAX_JOBS`, or `PHOTON_CLUSTER_NIX_BUILD_CORES` override them. + +## Regression Guards + +```bash +nix build .#checks.x86_64-linux.canonical-profile-eval-guards +nix build .#checks.x86_64-linux.canonical-profile-build-guards +``` + +These two checks are the fast fail-first drift gates for the supported surface: + +- `canonical-profile-eval-guards`: forces evaluation of every canonical profile entrypoint, so broken attrs fail before any long-running harness work starts. +- `canonical-profile-build-guards`: realizes the single-node VM, the HA control-plane configs and companion image, and the ISO or bare-metal outputs so build-time drift is caught even when a cluster harness is not running. +- `supported-surface-guard`: rejects unfinished public-surface wording across the published docs, add-on workspaces, and VM-cluster harness files, fails on shipped public server code that still contains `Status::unimplemented`, `unimplemented!()`, `todo!()`, or other intentional stub responses, blocks high-signal completeness markers such as `TODO:`, `FIXME`, or `best-effort` in the supported FiberLB, PrismNet, PlasmaVMC, and K8sHost server code paths, and also fails if archived helpers such as `netboot-worker`, `plasmavmc-firecracker`, `k8shost-cni`, `k8shost-csi`, or `k8shost-controllers` re-enter the default product surface. + +## Portable Local Proof + +```bash +nix build .#checks.x86_64-linux.canonical-profile-eval-guards +nix build .#checks.x86_64-linux.portable-control-plane-regressions +``` + +Use this lane on Linux hosts that do not expose `/dev/kvm`: + +- `portable-control-plane-regressions`: TCG-safe aggregate check that keeps the canonical profile eval guard, `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` green together. +- It also links in `supported-surface-guard`, so unsupported product-surface wording, code-level public API stubs, or high-signal completeness markers in the supported provider/backend servers fail in the same low-cost lane before a publishable rerun. +- It intentionally does not boot the six-node nested-KVM VM suite, so it is a developer regression path, not the publishable multi-node proof. +- CI runs `canonical-profile-eval-guards` and `portable-control-plane-regressions` on every relevant change from `.github/workflows/nix.yml`. ## Publishable Checks ```bash +nix run .#single-node-quickstart +nix run ./nix/test-cluster#cluster -- baremetal-iso nix run ./nix/test-cluster#cluster -- fresh-smoke nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-matrix -nix run ./nix/test-cluster#cluster -- fresh-bench-storage +nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof +nix run ./nix/test-cluster#cluster -- rollout-soak +./nix/test-cluster/run-publishable-kvm-suite.sh ./work/publishable-kvm-suite +./nix/test-cluster/run-supported-surface-final-proof.sh ./work/final-proofs/latest +nix build .#checks.x86_64-linux.baremetal-iso-e2e +nix build .#checks.x86_64-linux.baremetal-iso-e2e && ./result/bin/baremetal-iso-e2e ./work/baremetal-iso-e2e/latest nix build .#checks.x86_64-linux.deployer-vm-smoke ``` Use these commands as the release-facing local proof set: -- `fresh-smoke`: whole-cluster readiness, core behavior, and fault injection -- `fresh-demo-vm-webapp`: focused VM demo showing a web app inside the guest with FlareDB-backed state and LightningStor object snapshots surviving restart and migration -- `fresh-matrix`: composed service scenarios such as `prismnet + flashdns + fiberlb` and PrismNet-backed VM hosting bundles with `plasmavmc + coronafs + lightningstor` -- `fresh-bench-storage`: CoronaFS local-vs-shared-volume throughput, cross-worker volume visibility, and LightningStor large/small-object throughput capture -- `deployer-vm-smoke`: prebuilt NixOS system closure handoff into `nix-agent`, proving host rollout can activate a host-built target without guest-side compilation +- `single-node-quickstart`: productized one-command quickstart gate for the minimal VM platform profile +- `single-node-trial-vm`: buildable VM appliance for the same minimal VM-platform profile +- `baremetal-iso`: canonical bare-metal bootstrap gate covering pre-install boot, phone-home, flake bundle fetch, Disko install, reboot, post-install boot, and desired-system activation on one control-plane node plus one worker-equivalent node +- `fresh-smoke`: base VM-cluster gate for the six-node harness that extends the canonical `3-node HA control plane`, including readiness, core behavior, and fault injection +- `fresh-smoke` also proves the supported PlasmaVMC backend contract by requiring both worker registrations to advertise `HYPERVISOR_TYPE_KVM` and nothing broader on the public surface +- `fresh-demo-vm-webapp`: optional VM-hosting bundle proof for `plasmavmc + prismnet` with state persisted through `lightningstor` +- `fresh-matrix`: optional composition proof for provider bundles such as `prismnet + flashdns + fiberlb` and `plasmavmc + coronafs + lightningstor`, including PrismNet security-group ACL add/remove, FiberLB TCP plus TLS-terminated `Https` / `TerminatedHttps` listeners, LightningStor bucket metadata plus object-version APIs, the published `k8shost` pod-watch surface, and the KVM-only PlasmaVMC worker contract +- `provider-vm-reality-proof`: focused local-KVM provider and VM-hosting lane that writes dated artifacts under `./work/provider-vm-reality-proof/latest`, captures authoritative FlashDNS answers, FiberLB backend drain and re-convergence, and PlasmaVMC KVM shared-storage migration plus post-migration restart state +- `rollout-soak`: focused longer-run control-plane and rollout lane that rebuilds from clean local runtime state, writes dated artifacts under `./work/rollout-soak/latest`, repeats `draining` maintenance and worker power-loss, then restarts `deployer`, `fleet-scheduler`, `node-agent`, `chainfire`, and `flaredb` while recording explicit `nix-agent` scope markers for the steady-state KVM nodes +- `durability-proof`: canonical chainfire flaredb deployer backup/restore lane. It stores artifacts under `./work/durability-proof/latest`, proves logical backup/restore for ChainFire keys and FlareDB SQL rows, uses the canonical Deployer admin pre-register request itself as the backup artifact, verifies that the pre-registered node survives a `deployer.service` restart, replays the same request idempotently, and injects CoronaFS plus LightningStor failures on the live KVM cluster +- `run-publishable-kvm-suite.sh`: reproducible wrapper that captures the KVM environment, requires real `/dev/kvm` access, keeps runtime state under `./work` by default, and runs the full publishable nested-KVM trio in a single command +- `run-supported-surface-final-proof.sh`: one-shot local wrapper that keeps builders local, records environment metadata, builds `single-node-trial-vm`, runs `supported-surface-guard`, `single-node-quickstart`, and then the publishable nested-KVM suite into one dated log root +- `baremetal-iso-e2e`: materialized exact proof runner for the same canonical ISO harness; the build output keeps the attr stable, and `./result/bin/baremetal-iso-e2e` runs the real host-KVM proof with persisted log/meta +- `deployer-vm-smoke`: lightweight regression proving that `nix-agent` can activate a host-built target closure without guest-side compilation +- `deployer-vm-rollback`: smallest reproducible `nix-agent` rollback proof. It publishes a desired system with a failing `health_check_command`, expects observed status `rolled-back`, and confirms the node does not stay on the rejected target generation + +`single-node-trial-vm` and `single-node-quickstart` are the standalone VM-platform story. They keep the minimal KVM-backed surface separate from the rollout stack. + +The checked-in entrypoint for the publishable KVM proof is the local wrapper `./nix/test-cluster/run-publishable-kvm-suite.sh`. Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally excluded from this baseline branch. +The 2026-04-10 local AMD/KVM proof snapshot is recorded under `./work/final-proofs/32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0-final` for `supported-surface-guard`, `single-node-trial-vm`, and `single-node-quickstart`, under `./work/publishable-kvm-suite` for the passing `fresh-smoke`, `fresh-demo-vm-webapp`, `fresh-matrix`, and wrapper environment capture, and under `./work/rollout-soak/20260410T164549+0900` for the longer-running rollout/control-plane soak. +The 2026-04-10 exact bare-metal check-runner proof is recorded under `./work/baremetal-iso-e2e/0de75570-dabd-471b-95fe-5898c54e2e8c`; its outer `environment.txt` records `execution_model=materialized-check-runner`, while `state/environment.txt` records `vm_accelerator_mode=kvm`. + +## Responsibility Coverage + +- `baremetal-iso` and `baremetal-iso-e2e` are the canonical proof for `deployer -> installer -> nix-agent`. They cover phone-home, install-plan materialization, Disko, reboot, and desired-system activation, and they now share the same `verify-baremetal-iso.sh` runtime harness. +- `deployer-vm-smoke` is the smallest regression for the same `deployer -> nix-agent` boundary. It proves that a node can receive a prebuilt target closure and activate it without guest-side compilation. +- `deployer-vm-rollback` is the canonical operator proof for `nix-agent` health-check, rollback, and partial failure recovery. Use it with [rollout-bundle.md](rollout-bundle.md) when documenting or changing the host-local rollback contract. +- `portable-control-plane-regressions` keeps the main non-KVM-safe boundaries under continuous coverage by composing `deployer-bootstrap-e2e`, `host-lifecycle-e2e`, `deployer-vm-smoke`, and `fleet-scheduler-e2e` behind the canonical profile eval guard. +- `fresh-smoke` and `fresh-matrix` are the canonical proof for `deployer -> fleet-scheduler -> node-agent`. They cover native service placement, heartbeats, failover, and runtime reconciliation. +- `fresh-smoke` proves the supported `fleet-scheduler` maintenance semantics: short-lived `active -> draining -> active` transitions, fail-stop worker loss, and replica restoration after the node returns. +- `rollout-soak` is the longer-running companion lane for the same bundle. It validates exactly one planned drain cycle and one fail-stop worker-loss cycle on the two native-runtime workers, holds each degraded state for 30 seconds, restarts `deployer`, `fleet-scheduler`, `node-agent`, `chainfire`, and `flaredb`, and then revalidates the live cluster. It also writes `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, and `fleet-scheduler-scope-fixed.txt` so the supported release boundary is captured in the proof root. The steady-state KVM nodes do not ship `nix-agent.service`, so the lane records scope markers there and leaves executable `nix-agent` proof to `deployer-vm-rollback`, `baremetal-iso`, and `baremetal-iso-e2e`. +- Multi-hour maintenance windows, pinned singleton relocation rules, dynamic ChainFire membership changes, destructive FlareDB schema rewrites, fully automated online migration, and large-cluster drain storms remain outside the release-proven scope and are called out explicitly in [rollout-bundle.md](rollout-bundle.md) and [control-plane-ops.md](control-plane-ops.md). +- `fresh-smoke` also covers `k8shost` separately from `fleet-scheduler`: `k8shost` exposes tenant pod and service semantics, while `fleet-scheduler` handles bare-metal host services. `k8shost` is fixed as an API/control-plane product surface; runtime dataplane helpers stay archived non-product. +- `fresh-matrix` keeps the shipped add-on surface honest: it exercises the supported `creditservice` quota, wallet, reservation, and API-gateway flows, the published `k8shost-server` API contract, the supported LightningStor bucket metadata plus object-version APIs, and the network-provider bundle contract for PrismNet ACL lifecycle plus FiberLB TCP and TLS-terminated listeners. +- `provider-vm-reality-proof` is the artifact-producing companion lane for that same provider or VM-hosting bundle. It records PrismNet port and ACL state, authoritative FlashDNS answers, FiberLB listener drain or restore artifacts, and PlasmaVMC migration or storage-handoff state in one dated proof root. +- PrismNet real OVS/OVN dataplane validation remains outside the supported local KVM surface. The current provider proof keeps tenant API lifecycle and attached-VM networking honest, but not a release-grade `ovn-nbctl` or hardware-switch dataplane path. +- FiberLB native BGP or BFD peer interop and hardware VIP ownership remain outside the supported local KVM surface. The current provider proof fixes the shipped contract to listener publication plus backend drain and re-convergence inside the lab. +- PlasmaVMC real-hardware migration or storage handoff remains a later hardware proof. The current provider proof fixes the release surface to KVM shared-storage migration on the local worker pair. +- Within that edge bundle, APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer, but the release-facing proof remains the shipped single gateway-node layout on `node06`; live in-process reload is not promised, and config rollout stays restart-based. +- NightLight is supported as a single-node WAL/snapshot service; replicated HA metrics storage and per-tenant retention enforcement are not part of the current product contract. +- CreditService export and backend migration are supported as offline export/import or backend-native snapshot workflows, not live mixed-writer migration. +- FiberLB HTTPS health checks currently do not verify backend TLS certificates. Supported scope is limited to TCP reachability plus HTTP status for the backend endpoint until CA-aware verification is wired through config, server code, and the canonical harness. +- `durability-proof` is the canonical backup, restore, and failure-injection companion lane for the publishable KVM suite. Use it after `fresh-matrix` when you need persisted artifacts for `chainfire`, `flaredb`, `deployer`, `coronafs`, and `lightningstor`. +- `rollout-soak` is the longer-running maintenance and DR companion lane for the same control-plane and rollout bundle. Use it when a change is supposed to survive the current release boundary of one planned drain cycle, one fail-stop worker-loss cycle, and service-restart churn on the live KVM lab instead of only the short `fresh-smoke` window. +- `run-core-control-plane-ops-proof.sh` is the focused operator lifecycle proof for the core control plane. It records the fixed-membership ChainFire boundary, the FlareDB additive-first migration and destructive-DDL boundary, and the standalone IAM bootstrap hardening plus signing-key, credential, and mTLS rotation proof under `./work/core-control-plane-ops-proof`. +- The supported `deployer` HA and DR boundary is scope-fixed to one active writer plus optional cold-standby restore, not automatic multi-instance failover. The canonical runbook is to recover one writer, re-apply `ultracloud.cluster` generated state with `deployer-ctl apply`, replay preserved admin pre-register requests, and then verify state through the admin API or `deployer-ctl node inspect`; the unsupported multi-instance boundary is fixed in [rollout-bundle.md](rollout-bundle.md). +- The supported `node-agent` product contract is also fixed in [rollout-bundle.md](rollout-bundle.md): per-instance logs and pid metadata live under `${stateDir}/pids`, secrets must already exist in the rendered spec or mounted host files, host-path volumes are passed through but not provisioned, and upgrades are replace-and-reconcile operations rather than in-place patching. +- The dated 2026-04-10 proof root for that lane is `./work/durability-proof/20260410T120618+0900`; `result.json` records `success=true`, and the artifact set includes `deployer-post-restart-list.json`, `coronafs-node04-local-state.json`, and `lightningstor-head-during-node05-outage.json`. +- `single-node-quickstart` intentionally excludes `deployer`, `nix-agent`, `node-agent`, and `fleet-scheduler`, so the smallest trial surface stays focused on the VM-platform core instead of mixing rollout and scheduling responsibilities. + +The three `fresh-*` VM-cluster commands are the publishable nested-KVM suite. They require a Linux host with `/dev/kvm` and nested virtualization, and the harness stops at preflight by design when that device is absent. `single-node-quickstart` and `baremetal-iso` can still fall back to `TCG` for debugging, but the release-facing `baremetal-iso-e2e` runner now requires host KVM so the exact proof lane matches the shipped hardware proxy route. `deployer-vm-smoke` and `portable-control-plane-regressions` remain the supported non-KVM developer lanes. + +Release-facing completion now requires both of these to be green on the same branch: + +- the canonical bare-metal proof: `nix run ./nix/test-cluster#cluster -- baremetal-iso` plus `nix build .#checks.x86_64-linux.baremetal-iso-e2e` and `./result/bin/baremetal-iso-e2e` +- the publishable nested-KVM suite: `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix`, preferably through `./nix/test-cluster/run-publishable-kvm-suite.sh` + +Focused operator lifecycle proof for the core control plane: + +```bash +./nix/test-cluster/run-core-control-plane-ops-proof.sh ./work/core-control-plane-ops-proof/latest +``` + +This proof is lighter than the full KVM suite. It keeps `supported-surface-guard` honest for the control-plane contract, runs the standalone IAM signing-key rotation, credential rotation, and mTLS overlap rotation tests, and records the explicit ChainFire membership, FlareDB schema migration or destructive-DDL boundary, and IAM bootstrap hardening markers that the public docs now promise. +The dated 2026-04-10 artifact root for that lane is `./work/core-control-plane-ops-proof/20260410T172148+09:00`; it includes `iam-key-rotation-tests.log`, `iam-credential-rotation-tests.log`, `iam-mtls-rotation-tests.log`, `scope-fixed-contract.json`, and `result.json`. + +## Work Root Budget + +```bash +./nix/test-cluster/work-root-budget.sh status +./nix/test-cluster/work-root-budget.sh enforce +./nix/test-cluster/work-root-budget.sh cleanup-advice +./nix/test-cluster/work-root-budget.sh prune-proof-logs 2 +``` + +Use `./nix/test-cluster/work-root-budget.sh status` for reporting, `./nix/test-cluster/work-root-budget.sh enforce` when a local proof run should fail on budget overrun, and `./nix/test-cluster/work-root-budget.sh prune-proof-logs 2` for a safer dated-proof cleanup dry-run. + +The helper keeps the local proof path practical by reporting the current size of `./work`, `./work/test-cluster/state`, disposable runtime directories such as `./work/tmp` and `./work/publishable-kvm-runtime`, and the dated proof roots including `./work/provider-vm-reality-proof` and `./work/hardware-smoke`. The `enforce` mode turns those soft budgets into a non-zero local gate, and `prune-proof-logs` gives a safer dated-proof cleanup workflow before the final `nix store gc`. + +## Extended Measurements + +```bash +nix run ./nix/test-cluster#cluster -- fresh-bench-storage +``` + +`fresh-bench-storage` remains useful for storage regression tracking, but it is a benchmark path, not part of the minimal canonical publish gate. ## Operational Commands @@ -53,8 +263,13 @@ nix run ./nix/test-cluster#cluster -- clean - package unit tests are useful but not sufficient - host-built VM clusters are the main integration signal +- bootstrap and rollout paths must stay evaluable independently of the larger VM-hosting feature set - distributed storage and virtualization paths must be checked under failure, not only at steady state -## Legacy Note +## Legacy And Experimental Paths -Older manual launch scripts under `baremetal/vm-cluster` are archived only for historical reference. They are not the release-validation path. +- `baremetal/vm-cluster` manual launch scripts are `legacy/manual`, not canonical validation +- direct `nix develop ./nix/test-cluster -c ./nix/test-cluster/run-cluster.sh ...` usage is a debugging path, not the publishable entrypoint +- standalone use of `netboot-control-plane` or `netboot-all-in-one` outside the documented profiles is a debugging path, not a fourth supported profile +- `netboot-worker`, Firecracker, mvisor, `k8shost-cni`, `k8shost-controllers`, and `lightningstor-csi` are archived non-product helpers and should not be presented as canonical entrypoints +- `netboot-base`, `pxe-server`, `vm-smoke-target`, and other helper images are internal or legacy building blocks, not supported profiles by themselves diff --git a/fiberlb/Cargo.lock b/fiberlb/Cargo.lock index dbc3390..39e2159 100644 --- a/fiberlb/Cargo.lock +++ b/fiberlb/Cargo.lock @@ -161,6 +161,45 @@ dependencies = [ "password-hash", ] +[[package]] +name = "asn1-rs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -638,6 +677,26 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "data-encoding" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" + +[[package]] +name = "der-parser" +version = "10.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" +dependencies = [ + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", +] + [[package]] name = "deranged" version = "0.5.8" @@ -783,6 +842,7 @@ dependencies = [ "tracing", "tracing-subscriber", "uuid", + "x509-parser", ] [[package]] @@ -1783,6 +1843,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "mio" version = "1.1.1" @@ -1800,6 +1866,16 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -1853,6 +1929,15 @@ dependencies = [ "libc", ] +[[package]] +name = "oid-registry" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f40cff3dde1b6087cc5d5f5d4d65712f34016a03ed60e9c08dcc392736b5b7" +dependencies = [ + "asn1-rs", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -2422,6 +2507,15 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom", +] + [[package]] name = "rustix" version = "1.1.4" @@ -3922,6 +4016,23 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "x509-parser" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43b0f71ce057da06bc0851b23ee24f3f86190b07203dd8f567d0b706a185202" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static", + "nom", + "oid-registry", + "rusticata-macros", + "thiserror 2.0.18", + "time", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/fiberlb/crates/fiberlb-server/Cargo.toml b/fiberlb/crates/fiberlb-server/Cargo.toml index 9e18f1e..2c10654 100644 --- a/fiberlb/crates/fiberlb-server/Cargo.toml +++ b/fiberlb/crates/fiberlb-server/Cargo.toml @@ -35,6 +35,7 @@ rustls = "0.23" rustls-pemfile = "2.0" tokio-rustls = "0.26" axum-server = { version = "0.7", features = ["tls-rustls"] } +x509-parser = "0.18" tracing = { workspace = true } tracing-subscriber = { workspace = true } diff --git a/fiberlb/crates/fiberlb-server/proto/api/attribute.proto b/fiberlb/crates/fiberlb-server/proto/api/attribute.proto index 529e3cd..0fbe797 100644 --- a/fiberlb/crates/fiberlb-server/proto/api/attribute.proto +++ b/fiberlb/crates/fiberlb-server/proto/api/attribute.proto @@ -574,8 +574,8 @@ message PrefixSID { // tlv is one of: message TLV { oneof tlv { - // IndexLabelTLV Type 1 (not yet implemented) - // OriginatorSRGBTLV Type 3 (not yet implemented) + // Type 1 is reserved for IndexLabelTLV. + // Type 3 is reserved for OriginatorSRGBTLV. SRv6L3ServiceTLV l3_service = 3; SRv6L2ServiceTLV l2_service = 4; } diff --git a/fiberlb/crates/fiberlb-server/src/dataplane.rs b/fiberlb/crates/fiberlb-server/src/dataplane.rs index 8a0f12c..2a93a9d 100644 --- a/fiberlb/crates/fiberlb-server/src/dataplane.rs +++ b/fiberlb/crates/fiberlb-server/src/dataplane.rs @@ -1,11 +1,11 @@ //! L4 TCP Data Plane for FiberLB //! -//! Handles TCP proxy functionality with round-robin backend selection. +//! Handles TCP proxy functionality with the published L4 balancing algorithms. use std::collections::HashMap; +use std::hash::{Hash, Hasher}; use std::net::SocketAddr; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; use tokio::net::{TcpListener, TcpStream}; @@ -14,7 +14,10 @@ use tokio::task::JoinHandle; use crate::maglev::MaglevTable; use crate::metadata::LbMetadataStore; -use fiberlb_types::{Backend, BackendStatus, ListenerId, Listener, PoolId, PoolAlgorithm, BackendAdminState}; +use fiberlb_types::{ + Backend, BackendAdminState, BackendId, BackendStatus, Listener, ListenerId, PoolAlgorithm, + PoolId, +}; /// Result type for data plane operations pub type Result = std::result::Result; @@ -56,6 +59,8 @@ pub struct DataPlane { metadata: Arc, listeners: Arc>>, pool_cache: Arc>>, + pool_counters: Arc>>, + active_connections: Arc>>, } impl DataPlane { @@ -67,6 +72,8 @@ impl DataPlane { metadata, listeners: Arc::new(RwLock::new(HashMap::new())), pool_cache: Arc::new(RwLock::new(HashMap::new())), + pool_counters: Arc::new(Mutex::new(HashMap::new())), + active_connections: Arc::new(Mutex::new(HashMap::new())), } } @@ -105,6 +112,8 @@ impl DataPlane { // Clone required state for the task let metadata = self.metadata.clone(); let pool_cache = self.pool_cache.clone(); + let pool_counters = self.pool_counters.clone(); + let active_connections = self.active_connections.clone(); let listener_id_clone = listener_id; // Spawn listener task @@ -117,6 +126,8 @@ impl DataPlane { tracing::debug!("Accepted connection from {}", peer_addr); let metadata = metadata.clone(); let pool_cache = pool_cache.clone(); + let pool_counters = pool_counters.clone(); + let active_connections = active_connections.clone(); let pool_id = pool_id; // Spawn connection handler @@ -126,6 +137,8 @@ impl DataPlane { peer_addr, metadata, pool_cache, + pool_counters, + active_connections, pool_id, ).await { tracing::debug!("Connection handler error: {}", e); @@ -205,18 +218,37 @@ impl DataPlane { peer_addr: SocketAddr, metadata: Arc, pool_cache: Arc>>, + pool_counters: Arc>>, + active_connections: Arc>>, pool_id: PoolId, ) -> Result<()> { - // Select a backend using client address for consistent hashing - let connection_key = peer_addr.to_string(); - let backend = Self::select_backend(&metadata, &pool_cache, &pool_id, &connection_key, false).await?; + let connection_key = peer_addr.ip().to_string(); + let backend = Self::select_backend( + &metadata, + &pool_cache, + &pool_counters, + &active_connections, + &pool_id, + &connection_key, + false, + ) + .await?; // Build backend address - let backend_stream = match Self::connect_backend(&backend).await { - Ok(stream) => stream, + let (backend, backend_stream) = match Self::connect_backend(&backend).await { + Ok(stream) => (backend, stream), Err(error) => { Self::invalidate_pool_cache(&pool_cache, &pool_id).await; - let fallback = Self::select_backend(&metadata, &pool_cache, &pool_id, &connection_key, true).await?; + let fallback = Self::select_backend( + &metadata, + &pool_cache, + &pool_counters, + &active_connections, + &pool_id, + &connection_key, + true, + ) + .await?; if fallback.id == backend.id { return Err(error); } @@ -225,10 +257,13 @@ impl DataPlane { fallback_backend = %fallback.id, "Retrying FiberLB backend connection after cache refresh" ); - Self::connect_backend(&fallback).await? + let fallback_stream = Self::connect_backend(&fallback).await?; + (fallback, fallback_stream) } }; + let _active_guard = ActiveConnectionGuard::new(active_connections, backend.id); + // Proxy bidirectionally Self::proxy_bidirectional(client, backend_stream).await } @@ -249,6 +284,8 @@ impl DataPlane { async fn select_backend( metadata: &Arc, pool_cache: &Arc>>, + pool_counters: &Arc>>, + active_connections: &Arc>>, pool_id: &PoolId, connection_key: &str, force_refresh: bool, @@ -257,22 +294,29 @@ impl DataPlane { let healthy = snapshot.healthy_backends; // Select based on algorithm - match snapshot.algorithm { + let index = match snapshot.algorithm { + PoolAlgorithm::RoundRobin => Self::next_pool_counter(pool_counters, pool_id) % healthy.len(), + PoolAlgorithm::LeastConnections => { + Self::least_connections_index(active_connections, pool_counters, pool_id, &healthy) + } + PoolAlgorithm::IpHash => Self::stable_hash(connection_key) % healthy.len(), + PoolAlgorithm::WeightedRoundRobin => { + Self::weighted_round_robin_index(pool_counters, pool_id, &healthy) + } + PoolAlgorithm::Random => { + let offset = Self::next_pool_counter(pool_counters, pool_id); + Self::stable_hash(&(connection_key, offset)) % healthy.len() + } PoolAlgorithm::Maglev => { // Use Maglev consistent hashing let table = MaglevTable::new(&healthy, None); - let idx = table.lookup(connection_key) - .ok_or(DataPlaneError::NoHealthyBackends)?; - Ok(healthy[idx].clone()) + table + .lookup(connection_key) + .ok_or(DataPlaneError::NoHealthyBackends)? } - _ => { - // Default: Round-robin for all other algorithms - // TODO: Implement LeastConnections, IpHash, WeightedRoundRobin, Random - static COUNTER: AtomicUsize = AtomicUsize::new(0); - let idx = COUNTER.fetch_add(1, Ordering::Relaxed) % healthy.len(); - Ok(healthy.into_iter().nth(idx).unwrap()) - } - } + }; + + Ok(healthy[index].clone()) } async fn get_pool_snapshot( @@ -326,6 +370,80 @@ impl DataPlane { Ok(snapshot) } + fn stable_hash(value: &T) -> usize { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() as usize + } + + fn next_pool_counter(pool_counters: &Arc>>, pool_id: &PoolId) -> usize { + let mut counters = pool_counters.lock().expect("pool counters poisoned"); + let counter = counters.entry(*pool_id).or_insert(0); + let current = *counter; + *counter = counter.wrapping_add(1); + current + } + + fn active_connection_count( + active_connections: &Arc>>, + backend_id: &BackendId, + ) -> usize { + let counts = active_connections + .lock() + .expect("active connection counters poisoned"); + counts.get(backend_id).copied().unwrap_or(0) + } + + fn least_connections_index( + active_connections: &Arc>>, + pool_counters: &Arc>>, + pool_id: &PoolId, + backends: &[Backend], + ) -> usize { + let min_connections = backends + .iter() + .map(|backend| Self::active_connection_count(active_connections, &backend.id)) + .min() + .unwrap_or(0); + + let least_loaded = backends + .iter() + .enumerate() + .filter_map(|(index, backend)| { + let count = Self::active_connection_count(active_connections, &backend.id); + (count == min_connections).then_some(index) + }) + .collect::>(); + + let offset = Self::next_pool_counter(pool_counters, pool_id) % least_loaded.len(); + least_loaded[offset] + } + + fn weighted_round_robin_index( + pool_counters: &Arc>>, + pool_id: &PoolId, + backends: &[Backend], + ) -> usize { + let total_weight = backends + .iter() + .map(|backend| backend.weight.max(1) as usize) + .sum::(); + if total_weight == 0 { + return 0; + } + + let mut offset = Self::next_pool_counter(pool_counters, pool_id) % total_weight; + for (index, backend) in backends.iter().enumerate() { + let weight = backend.weight.max(1) as usize; + if offset < weight { + return index; + } + offset -= weight; + } + + 0 + } + async fn invalidate_pool_cache( pool_cache: &Arc>>, pool_id: &PoolId, @@ -378,9 +496,67 @@ impl DataPlane { } } +struct ActiveConnectionGuard { + active_connections: Arc>>, + backend_id: BackendId, +} + +impl ActiveConnectionGuard { + fn new( + active_connections: Arc>>, + backend_id: BackendId, + ) -> Self { + let mut counts = active_connections + .lock() + .expect("active connection counters poisoned"); + *counts.entry(backend_id).or_insert(0) += 1; + drop(counts); + + Self { + active_connections, + backend_id, + } + } +} + +impl Drop for ActiveConnectionGuard { + fn drop(&mut self) { + let mut counts = self + .active_connections + .lock() + .expect("active connection counters poisoned"); + if let Some(count) = counts.get_mut(&self.backend_id) { + if *count > 1 { + *count -= 1; + } else { + counts.remove(&self.backend_id); + } + } + } +} + #[cfg(test)] mod tests { use super::*; + use fiberlb_types::{LoadBalancerId, Pool, PoolProtocol}; + + async fn seed_pool( + metadata: &Arc, + algorithm: PoolAlgorithm, + backends: &[(String, u16, u32)], + ) -> PoolId { + let pool = Pool::new("test-pool", LoadBalancerId::new(), algorithm, PoolProtocol::Tcp); + metadata.save_pool(&pool).await.unwrap(); + + for (index, (address, port, weight)) in backends.iter().enumerate() { + let mut backend = Backend::new(format!("backend-{index}"), pool.id, address.clone(), *port); + backend.weight = *weight; + backend.status = BackendStatus::Online; + metadata.save_backend(&backend).await.unwrap(); + } + + pool.id + } #[tokio::test] async fn test_dataplane_creation() { @@ -409,11 +585,15 @@ mod tests { async fn test_backend_selection_empty() { let metadata = Arc::new(LbMetadataStore::new_in_memory()); let pool_cache = Arc::new(RwLock::new(HashMap::new())); + let pool_counters = Arc::new(Mutex::new(HashMap::new())); + let active_connections = Arc::new(Mutex::new(HashMap::new())); let pool_id = PoolId::new(); let result = DataPlane::select_backend( &metadata, &pool_cache, + &pool_counters, + &active_connections, &pool_id, "192.168.1.1:54321", false, @@ -422,4 +602,113 @@ mod tests { assert!(result.is_err()); // Expecting PoolNotFound since pool doesn't exist } + + #[tokio::test] + async fn test_weighted_round_robin_selection_respects_weights() { + let metadata = Arc::new(LbMetadataStore::new_in_memory()); + let pool_cache = Arc::new(RwLock::new(HashMap::new())); + let pool_counters = Arc::new(Mutex::new(HashMap::new())); + let active_connections = Arc::new(Mutex::new(HashMap::new())); + let pool_id = seed_pool( + &metadata, + PoolAlgorithm::WeightedRoundRobin, + &[ + ("10.0.0.1".to_string(), 8080, 1), + ("10.0.0.2".to_string(), 8080, 3), + ], + ) + .await; + + let sequence = [ + DataPlane::select_backend(&metadata, &pool_cache, &pool_counters, &active_connections, &pool_id, "client-a", false).await.unwrap().address, + DataPlane::select_backend(&metadata, &pool_cache, &pool_counters, &active_connections, &pool_id, "client-b", false).await.unwrap().address, + DataPlane::select_backend(&metadata, &pool_cache, &pool_counters, &active_connections, &pool_id, "client-c", false).await.unwrap().address, + DataPlane::select_backend(&metadata, &pool_cache, &pool_counters, &active_connections, &pool_id, "client-d", false).await.unwrap().address, + ]; + + assert_eq!(sequence, ["10.0.0.1", "10.0.0.2", "10.0.0.2", "10.0.0.2"]); + } + + #[tokio::test] + async fn test_least_connections_prefers_less_loaded_backend() { + let metadata = Arc::new(LbMetadataStore::new_in_memory()); + let pool_cache = Arc::new(RwLock::new(HashMap::new())); + let pool_counters = Arc::new(Mutex::new(HashMap::new())); + let active_connections = Arc::new(Mutex::new(HashMap::new())); + let pool_id = seed_pool( + &metadata, + PoolAlgorithm::LeastConnections, + &[ + ("10.0.0.1".to_string(), 8080, 1), + ("10.0.0.2".to_string(), 8080, 1), + ], + ) + .await; + + let snapshot = DataPlane::get_pool_snapshot(&metadata, &pool_cache, &pool_id, false) + .await + .unwrap(); + let loaded_backend = snapshot.healthy_backends[0].id; + active_connections + .lock() + .unwrap() + .insert(loaded_backend, 4); + + let selected = DataPlane::select_backend( + &metadata, + &pool_cache, + &pool_counters, + &active_connections, + &pool_id, + "least-client", + false, + ) + .await + .unwrap(); + + assert_eq!(selected.address, "10.0.0.2"); + } + + #[tokio::test] + async fn test_ip_hash_is_stable_for_same_source_ip() { + let metadata = Arc::new(LbMetadataStore::new_in_memory()); + let pool_cache = Arc::new(RwLock::new(HashMap::new())); + let pool_counters = Arc::new(Mutex::new(HashMap::new())); + let active_connections = Arc::new(Mutex::new(HashMap::new())); + let pool_id = seed_pool( + &metadata, + PoolAlgorithm::IpHash, + &[ + ("10.0.0.1".to_string(), 8080, 1), + ("10.0.0.2".to_string(), 8080, 1), + ("10.0.0.3".to_string(), 8080, 1), + ], + ) + .await; + + let first = DataPlane::select_backend( + &metadata, + &pool_cache, + &pool_counters, + &active_connections, + &pool_id, + "192.168.10.44", + false, + ) + .await + .unwrap(); + let second = DataPlane::select_backend( + &metadata, + &pool_cache, + &pool_counters, + &active_connections, + &pool_id, + "192.168.10.44", + false, + ) + .await + .unwrap(); + + assert_eq!(first.id, second.id); + } } diff --git a/fiberlb/crates/fiberlb-server/src/healthcheck.rs b/fiberlb/crates/fiberlb-server/src/healthcheck.rs index 2a6c69a..81063aa 100644 --- a/fiberlb/crates/fiberlb-server/src/healthcheck.rs +++ b/fiberlb/crates/fiberlb-server/src/healthcheck.rs @@ -152,7 +152,10 @@ impl HealthChecker { self.http_check(backend, path).await } HealthCheckType::Https => { - // For now, treat HTTPS same as HTTP (no TLS verification) + // HTTPS backends currently use the HTTP probe path without + // backend certificate verification. The supported surface is + // documented as TCP reachability plus HTTP status only until + // CA-aware verification is added to the config and harness. let path = hc_config .and_then(|hc| hc.http_config.as_ref()) .map(|cfg| cfg.path.as_str()) diff --git a/fiberlb/crates/fiberlb-server/src/l7_dataplane.rs b/fiberlb/crates/fiberlb-server/src/l7_dataplane.rs index ca5c868..6a288e5 100644 --- a/fiberlb/crates/fiberlb-server/src/l7_dataplane.rs +++ b/fiberlb/crates/fiberlb-server/src/l7_dataplane.rs @@ -1,30 +1,33 @@ //! L7 (HTTP/HTTPS) Data Plane //! -//! Provides HTTP-aware load balancing with content-based routing, TLS termination, +//! Provides HTTP-aware load balancing with content-based routing, TLS-terminated HTTPS, //! and session persistence. use axum::{ body::Body, extract::{Request, State}, - http::{header, HeaderValue, StatusCode, Uri}, + http::{header, HeaderValue, StatusCode, Uri, Version}, response::{IntoResponse, Response}, routing::any, Router, }; use hyper_util::client::legacy::connect::HttpConnector; use hyper_util::client::legacy::Client; -use hyper_util::rt::TokioExecutor; +use hyper_util::rt::{TokioExecutor, TokioIo}; +use hyper_util::service::TowerToHyperService; use std::collections::HashMap; use std::net::SocketAddr; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio::sync::RwLock; use tokio::task::JoinHandle; +use tokio_rustls::TlsAcceptor; use crate::l7_router::{L7Router, RequestInfo, RoutingResult}; use crate::metadata::LbMetadataStore; +use crate::tls::build_tls_config; use fiberlb_types::{ - Backend, BackendAdminState, BackendStatus, Listener, ListenerId, ListenerProtocol, PoolAlgorithm, - PoolId, + Backend, BackendAdminState, BackendId, BackendStatus, CertificateId, Listener, ListenerId, + ListenerProtocol, PoolAlgorithm, PoolId, }; type Result = std::result::Result; @@ -37,8 +40,10 @@ pub enum L7Error { InvalidProtocol, #[error("TLS config missing for HTTPS listener")] TlsConfigMissing, - #[error("TLS termination not implemented for HTTPS listeners")] - TlsNotImplemented, + #[error("TLS certificate not found: {0}")] + TlsCertificateNotFound(String), + #[error("TLS configuration error: {0}")] + TlsConfig(String), #[error("Backend unavailable: {0}")] BackendUnavailable(String), #[error("Proxy error: {0}")] @@ -59,6 +64,7 @@ pub struct L7DataPlane { http_client: Client, listeners: Arc>>, pool_counters: Arc>>, + active_requests: Arc>>, } impl L7DataPlane { @@ -74,6 +80,7 @@ impl L7DataPlane { http_client, listeners: Arc::new(RwLock::new(HashMap::new())), pool_counters: Arc::new(RwLock::new(HashMap::new())), + active_requests: Arc::new(Mutex::new(HashMap::new())), } } @@ -91,14 +98,11 @@ impl L7DataPlane { .parse() .map_err(|e| L7Error::ProxyError(format!("Invalid bind address: {}", e)))?; - // For now, only implement HTTP (HTTPS/TLS in Phase 3) match listener.protocol { - ListenerProtocol::Http => { - self.start_http_server(listener_id, bind_addr, app).await - } + ListenerProtocol::Http => self.start_http_server(listener_id, bind_addr, app).await, ListenerProtocol::Https | ListenerProtocol::TerminatedHttps => { - // TODO: Phase 3 - TLS termination - Err(L7Error::TlsNotImplemented) + self.start_tls_server(listener_id, bind_addr, app, &listener) + .await } _ => Err(L7Error::InvalidProtocol), } @@ -138,6 +142,7 @@ impl L7DataPlane { listener_id: listener.id, default_pool_id: listener.default_pool_id.clone(), pool_counters: self.pool_counters.clone(), + active_requests: self.active_requests.clone(), }; Ok(Router::new() @@ -174,6 +179,103 @@ impl L7DataPlane { Ok(()) } + + async fn start_tls_server( + &self, + listener_id: ListenerId, + bind_addr: SocketAddr, + app: Router, + listener: &Listener, + ) -> Result<()> { + let tls = listener + .tls_config + .as_ref() + .ok_or(L7Error::TlsConfigMissing)?; + let certificate_id = parse_certificate_id(&tls.certificate_id)?; + let certificate = self + .metadata + .find_certificate_by_id(&certificate_id) + .await + .map_err(|error| L7Error::Metadata(error.to_string()))? + .ok_or_else(|| L7Error::TlsCertificateNotFound(tls.certificate_id.clone()))?; + let tls_config = build_tls_config( + &certificate.certificate, + &certificate.private_key, + tls.min_version, + ) + .map_err(|error| L7Error::TlsConfig(error.to_string()))?; + let acceptor = TlsAcceptor::from(Arc::new(tls_config)); + + tracing::info!( + listener_id = %listener_id, + addr = %bind_addr, + "Starting L7 HTTPS listener" + ); + + let tcp_listener = tokio::net::TcpListener::bind(bind_addr) + .await + .map_err(|e| L7Error::ProxyError(format!("Failed to bind: {}", e)))?; + + let task = tokio::spawn(async move { + loop { + match tcp_listener.accept().await { + Ok((stream, peer_addr)) => { + let acceptor = acceptor.clone(); + let app = app.clone(); + tokio::spawn(async move { + match acceptor.accept(stream).await { + Ok(tls_stream) => { + let io = TokioIo::new(tls_stream); + let builder = hyper_util::server::conn::auto::Builder::new( + TokioExecutor::new(), + ); + let service = TowerToHyperService::new(app); + if let Err(error) = builder + .serve_connection_with_upgrades(io, service) + .await + { + tracing::warn!( + listener_id = %listener_id, + peer_addr = %peer_addr, + error = %error, + "HTTPS server connection ended with error" + ); + } + } + Err(error) => { + tracing::warn!( + listener_id = %listener_id, + peer_addr = %peer_addr, + error = %error, + "TLS handshake failed" + ); + } + } + }); + } + Err(error) => { + tracing::error!( + listener_id = %listener_id, + error = %error, + "HTTPS accept error" + ); + } + } + } + }); + + let mut listeners = self.listeners.write().await; + listeners.insert(listener_id, L7ListenerHandle { task }); + + Ok(()) + } +} + +fn parse_certificate_id(id: &str) -> Result { + let uuid = id + .parse() + .map_err(|_| L7Error::TlsConfig(format!("invalid certificate ID: {id}")))?; + Ok(CertificateId::from_uuid(uuid)) } /// Shared state for proxy handlers @@ -185,6 +287,7 @@ struct ProxyState { listener_id: ListenerId, default_pool_id: Option, pool_counters: Arc>>, + active_requests: Arc>>, } /// Main proxy request handler @@ -246,6 +349,7 @@ async fn proxy_to_pool( return text_response(StatusCode::SERVICE_UNAVAILABLE, error.to_string()); } }; + let _active_request = ActiveRequestGuard::new(state.active_requests.clone(), backend.id); let path_and_query = request .uri() @@ -267,8 +371,7 @@ async fn proxy_to_pool( }; let (mut parts, body) = request.into_parts(); - parts.uri = target_uri; - rewrite_proxy_headers(&mut parts.headers, &backend_host); + rewrite_backend_request_parts(&mut parts, target_uri, &backend_host); match state.http_client.request(Request::from_parts(parts, body)).await { Ok(response) => { @@ -318,10 +421,9 @@ async fn select_backend( let index = match pool.algorithm { PoolAlgorithm::IpHash | PoolAlgorithm::Maglev => request_hash % backends.len(), PoolAlgorithm::WeightedRoundRobin => weighted_round_robin_index(state, pool_id, &backends).await, - PoolAlgorithm::Random => next_counter(state, pool_id).await % backends.len(), - PoolAlgorithm::LeastConnections | PoolAlgorithm::RoundRobin => { - next_counter(state, pool_id).await % backends.len() - } + PoolAlgorithm::Random => random_index(state, pool_id, request_hash, backends.len()).await, + PoolAlgorithm::LeastConnections => least_connections_index(state, pool_id, &backends).await, + PoolAlgorithm::RoundRobin => next_counter(state, pool_id).await % backends.len(), }; Ok(backends[index].clone()) @@ -365,17 +467,69 @@ async fn weighted_round_robin_index( 0 } +async fn random_index( + state: &ProxyState, + pool_id: PoolId, + request_hash: usize, + backend_count: usize, +) -> usize { + let offset = next_counter(state, pool_id).await; + stable_hash(&(request_hash, offset)) % backend_count +} + +async fn least_connections_index( + state: &ProxyState, + pool_id: PoolId, + backends: &[Backend], +) -> usize { + let min_requests = { + let counts = state + .active_requests + .lock() + .expect("active request counters poisoned"); + backends + .iter() + .map(|backend| counts.get(&backend.id).copied().unwrap_or(0)) + .min() + .unwrap_or(0) + }; + + let candidates = { + let counts = state + .active_requests + .lock() + .expect("active request counters poisoned"); + backends + .iter() + .enumerate() + .filter_map(|(index, backend)| { + let count = counts.get(&backend.id).copied().unwrap_or(0); + (count == min_requests).then_some(index) + }) + .collect::>() + }; + + let offset = next_counter(state, pool_id).await % candidates.len(); + candidates[offset] +} + fn stable_request_hash(request: &Request) -> usize { - use std::hash::{Hash, Hasher}; + stable_hash(&( + request.method().clone(), + request.uri().path_and_query().map(|value| value.as_str().to_string()), + request + .headers() + .get(header::HOST) + .and_then(|value| value.to_str().ok()) + .map(str::to_string), + )) +} + +fn stable_hash(value: &T) -> usize { + use std::hash::Hasher; let mut hasher = std::collections::hash_map::DefaultHasher::new(); - request.method().hash(&mut hasher); - request.uri().path_and_query().map(|value| value.as_str()).hash(&mut hasher); - request - .headers() - .get(header::HOST) - .and_then(|value| value.to_str().ok()) - .hash(&mut hasher); + std::hash::Hash::hash(value, &mut hasher); hasher.finish() as usize } @@ -393,9 +547,82 @@ fn rewrite_proxy_headers(headers: &mut axum::http::HeaderMap, backend_host: &str } } +fn rewrite_backend_request_parts( + parts: &mut axum::http::request::Parts, + target_uri: Uri, + backend_host: &str, +) { + parts.uri = target_uri; + parts.version = Version::HTTP_11; + rewrite_proxy_headers(&mut parts.headers, backend_host); +} + fn text_response(status: StatusCode, body: impl Into) -> Response { Response::builder() .status(status) .body(body.into()) .unwrap() } + +struct ActiveRequestGuard { + active_requests: Arc>>, + backend_id: BackendId, +} + +impl ActiveRequestGuard { + fn new(active_requests: Arc>>, backend_id: BackendId) -> Self { + let mut counts = active_requests + .lock() + .expect("active request counters poisoned"); + *counts.entry(backend_id).or_insert(0) += 1; + drop(counts); + + Self { + active_requests, + backend_id, + } + } +} + +impl Drop for ActiveRequestGuard { + fn drop(&mut self) { + let mut counts = self + .active_requests + .lock() + .expect("active request counters poisoned"); + if let Some(count) = counts.get_mut(&self.backend_id) { + if *count > 1 { + *count -= 1; + } else { + counts.remove(&self.backend_id); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rewrite_backend_request_parts_sets_http11_and_host() { + let request = Request::builder() + .uri("https://frontend.example.test/health") + .version(Version::HTTP_2) + .header(header::HOST, "frontend.example.test") + .body(Body::empty()) + .expect("request"); + let target_uri: Uri = "http://10.0.0.10:8081/health".parse().expect("uri"); + let backend_host = "10.0.0.10:8081"; + + let (mut parts, _body) = request.into_parts(); + rewrite_backend_request_parts(&mut parts, target_uri.clone(), backend_host); + + assert_eq!(parts.version, Version::HTTP_11); + assert_eq!(parts.uri, target_uri); + assert_eq!( + parts.headers.get(header::HOST).and_then(|value| value.to_str().ok()), + Some(backend_host) + ); + } +} diff --git a/fiberlb/crates/fiberlb-server/src/services/certificate.rs b/fiberlb/crates/fiberlb-server/src/services/certificate.rs index 6c0d9b5..17ddbac 100644 --- a/fiberlb/crates/fiberlb-server/src/services/certificate.rs +++ b/fiberlb/crates/fiberlb-server/src/services/certificate.rs @@ -18,6 +18,7 @@ use fiberlb_types::{ use iam_service_auth::{get_tenant_context, resource_for_tenant, AuthService}; use tonic::{Request, Response, Status}; use uuid::Uuid; +use x509_parser::parse_x509_certificate; /// Certificate service implementation pub struct CertificateServiceImpl { @@ -82,6 +83,26 @@ fn proto_to_cert_type(cert_type: i32) -> CertificateType { } } +fn parse_certificate_expiry(certificate_pem: &str) -> Result { + let cert_chain = rustls_pemfile::certs(&mut certificate_pem.as_bytes()) + .collect::, _>>() + .map_err(|e| Status::invalid_argument(format!("failed to parse certificate PEM: {e}")))?; + let cert_der = cert_chain + .first() + .ok_or_else(|| Status::invalid_argument("certificate PEM did not contain any certificates"))?; + + let (_, parsed) = parse_x509_certificate(cert_der.as_ref()) + .map_err(|e| Status::invalid_argument(format!("failed to parse X.509 certificate: {e:?}")))?; + let expires_at = parsed.validity().not_after.timestamp(); + if expires_at <= 0 { + return Err(Status::invalid_argument( + "certificate expiry must be after the Unix epoch", + )); + } + + Ok(expires_at as u64) +} + #[tonic::async_trait] impl CertificateService for CertificateServiceImpl { async fn create_certificate( @@ -128,13 +149,7 @@ impl CertificateService for CertificateServiceImpl { // Parse certificate type let cert_type = proto_to_cert_type(req.cert_type); - - // TODO: Parse certificate to extract expiry date - // For now, set expires_at to 1 year from now - let expires_at = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs() + (365 * 24 * 60 * 60); + let expires_at = parse_certificate_expiry(&req.certificate)?; // Create new certificate let certificate = Certificate::new( @@ -335,3 +350,14 @@ impl CertificateService for CertificateServiceImpl { Ok(Response::new(DeleteCertificateResponse {})) } } + +#[cfg(test)] +mod tests { + use super::parse_certificate_expiry; + + #[test] + fn parse_certificate_expiry_rejects_invalid_pem() { + let err = parse_certificate_expiry("not-a-certificate").unwrap_err(); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + } +} diff --git a/fiberlb/crates/fiberlb-server/src/tls.rs b/fiberlb/crates/fiberlb-server/src/tls.rs index ab22a3f..4d1d7a2 100644 --- a/fiberlb/crates/fiberlb-server/src/tls.rs +++ b/fiberlb/crates/fiberlb-server/src/tls.rs @@ -1,6 +1,6 @@ //! TLS Configuration and Certificate Management //! -//! Provides rustls-based TLS termination with SNI support for L7 HTTPS listeners. +//! Provides rustls-based terminated-HTTPS support with SNI for L7 listeners. use rustls::crypto::ring::sign::any_supported_type; use rustls::pki_types::CertificateDer; @@ -9,7 +9,7 @@ use rustls::sign::CertifiedKey; use rustls::ServerConfig; use std::collections::HashMap; use std::io::Cursor; -use std::sync::Arc; +use std::sync::{Arc, Once}; use fiberlb_types::{Certificate, CertificateId, LoadBalancerId, TlsVersion}; @@ -29,12 +29,21 @@ pub enum TlsError { CertificateNotFound(String), } +fn ensure_crypto_provider() { + static INIT: Once = Once::new(); + INIT.call_once(|| { + let _ = rustls::crypto::ring::default_provider().install_default(); + }); +} + /// Build TLS server configuration from certificate and private key pub fn build_tls_config( cert_pem: &str, key_pem: &str, min_version: TlsVersion, ) -> Result { + ensure_crypto_provider(); + // Parse certificate chain from PEM let mut cert_reader = Cursor::new(cert_pem.as_bytes()); let certs: Vec = rustls_pemfile::certs(&mut cert_reader) @@ -69,6 +78,8 @@ pub fn build_tls_config( } pub fn build_certified_key(cert_pem: &str, key_pem: &str) -> Result> { + ensure_crypto_provider(); + let mut cert_reader = Cursor::new(cert_pem.as_bytes()); let certs: Vec = rustls_pemfile::certs(&mut cert_reader) .collect::, _>>() diff --git a/fiberlb/crates/fiberlb-types/src/certificate.rs b/fiberlb/crates/fiberlb-types/src/certificate.rs index d6d9d68..e059494 100644 --- a/fiberlb/crates/fiberlb-types/src/certificate.rs +++ b/fiberlb/crates/fiberlb-types/src/certificate.rs @@ -40,7 +40,7 @@ impl std::fmt::Display for CertificateId { /// TLS Certificate /// -/// Stores X.509 certificates and private keys for TLS termination. +/// Stores X.509 certificates and private keys for terminated HTTPS listeners. /// Certificates are stored in PEM format and should be encrypted at rest /// in production deployments. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -76,7 +76,7 @@ pub struct Certificate { #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum CertificateType { - /// Standard server certificate for TLS termination + /// Standard server certificate for terminated HTTPS listeners Server, /// CA certificate for client authentication ClientCa, diff --git a/fiberlb/crates/fiberlb-types/src/listener.rs b/fiberlb/crates/fiberlb-types/src/listener.rs index 2c8cdf0..2eb4614 100644 --- a/fiberlb/crates/fiberlb-types/src/listener.rs +++ b/fiberlb/crates/fiberlb-types/src/listener.rs @@ -50,7 +50,7 @@ pub enum ListenerProtocol { Udp, /// HTTP (L7) Http, - /// HTTPS (L7 with TLS termination) + /// HTTPS (L7 with terminated HTTPS) Https, /// Terminated HTTPS (pass through to HTTP backend) TerminatedHttps, diff --git a/flake.nix b/flake.nix index 9166f57..acce039 100644 --- a/flake.nix +++ b/flake.nix @@ -66,6 +66,7 @@ }; clusterPython = pkgs.python3.withPackages (ps: [ ps.python-snappy ]); + singleNodeSurface = import ./nix/single-node/surface.nix; # Keep Rust package builds stable without invalidating every package on # unrelated workspace changes. @@ -963,6 +964,352 @@ self.packages.${system}.vmClusterDeployerState ]; }; + + single-node-quickstart-vm = + self.nixosConfigurations.single-node-quickstart.config.system.build.vm; + + single-node-trial-vm = self.packages.${system}.single-node-quickstart-vm; + + single-node-trial-manifest = + pkgs.writeText "single-node-trial-manifest.json" + (builtins.toJSON singleNodeSurface); + + single-node-quickstart = pkgs.writeShellApplication { + name = "single-node-quickstart"; + runtimeInputs = with pkgs; [ + coreutils + findutils + netcat + nix + openssh + procps + sshpass + ]; + text = '' + set -euo pipefail + + REPO_FLAKE="${self}" + WORK_ROOT="''${ULTRACLOUD_QUICKSTART_WORK_ROOT:-$PWD/work}" + STATE_DIR="''${ULTRACLOUD_QUICKSTART_STATE_DIR:-$WORK_ROOT/single-node-quickstart}" + RUN_DIR="$STATE_DIR/run" + DISK_IMAGE="$STATE_DIR/quickstart.qcow2" + PID_FILE="$STATE_DIR/qemu.pid" + SERIAL_LOG="$STATE_DIR/serial.log" + METADATA_FILE="$STATE_DIR/run.env" + BUILD_LOG="$STATE_DIR/build-vm.log" + BUILD_PATH_FILE="$STATE_DIR/vm-path.txt" + SSH_PORT="''${ULTRACLOUD_QUICKSTART_SSH_PORT:-22220}" + KEEP_VM="''${ULTRACLOUD_QUICKSTART_KEEP_VM:-0}" + REUSE_DISK="''${ULTRACLOUD_QUICKSTART_REUSE_DISK:-0}" + HOST_CPU_COUNT="" + LOCAL_NIX_MAX_JOBS="" + LOCAL_NIX_BUILD_CORES="" + VM_PATH="" + RUN_VM="" + + log() { + printf '[single-node-quickstart] %s\n' "$*" + } + + host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "$count" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "$count" + } + + default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" + } + + default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + + printf '%s\n' "$build_cores" + } + + append_nix_config_line() { + local line="$1" + if [[ -n "''${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="''${line}" + } + + configure_local_nix_execution() { + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = $LOCAL_NIX_MAX_JOBS" + append_nix_config_line "cores = $LOCAL_NIX_BUILD_CORES" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG + } + + prepare_local_nix_execution() { + HOST_CPU_COUNT="$(host_cpu_count)" + LOCAL_NIX_MAX_JOBS="''${ULTRACLOUD_QUICKSTART_NIX_MAX_JOBS:-''${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-$(default_local_nix_max_jobs "$HOST_CPU_COUNT")}}" + LOCAL_NIX_BUILD_CORES="''${ULTRACLOUD_QUICKSTART_NIX_BUILD_CORES:-''${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-$(default_local_nix_build_cores "$HOST_CPU_COUNT" "$LOCAL_NIX_MAX_JOBS")}}" + export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="''${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-$LOCAL_NIX_MAX_JOBS}" + export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="''${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-$LOCAL_NIX_BUILD_CORES}" + configure_local_nix_execution + } + + build_vm_locally() { + log "building single-node quickstart VM locally (max-jobs=$LOCAL_NIX_MAX_JOBS build-cores=$LOCAL_NIX_BUILD_CORES)" + if ! TMPDIR="$RUN_DIR" NIX_BUILD_CORES="$LOCAL_NIX_BUILD_CORES" nix \ + --option builders "" \ + --option warn-dirty false \ + --max-jobs "$LOCAL_NIX_MAX_JOBS" \ + build "$REPO_FLAKE#single-node-quickstart-vm" \ + --no-link \ + --print-out-paths \ + >"$BUILD_PATH_FILE" \ + 2>"$BUILD_LOG"; then + log "local VM build failed; build log tail:" + tail -n 120 "$BUILD_LOG" >&2 || true + return 1 + fi + + VM_PATH="$(tail -n 1 "$BUILD_PATH_FILE")" + if [ -z "$VM_PATH" ]; then + log "failed to resolve single-node quickstart VM output path" + return 1 + fi + + RUN_VM="$(find "$VM_PATH/bin" -maxdepth 1 -name 'run-*-vm' | head -n1)" + if [ -z "$RUN_VM" ]; then + log "failed to locate run-*-vm under $VM_PATH/bin" + return 1 + fi + + { + printf 'vm_path=%s\n' "$VM_PATH" + printf 'build_log=%s\n' "$BUILD_LOG" + printf 'build_path_file=%s\n' "$BUILD_PATH_FILE" + printf 'nix_build_command=%s\n' "nix --option builders \"\" --max-jobs $LOCAL_NIX_MAX_JOBS build $REPO_FLAKE#single-node-quickstart-vm --no-link --print-out-paths" + } >>"$METADATA_FILE" + } + + capture_environment() { + { + printf 'started_at=%s\n' "$(date -Is)" + printf 'repo_flake=%s\n' "$REPO_FLAKE" + printf 'pwd=%s\n' "$PWD" + printf 'user=%s\n' "$(id -un)" + printf 'uid=%s\n' "$(id -u)" + printf 'gid=%s\n' "$(id -g)" + printf 'work_root=%s\n' "$WORK_ROOT" + printf 'state_dir=%s\n' "$STATE_DIR" + printf 'run_dir=%s\n' "$RUN_DIR" + printf 'disk_image=%s\n' "$DISK_IMAGE" + printf 'serial_log=%s\n' "$SERIAL_LOG" + printf 'ssh_port=%s\n' "$SSH_PORT" + printf 'reuse_disk=%s\n' "$REUSE_DISK" + printf 'keep_vm=%s\n' "$KEEP_VM" + printf 'host_cpu_count=%s\n' "$HOST_CPU_COUNT" + printf 'local_nix_max_jobs=%s\n' "$LOCAL_NIX_MAX_JOBS" + printf 'local_nix_build_cores=%s\n' "$LOCAL_NIX_BUILD_CORES" + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + } >"$METADATA_FILE" + } + + dump_serial() { + if [ -f "$SERIAL_LOG" ]; then + log "serial log tail:" + tail -n 120 "$SERIAL_LOG" >&2 || true + fi + } + + cleanup() { + if [ -f "$PID_FILE" ]; then + pid="$(cat "$PID_FILE")" + if kill -0 "$pid" >/dev/null 2>&1; then + kill "$pid" >/dev/null 2>&1 || true + for _ in $(seq 1 30); do + if ! kill -0 "$pid" >/dev/null 2>&1; then + break + fi + sleep 1 + done + fi + rm -f "$PID_FILE" + fi + } + + on_exit() { + status="$?" + { + printf 'finished_at=%s\n' "$(date -Is)" + printf 'exit_status=%s\n' "$status" + } >>"$METADATA_FILE" + if [ "$status" -ne 0 ]; then + dump_serial + fi + if [ "$KEEP_VM" != "1" ]; then + cleanup + fi + exit "$status" + } + + wait_for_ssh() { + local deadline=$((SECONDS + 240)) + while true; do + if sshpass -p ultracloud ssh \ + -F /dev/null \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + -o ConnectTimeout=5 \ + -o ConnectionAttempts=1 \ + -p "$SSH_PORT" \ + root@127.0.0.1 true >/dev/null 2>&1; then + return 0 + fi + if [ "$SECONDS" -ge "$deadline" ]; then + log "timed out waiting for SSH on port $SSH_PORT" + return 1 + fi + sleep 1 + done + } + + wait_for_unit_active() { + local unit="$1" + local deadline=$((SECONDS + 240)) + while true; do + if ssh_cmd systemctl is-active "$unit" >/dev/null 2>&1; then + return 0 + fi + if [ "$SECONDS" -ge "$deadline" ]; then + log "timed out waiting for $unit" + ssh_cmd systemctl status "$unit" --no-pager || true + return 1 + fi + sleep 1 + done + } + + ssh_cmd() { + sshpass -p ultracloud ssh \ + -F /dev/null \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + -o ConnectTimeout=5 \ + -o ConnectionAttempts=1 \ + -p "$SSH_PORT" \ + root@127.0.0.1 -- "$@" + } + + ssh_shell() { + local script="$1" + local quoted + printf -v quoted '%q' "$script" + sshpass -p ultracloud ssh \ + -F /dev/null \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + -o ConnectTimeout=5 \ + -o ConnectionAttempts=1 \ + -p "$SSH_PORT" \ + root@127.0.0.1 "bash -lc $quoted" + } + + trap on_exit EXIT + + mkdir -p "$STATE_DIR" + rm -rf "$RUN_DIR" + mkdir -p "$RUN_DIR" + rm -f "$SERIAL_LOG" + rm -f "$BUILD_LOG" "$BUILD_PATH_FILE" + if [ "$REUSE_DISK" != "1" ]; then + rm -f "$DISK_IMAGE" + fi + + prepare_local_nix_execution + capture_environment + cleanup + build_vm_locally + + log "launching single-node quickstart VM" + nohup env \ + USE_TMPDIR=1 \ + TMPDIR="$RUN_DIR" \ + NIX_DISK_IMAGE="$DISK_IMAGE" \ + QEMU_NET_OPTS="hostfwd=tcp:127.0.0.1:$SSH_PORT-:22" \ + "$RUN_VM" >"$SERIAL_LOG" 2>&1 & + echo "$!" > "$PID_FILE" + + log "waiting for guest SSH" + wait_for_ssh + + log "waiting for in-guest readiness gate" + wait_for_unit_active ultracloud-single-node-quickstart-ready.service + + log "verifying required services" + ssh_cmd systemctl is-active chainfire flaredb iam prismnet plasmavmc >/dev/null + + log "verifying service health endpoints and VM runtime prerequisites" + ssh_shell 'curl -fsS http://127.0.0.1:8081/health >/dev/null && curl -fsS http://127.0.0.1:8082/health >/dev/null && curl -fsS http://127.0.0.1:8083/health >/dev/null && curl -fsS http://127.0.0.1:8087/health >/dev/null && curl -fsS http://127.0.0.1:8084/health >/dev/null && test -x /run/current-system/sw/bin/qemu-system-x86_64 && test -x /run/current-system/sw/bin/qemu-img && test -c /dev/net/tun' + + log "single-node quickstart smoke passed" + printf 'result=passed\n' >>"$METADATA_FILE" + + if [ "$KEEP_VM" = "1" ]; then + trap - EXIT + log "VM left running" + log "ssh: sshpass -p ultracloud ssh -p $SSH_PORT root@127.0.0.1" + exit 0 + fi + ''; + }; + + baremetal-iso-e2e-runner = pkgs.writeShellApplication { + name = "baremetal-iso-e2e"; + runtimeInputs = with pkgs; [ + bash + coreutils + curl + findutils + gawk + gnugrep + gnused + iproute2 + jq + nix + openssh + procps + python3 + qemu + ]; + text = '' + set -euo pipefail + + export ULTRACLOUD_BAREMETAL_E2E_SOURCE_FLAKE_ROOT="${self}" + export ULTRACLOUD_BAREMETAL_PROOF_MODEL="materialized-check-runner" + exec ${pkgs.bash}/bin/bash ${./nix/test-cluster/run-baremetal-iso-e2e.sh} "$@" + ''; + }; }; # ====================================================================== @@ -1044,9 +1391,259 @@ fleet-scheduler = flake-utils.lib.mkApp { drv = self.packages.${system}.fleet-scheduler; }; + + single-node-quickstart = flake-utils.lib.mkApp { + drv = self.packages.${system}.single-node-quickstart; + }; + + single-node-trial = flake-utils.lib.mkApp { + drv = self.packages.${system}.single-node-quickstart; + }; + + baremetal-iso-e2e = flake-utils.lib.mkApp { + drv = self.packages.${system}.baremetal-iso-e2e-runner; + }; + + all-in-one-quickstart = flake-utils.lib.mkApp { + drv = self.packages.${system}.single-node-quickstart; + }; }; - checks = { + checks = + let + stripKvmRequiredSystemFeature = drv: + drv.overrideTestDerivation (old: { + requiredSystemFeatures = + builtins.filter (feature: feature != "kvm") (old.requiredSystemFeatures or [ ]); + }); + singleNodeQuickstartConfig = self.nixosConfigurations.single-node-quickstart; + node01Config = self.nixosConfigurations.node01; + node02Config = self.nixosConfigurations.node02; + node03Config = self.nixosConfigurations.node03; + netbootControlPlaneConfig = self.nixosConfigurations.netboot-control-plane; + netbootAllInOneConfig = self.nixosConfigurations.netboot-all-in-one; + ultracloudIsoConfig = self.nixosConfigurations.ultracloud-iso; + baremetalQemuControlPlaneConfig = self.nixosConfigurations.baremetal-qemu-control-plane; + baremetalQemuWorkerConfig = self.nixosConfigurations.baremetal-qemu-worker; + + mkNixosOutput = + attr: configuration: extra: + { + kind = "nixosConfiguration"; + inherit attr; + hostName = configuration.config.networking.hostName; + } + // extra; + + canonicalProfileManifest = { + profiles = [ + { + id = "single-node-dev"; + label = "single-node dev"; + entrypoints = [ + { + kind = "app"; + attr = "apps.${system}.single-node-trial"; + command = "nix run .#single-node-trial"; + mapsTo = "apps.${system}.single-node-quickstart"; + flakeOutputType = self.apps.${system}.single-node-trial.type; + } + { + kind = "app"; + attr = "apps.${system}.single-node-quickstart"; + command = "nix run .#single-node-quickstart"; + flakeOutputType = self.apps.${system}.single-node-quickstart.type; + } + ( + mkNixosOutput "nixosConfigurations.single-node-quickstart" singleNodeQuickstartConfig { + stateVersion = singleNodeQuickstartConfig.config.system.stateVersion; + } + ) + ]; + companionOutputs = [ + { + kind = "package"; + attr = "packages.${system}.single-node-trial-vm"; + command = "nix build .#single-node-trial-vm"; + mapsTo = "packages.${system}.single-node-quickstart-vm"; + } + ( + mkNixosOutput "nixosConfigurations.netboot-all-in-one" netbootAllInOneConfig { + role = "canonical single-node companion install image"; + stateVersion = netbootAllInOneConfig.config.system.stateVersion; + } + ) + ]; + } + { + id = "three-node-ha-control-plane"; + label = "3-node HA control plane"; + entrypoints = [ + ( + mkNixosOutput "nixosConfigurations.node01" node01Config { + stateVersion = node01Config.config.system.stateVersion; + } + ) + ( + mkNixosOutput "nixosConfigurations.node02" node02Config { + stateVersion = node02Config.config.system.stateVersion; + } + ) + ( + mkNixosOutput "nixosConfigurations.node03" node03Config { + stateVersion = node03Config.config.system.stateVersion; + } + ) + ]; + companionOutputs = [ + ( + mkNixosOutput "nixosConfigurations.netboot-control-plane" netbootControlPlaneConfig { + role = "canonical HA control-plane install image"; + stateVersion = netbootControlPlaneConfig.config.system.stateVersion; + } + ) + ]; + } + { + id = "bare-metal-bootstrap"; + label = "bare-metal bootstrap"; + entrypoints = [ + { + kind = "command"; + command = "nix run ./nix/test-cluster#cluster -- baremetal-iso"; + } + ( + mkNixosOutput "nixosConfigurations.ultracloud-iso" ultracloudIsoConfig { + imageFileName = ultracloudIsoConfig.config.image.fileName; + } + ) + ( + mkNixosOutput + "nixosConfigurations.baremetal-qemu-control-plane" + baremetalQemuControlPlaneConfig + { + stateVersion = + baremetalQemuControlPlaneConfig.config.system.stateVersion; + } + ) + ( + mkNixosOutput + "nixosConfigurations.baremetal-qemu-worker" + baremetalQemuWorkerConfig + { + stateVersion = baremetalQemuWorkerConfig.config.system.stateVersion; + } + ) + { + kind = "check"; + attr = "checks.${system}.baremetal-iso-e2e"; + command = + "nix build .#checks.${system}.baremetal-iso-e2e && ./result/bin/baremetal-iso-e2e"; + flakeOutputType = self.checks.${system}.baremetal-iso-e2e.type; + } + ]; + companionOutputs = [ ]; + } + ]; + clusterAuthoring = { + supportedSource = "ultracloud.cluster"; + schemaPath = "nix/lib/cluster-schema.nix"; + legacyCompatibility = [ + "nix-nos: legacy compatibility and low-level network primitives only" + ]; + }; + standaloneStories = [ + { + id = "vm-platform"; + entrypoints = [ + "nix build .#single-node-trial-vm" + "nix run .#single-node-trial" + "nix run .#single-node-quickstart" + ]; + excludes = [ "deployer" "nix-agent" "fleet-scheduler" "node-agent" ]; + } + { + id = "rollout-stack"; + entrypoints = [ + "nix build .#checks.${system}.deployer-vm-smoke" + "nix build .#checks.${system}.portable-control-plane-regressions" + "nix run ./nix/test-cluster#cluster -- baremetal-iso" + ]; + } + ]; + helperOutputs = [ ]; + legacyAliases = [ + { + attr = "apps.${system}.all-in-one-quickstart"; + command = "nix run .#all-in-one-quickstart"; + mapsTo = "apps.${system}.single-node-quickstart"; + flakeOutputType = self.apps.${system}.all-in-one-quickstart.type; + } + ]; + internalOnlyOutputs = [ + { + attr = "nixosConfigurations.netboot-base"; + role = "internal helper image"; + } + { + attr = "nixosConfigurations.netboot-worker"; + role = "archived/non-product worker netboot helper"; + } + { + attr = "nixosConfigurations.pxe-server"; + role = "legacy/manual PXE helper"; + } + { + attr = "nixosConfigurations.vm-smoke-target"; + role = "offline deployer smoke-test target"; + } + ]; + }; + + canonicalProfileBuildTargets = [ + { + name = "single-node-quickstart-vm"; + path = self.packages.${system}.single-node-quickstart-vm; + } + { + name = "single-node-trial-vm"; + path = self.packages.${system}.single-node-trial-vm; + } + { + name = "netboot-all-in-one-toplevel"; + path = netbootAllInOneConfig.config.system.build.toplevel; + } + { + name = "node01-toplevel"; + path = node01Config.config.system.build.toplevel; + } + { + name = "node02-toplevel"; + path = node02Config.config.system.build.toplevel; + } + { + name = "node03-toplevel"; + path = node03Config.config.system.build.toplevel; + } + { + name = "netboot-control-plane-toplevel"; + path = netbootControlPlaneConfig.config.system.build.toplevel; + } + { + name = "ultracloud-iso-image"; + path = ultracloudIsoConfig.config.system.build.isoImage; + } + { + name = "baremetal-qemu-control-plane-toplevel"; + path = baremetalQemuControlPlaneConfig.config.system.build.toplevel; + } + { + name = "baremetal-qemu-worker-toplevel"; + path = baremetalQemuWorkerConfig.config.system.build.toplevel; + } + ]; + in + { workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit" { nativeBuildInputs = [ pkgs.python3 ]; @@ -1169,6 +1766,284 @@ touch "$out" ''; + supported-surface-guard = pkgs.runCommand "supported-surface-guard" + { + nativeBuildInputs = with pkgs; [ + bash + gawk + gnugrep + ripgrep + ]; + } '' + repo_root=${./.} + cd "$repo_root" + + wording_targets=( + README.md + docs + apigateway + chainfire + k8shost + plasmavmc + creditservice + fiberlb + nightlight + nix/test-cluster + nix/modules/creditservice.nix + nix/modules/k8shost.nix + nix/modules/plasmavmc.nix + nix/modules/ultracloud-cluster.nix + nix-nos + ) + + wording_patterns=( + 'minimal reference' + 'not yet implemented' + 'placeholder' + 'TODO\(' + ) + + public_api_code_targets=( + chainfire/crates/chainfire-api/src + chainfire/crates/chainfire-server/src + flaredb/crates/flaredb-server/src + lightningstor/crates/lightningstor-server/src + k8shost/crates/k8shost-server/src + plasmavmc/crates/plasmavmc-server/src + fiberlb/crates/fiberlb-server/src + prismnet/crates/prismnet-server/src + ) + + public_api_code_patterns=( + 'Status::unimplemented' + 'unimplemented!\(' + 'todo!\(' + 'not yet implemented' + 'placeholder' + ) + + product_completeness_code_targets=( + k8shost/crates/k8shost-server/src + plasmavmc/crates/plasmavmc-server/src + fiberlb/crates/fiberlb-server/src + prismnet/crates/prismnet-server/src + ) + + product_completeness_code_patterns=( + 'TODO:' + 'FIXME' + 'best-effort' + ) + + contract_targets=( + README.md + docs + apigateway + nightlight + creditservice + nix/test-cluster/README.md + nix/modules/ultracloud-cluster.nix + nix/modules/deployer.nix + nix/modules/fleet-scheduler.nix + nix/modules/nix-agent.nix + nix/modules/node-agent.nix + nix/modules/plasmavmc.nix + nix/modules/k8shost.nix + nix-nos + ) + + required_contract_patterns=( + 'ultracloud\.cluster.*cluster-schema\.nix.*only supported cluster authoring source' + 'nix-nos.*legacy compatibility.*low-level network primitives' + 'single-node-trial-vm.*single-node-quickstart.*standalone VM-platform story' + 'durability-proof.*chainfire.*flaredb.*deployer.*backup/restore' + 'ChainFire dynamic membership, replace-node, and scale-out are unsupported on the supported surface' + 'FlareDB online migration and schema evolution must start from the durability-proof backup/restore baseline' + 'IAM bootstrap hardening requires an explicit admin token, an explicit signing key, and a 32-byte IAM_CRED_MASTER_KEY' + 'FlareDB destructive DDL and fully automated online migration remain outside the supported product contract' + 'credential overlap-and-revoke rotation, and mTLS overlap-and-cutover rotation are part of the supported operator contract; multi-node IAM failover remains outside the supported product contract' + 'APIGateway is supported as stateless replicated instances behind an external L4 or VIP layer; live in-process reload is not part of the product contract' + 'NightLight is supported as a single-node WAL/snapshot service; replicated HA metrics storage is not part of the product contract' + 'CreditService export and backend migration are supported as offline export/import or backend-native snapshot workflows, not live mixed-writer migration' + 'provider-vm-reality-proof.*authoritative DNS answers.*backend drain.*re-convergence' + 'PrismNet real OVS/OVN dataplane validation remains outside the supported local KVM surface' + 'FiberLB native BGP.*BFD peer interop.*outside the supported local KVM surface' + 'OCI/Docker artifact is intentionally not the public trial surface' + 'work-root-budget\.sh.*disk budget, GC, and cleanup guidance' + 'work-root-budget\.sh status.*enforce.*prune-proof-logs' + 'FiberLB HTTPS health checks currently do not verify backend TLS certificates' + 'k8shost.*API/control-plane product surface.*archived non-product' + 'deployer.*scope-fixed to one active writer plus optional cold-standby restore.*automatic ChainFire-backed multi-instance failover is outside the supported product contract' + 'fleet-scheduler.*scope-fixed to the two native-runtime worker lab with one planned drain cycle, one fail-stop worker-loss cycle, and 30-second held degraded states in rollout-soak' + ) + + chainfire_core_surface_targets=( + chainfire/crates/chainfire-core/Cargo.toml + chainfire/crates/chainfire-core/src/lib.rs + ) + + chainfire_core_surface_patterns=( + 'Embeddable distributed cluster library' + 'pub mod builder;' + 'pub mod cluster;' + 'pub mod kvs;' + 'pub use builder::ClusterBuilder;' + 'pub use cluster::\{Cluster, ClusterHandle, ClusterState\};' + 'pub use kvs::\{CasResult, Kv, KvEntry, KvHandle, KvNamespace, KvOptions, ReadConsistency\};' + ) + + extract_toml_array_block() { + local file=$1 + local key=$2 + ${pkgs.gawk}/bin/awk -v key="$key" ' + $0 ~ "^" key "[[:space:]]*=" { + in_array = 1 + } + in_array { + print + } + in_array && /\]/ { + exit + } + ' "$file" + } + + extract_nix_array_block() { + local file=$1 + local key=$2 + ${pkgs.gawk}/bin/awk -v key="$key" ' + $0 ~ key "[[:space:]]*=[[:space:]]*\\[" { + in_array = 1 + } + in_array { + print + } + in_array && /\];/ { + exit + } + ' "$file" + } + + status=0 + + for pattern in "''${wording_patterns[@]}"; do + if hits="$(${pkgs.ripgrep}/bin/rg -n "$pattern" "''${wording_targets[@]}" || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: found unfinished public marker %q\n' "$pattern" >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + done + + for pattern in "''${public_api_code_patterns[@]}"; do + if hits="$(${pkgs.ripgrep}/bin/rg -n "$pattern" "''${public_api_code_targets[@]}" || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: found unfinished public API stub %q\n' "$pattern" >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + done + + for pattern in "''${product_completeness_code_patterns[@]}"; do + if hits="$(${pkgs.ripgrep}/bin/rg -n "$pattern" "''${product_completeness_code_targets[@]}" || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: found supported component completeness marker %q\n' "$pattern" >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + done + + for pattern in "''${required_contract_patterns[@]}"; do + if ! hits="$(${pkgs.ripgrep}/bin/rg -n "$pattern" "''${contract_targets[@]}" || true)" || [ -z "$hits" ]; then + printf 'supported-surface-guard: missing supported-surface contract marker %q\n' "$pattern" >&2 + status=1 + fi + done + + for pattern in "''${chainfire_core_surface_patterns[@]}"; do + if hits="$(${pkgs.ripgrep}/bin/rg -n "$pattern" "''${chainfire_core_surface_targets[@]}" || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: found desurfaced chainfire-core API marker %q\n' "$pattern" >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + done + + if default_members="$(extract_toml_array_block plasmavmc/Cargo.toml default-members)"; then + if hits="$(printf '%s\n' "$default_members" | ${pkgs.ripgrep}/bin/rg -n 'plasmavmc-firecracker' || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: archived PlasmaVMC backend scaffold re-entered the default workspace members\n' >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + fi + + if default_members="$(extract_toml_array_block k8shost/Cargo.toml default-members)"; then + for pattern in 'k8shost-cni' 'k8shost-csi' 'k8shost-controllers'; do + if hits="$(printf '%s\n' "$default_members" | ${pkgs.ripgrep}/bin/rg -n "$pattern" || true)" && [ -n "$hits" ]; then + printf 'supported-surface-guard: archived K8sHost helper scaffold re-entered the default workspace members: %s\n' "$pattern" >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + done + fi + + if helper_outputs="$(extract_nix_array_block flake.nix 'helperOutputs')" \ + && hits="$(printf '%s\n' "$helper_outputs" | ${pkgs.ripgrep}/bin/rg -n 'netboot-worker' || true)" \ + && [ -n "$hits" ]; then + printf 'supported-surface-guard: archived netboot-worker helper re-entered canonical helper outputs\n' >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + + if canonical_build_targets="$(extract_nix_array_block flake.nix 'canonicalProfileBuildTargets')" \ + && hits="$(printf '%s\n' "$canonical_build_targets" | ${pkgs.ripgrep}/bin/rg -n 'netboot-worker' || true)" \ + && [ -n "$hits" ]; then + printf 'supported-surface-guard: archived netboot-worker helper re-entered canonical profile build targets\n' >&2 + printf '%s\n' "$hits" >&2 + status=1 + fi + + if [ "$status" -ne 0 ]; then + exit "$status" + fi + + printf 'supported-surface-guard: no unfinished public markers, API stubs, supported component completeness markers, contract-marker regressions, desurfaced chainfire-core API markers, or archived scaffold regressions found\n' + touch "$out" + ''; + + canonical-profile-eval-guards = pkgs.writeText "canonical-profile-eval-guards.json" + (builtins.toJSON canonicalProfileManifest); + + canonical-profile-build-guards = + pkgs.linkFarm "canonical-profile-build-guards" + (map (target: { + inherit (target) name path; + }) canonicalProfileBuildTargets); + + portable-control-plane-regressions = + pkgs.linkFarm "portable-control-plane-regressions" [ + { + name = "canonical-profile-eval-guards"; + path = self.checks.${system}.canonical-profile-eval-guards; + } + { + name = "supported-surface-guard"; + path = self.checks.${system}.supported-surface-guard; + } + { + name = "deployer-bootstrap-e2e"; + path = self.checks.${system}.deployer-bootstrap-e2e; + } + { + name = "host-lifecycle-e2e"; + path = self.checks.${system}.host-lifecycle-e2e; + } + { + name = "deployer-vm-smoke"; + path = self.checks.${system}.deployer-vm-smoke; + } + { + name = "fleet-scheduler-e2e"; + path = self.checks.${system}.fleet-scheduler-e2e; + } + ]; + first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest ( import ./nix/tests/first-boot-topology-vm-smoke.nix { inherit pkgs; @@ -1177,15 +2052,15 @@ } ); - deployer-vm-smoke = pkgs.testers.runNixOSTest ( + deployer-vm-smoke = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest ( import ./nix/tests/deployer-vm-smoke.nix { inherit pkgs; ultracloudPackages = self.packages.${system}; smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel; } - ); + )); - deployer-vm-rollback = pkgs.testers.runNixOSTest ( + deployer-vm-rollback = stripKvmRequiredSystemFeature (pkgs.testers.runNixOSTest ( import ./nix/tests/deployer-vm-smoke.nix { inherit pkgs; ultracloudPackages = self.packages.${system}; @@ -1198,7 +2073,24 @@ expectCurrentSystemMatchesTarget = false; expectMarkerPresent = false; } - ); + )); + + baremetal-iso-e2e = pkgs.runCommand "baremetal-iso-e2e" + { + nativeBuildInputs = with pkgs; [ coreutils ]; + preferLocalBuild = true; + allowSubstitutes = false; + passthru.proofRunner = self.packages.${system}.baremetal-iso-e2e-runner; + } '' + mkdir -p "$out/bin" "$out/share/ultracloud" + ln -s ${self.packages.${system}.baremetal-iso-e2e-runner}/bin/baremetal-iso-e2e \ + "$out/bin/baremetal-iso-e2e" + cat >"$out/share/ultracloud/README.txt" <<'EOF' +This check materializes the local-KVM baremetal-iso-e2e proof runner. +Direct build-time execution under the Nix daemon sandbox would run as nixbld and fall back to TCG instead of host KVM. +Run ./result/bin/baremetal-iso-e2e from a writable checkout to execute the exact proof and keep log/meta under ./work by default. +EOF + ''; fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest ( import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix { @@ -1330,6 +2222,7 @@ "${self.packages.${system}.deployer-workspace}/bin/node-agent"; ULTRACLOUD_FLEET_SCHEDULER_BIN = "${self.packages.${system}.deployer-workspace}/bin/fleet-scheduler"; + ULTRACLOUD_FLEET_E2E_REPO_ROOT = "${self}"; } '' export HOME="$TMPDIR/home" mkdir -p "$HOME" @@ -1344,7 +2237,7 @@ pkgs.procps pkgs.python3 ]}" - bash ${./deployer/scripts/verify-fleet-scheduler-e2e.sh} + bash ${./nix/tests/verify-fleet-scheduler-e2e-stable.sh} touch "$out" ''; }; @@ -1363,6 +2256,9 @@ nixosConfigurations = let vmClusterLib = import ./nix/nodes/vm-cluster/lib.nix { lib = nixpkgs.lib; }; + overlayModule = { + nixpkgs.overlays = [ self.overlays.default ]; + }; mkVmClusterSystem = nodeName: nixpkgs.lib.nixosSystem { system = "x86_64-linux"; @@ -1382,25 +2278,74 @@ # Control Plane netboot image (all 8 services) netboot-control-plane = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./nix/images/netboot-control-plane.nix ]; + modules = [ + ./nix/images/netboot-control-plane.nix + overlayModule + ]; }; - # Worker netboot image (compute-focused services) + # Archived worker netboot helper kept only for manual lab debugging. netboot-worker = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./nix/images/netboot-worker.nix ]; + modules = [ + ./nix/images/netboot-worker.nix + overlayModule + ]; }; # All-in-One netboot image (single-node deployment) netboot-all-in-one = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./nix/images/netboot-all-in-one.nix ]; + modules = [ + ./nix/images/netboot-all-in-one.nix + overlayModule + ]; + }; + + # QEMU-first single-node quickstart for one-command local bring-up. + single-node-quickstart = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ./nix/single-node/qemu-vm.nix + ./nix/single-node/base.nix + self.nixosModules.default + overlayModule + { + ultracloud.quickstart.enable = true; + } + ]; + }; + + # Canonical bare-metal ISO install targets used by the QEMU proof path. + baremetal-qemu-control-plane = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + disko.nixosModules.disko + ./nix/nodes/baremetal-qemu/control-plane/configuration.nix + ./nix/nodes/baremetal-qemu/control-plane/disko.nix + self.nixosModules.default + overlayModule + ]; + }; + + baremetal-qemu-worker = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + disko.nixosModules.disko + ./nix/nodes/baremetal-qemu/worker/configuration.nix + ./nix/nodes/baremetal-qemu/worker/disko.nix + self.nixosModules.default + overlayModule + ]; }; # Base netboot image (minimal, for VM testing and provisioning) netboot-base = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; - modules = [ ./nix/images/netboot-base.nix ]; + modules = [ + ./nix/images/netboot-base.nix + overlayModule + ]; }; # Offline-friendly target used by deployer VM smoke tests. @@ -1412,6 +2357,20 @@ # UltraCloud ISO (T061.S5 - bootable ISO with cluster-config embedding) ultracloud-iso = nixpkgs.lib.nixosSystem { system = "x86_64-linux"; + specialArgs = { + ultracloudBaremetalFormatMountPaths = { + baremetal-qemu-control-plane = + self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.formatMount; + baremetal-qemu-worker = + self.nixosConfigurations."baremetal-qemu-worker".config.system.build.formatMount; + }; + ultracloudBaremetalSystemPaths = { + baremetal-qemu-control-plane = + self.nixosConfigurations."baremetal-qemu-control-plane".config.system.build.toplevel; + baremetal-qemu-worker = + self.nixosConfigurations."baremetal-qemu-worker".config.system.build.toplevel; + }; + }; modules = [ ./nix/iso/ultracloud-iso.nix self.nixosModules.default @@ -1455,13 +2414,13 @@ apigateway-server = self.packages.${final.system}.apigateway-server; k8shost-server = self.packages.${final.system}.k8shost-server; deployer-workspace = self.packages.${final.system}.deployer-workspace; - deployer-server = self.packages.${final.system}.deployer-workspace; - deployer-ctl = self.packages.${final.system}.deployer-workspace; - ultracloud-reconciler = self.packages.${final.system}.deployer-workspace; + deployer-server = self.packages.${final.system}.deployer-server; + deployer-ctl = self.packages.${final.system}.deployer-ctl; + ultracloud-reconciler = self.packages.${final.system}.ultracloud-reconciler; ultracloudFlakeBundle = self.packages.${final.system}.ultracloudFlakeBundle; - nix-agent = self.packages.${final.system}.deployer-workspace; - node-agent = self.packages.${final.system}.deployer-workspace; - fleet-scheduler = self.packages.${final.system}.deployer-workspace; + nix-agent = self.packages.${final.system}.nix-agent; + node-agent = self.packages.${final.system}.node-agent; + fleet-scheduler = self.packages.${final.system}.fleet-scheduler; }; }; } diff --git a/flaredb/crates/flaredb-client/src/client.rs b/flaredb/crates/flaredb-client/src/client.rs index fdb48b2..fd77363 100644 --- a/flaredb/crates/flaredb-client/src/client.rs +++ b/flaredb/crates/flaredb-client/src/client.rs @@ -105,8 +105,8 @@ fn resolve_chainfire_route_from_snapshot( } } - let selected_store = selected_store - .ok_or_else(|| tonic::Status::not_found("region peer store not found"))?; + let selected_store = + selected_store.ok_or_else(|| tonic::Status::not_found("region peer store not found"))?; if candidate_addrs.is_empty() { return Err(tonic::Status::not_found( "region has no candidate store addresses", @@ -841,9 +841,7 @@ impl RdbClient { force_refresh: bool, ) -> Result { if force_refresh { - let snapshot = self - .chainfire_route_snapshot(kv_client, true) - .await?; + let snapshot = self.chainfire_route_snapshot(kv_client, true).await?; return resolve_chainfire_route_from_snapshot(key, &snapshot); } @@ -946,9 +944,8 @@ async fn list_chainfire_regions( #[cfg(test)] mod tests { use super::{ - normalize_transport_addr, parse_transport_endpoints, - resolve_chainfire_route_from_snapshot, ChainfireRegionInfo, ChainfireRouteSnapshot, - ChainfireStoreInfo, RdbClient, + normalize_transport_addr, parse_transport_endpoints, resolve_chainfire_route_from_snapshot, + ChainfireRegionInfo, ChainfireRouteSnapshot, ChainfireStoreInfo, RdbClient, }; use std::collections::HashMap; use std::time::Instant; diff --git a/flaredb/crates/flaredb-proto/src/chainfire.proto b/flaredb/crates/flaredb-proto/src/chainfire.proto index ef56985..7b35653 100644 --- a/flaredb/crates/flaredb-proto/src/chainfire.proto +++ b/flaredb/crates/flaredb-proto/src/chainfire.proto @@ -23,15 +23,9 @@ service Watch { rpc Watch(stream WatchRequest) returns (stream WatchResponse); } -// Cluster management service +// Cluster management service for fixed-membership clusters. service Cluster { - // MemberAdd adds a member into the cluster - rpc MemberAdd(MemberAddRequest) returns (MemberAddResponse); - - // MemberRemove removes an existing member from the cluster - rpc MemberRemove(MemberRemoveRequest) returns (MemberRemoveResponse); - - // MemberList lists all the members in the cluster + // MemberList lists the members configured at cluster bootstrap time rpc MemberList(MemberListRequest) returns (MemberListResponse); // Status gets the status of the cluster @@ -266,32 +260,6 @@ message Member { bool is_learner = 5; } -message MemberAddRequest { - // peer_urls are the URLs to reach the new member - repeated string peer_urls = 1; - // is_learner indicates if the member is a learner - bool is_learner = 2; -} - -message MemberAddResponse { - ResponseHeader header = 1; - // member is the member information for the added member - Member member = 2; - // members is the list of all members after adding - repeated Member members = 3; -} - -message MemberRemoveRequest { - // ID is the member ID to remove - uint64 id = 1; -} - -message MemberRemoveResponse { - ResponseHeader header = 1; - // members is the list of all members after removing - repeated Member members = 2; -} - message MemberListRequest {} message MemberListResponse { diff --git a/flaredb/crates/flaredb-raft/src/network.rs b/flaredb/crates/flaredb-raft/src/network.rs index a771e51..1a52d13 100644 --- a/flaredb/crates/flaredb-raft/src/network.rs +++ b/flaredb/crates/flaredb-raft/src/network.rs @@ -4,14 +4,16 @@ //! for inter-node communication using gRPC. use crate::types::{FlareNode, FlareNodeId, FlareTypeConfig}; -use openraft::error::{Fatal, NetworkError, RPCError, RaftError, ReplicationClosed, StreamingError}; +use flaredb_proto::raft_server::raft_service_client::RaftServiceClient; +use openraft::error::{ + Fatal, NetworkError, RPCError, RaftError, ReplicationClosed, StreamingError, +}; use openraft::network::{RPCOption, RaftNetwork, RaftNetworkFactory}; use openraft::raft::{ AppendEntriesRequest, AppendEntriesResponse, InstallSnapshotRequest, InstallSnapshotResponse, SnapshotResponse, VoteRequest, VoteResponse, }; use openraft::{OptionalSend, Snapshot, Vote}; -use flaredb_proto::raft_server::raft_service_client::RaftServiceClient; use std::collections::HashMap; use std::future::Future; use std::sync::Arc; @@ -348,8 +350,9 @@ impl RaftNetwork for FlareNetwork { RPCError::Network(Self::network_error(e.to_string())) })?; - let resp: VoteResponse = serde_json::from_slice(&response.into_inner().data) - .map_err(|e| RPCError::Network(Self::network_error(e.to_string())))?; + let resp: VoteResponse = + serde_json::from_slice(&response.into_inner().data) + .map_err(|e| RPCError::Network(Self::network_error(e.to_string())))?; Ok(resp) } diff --git a/flaredb/crates/flaredb-raft/src/persistent_storage.rs b/flaredb/crates/flaredb-raft/src/persistent_storage.rs index 1bf7499..752bef2 100644 --- a/flaredb/crates/flaredb-raft/src/persistent_storage.rs +++ b/flaredb/crates/flaredb-raft/src/persistent_storage.rs @@ -7,7 +7,10 @@ use crate::types::{ FlareEntry, FlareLogId, FlareNode, FlareNodeId, FlareResponse, FlareTypeConfig, }; use flaredb_storage::rocks_engine::RocksEngine; -use openraft::storage::{LogFlushed, LogState, RaftLogReader, RaftLogStorage, RaftSnapshotBuilder, RaftStateMachine, Snapshot}; +use openraft::storage::{ + LogFlushed, LogState, RaftLogReader, RaftLogStorage, RaftSnapshotBuilder, RaftStateMachine, + Snapshot, +}; use openraft::{EntryPayload, OptionalSend, RaftLogId, SnapshotMeta}; use openraft::{StorageError, StorageIOError, StoredMembership, Vote}; use std::fmt::Debug; @@ -50,8 +53,7 @@ impl PersistentFlareStore { debug!( has_snapshot = snapshot.is_some(), - snapshot_idx, - "PersistentFlareStore initialized" + snapshot_idx, "PersistentFlareStore initialized" ); Self { @@ -63,9 +65,7 @@ impl PersistentFlareStore { } /// Recover state from RocksDB on startup - fn recover_from_disk( - engine: &RocksEngine, - ) -> (FlareStateMachine, Option, u64) { + fn recover_from_disk(engine: &RocksEngine) -> (FlareStateMachine, Option, u64) { // Load snapshot index let snapshot_idx = engine .get_raft_state(KEY_SNAPSHOT_IDX) @@ -90,10 +90,8 @@ impl PersistentFlareStore { .and_then(|data| serde_json::from_slice(&data).ok()); // Load snapshot data - let snapshot_data: Option> = engine - .get_raft_state(KEY_SNAPSHOT_DATA) - .ok() - .flatten(); + let snapshot_data: Option> = + engine.get_raft_state(KEY_SNAPSHOT_DATA).ok().flatten(); // If we have both meta and data, reconstruct the snapshot and state machine match (snapshot_meta, snapshot_data) { @@ -105,10 +103,7 @@ impl PersistentFlareStore { snapshot_id = %meta.snapshot_id, "Recovered state machine from snapshot" ); - let snapshot = FlareSnapshot { - meta, - data, - }; + let snapshot = FlareSnapshot { meta, data }; (sm, Some(snapshot), snapshot_idx) } Err(e) => { @@ -128,10 +123,14 @@ impl PersistentFlareStore { } /// Persist snapshot to RocksDB - fn persist_snapshot(&self, snapshot: &FlareSnapshot, idx: u64) -> Result<(), StorageError> { + fn persist_snapshot( + &self, + snapshot: &FlareSnapshot, + idx: u64, + ) -> Result<(), StorageError> { // Persist snapshot metadata - let meta_data = serde_json::to_vec(&snapshot.meta) - .map_err(|e| StorageIOError::write(&e))?; + let meta_data = + serde_json::to_vec(&snapshot.meta).map_err(|e| StorageIOError::write(&e))?; self.engine .put_raft_state(KEY_SNAPSHOT_META, &meta_data) .map_err(|e| StorageIOError::write(&e))?; @@ -308,7 +307,9 @@ impl RaftSnapshotBuilder for Arc { impl RaftLogStorage for Arc { type LogReader = Self; - async fn get_log_state(&mut self) -> Result, StorageError> { + async fn get_log_state( + &mut self, + ) -> Result, StorageError> { // Get last purged log ID from state let last_purged = self .engine @@ -379,9 +380,7 @@ impl RaftLogStorage for Arc { Ok(()) } - async fn read_committed( - &mut self, - ) -> Result, StorageError> { + async fn read_committed(&mut self) -> Result, StorageError> { let committed = self .engine .get_raft_state(KEY_COMMITTED) @@ -465,7 +464,10 @@ impl RaftStateMachine for Arc { async fn applied_state( &mut self, - ) -> Result<(Option, StoredMembership), StorageError> { + ) -> Result< + (Option, StoredMembership), + StorageError, + > { let sm = self.sm.read().await; Ok((sm.last_applied_log, sm.last_membership.clone())) } @@ -520,8 +522,10 @@ impl RaftStateMachine for Arc { // Update state machine (using bincode to match build_snapshot) { - let new_sm: FlareStateMachine = bincode::deserialize(&new_snapshot.data) - .map_err(|e| StorageIOError::read_snapshot(Some(new_snapshot.meta.signature()), &e))?; + let new_sm: FlareStateMachine = + bincode::deserialize(&new_snapshot.data).map_err(|e| { + StorageIOError::read_snapshot(Some(new_snapshot.meta.signature()), &e) + })?; let mut sm = self.sm.write().await; *sm = new_sm; } diff --git a/flaredb/crates/flaredb-raft/src/raft_node.rs b/flaredb/crates/flaredb-raft/src/raft_node.rs index 0f3ac30..e545c02 100644 --- a/flaredb/crates/flaredb-raft/src/raft_node.rs +++ b/flaredb/crates/flaredb-raft/src/raft_node.rs @@ -6,9 +6,7 @@ use crate::network::FlareNetworkFactory; use crate::persistent_storage::PersistentFlareStore; use crate::storage::FlareStore; -use crate::types::{ - FlareNode, FlareNodeId, FlareRaft, FlareRequest, FlareResponse, -}; +use crate::types::{FlareNode, FlareNodeId, FlareRaft, FlareRequest, FlareResponse}; use flaredb_storage::rocks_engine::RocksEngine; use openraft::error::{ClientWriteError, InitializeError, RaftError}; use openraft::{Config, Raft}; @@ -117,7 +115,9 @@ pub struct FlareRaftNode { impl FlareRaftNode { /// Create OpenRaft config with sensible defaults - fn create_raft_config(region_id: u64) -> Result, Box> { + fn create_raft_config( + region_id: u64, + ) -> Result, Box> { let raft_config = Config { cluster_name: format!("flare-region-{}", region_id), // VM-backed cluster tests can stall for >1s while other services build images, @@ -216,8 +216,7 @@ impl FlareRaftNode { info!( store_id, - region_id, - "Created persistent Raft node with RocksDB storage" + region_id, "Created persistent Raft node with RocksDB storage" ); Ok(node) diff --git a/flaredb/crates/flaredb-raft/src/storage.rs b/flaredb/crates/flaredb-raft/src/storage.rs index 06e50be..e192aed 100644 --- a/flaredb/crates/flaredb-raft/src/storage.rs +++ b/flaredb/crates/flaredb-raft/src/storage.rs @@ -7,7 +7,10 @@ use crate::types::{ FlareEntry, FlareLogId, FlareNode, FlareNodeId, FlareRequest, FlareResponse, FlareStoredMembership, FlareTypeConfig, }; -use openraft::storage::{LogFlushed, LogState, RaftLogReader, RaftLogStorage, RaftSnapshotBuilder, RaftStateMachine, Snapshot}; +use openraft::storage::{ + LogFlushed, LogState, RaftLogReader, RaftLogStorage, RaftSnapshotBuilder, RaftStateMachine, + Snapshot, +}; use openraft::{EntryPayload, OptionalSend, RaftLogId, SnapshotMeta}; use openraft::{StorageError, StorageIOError, StoredMembership, Vote}; use serde::{Deserialize, Serialize}; @@ -141,7 +144,9 @@ impl RaftLogReader for Arc { } impl RaftSnapshotBuilder for Arc { - async fn build_snapshot(&mut self) -> Result, StorageError> { + async fn build_snapshot( + &mut self, + ) -> Result, StorageError> { let data; let last_applied_log; let last_membership; @@ -192,7 +197,9 @@ impl RaftSnapshotBuilder for Arc { impl RaftLogStorage for Arc { type LogReader = Self; - async fn get_log_state(&mut self) -> Result, StorageError> { + async fn get_log_state( + &mut self, + ) -> Result, StorageError> { let log = self.log.read().await; let last_serialized = log.iter().next_back().map(|(_, ent)| ent); @@ -218,7 +225,10 @@ impl RaftLogStorage for Arc { }) } - async fn save_vote(&mut self, vote: &Vote) -> Result<(), StorageError> { + async fn save_vote( + &mut self, + vote: &Vote, + ) -> Result<(), StorageError> { let mut v = self.vote.write().await; *v = Some(*vote); Ok(()) @@ -300,8 +310,10 @@ impl RaftStateMachine for Arc { async fn applied_state( &mut self, - ) -> Result<(Option, StoredMembership), StorageError> - { + ) -> Result< + (Option, StoredMembership), + StorageError, + > { let sm = self.sm.read().await; Ok((sm.last_applied_log, sm.last_membership.clone())) } @@ -356,8 +368,10 @@ impl RaftStateMachine for Arc { // Update state machine (using bincode to match build_snapshot) { - let new_sm: FlareStateMachine = bincode::deserialize(&new_snapshot.data) - .map_err(|e| StorageIOError::read_snapshot(Some(new_snapshot.meta.signature()), &e))?; + let new_sm: FlareStateMachine = + bincode::deserialize(&new_snapshot.data).map_err(|e| { + StorageIOError::read_snapshot(Some(new_snapshot.meta.signature()), &e) + })?; let mut sm = self.sm.write().await; *sm = new_sm; } @@ -435,11 +449,7 @@ pub fn apply_request(sm: &mut FlareStateMachine, req: &FlareRequest, index: u64) let key_tuple = (*namespace_id, key.clone()); // Get current version (0 if key doesn't exist) - let current_version = sm - .cas_data - .get(&key_tuple) - .map(|(_, v, _)| *v) - .unwrap_or(0); + let current_version = sm.cas_data.get(&key_tuple).map(|(_, v, _)| *v).unwrap_or(0); if current_version != *expected_version { // Version mismatch - CAS fails @@ -469,11 +479,7 @@ pub fn apply_request(sm: &mut FlareStateMachine, req: &FlareRequest, index: u64) let key_tuple = (*namespace_id, key.clone()); // Get current version (0 if key doesn't exist) - let current_version = sm - .cas_data - .get(&key_tuple) - .map(|(_, v, _)| *v) - .unwrap_or(0); + let current_version = sm.cas_data.get(&key_tuple).map(|(_, v, _)| *v).unwrap_or(0); // If expected_version is 0, delete if exists (no version check) // Otherwise, only delete if version matches @@ -685,7 +691,10 @@ mod tests { ts: 200, }; let response = apply_request(&mut sm, &req_delete, 2); - assert!(matches!(response, FlareResponse::DeleteResult { existed: true })); + assert!(matches!( + response, + FlareResponse::DeleteResult { existed: true } + )); // Key should be deleted let data = sm.kv_data.get(&(1, b"key".to_vec())); @@ -703,7 +712,10 @@ mod tests { ts: 100, }; let response = apply_request(&mut sm, &req_delete, 1); - assert!(matches!(response, FlareResponse::DeleteResult { existed: false })); + assert!(matches!( + response, + FlareResponse::DeleteResult { existed: false } + )); } #[tokio::test] diff --git a/flaredb/crates/flaredb-raft/src/types.rs b/flaredb/crates/flaredb-raft/src/types.rs index 416b87a..c034dc6 100644 --- a/flaredb/crates/flaredb-raft/src/types.rs +++ b/flaredb/crates/flaredb-raft/src/types.rs @@ -62,9 +62,7 @@ pub enum FlareResponse { /// Write applied with index Applied { index: u64 }, /// Delete operation result - DeleteResult { - existed: bool, - }, + DeleteResult { existed: bool }, /// Compare-and-swap result CasResult { success: bool, diff --git a/flaredb/crates/flaredb-server/benches/storage_bench.rs b/flaredb/crates/flaredb-server/benches/storage_bench.rs index bbce19f..71532ab 100644 --- a/flaredb/crates/flaredb-server/benches/storage_bench.rs +++ b/flaredb/crates/flaredb-server/benches/storage_bench.rs @@ -43,7 +43,8 @@ fn bench_write_throughput(c: &mut Criterion) { b.iter(|| { for i in 0..NUM_KEYS_THROUGHPUT { let key = format!("bench_key_{:08}", i); - db.put(black_box(key.as_bytes()), black_box(&value)).unwrap(); + db.put(black_box(key.as_bytes()), black_box(&value)) + .unwrap(); } }); }); @@ -149,7 +150,8 @@ fn bench_write_latency(c: &mut Criterion) { b.iter(|| { let key = format!("latency_key_{:08}", key_counter); key_counter += 1; - db.put(black_box(key.as_bytes()), black_box(&value)).unwrap(); + db.put(black_box(key.as_bytes()), black_box(&value)) + .unwrap(); }); }); diff --git a/flaredb/crates/flaredb-server/src/config/mod.rs b/flaredb/crates/flaredb-server/src/config/mod.rs index cdf89d8..a7f43e9 100644 --- a/flaredb/crates/flaredb-server/src/config/mod.rs +++ b/flaredb/crates/flaredb-server/src/config/mod.rs @@ -374,7 +374,10 @@ pub fn parse_mode(mode_str: &str) -> Result { match mode_str.to_lowercase().as_str() { "strong" => Ok(ConsistencyMode::Strong), "eventual" => Ok(ConsistencyMode::Eventual), - _ => Err(format!("invalid mode '{}', expected strong|eventual", mode_str)), + _ => Err(format!( + "invalid mode '{}', expected strong|eventual", + mode_str + )), } } diff --git a/flaredb/crates/flaredb-server/src/heartbeat.rs b/flaredb/crates/flaredb-server/src/heartbeat.rs index f7ee478..d62babe 100644 --- a/flaredb/crates/flaredb-server/src/heartbeat.rs +++ b/flaredb/crates/flaredb-server/src/heartbeat.rs @@ -49,8 +49,7 @@ pub async fn start_heartbeat( let _ = store.refresh_regions(metas).await; } } - // send basic heartbeat info (store id + regions held) - // PD heartbeat RPC is not defined yet; placeholder refresh via list. + // Refresh store and region visibility against legacy PD by re-listing regions. } sleep(Duration::from_secs(30)).await; } diff --git a/flaredb/crates/flaredb-server/src/raft_service.rs b/flaredb/crates/flaredb-server/src/raft_service.rs index f40ba86..520e256 100644 --- a/flaredb/crates/flaredb-server/src/raft_service.rs +++ b/flaredb/crates/flaredb-server/src/raft_service.rs @@ -29,9 +29,8 @@ impl RaftServiceImpl { #[tonic::async_trait] impl RaftService for RaftServiceImpl { async fn send(&self, _request: Request) -> Result, Status> { - // Legacy raft-rs RPC - no longer supported - Err(Status::unimplemented( - "Legacy raft-rs RPC not supported. Use OpenRaft RPCs (vote_v2, append_entries_v2).", + Err(Status::failed_precondition( + "Legacy raft-rs RPC has been retired. Use OpenRaft RPCs (vote_v2, append_entries_v2).", )) } diff --git a/flaredb/crates/flaredb-server/src/rest.rs b/flaredb/crates/flaredb-server/src/rest.rs index db5e3cd..4bd8068 100644 --- a/flaredb/crates/flaredb-server/src/rest.rs +++ b/flaredb/crates/flaredb-server/src/rest.rs @@ -16,9 +16,15 @@ use axum::{ Json, Router, }; use flaredb_client::RdbClient; +use flaredb_sql::{ + executor::{ExecutionResult, SqlExecutor}, + metadata::MetadataManager, + types::Value, +}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Arc; +use tokio::sync::Mutex; /// REST API state #[derive(Clone)] @@ -79,13 +85,19 @@ impl SuccessResponse { #[derive(Debug, Deserialize)] pub struct SqlRequest { pub query: String, + #[serde(default)] + pub namespace: String, } /// SQL execution response #[derive(Debug, Serialize)] pub struct SqlResponse { pub rows_affected: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub columns: Option>, pub rows: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, } /// KV Put request body @@ -109,6 +121,12 @@ pub struct TablesResponse { pub tables: Vec, } +#[derive(Debug, Deserialize)] +pub struct NamespaceQuery { + #[serde(default)] + pub namespace: String, +} + /// Query parameters for scan #[derive(Debug, Deserialize)] pub struct ScanQuery { @@ -180,29 +198,66 @@ async fn health_check() -> (StatusCode, Json> /// POST /api/v1/sql - Execute SQL query async fn execute_sql( - State(_state): State, + State(state): State, Json(req): Json, ) -> Result>, (StatusCode, Json)> { - // SQL execution requires Arc> which is complex to set up in REST context - // For now, return a placeholder indicating SQL should be accessed via gRPC - // Full implementation would require refactoring to share SQL executor state - Ok(Json(SuccessResponse::new(SqlResponse { - rows_affected: None, - rows: Some(vec![serde_json::json!({ - "message": format!("SQL execution via REST not yet implemented. Query received: {}", req.query), - "hint": "Use gRPC SqlService for SQL queries or implement Arc> sharing" - })]), - }))) + let namespace = normalize_namespace(&req.namespace); + let client = connect_sql_client(&state, &namespace).await?; + let executor = SqlExecutor::new(client); + let result = executor.execute(&req.query).await.map_err(|e| { + error_response( + StatusCode::BAD_REQUEST, + "SQL_EXECUTION_ERROR", + &e.to_string(), + ) + })?; + + let response = match result { + ExecutionResult::DdlSuccess(message) => SqlResponse { + rows_affected: Some(0), + columns: None, + rows: None, + message: Some(message), + }, + ExecutionResult::DmlSuccess(rows_affected) => SqlResponse { + rows_affected: Some(rows_affected), + columns: None, + rows: None, + message: None, + }, + ExecutionResult::Query(query_result) => { + let columns = query_result.columns.clone(); + SqlResponse { + rows_affected: None, + columns: Some(columns.clone()), + rows: Some(query_result_rows_to_json(columns, query_result.rows)), + message: None, + } + } + }; + + Ok(Json(SuccessResponse::new(response))) } /// GET /api/v1/tables - List tables async fn list_tables( - State(_state): State, + State(state): State, + Query(params): Query, ) -> Result>, (StatusCode, Json)> { - // Listing tables requires SQL executor with Arc> - // For now, return empty list with hint + let namespace = normalize_namespace(¶ms.namespace); + let client = connect_sql_client(&state, &namespace).await?; + let metadata = MetadataManager::new(client); + let mut tables = metadata.list_tables().await.map_err(|e| { + error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "SQL_METADATA_ERROR", + &e.to_string(), + ) + })?; + tables.sort_unstable(); + Ok(Json(SuccessResponse::new(TablesResponse { - tables: vec!["(Table listing via REST not yet implemented - use gRPC)".to_string()], + tables, }))) } @@ -549,6 +604,62 @@ fn socket_host(addr: &str) -> String { }) } +fn normalize_namespace(namespace: &str) -> String { + if namespace.trim().is_empty() { + "default".to_string() + } else { + namespace.trim().to_string() + } +} + +async fn connect_sql_client( + state: &RestApiState, + namespace: &str, +) -> Result>, (StatusCode, Json)> { + let client = RdbClient::connect_with_pd_namespace( + state.server_addr.clone(), + state.pd_endpoints.join(","), + namespace.to_string(), + ) + .await + .map_err(|e| { + error_response( + StatusCode::SERVICE_UNAVAILABLE, + "SERVICE_UNAVAILABLE", + &format!("Failed to connect: {}", e), + ) + })?; + + Ok(Arc::new(Mutex::new(client))) +} + +fn query_result_rows_to_json( + columns: Vec, + rows: Vec>, +) -> Vec { + rows.into_iter() + .map(|row| { + let object = columns + .iter() + .cloned() + .zip(row.into_iter()) + .map(|(column, value)| (column, sql_value_to_json(value))) + .collect::>(); + serde_json::Value::Object(object) + }) + .collect() +} + +fn sql_value_to_json(value: Value) -> serde_json::Value { + match value { + Value::Null => serde_json::Value::Null, + Value::Integer(number) => serde_json::Value::Number(number.into()), + Value::Text(text) => serde_json::Value::String(text), + Value::Boolean(flag) => serde_json::Value::Bool(flag), + Value::Timestamp(ts) => serde_json::Value::Number(ts.into()), + } +} + /// Helper to create error response fn error_response( status: StatusCode, @@ -614,4 +725,32 @@ mod tests { Some((2, "10.100.0.12:50052".to_string())) ); } + + #[test] + fn normalize_namespace_defaults_to_default() { + assert_eq!(normalize_namespace(""), "default"); + assert_eq!(normalize_namespace(" "), "default"); + assert_eq!(normalize_namespace("tenant-a"), "tenant-a"); + } + + #[test] + fn query_result_rows_to_json_uses_column_names() { + let rows = query_result_rows_to_json( + vec!["id".to_string(), "name".to_string(), "active".to_string()], + vec![vec![ + Value::Integer(7), + Value::Text("alice".to_string()), + Value::Boolean(true), + ]], + ); + + assert_eq!( + rows, + vec![serde_json::json!({ + "id": 7, + "name": "alice", + "active": true, + })] + ); + } } diff --git a/flaredb/crates/flaredb-sql/src/error.rs b/flaredb/crates/flaredb-sql/src/error.rs index a3dbf1d..5fcde89 100644 --- a/flaredb/crates/flaredb-sql/src/error.rs +++ b/flaredb/crates/flaredb-sql/src/error.rs @@ -15,10 +15,7 @@ pub enum SqlError { ColumnNotFound(String, String), #[error("Type mismatch: expected {expected:?}, got {actual:?}")] - TypeMismatch { - expected: String, - actual: String, - }, + TypeMismatch { expected: String, actual: String }, #[error("Primary key violation: {0}")] PrimaryKeyViolation(String), diff --git a/flaredb/crates/flaredb-sql/src/metadata.rs b/flaredb/crates/flaredb-sql/src/metadata.rs index fde386f..04515e2 100644 --- a/flaredb/crates/flaredb-sql/src/metadata.rs +++ b/flaredb/crates/flaredb-sql/src/metadata.rs @@ -38,10 +38,7 @@ impl MetadataManager { // Validate primary key columns exist for pk_col in &primary_key { if !columns.iter().any(|c| &c.name == pk_col) { - return Err(SqlError::ColumnNotFound( - pk_col.clone(), - table_name.clone(), - )); + return Err(SqlError::ColumnNotFound(pk_col.clone(), table_name.clone())); } } @@ -161,7 +158,11 @@ impl MetadataManager { let mut client = self.client.lock().await; let (entries, _next_key) = client - .cas_scan(start_key.as_bytes().to_vec(), end_key.as_bytes().to_vec(), 1000) + .cas_scan( + start_key.as_bytes().to_vec(), + end_key.as_bytes().to_vec(), + 1000, + ) .await .map_err(|e| SqlError::KvsError(e.to_string()))?; @@ -180,8 +181,8 @@ impl MetadataManager { /// Store table metadata in KVS async fn store_metadata(&self, metadata: &TableMetadata) -> Result<()> { let key = format!("{}:{}", TABLES_KEY_PREFIX, metadata.table_name); - let value = - bincode::serialize(metadata).map_err(|e| SqlError::SerializationError(e.to_string()))?; + let value = bincode::serialize(metadata) + .map_err(|e| SqlError::SerializationError(e.to_string()))?; let mut client = self.client.lock().await; // Use version 0 for new table (we already checked it doesn't exist) @@ -191,7 +192,9 @@ impl MetadataManager { .map_err(|e| SqlError::KvsError(e.to_string()))?; if !success { - return Err(SqlError::InternalError("Failed to store table metadata".to_string())); + return Err(SqlError::InternalError( + "Failed to store table metadata".to_string(), + )); } Ok(()) @@ -211,11 +214,10 @@ impl MetadataManager { .map_err(|e| SqlError::KvsError(e.to_string()))?; let (next_id, expected_version) = if let Some((version, bytes)) = current { - let current_id = u32::from_be_bytes( - bytes - .try_into() - .map_err(|_| SqlError::InternalError("Invalid table ID format".to_string()))?, - ); + let current_id = + u32::from_be_bytes(bytes.try_into().map_err(|_| { + SqlError::InternalError("Invalid table ID format".to_string()) + })?); (current_id + 1, version) } else { (1u32, 0u64) // Start from 1 if no counter exists @@ -234,7 +236,9 @@ impl MetadataManager { // CAS failed, retry } - Err(SqlError::InternalError("Failed to allocate table ID after retries".to_string())) + Err(SqlError::InternalError( + "Failed to allocate table ID after retries".to_string(), + )) } } @@ -249,7 +253,9 @@ mod tests { #[tokio::test] #[ignore] // Requires FlareDB server async fn test_create_table() { - let client = RdbClient::connect_direct("127.0.0.1:8001".to_string(), "sqltest".to_string()).await.unwrap(); + let client = RdbClient::connect_direct("127.0.0.1:8001".to_string(), "sqltest".to_string()) + .await + .unwrap(); let manager = MetadataManager::new(Arc::new(Mutex::new(client))); let columns = vec![ diff --git a/flaredb/crates/flaredb-sql/src/types.rs b/flaredb/crates/flaredb-sql/src/types.rs index cdba4a8..06b4886 100644 --- a/flaredb/crates/flaredb-sql/src/types.rs +++ b/flaredb/crates/flaredb-sql/src/types.rs @@ -75,7 +75,10 @@ pub struct RowData { impl RowData { pub fn new(columns: HashMap) -> Self { - Self { columns, version: 0 } + Self { + columns, + version: 0, + } } pub fn get(&self, column: &str) -> Option<&Value> { diff --git a/flaredb/crates/flaredb-storage/src/rocks_engine.rs b/flaredb/crates/flaredb-storage/src/rocks_engine.rs index 8d7c084..eeeb965 100644 --- a/flaredb/crates/flaredb-storage/src/rocks_engine.rs +++ b/flaredb/crates/flaredb-storage/src/rocks_engine.rs @@ -152,36 +152,30 @@ impl RocksEngine { // Get first let first = { - let mut iter = self - .db - .iterator_cf(&cf, rocksdb::IteratorMode::Start); - iter.next() - .and_then(|r| r.ok()) - .and_then(|(k, _)| { - if k.len() == 8 { - let mut arr = [0u8; 8]; - arr.copy_from_slice(&k); - Some(u64::from_be_bytes(arr)) - } else { - None - } - }) + let mut iter = self.db.iterator_cf(&cf, rocksdb::IteratorMode::Start); + iter.next().and_then(|r| r.ok()).and_then(|(k, _)| { + if k.len() == 8 { + let mut arr = [0u8; 8]; + arr.copy_from_slice(&k); + Some(u64::from_be_bytes(arr)) + } else { + None + } + }) }; // Get last let last = { let mut iter = self.db.iterator_cf(&cf, rocksdb::IteratorMode::End); - iter.next() - .and_then(|r| r.ok()) - .and_then(|(k, _)| { - if k.len() == 8 { - let mut arr = [0u8; 8]; - arr.copy_from_slice(&k); - Some(u64::from_be_bytes(arr)) - } else { - None - } - }) + iter.next().and_then(|r| r.ok()).and_then(|(k, _)| { + if k.len() == 8 { + let mut arr = [0u8; 8]; + arr.copy_from_slice(&k); + Some(u64::from_be_bytes(arr)) + } else { + None + } + }) }; Ok((first, last)) @@ -249,9 +243,7 @@ impl StorageEngine for RocksEngine { .db .cf_handle("default") .ok_or_else(|| StorageError::Serialization("CF default not found".to_string()))?; - self.db - .delete_cf(&cf, key) - .map_err(StorageError::RocksDb) + self.db.delete_cf(&cf, key).map_err(StorageError::RocksDb) } async fn get_cas(&self, key: &[u8]) -> Result)>, StorageError> { @@ -319,9 +311,7 @@ impl StorageEngine for RocksEngine { .db .cf_handle("cas") .ok_or_else(|| StorageError::Serialization("CF cas not found".to_string()))?; - self.db - .delete_cf(&cf, key) - .map_err(StorageError::RocksDb) + self.db.delete_cf(&cf, key).map_err(StorageError::RocksDb) } async fn scan_raw( diff --git a/iam/crates/iam-api/src/credential_service.rs b/iam/crates/iam-api/src/credential_service.rs index 74b81fc..388dddb 100644 --- a/iam/crates/iam-api/src/credential_service.rs +++ b/iam/crates/iam-api/src/credential_service.rs @@ -457,6 +457,77 @@ mod tests { assert_eq!(err.code(), Status::permission_denied("").code()); } + #[tokio::test] + async fn credential_rotation_cutover_keeps_new_key_live() { + let (svc, principal_store) = test_service(); + seed_service_account(&principal_store, "p1", "org-a", "project-a").await; + + let old_credential = svc + .create_s3_credential(Request::new(CreateS3CredentialRequest { + principal_id: "p1".into(), + description: "old".into(), + expires_at: None, + org_id: Some("org-a".into()), + project_id: Some("project-a".into()), + principal_kind: PrincipalKind::ServiceAccount as i32, + })) + .await + .unwrap() + .into_inner(); + + let new_credential = svc + .create_s3_credential(Request::new(CreateS3CredentialRequest { + principal_id: "p1".into(), + description: "new".into(), + expires_at: None, + org_id: Some("org-a".into()), + project_id: Some("project-a".into()), + principal_kind: PrincipalKind::ServiceAccount as i32, + })) + .await + .unwrap() + .into_inner(); + + assert_ne!(old_credential.access_key_id, new_credential.access_key_id); + + let listed = svc + .list_credentials(Request::new(ListCredentialsRequest { + principal_id: "p1".into(), + })) + .await + .unwrap() + .into_inner(); + assert_eq!(listed.credentials.len(), 2); + + let revoke_old = svc + .revoke_credential(Request::new(RevokeCredentialRequest { + access_key_id: old_credential.access_key_id.clone(), + })) + .await + .unwrap() + .into_inner(); + assert!(revoke_old.success); + + let old_err = svc + .get_secret_key(Request::new(GetSecretKeyRequest { + access_key_id: old_credential.access_key_id, + })) + .await + .unwrap_err(); + assert_eq!(old_err.code(), Status::permission_denied("").code()); + + let new_secret = svc + .get_secret_key(Request::new(GetSecretKeyRequest { + access_key_id: new_credential.access_key_id, + })) + .await + .unwrap() + .into_inner(); + assert_eq!(new_secret.principal_id, "p1"); + assert_eq!(new_secret.org_id.as_deref(), Some("org-a")); + assert_eq!(new_secret.project_id.as_deref(), Some("project-a")); + } + #[tokio::test] async fn expired_key_is_denied() { let (svc, principal_store) = test_service(); diff --git a/iam/crates/iam-api/src/lib.rs b/iam/crates/iam-api/src/lib.rs index bc3a694..2ae7509 100644 --- a/iam/crates/iam-api/src/lib.rs +++ b/iam/crates/iam-api/src/lib.rs @@ -9,11 +9,11 @@ pub mod proto { pub use crate::generated::iam::v1::*; } +pub use apigateway_api::GatewayAuthServiceServer; +pub use credential_service::IamCredentialService; +pub use gateway_auth_service::GatewayAuthServiceImpl; pub use generated::iam::v1::{ iam_admin_server, iam_authz_server, iam_credential_server, iam_token_server, }; -pub use credential_service::IamCredentialService; -pub use gateway_auth_service::GatewayAuthServiceImpl; pub use iam_service::{IamAdminService, IamAuthzService}; pub use token_service::IamTokenService; -pub use apigateway_api::GatewayAuthServiceServer; diff --git a/iam/crates/iam-authn/src/jwt.rs b/iam/crates/iam-authn/src/jwt.rs index ea3d2a0..2bd120b 100644 --- a/iam/crates/iam-authn/src/jwt.rs +++ b/iam/crates/iam-authn/src/jwt.rs @@ -290,9 +290,7 @@ impl JwtVerifier { None => { let mut keys = jwks.keys.iter().filter(is_signing_key); let key = keys.next().ok_or_else(|| { - Error::Iam(IamError::InvalidToken( - "JWKS has no signing keys".into(), - )) + Error::Iam(IamError::InvalidToken("JWKS has no signing keys".into())) })?; if keys.next().is_some() { return Err(Error::Iam(IamError::InvalidToken( diff --git a/iam/crates/iam-authn/src/mtls.rs b/iam/crates/iam-authn/src/mtls.rs index 68dc7d5..53afd02 100644 --- a/iam/crates/iam-authn/src/mtls.rs +++ b/iam/crates/iam-authn/src/mtls.rs @@ -350,4 +350,45 @@ mod tests { assert!(!verifier.matches_pattern("exact-match", "other")); assert!(verifier.matches_pattern("*", "anything")); } + + #[test] + fn test_mtls_rotation_overlap_and_cutover() { + let mapping = PrincipalMapping { + kind: PrincipalKind::ServiceAccount, + id_template: "compute-agent".into(), + node_id_from_cn: false, + }; + + let overlap_verifier = MtlsVerifier::new( + MtlsVerifierConfig::new() + .add_mapping("compute-agent-v1.service.internal", mapping.clone()) + .add_mapping("compute-agent-v2.service.internal", mapping.clone()) + .with_required_org("cloud-platform"), + ); + + let mut old_cert = test_cert_info(); + old_cert.common_name = "compute-agent-v1.service.internal".into(); + old_cert.fingerprint = "sha256:old".into(); + + let mut new_cert = test_cert_info(); + new_cert.common_name = "compute-agent-v2.service.internal".into(); + new_cert.fingerprint = "sha256:new".into(); + + let old_overlap = overlap_verifier.verify(&old_cert).unwrap(); + assert_eq!(old_overlap.principal_ref.id, "compute-agent"); + + let new_overlap = overlap_verifier.verify(&new_cert).unwrap(); + assert_eq!(new_overlap.principal_ref.id, "compute-agent"); + + let cutover_verifier = MtlsVerifier::new( + MtlsVerifierConfig::new() + .add_mapping("compute-agent-v2.service.internal", mapping) + .with_required_org("cloud-platform"), + ); + + assert!(cutover_verifier.verify(&old_cert).is_err()); + + let new_cutover = cutover_verifier.verify(&new_cert).unwrap(); + assert_eq!(new_cutover.principal_ref.id, "compute-agent"); + } } diff --git a/iam/crates/iam-authz/src/evaluator.rs b/iam/crates/iam-authz/src/evaluator.rs index 5425a8a..f0a8041 100644 --- a/iam/crates/iam-authz/src/evaluator.rs +++ b/iam/crates/iam-authz/src/evaluator.rs @@ -474,10 +474,7 @@ mod tests { "project/*/instance/*", "project/p1/instance/vm-1" )); - assert!(matches_resource( - "project/p1/*", - "project/p1/instance/vm-1" - )); + assert!(matches_resource("project/p1/*", "project/p1/instance/vm-1")); assert!(!matches_resource( "project/p2/*", "project/p1/instance/vm-1" diff --git a/iam/crates/iam-server/src/main.rs b/iam/crates/iam-server/src/main.rs index d2ee8e5..7653e8d 100644 --- a/iam/crates/iam-server/src/main.rs +++ b/iam/crates/iam-server/src/main.rs @@ -12,11 +12,11 @@ use std::time::{SystemTime, UNIX_EPOCH}; use chainfire_client::Client as ChainFireClient; use clap::Parser; use metrics_exporter_prometheus::PrometheusBuilder; +use tokio_stream::wrappers::TcpListenerStream; use tonic::service::Interceptor; use tonic::transport::{Certificate, Identity, Server, ServerTlsConfig}; use tonic::{metadata::MetadataMap, Request, Status}; use tonic_health::server::health_reporter; -use tokio_stream::wrappers::TcpListenerStream; use tracing::{info, warn}; use iam_api::{ diff --git a/iam/crates/iam-store/src/backend.rs b/iam/crates/iam-store/src/backend.rs index c956281..eb3a94d 100644 --- a/iam/crates/iam-store/src/backend.rs +++ b/iam/crates/iam-store/src/backend.rs @@ -369,10 +369,13 @@ impl StorageBackend for ChainfireBackend { let mut client = timeout(STORAGE_RPC_TIMEOUT, self.client.lock()) .await .map_err(|_| Error::Storage(StorageError::Timeout))?; - let (results, _) = timeout(STORAGE_RPC_TIMEOUT, client.scan_prefix(prefix, limit as i64)) - .await - .map_err(|_| Error::Storage(StorageError::Timeout))? - .map_err(map_chainfire_error)?; + let (results, _) = timeout( + STORAGE_RPC_TIMEOUT, + client.scan_prefix(prefix, limit as i64), + ) + .await + .map_err(|_| Error::Storage(StorageError::Timeout))? + .map_err(map_chainfire_error)?; Ok(results .into_iter() @@ -388,10 +391,13 @@ impl StorageBackend for ChainfireBackend { let mut client = timeout(STORAGE_RPC_TIMEOUT, self.client.lock()) .await .map_err(|_| Error::Storage(StorageError::Timeout))?; - let (results, _) = timeout(STORAGE_RPC_TIMEOUT, client.scan_range(start, end, limit as i64)) - .await - .map_err(|_| Error::Storage(StorageError::Timeout))? - .map_err(map_chainfire_error)?; + let (results, _) = timeout( + STORAGE_RPC_TIMEOUT, + client.scan_range(start, end, limit as i64), + ) + .await + .map_err(|_| Error::Storage(StorageError::Timeout))? + .map_err(map_chainfire_error)?; Ok(results .into_iter() @@ -419,11 +425,13 @@ impl StorageBackend for ChainfireBackend { let mut client = timeout(STORAGE_RPC_TIMEOUT, self.client.lock()) .await .map_err(|_| Error::Storage(StorageError::Timeout))?; - let (results, next) = - timeout(STORAGE_RPC_TIMEOUT, client.scan_range(&start, &end, limit as i64)) - .await - .map_err(|_| Error::Storage(StorageError::Timeout))? - .map_err(map_chainfire_error)?; + let (results, next) = timeout( + STORAGE_RPC_TIMEOUT, + client.scan_range(&start, &end, limit as i64), + ) + .await + .map_err(|_| Error::Storage(StorageError::Timeout))? + .map_err(map_chainfire_error)?; let kvs = results .into_iter() @@ -761,13 +769,10 @@ impl StorageBackend for FlareDbBackend { let mut client = timeout(STORAGE_RPC_TIMEOUT, self.client.lock()) .await .map_err(|_| Error::Storage(StorageError::Timeout))?; - let (entries, next) = timeout( - STORAGE_RPC_TIMEOUT, - client.cas_scan(s, end.to_vec(), limit), - ) - .await - .map_err(|_| Error::Storage(StorageError::Timeout))? - .map_err(map_flaredb_error)?; + let (entries, next) = timeout(STORAGE_RPC_TIMEOUT, client.cas_scan(s, end.to_vec(), limit)) + .await + .map_err(|_| Error::Storage(StorageError::Timeout))? + .map_err(map_flaredb_error)?; let kvs = entries .into_iter() @@ -889,7 +894,10 @@ impl SqlBackend { fn row_to_kv(key: String, value: Vec, version: i64) -> Result { let version = u64::try_from(version).map_err(|e| { - Error::Storage(StorageError::Backend(format!("Invalid version in SQL row: {}", e))) + Error::Storage(StorageError::Backend(format!( + "Invalid version in SQL row: {}", + e + ))) })?; Ok(KvPair { key: Bytes::from(key.into_bytes()), @@ -1026,7 +1034,9 @@ impl StorageBackend for SqlBackend { .bind(key) .fetch_optional(pool) .await - .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))?; + .map_err(|e| { + Error::Storage(StorageError::Backend(e.to_string())) + })?; match actual { Some(v) => Ok(CasResult::Conflict { expected: 0, @@ -1042,13 +1052,14 @@ impl StorageBackend for SqlBackend { } } SqlBackendKind::Sqlite(pool) => { - let result = - sqlx::query("INSERT OR IGNORE INTO iam_kv (key, value, version) VALUES (?1, ?2, 1)") - .bind(key) - .bind(value) - .execute(pool) - .await - .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))?; + let result = sqlx::query( + "INSERT OR IGNORE INTO iam_kv (key, value, version) VALUES (?1, ?2, 1)", + ) + .bind(key) + .bind(value) + .execute(pool) + .await + .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))?; if result.rows_affected() > 0 { Ok(CasResult::Success(1)) } else { @@ -1057,7 +1068,9 @@ impl StorageBackend for SqlBackend { .bind(key) .fetch_optional(pool) .await - .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))?; + .map_err(|e| { + Error::Storage(StorageError::Backend(e.to_string())) + })?; match actual { Some(v) => Ok(CasResult::Conflict { expected: 0, @@ -1173,22 +1186,18 @@ impl StorageBackend for SqlBackend { async fn delete(&self, key: &[u8]) -> Result { let key = Self::key_to_text(key)?; let rows = match &self.backend { - SqlBackendKind::Postgres(pool) => { - sqlx::query("DELETE FROM iam_kv WHERE key = $1") - .bind(key) - .execute(pool) - .await - .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))? - .rows_affected() - } - SqlBackendKind::Sqlite(pool) => { - sqlx::query("DELETE FROM iam_kv WHERE key = ?1") - .bind(key) - .execute(pool) - .await - .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))? - .rows_affected() - } + SqlBackendKind::Postgres(pool) => sqlx::query("DELETE FROM iam_kv WHERE key = $1") + .bind(key) + .execute(pool) + .await + .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))? + .rows_affected(), + SqlBackendKind::Sqlite(pool) => sqlx::query("DELETE FROM iam_kv WHERE key = ?1") + .bind(key) + .execute(pool) + .await + .map_err(|e| Error::Storage(StorageError::Backend(e.to_string())))? + .rows_affected(), }; Ok(rows > 0) } diff --git a/iam/crates/iam-store/src/org_store.rs b/iam/crates/iam-store/src/org_store.rs index 9729696..a38f36b 100644 --- a/iam/crates/iam-store/src/org_store.rs +++ b/iam/crates/iam-store/src/org_store.rs @@ -30,9 +30,9 @@ impl OrgStore { let bytes = serde_json::to_vec(org).map_err(|e| Error::Serialization(e.to_string()))?; match self.backend.cas(key.as_bytes(), 0, &bytes).await? { CasResult::Success(version) => Ok(version), - CasResult::Conflict { .. } => { - Err(Error::Iam(IamError::OrganizationAlreadyExists(org.id.clone()))) - } + CasResult::Conflict { .. } => Err(Error::Iam(IamError::OrganizationAlreadyExists( + org.id.clone(), + ))), CasResult::NotFound => Err(Error::Internal("Unexpected CAS result".into())), } } @@ -48,20 +48,31 @@ impl OrgStore { } pub async fn get(&self, id: &str) -> Result> { - Ok(self.get_json::(self.primary_key(id).as_bytes()).await?.map(|v| v.0)) + Ok(self + .get_json::(self.primary_key(id).as_bytes()) + .await? + .map(|v| v.0)) } pub async fn get_with_version(&self, id: &str) -> Result> { - self.get_json::(self.primary_key(id).as_bytes()).await + self.get_json::(self.primary_key(id).as_bytes()) + .await } pub async fn update(&self, org: &Organization, expected_version: u64) -> Result { let key = self.primary_key(&org.id); let bytes = serde_json::to_vec(org).map_err(|e| Error::Serialization(e.to_string()))?; - match self.backend.cas(key.as_bytes(), expected_version, &bytes).await? { + match self + .backend + .cas(key.as_bytes(), expected_version, &bytes) + .await? + { CasResult::Success(version) => Ok(version), CasResult::Conflict { expected, actual } => { - Err(Error::Storage(iam_types::StorageError::CasConflict { expected, actual })) + Err(Error::Storage(iam_types::StorageError::CasConflict { + expected, + actual, + })) } CasResult::NotFound => Err(Error::Iam(IamError::OrganizationNotFound(org.id.clone()))), } @@ -72,7 +83,10 @@ impl OrgStore { } pub async fn list(&self) -> Result> { - let pairs = self.backend.scan_prefix(keys::ORGS.as_bytes(), 10_000).await?; + let pairs = self + .backend + .scan_prefix(keys::ORGS.as_bytes(), 10_000) + .await?; let mut orgs = Vec::new(); for pair in pairs { let org: Organization = serde_json::from_slice(&pair.value) @@ -83,7 +97,11 @@ impl OrgStore { } pub async fn exists(&self, id: &str) -> Result { - Ok(self.backend.get(self.primary_key(id).as_bytes()).await?.is_some()) + Ok(self + .backend + .get(self.primary_key(id).as_bytes()) + .await? + .is_some()) } fn primary_key(&self, id: &str) -> String { diff --git a/iam/crates/iam-store/src/project_store.rs b/iam/crates/iam-store/src/project_store.rs index c7ffe98..94c2292 100644 --- a/iam/crates/iam-store/src/project_store.rs +++ b/iam/crates/iam-store/src/project_store.rs @@ -28,24 +28,22 @@ impl ProjectStore { pub async fn create(&self, project: &Project) -> Result { let key = self.primary_key(&project.org_id, &project.id); - let bytes = - serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; + let bytes = serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; match self.backend.cas(key.as_bytes(), 0, &bytes).await? { CasResult::Success(version) => { self.create_indexes(project).await?; Ok(version) } - CasResult::Conflict { .. } => Err(Error::Iam(IamError::ProjectAlreadyExists( - project.key(), - ))), + CasResult::Conflict { .. } => { + Err(Error::Iam(IamError::ProjectAlreadyExists(project.key()))) + } CasResult::NotFound => Err(Error::Internal("Unexpected CAS result".into())), } } pub async fn create_if_missing(&self, project: &Project) -> Result { let key = self.primary_key(&project.org_id, &project.id); - let bytes = - serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; + let bytes = serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; match self.backend.cas(key.as_bytes(), 0, &bytes).await? { CasResult::Success(_) => { self.create_indexes(project).await?; @@ -70,15 +68,21 @@ impl ProjectStore { pub async fn update(&self, project: &Project, expected_version: u64) -> Result { let key = self.primary_key(&project.org_id, &project.id); - let bytes = - serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; - match self.backend.cas(key.as_bytes(), expected_version, &bytes).await? { + let bytes = serde_json::to_vec(project).map_err(|e| Error::Serialization(e.to_string()))?; + match self + .backend + .cas(key.as_bytes(), expected_version, &bytes) + .await? + { CasResult::Success(version) => { self.create_indexes(project).await?; Ok(version) } CasResult::Conflict { expected, actual } => { - Err(Error::Storage(iam_types::StorageError::CasConflict { expected, actual })) + Err(Error::Storage(iam_types::StorageError::CasConflict { + expected, + actual, + })) } CasResult::NotFound => Err(Error::Iam(IamError::ProjectNotFound(project.key()))), } @@ -86,7 +90,10 @@ impl ProjectStore { pub async fn delete(&self, org_id: &str, id: &str) -> Result { if let Some(project) = self.get(org_id, id).await? { - let deleted = self.backend.delete(self.primary_key(org_id, id).as_bytes()).await?; + let deleted = self + .backend + .delete(self.primary_key(org_id, id).as_bytes()) + .await?; if deleted { self.delete_indexes(&project).await?; } @@ -97,7 +104,10 @@ impl ProjectStore { } pub async fn list(&self) -> Result> { - let pairs = self.backend.scan_prefix(keys::PROJECTS.as_bytes(), 10_000).await?; + let pairs = self + .backend + .scan_prefix(keys::PROJECTS.as_bytes(), 10_000) + .await?; let mut projects = Vec::new(); for pair in pairs { if String::from_utf8_lossy(&pair.key).starts_with(keys::BY_ORG) { @@ -137,7 +147,9 @@ impl ProjectStore { async fn create_indexes(&self, project: &Project) -> Result<()> { let key = format!("{}{}/{}", keys::BY_ORG, project.org_id, project.id); - self.backend.put(key.as_bytes(), project.id.as_bytes()).await?; + self.backend + .put(key.as_bytes(), project.id.as_bytes()) + .await?; Ok(()) } diff --git a/iam/crates/iam-types/src/tenant.rs b/iam/crates/iam-types/src/tenant.rs index 5718a77..3775f10 100644 --- a/iam/crates/iam-types/src/tenant.rs +++ b/iam/crates/iam-types/src/tenant.rs @@ -50,11 +50,7 @@ pub struct Project { } impl Project { - pub fn new( - id: impl Into, - org_id: impl Into, - name: impl Into, - ) -> Self { + pub fn new(id: impl Into, org_id: impl Into, name: impl Into) -> Self { Self { id: id.into(), org_id: org_id.into(), diff --git a/k8shost/Cargo.toml b/k8shost/Cargo.toml index f348038..a7f596f 100644 --- a/k8shost/Cargo.toml +++ b/k8shost/Cargo.toml @@ -7,6 +7,13 @@ members = [ "crates/k8shost-controllers", "crates/k8shost-server", ] +# The archived helper binaries stay in the workspace for manual compatibility +# builds only. Default workspace commands cover the supported server surface. +default-members = [ + "crates/k8shost-types", + "crates/k8shost-proto", + "crates/k8shost-server", +] resolver = "2" [workspace.dependencies] diff --git a/k8shost/README.md b/k8shost/README.md new file mode 100644 index 0000000..aec8a95 --- /dev/null +++ b/k8shost/README.md @@ -0,0 +1,20 @@ +# K8sHost + +`k8shost` is UltraCloud's tenant workload control plane. The supported surface is `k8shost-server`, which exposes node, pod, deployment, and service APIs and then projects tenant networking and publication into `prismnet`, `flashdns`, and `fiberlb`. + +## Supported scope + +- tenant pod, deployment, and service lifecycle APIs +- bounded `WatchPods` snapshot streams for the current matching pod set +- tenant-aware auth integration through IAM +- service publication through `prismnet`, `flashdns`, and `fiberlb` +- VM-cluster coverage through `fresh-smoke` and `fresh-matrix` +- API/control-plane product surface only; runtime dataplane helpers stay archived non-product + +## Archived Non-Product Scaffolds + +- `k8shost-cni` +- `k8shost-controllers` +- `lightningstor-csi` + +Those binaries stay in-tree as internal integration scaffolds. They are excluded from the default workspace members, they are not wired into canonical profiles, they are not started by the publishable suites, and they should not be presented as productized entrypoints until they have their own documented contract and coverage. diff --git a/k8shost/crates/k8shost-cni/src/main.rs b/k8shost/crates/k8shost-cni/src/main.rs index 6191dc1..a00619e 100644 --- a/k8shost/crates/k8shost-cni/src/main.rs +++ b/k8shost/crates/k8shost-cni/src/main.rs @@ -1,13 +1,9 @@ -//! PrismNET CNI Plugin for k8shost +//! Internal PrismNET CNI helper scaffold for k8shost. //! -//! This binary implements the CNI 1.0.0 specification to integrate k8shost pods -//! with PrismNET's OVN-based virtual networking. -//! -//! CNI operations: -//! - ADD: Create network interface and attach to OVN logical switch -//! - DEL: Remove network interface and clean up OVN resources -//! - CHECK: Verify network configuration is correct -//! - VERSION: Report supported CNI versions +//! This binary stays in-tree for development experiments around tenant port +//! allocation, but it is outside the supported UltraCloud surface. It records +//! PrismNET port intent and returns CNI-shaped metadata without claiming to +//! program a production dataplane in the host network namespace. use anyhow::{Context, Result}; use prismnet_api::{ @@ -182,13 +178,9 @@ async fn handle_add() -> Result<()> { ); } - // TODO: In production, we would: - // 1. Create veth pair - // 2. Move one end to container network namespace - // 3. Configure IP address and routes - // 4. Configure OVN logical switch port with MAC/IP - // - // For MVP, we return the allocated IP/MAC information + // This internal scaffold stops after PrismNET port allocation. It does not + // create a veth pair, move devices into the target netns, or reconcile OVN + // switch state on the local host. // Extract gateway from subnet (would come from GetSubnet call in production) let gateway = port.ip_address.split('.').take(3).collect::>().join(".") + ".1"; @@ -217,7 +209,7 @@ async fn handle_add() -> Result<()> { println!("{}", serde_json::to_string(&result)?); - tracing::info!("CNI ADD operation completed successfully"); + tracing::info!("CNI ADD scaffold completed after PrismNET port allocation"); Ok(()) } @@ -302,22 +294,14 @@ async fn handle_del() -> Result<()> { } } - // TODO: In production, we would also: - // 1. Remove network interfaces from container namespace - // 2. Clean up veth pair - // 3. Remove OVN logical switch port configuration - - tracing::info!("CNI DEL operation completed successfully"); + tracing::info!("CNI DEL scaffold completed after PrismNET port cleanup"); Ok(()) } fn handle_check() -> Result<()> { - // TODO: Implement CHECK logic - // Verify that the network configuration is still valid - // For now, return success - - tracing::info!("CNI CHECK operation - basic validation passed"); - Ok(()) + Err(anyhow::anyhow!( + "k8shost-cni is an internal scaffold and does not implement CNI CHECK or host network reconciliation" + )) } fn handle_version() -> Result<()> { diff --git a/k8shost/crates/k8shost-controllers/src/main.rs b/k8shost/crates/k8shost-controllers/src/main.rs index 269caab..b5a3026 100644 --- a/k8shost/crates/k8shost-controllers/src/main.rs +++ b/k8shost/crates/k8shost-controllers/src/main.rs @@ -1,14 +1,8 @@ //! k8shost Controllers //! -//! This binary runs the UltraCloud integration controllers for k8shost: -//! - FiberLB Controller: Manages LoadBalancer services -//! - FlashDNS Controller: Manages Service DNS records -//! - IAM Webhook: Handles TokenReview authentication -//! -//! Each controller follows the watch-reconcile pattern: -//! 1. Watch k8s API for resource changes -//! 2. Reconcile desired state with UltraCloud components -//! 3. Update k8s resource status +//! This binary remains an internal integration scaffold for controller-side +//! experiments around `k8shost-server`. The supported surface is the API server +//! itself; canonical profiles and publishable suites do not start this process. use anyhow::Result; use tracing::info; @@ -17,63 +11,10 @@ use tracing::info; async fn main() -> Result<()> { tracing_subscriber::fmt::init(); - info!("k8shost controllers starting"); - - // TODO: Initialize controllers - // 1. FiberLB controller - watch Service resources with type=LoadBalancer - // 2. FlashDNS controller - watch Service resources for DNS sync - // 3. IAM webhook server - handle TokenReview requests - - // Start controller loops - tokio::select! { - result = fiberlb_controller() => { - info!("FiberLB controller exited: {:?}", result); - } - result = flashdns_controller() => { - info!("FlashDNS controller exited: {:?}", result); - } - result = iam_webhook_server() => { - info!("IAM webhook server exited: {:?}", result); - } - } + info!( + "k8shost-controllers is an internal scaffold; the supported k8shost surface is k8shost-server" + ); + tokio::signal::ctrl_c().await?; Ok(()) } - -async fn fiberlb_controller() -> Result<()> { - // TODO: Implement FiberLB controller - // 1. Watch Service resources (type=LoadBalancer) - // 2. Allocate external IP from FiberLB - // 3. Configure load balancer backend pool - // 4. Update Service.status.loadBalancer.ingress - - info!("FiberLB controller not yet implemented"); - tokio::time::sleep(tokio::time::Duration::from_secs(3600)).await; - Ok(()) -} - -async fn flashdns_controller() -> Result<()> { - // TODO: Implement FlashDNS controller - // 1. Watch Service resources - // 2. Create/update DNS records in FlashDNS - // - ..svc.cluster.local -> ClusterIP - // - ...plasma.cloud -> ExternalIP (if LoadBalancer) - // 3. Handle service deletion (cleanup DNS records) - - info!("FlashDNS controller not yet implemented"); - tokio::time::sleep(tokio::time::Duration::from_secs(3600)).await; - Ok(()) -} - -async fn iam_webhook_server() -> Result<()> { - // TODO: Implement IAM webhook server - // 1. Start HTTPS server on port 8443 - // 2. Handle TokenReview requests from k8s API server - // 3. Validate bearer tokens with IAM service - // 4. Return UserInfo with org_id, project_id, groups - // 5. Map IAM roles to k8s RBAC groups - - info!("IAM webhook server not yet implemented"); - tokio::time::sleep(tokio::time::Duration::from_secs(3600)).await; - Ok(()) -} diff --git a/k8shost/crates/k8shost-csi/src/main.rs b/k8shost/crates/k8shost-csi/src/main.rs index d147406..c02a443 100644 --- a/k8shost/crates/k8shost-csi/src/main.rs +++ b/k8shost/crates/k8shost-csi/src/main.rs @@ -1,13 +1,8 @@ //! LightningStor CSI Driver for k8shost //! -//! This binary implements the Container Storage Interface (CSI) specification -//! to integrate k8shost persistent volumes with LightningStor's distributed -//! block storage system. -//! -//! CSI services: -//! - Identity Service: Plugin info and capabilities -//! - Controller Service: Volume lifecycle (create, delete, attach, detach) -//! - Node Service: Volume staging and publishing on nodes +//! This binary remains an internal storage-side experiment. The published +//! `k8shost` contract stops at `k8shost-server`; CSI is kept out of canonical +//! profiles and publishable suites until it has its own documented proof path. use anyhow::Result; use tracing::info; @@ -16,30 +11,9 @@ use tracing::info; async fn main() -> Result<()> { tracing_subscriber::fmt::init(); - let addr = "0.0.0.0:50051"; - - info!("LightningStor CSI driver starting on {}", addr); - - // TODO: Implement CSI gRPC services - // 1. IdentityService - GetPluginInfo, GetPluginCapabilities, Probe - // 2. ControllerService - CreateVolume, DeleteVolume, ControllerPublishVolume, ControllerUnpublishVolume - // 3. NodeService - NodeStageVolume, NodeUnstageVolume, NodePublishVolume, NodeUnpublishVolume - - // Placeholder server that will be replaced with actual CSI implementation - info!("CSI driver not yet implemented - exiting"); + info!( + "lightningstor-csi is an internal scaffold and is not part of the supported k8shost surface" + ); Ok(()) } - -// Placeholder types for future CSI implementation -#[allow(dead_code)] -mod csi { - /// Identity service provides plugin metadata and capabilities - pub struct IdentityService; - - /// Controller service manages volume lifecycle - pub struct ControllerService; - - /// Node service manages volume mounting on nodes - pub struct NodeService; -} diff --git a/k8shost/crates/k8shost-server/src/services/pod.rs b/k8shost/crates/k8shost-server/src/services/pod.rs index 366b513..b420a03 100644 --- a/k8shost/crates/k8shost-server/src/services/pod.rs +++ b/k8shost/crates/k8shost-server/src/services/pod.rs @@ -16,7 +16,7 @@ use k8shost_proto::{ }; use k8shost_types::PodStatus; use std::sync::Arc; -use tokio::sync::RwLock; +use tokio::sync::{mpsc, RwLock}; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; use uuid::Uuid; @@ -280,7 +280,7 @@ impl PodServiceImpl { value: e.value.clone(), }) .collect(), - resources: None, // TODO: Add resource requirements conversion + resources: None, }) .collect(), restart_policy: spec.restart_policy.clone(), @@ -310,6 +310,49 @@ impl PodServiceImpl { status, }) } + + fn parse_resource_version(resource_version: Option<&str>) -> Result { + match resource_version.map(str::trim) { + None | Some("") => Ok(0), + Some(value) => value.parse::().map_err(|_| { + Status::invalid_argument("resource_version must be an unsigned integer") + }), + } + } + + fn build_watch_snapshot_events( + mut pods: Vec, + minimum_resource_version: u64, + ) -> Vec { + pods.sort_by(|lhs, rhs| { + lhs.metadata + .namespace + .as_deref() + .unwrap_or("default") + .cmp(rhs.metadata.namespace.as_deref().unwrap_or("default")) + .then_with(|| lhs.metadata.name.cmp(&rhs.metadata.name)) + }); + + pods.into_iter() + .filter_map(|pod| { + let current_resource_version = pod + .metadata + .resource_version + .as_deref() + .and_then(|value| value.parse::().ok()) + .unwrap_or(0); + if minimum_resource_version > 0 + && current_resource_version <= minimum_resource_version + { + return None; + } + Some(WatchEvent { + r#type: "ADDED".to_string(), + object: Some(Self::to_proto_pod(&pod)), + }) + }) + .collect() + } } #[tonic::async_trait] @@ -686,10 +729,93 @@ impl PodService for PodServiceImpl { ), ) .await?; - let _req = request.into_inner(); + let req = request.into_inner(); + let minimum_resource_version = + Self::parse_resource_version(req.resource_version.as_deref())?; + let pods = self + .storage + .list_pods( + &tenant_context.org_id, + &tenant_context.project_id, + req.namespace.as_deref(), + None, + ) + .await?; + let events = Self::build_watch_snapshot_events(pods, minimum_resource_version); + let (tx, rx) = mpsc::channel(events.len().max(1)); + for event in events { + tx.try_send(Ok(event)) + .map_err(|_| Status::internal("failed to queue pod watch snapshot"))?; + } + drop(tx); - // TODO: Implement proper watch mechanism with FlareDB change notifications. - // Return unimplemented for now to avoid emitting invalid watch events. - Err(Status::unimplemented("watch_pods is not implemented yet")) + Ok(Response::new(ReceiverStream::new(rx))) + } +} + +#[cfg(test)] +mod tests { + use super::PodServiceImpl; + use chrono::Utc; + use k8shost_types::{Container, ObjectMeta, Pod, PodSpec}; + use std::collections::HashMap; + + fn pod(name: &str, namespace: &str, resource_version: &str) -> Pod { + Pod { + metadata: ObjectMeta { + name: name.to_string(), + namespace: Some(namespace.to_string()), + uid: None, + resource_version: Some(resource_version.to_string()), + creation_timestamp: Some(Utc::now()), + labels: HashMap::new(), + annotations: HashMap::new(), + org_id: Some("org".to_string()), + project_id: Some("project".to_string()), + }, + spec: PodSpec { + containers: vec![Container { + name: "app".to_string(), + image: "example".to_string(), + command: Vec::new(), + args: Vec::new(), + ports: Vec::new(), + env: Vec::new(), + resources: None, + }], + restart_policy: None, + node_name: None, + }, + status: None, + } + } + + #[test] + fn watch_snapshot_filters_old_resource_versions() { + let events = PodServiceImpl::build_watch_snapshot_events( + vec![ + pod("b", "default", "1"), + pod("a", "default", "3"), + pod("c", "ops", "4"), + ], + 2, + ); + + let names: Vec = events + .iter() + .map(|event| event.object.as_ref().unwrap().metadata.as_ref().unwrap().name.clone()) + .collect(); + assert_eq!(names, vec!["a".to_string(), "c".to_string()]); + assert!(events.iter().all(|event| event.r#type == "ADDED")); + } + + #[test] + fn parse_resource_version_rejects_invalid_values() { + assert!(PodServiceImpl::parse_resource_version(Some("not-a-number")).is_err()); + assert_eq!( + PodServiceImpl::parse_resource_version(Some("42")).unwrap(), + 42 + ); + assert_eq!(PodServiceImpl::parse_resource_version(None).unwrap(), 0); } } diff --git a/k8shost/crates/k8shost-server/src/storage.rs b/k8shost/crates/k8shost-server/src/storage.rs index dc7750e..e2a026d 100644 --- a/k8shost/crates/k8shost-server/src/storage.rs +++ b/k8shost/crates/k8shost-server/src/storage.rs @@ -59,9 +59,7 @@ impl Storage { /// Create an in-memory storage for testing #[cfg(test)] pub fn new_in_memory() -> Self { - // For testing, we'll use a mock that stores data in a HashMap - // This is a simplified version - in production, use actual FlareDB - unimplemented!("Use new() with a test FlareDB instance") + panic!("new_in_memory requires a test FlareDB instance; use new() or new_direct()") } // ============================================================================ diff --git a/lightningstor/crates/lightningstor-distributed/src/backends/erasure_coded.rs b/lightningstor/crates/lightningstor-distributed/src/backends/erasure_coded.rs index b7f6921..a734863 100644 --- a/lightningstor/crates/lightningstor-distributed/src/backends/erasure_coded.rs +++ b/lightningstor/crates/lightningstor-distributed/src/backends/erasure_coded.rs @@ -190,9 +190,7 @@ impl ErasureCodedBackend { let node = node.clone(); let key = meta_key.clone(); let data = meta_bytes.clone(); - write_futures.push(async move { - node.put_chunk(&key, 0, false, data).await - }); + write_futures.push(async move { node.put_chunk(&key, 0, false, data).await }); } let results = futures::future::join_all(write_futures).await; @@ -303,10 +301,7 @@ impl ErasureCodedBackend { // Require full shard write; without repair loop partial writes reduce redundancy. let min_required = self.total_shards(); if success_count < min_required { - let errors: Vec<_> = results - .into_iter() - .filter_map(|r| r.err()) - .collect(); + let errors: Vec<_> = results.into_iter().filter_map(|r| r.err()).collect(); error!( success_count, min_required, @@ -580,12 +575,7 @@ impl StorageBackend for ErasureCodedBackend { Ok(metadata.original_size) } - async fn put_part( - &self, - upload_id: &str, - part_number: u32, - data: Bytes, - ) -> StorageResult<()> { + async fn put_part(&self, upload_id: &str, part_number: u32, data: Bytes) -> StorageResult<()> { // Use a deterministic part key based on upload_id and part_number let part_key = format!("part_{}_{}", upload_id, part_number); let nodes = self.select_nodes_for_write().await?; @@ -599,12 +589,18 @@ impl StorageBackend for ErasureCodedBackend { let mut write_futures = Vec::with_capacity(self.total_shards()); for (shard_idx, (shard_data, node)) in shards.into_iter().zip(nodes.iter()).enumerate() { let is_parity = shard_idx >= self.data_shards; - let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" }); + let key = format!( + "{}_{}_{}", + part_key, + shard_idx, + if is_parity { "p" } else { "d" } + ); let node = node.clone(); let shard_bytes = Bytes::from(shard_data); write_futures.push(async move { - node.put_chunk(&key, shard_idx as u32, is_parity, shard_bytes).await + node.put_chunk(&key, shard_idx as u32, is_parity, shard_bytes) + .await }); } @@ -634,7 +630,12 @@ impl StorageBackend for ErasureCodedBackend { let mut shard_futures = FuturesUnordered::new(); for shard_idx in 0..self.total_shards() { let is_parity = shard_idx >= self.data_shards; - let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" }); + let key = format!( + "{}_{}_{}", + part_key, + shard_idx, + if is_parity { "p" } else { "d" } + ); let nodes = nodes.clone(); let node_selector = self.node_selector.clone(); @@ -743,7 +744,12 @@ impl StorageBackend for ErasureCodedBackend { let mut delete_futures = Vec::new(); for shard_idx in 0..self.total_shards() { let is_parity = shard_idx >= self.data_shards; - let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" }); + let key = format!( + "{}_{}_{}", + part_key, + shard_idx, + if is_parity { "p" } else { "d" } + ); for node in &nodes { let node = node.clone(); @@ -925,7 +931,9 @@ mod tests { let config = create_ec_config(4, 2); let registry = Arc::new(MockNodeRegistry::with_nodes(6)); - let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap(); + let backend = ErasureCodedBackend::new(config, registry.clone()) + .await + .unwrap(); let object_id = ObjectId::new(); let data = Bytes::from(vec![42u8; 1024]); @@ -950,7 +958,9 @@ mod tests { let config = create_ec_config(4, 2); let registry = Arc::new(MockNodeRegistry::with_nodes(6)); - let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap(); + let backend = ErasureCodedBackend::new(config, registry.clone()) + .await + .unwrap(); let object_id = ObjectId::new(); let data = Bytes::from(vec![42u8; 512]); @@ -973,7 +983,9 @@ mod tests { let config = create_ec_config(4, 2); let registry = Arc::new(MockNodeRegistry::with_nodes(6)); - let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap(); + let backend = ErasureCodedBackend::new(config, registry.clone()) + .await + .unwrap(); let object_id = ObjectId::new(); let data = Bytes::from(vec![42u8; 256]); @@ -1016,7 +1028,9 @@ mod tests { }; let registry = Arc::new(MockNodeRegistry::with_nodes(6)); - let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap(); + let backend = ErasureCodedBackend::new(config, registry.clone()) + .await + .unwrap(); let object_id = ObjectId::new(); // Create data larger than chunk size (3 KB = 3 chunks) @@ -1060,7 +1074,9 @@ mod tests { }; let registry = Arc::new(MockNodeRegistry::with_nodes(6)); - let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap(); + let backend = ErasureCodedBackend::new(config, registry.clone()) + .await + .unwrap(); let object_id = ObjectId::new(); // 2 KB = 4 chunks with 512 byte chunk size @@ -1139,12 +1155,7 @@ mod tests { let key = ChunkId::new(&object_id, 0, shard_idx, is_parity).to_key(); if shard_idx < 4 { fast_nodes[shard_idx] - .put_chunk( - &key, - shard_idx as u32, - is_parity, - Bytes::from(shard_data), - ) + .put_chunk(&key, shard_idx as u32, is_parity, Bytes::from(shard_data)) .await .unwrap(); } else if shard_idx == 4 { @@ -1216,12 +1227,7 @@ mod tests { if shard_idx < 4 { fast_nodes[shard_idx] - .put_chunk( - &key, - shard_idx as u32, - is_parity, - Bytes::from(shard_data), - ) + .put_chunk(&key, shard_idx as u32, is_parity, Bytes::from(shard_data)) .await .unwrap(); } else if shard_idx == 4 { diff --git a/lightningstor/crates/lightningstor-distributed/src/backends/replicated.rs b/lightningstor/crates/lightningstor-distributed/src/backends/replicated.rs index eb661c3..258ebfa 100644 --- a/lightningstor/crates/lightningstor-distributed/src/backends/replicated.rs +++ b/lightningstor/crates/lightningstor-distributed/src/backends/replicated.rs @@ -301,7 +301,8 @@ impl ReplicatedBackend { .await .map_err(|e| StorageError::Backend(e.to_string()))?; - self.read_replicas_from_nodes(&nodes, key, shard_index).await + self.read_replicas_from_nodes(&nodes, key, shard_index) + .await } async fn read_replicas_from_nodes( @@ -310,18 +311,16 @@ impl ReplicatedBackend { key: &str, shard_index: u32, ) -> StorageResult> { - if nodes.is_empty() { return Err(StorageError::Backend( "No healthy storage nodes available".to_string(), )); } - let mut ordered_nodes = Self::ordered_read_nodes(nodes, self - .node_selector - .select_for_read(nodes, key) - .await - .ok()); + let mut ordered_nodes = Self::ordered_read_nodes( + nodes, + self.node_selector.select_for_read(nodes, key).await.ok(), + ); if let Some(preferred) = ordered_nodes.first() { match preferred.get_chunk(key, shard_index, false).await { @@ -551,11 +550,17 @@ impl ReplicatedBackend { let mut requests = Vec::with_capacity(chunk_count + 1); for chunk_index in 0..chunk_count { let chunk_key = Self::object_chunk_key(object_id, chunk_index); - let (start, len) = ChunkManager::chunk_range_for_size(data.len(), chunk_index, chunk_size); + let (start, len) = + ChunkManager::chunk_range_for_size(data.len(), chunk_index, chunk_size); let chunk_bytes = data.slice(start..start + len); requests.push((chunk_key, chunk_index as u32, false, chunk_bytes)); } - requests.push((Self::object_metadata_key(object_id), 0, false, metadata.to_bytes())); + requests.push(( + Self::object_metadata_key(object_id), + 0, + false, + metadata.to_bytes(), + )); let nodes = self .select_replica_nodes_for_key(&Self::object_chunk_key(object_id, 0)) @@ -812,7 +817,8 @@ impl ReplicatedBackend { let ordered_nodes = Self::ordered_read_nodes(&nodes, preferred); if metadata.chunk_count > 1 { - if let Some(local_node) = ordered_nodes.iter().find(|node| Self::is_local_node(node)) + if let Some(local_node) = + ordered_nodes.iter().find(|node| Self::is_local_node(node)) { let batch_requests: Vec<(String, u32, bool)> = (0..metadata.chunk_count) .map(|chunk_index| { @@ -843,7 +849,9 @@ impl ReplicatedBackend { } } - if ordered_nodes.len() > 1 && metadata.chunk_count > 1 && !Self::has_local_node(&ordered_nodes) + if ordered_nodes.len() > 1 + && metadata.chunk_count > 1 + && !Self::has_local_node(&ordered_nodes) { match self .read_chunked_object_from_distributed_batches( @@ -877,7 +885,11 @@ impl ReplicatedBackend { for node in ordered_nodes { match node.batch_get_chunks(batch_requests.clone()).await { Ok(chunks) => { - return Self::assemble_chunked_bytes(object_id, metadata.original_size, chunks); + return Self::assemble_chunked_bytes( + object_id, + metadata.original_size, + chunks, + ); } Err(err) => { warn!( @@ -1591,13 +1603,10 @@ mod tests { nodes[2].set_fail_puts(true); let repair_queue = Arc::new(CapturingRepairQueue::default()); - let backend = ReplicatedBackend::new_with_repair_queue( - config, - registry, - Some(repair_queue.clone()), - ) - .await - .unwrap(); + let backend = + ReplicatedBackend::new_with_repair_queue(config, registry, Some(repair_queue.clone())) + .await + .unwrap(); let object_id = ObjectId::new(); backend @@ -1679,7 +1688,10 @@ mod tests { .expect("off-placement node should exist"); let source_bytes = desired_nodes[0].get_chunk(&key, 0, false).await.unwrap(); - off_placement.put_chunk(&key, 0, false, source_bytes).await.unwrap(); + off_placement + .put_chunk(&key, 0, false, source_bytes) + .await + .unwrap(); for node in &desired_nodes { node.delete_chunk(&key).await.unwrap(); assert!(!node.chunk_exists(&key).await.unwrap()); diff --git a/lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs b/lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs index 9dbd322..a1ddb6b 100644 --- a/lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs +++ b/lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs @@ -39,7 +39,11 @@ impl ChunkManager { return self.config.chunk_size; } - let min_chunk_size = self.config.min_chunk_size.min(self.config.chunk_size).max(1); + let min_chunk_size = self + .config + .min_chunk_size + .min(self.config.chunk_size) + .max(1); let max_chunk_size = self.config.max_chunk_size.max(self.config.chunk_size); let required = total_size.div_ceil(TARGET_CHUNK_COUNT_PER_OBJECT); let alignment = min_chunk_size; @@ -307,9 +311,18 @@ mod tests { fn test_effective_chunk_size_scales_large_objects_up_to_target_chunk_count() { let manager = ChunkManager::default(); - assert_eq!(manager.effective_chunk_size(4 * 1024 * 1024), 8 * 1024 * 1024); - assert_eq!(manager.effective_chunk_size(256 * 1024 * 1024), 32 * 1024 * 1024); - assert_eq!(manager.effective_chunk_size(1024 * 1024 * 1024), 64 * 1024 * 1024); + assert_eq!( + manager.effective_chunk_size(4 * 1024 * 1024), + 8 * 1024 * 1024 + ); + assert_eq!( + manager.effective_chunk_size(256 * 1024 * 1024), + 32 * 1024 * 1024 + ); + assert_eq!( + manager.effective_chunk_size(1024 * 1024 * 1024), + 64 * 1024 * 1024 + ); } #[test] diff --git a/lightningstor/crates/lightningstor-distributed/src/erasure/mod.rs b/lightningstor/crates/lightningstor-distributed/src/erasure/mod.rs index f9693ea..714b6c2 100644 --- a/lightningstor/crates/lightningstor-distributed/src/erasure/mod.rs +++ b/lightningstor/crates/lightningstor-distributed/src/erasure/mod.rs @@ -158,7 +158,11 @@ impl Codec { /// /// # Returns /// The reconstructed original data. - pub fn decode(&self, shards: Vec>>, original_size: usize) -> ErasureResult> { + pub fn decode( + &self, + shards: Vec>>, + original_size: usize, + ) -> ErasureResult> { if shards.len() != self.total_shards() { return Err(ErasureError::InvalidShardCount { expected: self.total_shards(), diff --git a/lightningstor/crates/lightningstor-distributed/src/node/client.rs b/lightningstor/crates/lightningstor-distributed/src/node/client.rs index 5ed9616..900490a 100644 --- a/lightningstor/crates/lightningstor-distributed/src/node/client.rs +++ b/lightningstor/crates/lightningstor-distributed/src/node/client.rs @@ -48,12 +48,10 @@ pub trait NodeClientTrait: Send + Sync { ) -> NodeResult<()>; /// Store multiple chunks on this node using a more efficient batch path when available. - async fn batch_put_chunks( - &self, - chunks: Vec<(String, u32, bool, Bytes)>, - ) -> NodeResult<()> { + async fn batch_put_chunks(&self, chunks: Vec<(String, u32, bool, Bytes)>) -> NodeResult<()> { for (chunk_id, shard_index, is_parity, data) in chunks { - self.put_chunk(&chunk_id, shard_index, is_parity, data).await?; + self.put_chunk(&chunk_id, shard_index, is_parity, data) + .await?; } Ok(()) } @@ -90,9 +88,8 @@ pub trait NodeClientTrait: Send + Sync { /// Real gRPC client for storage nodes /// -/// This client communicates with storage nodes over gRPC. -/// For now, this is a placeholder that will be implemented -/// when the storage node service is created. +/// This client communicates with storage nodes over gRPC once the node service +/// is available on the target endpoint. pub struct NodeClient { node_id: String, endpoint: String, @@ -368,10 +365,7 @@ impl NodeClientTrait for NodeClient { Err(NodeError::Timeout) } - async fn batch_put_chunks( - &self, - chunks: Vec<(String, u32, bool, Bytes)>, - ) -> NodeResult<()> { + async fn batch_put_chunks(&self, chunks: Vec<(String, u32, bool, Bytes)>) -> NodeResult<()> { if !self.is_healthy().await { return Err(NodeError::Unhealthy(self.node_id.clone())); } @@ -389,10 +383,16 @@ impl NodeClientTrait for NodeClient { )); let mut client = self.clone_client().await; - match timeout(self.request_timeout, client.batch_put_chunks(request_stream)).await { + match timeout( + self.request_timeout, + client.batch_put_chunks(request_stream), + ) + .await + { Ok(Ok(response)) => { let response = response.into_inner(); - if response.failure_count == 0 && response.success_count as usize == request_count + if response.failure_count == 0 + && response.success_count as usize == request_count { self.mark_healthy(); return Ok(()); diff --git a/lightningstor/crates/lightningstor-distributed/src/node/mock.rs b/lightningstor/crates/lightningstor-distributed/src/node/mock.rs index 9a9639e..ab58913 100644 --- a/lightningstor/crates/lightningstor-distributed/src/node/mock.rs +++ b/lightningstor/crates/lightningstor-distributed/src/node/mock.rs @@ -368,7 +368,9 @@ mod tests { // Enable failure injection node.set_fail_puts(true); - let result = node.put_chunk("chunk2", 0, false, Bytes::from(vec![2])).await; + let result = node + .put_chunk("chunk2", 0, false, Bytes::from(vec![2])) + .await; assert!(result.is_err()); // Disable failure injection diff --git a/lightningstor/crates/lightningstor-distributed/src/node/registry.rs b/lightningstor/crates/lightningstor-distributed/src/node/registry.rs index 2ff2a43..f636b5a 100644 --- a/lightningstor/crates/lightningstor-distributed/src/node/registry.rs +++ b/lightningstor/crates/lightningstor-distributed/src/node/registry.rs @@ -114,12 +114,7 @@ pub struct StaticNodeRegistry { impl StaticNodeRegistry { /// Create a new static node registry with the given endpoints pub async fn new(endpoints: &[String]) -> NodeResult { - Self::new_with_timeouts( - endpoints, - Duration::from_secs(5), - Duration::from_secs(300), - ) - .await + Self::new_with_timeouts(endpoints, Duration::from_secs(5), Duration::from_secs(300)).await } /// Create a new static node registry with explicit timeout settings. @@ -203,10 +198,7 @@ impl NodeRegistry for StaticNodeRegistry { } async fn deregister_node(&self, node_id: &str) -> NodeResult<()> { - self.nodes - .write() - .await - .retain(|n| n.node_id() != node_id); + self.nodes.write().await.retain(|n| n.node_id() != node_id); self.node_info .write() .await diff --git a/lightningstor/crates/lightningstor-distributed/src/repair.rs b/lightningstor/crates/lightningstor-distributed/src/repair.rs index a93ae91..33bb361 100644 --- a/lightningstor/crates/lightningstor-distributed/src/repair.rs +++ b/lightningstor/crates/lightningstor-distributed/src/repair.rs @@ -55,4 +55,3 @@ impl ReplicatedRepairTask { pub trait RepairQueue: Send + Sync { async fn enqueue_repair(&self, task: ReplicatedRepairTask); } - diff --git a/lightningstor/crates/lightningstor-node/src/main.rs b/lightningstor/crates/lightningstor-node/src/main.rs index bbd6e4b..b1545d9 100644 --- a/lightningstor/crates/lightningstor-node/src/main.rs +++ b/lightningstor/crates/lightningstor-node/src/main.rs @@ -111,8 +111,7 @@ async fn main() -> Result<(), Box> { // Initialize tracing tracing_subscriber::fmt() .with_env_filter( - EnvFilter::try_from_default_env() - .unwrap_or_else(|_| EnvFilter::new(&config.log_level)), + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)), ) .init(); @@ -127,10 +126,7 @@ async fn main() -> Result<(), Box> { tracing::info!(" Region: {}", config.region); } if config.max_capacity_bytes > 0 { - tracing::info!( - " Max capacity: {} bytes", - config.max_capacity_bytes - ); + tracing::info!(" Max capacity: {} bytes", config.max_capacity_bytes); } tracing::info!(" Sync on write: {}", config.sync_on_write); @@ -154,8 +150,8 @@ async fn main() -> Result<(), Box> { config.max_capacity_bytes, config.sync_on_write, ) - .await - .expect("Failed to create chunk store"), + .await + .expect("Failed to create chunk store"), ); tracing::info!( diff --git a/lightningstor/crates/lightningstor-node/src/service.rs b/lightningstor/crates/lightningstor-node/src/service.rs index 368d2fd..bdb977d 100644 --- a/lightningstor/crates/lightningstor-node/src/service.rs +++ b/lightningstor/crates/lightningstor-node/src/service.rs @@ -39,10 +39,9 @@ impl NodeServiceImpl { fn chunk_read_status(chunk_id: &str, error: crate::storage::StorageError) -> Status { match error { - crate::storage::StorageError::NotFound(_) => Status::not_found(format!( - "Chunk not found: {}", - chunk_id - )), + crate::storage::StorageError::NotFound(_) => { + Status::not_found(format!("Chunk not found: {}", chunk_id)) + } other => Status::internal(other.to_string()), } } @@ -92,16 +91,14 @@ impl NodeService for NodeServiceImpl { "GetChunk request" ); - let data = self.store.get(&req.chunk_id).await.map_err(|e| { - match &e { - crate::storage::StorageError::NotFound(_) => { - debug!(chunk_id = %req.chunk_id, "Chunk not found"); - Status::not_found(e.to_string()) - } - _ => { - error!(error = ?e, "Failed to get chunk"); - Status::internal(e.to_string()) - } + let data = self.store.get(&req.chunk_id).await.map_err(|e| match &e { + crate::storage::StorageError::NotFound(_) => { + debug!(chunk_id = %req.chunk_id, "Chunk not found"); + Status::not_found(e.to_string()) + } + _ => { + error!(error = ?e, "Failed to get chunk"); + Status::internal(e.to_string()) } })?; @@ -276,11 +273,12 @@ impl NodeService for NodeServiceImpl { results[index] = Some(result); } Err(join_error) => { - let status = Status::internal(format!( - "batch get task failed: {}", - join_error - )); - let index = results.iter().position(|entry| entry.is_none()).unwrap_or(0); + let status = + Status::internal(format!("batch get task failed: {}", join_error)); + let index = results + .iter() + .position(|entry| entry.is_none()) + .unwrap_or(0); results[index] = Some(Err(status)); } } @@ -293,7 +291,9 @@ impl NodeService for NodeServiceImpl { for result in results { let Some(result) = result else { - let _ = tx.send(Err(Status::internal("batch get result missing"))).await; + let _ = tx + .send(Err(Status::internal("batch get result missing"))) + .await; break; }; @@ -308,7 +308,9 @@ impl NodeService for NodeServiceImpl { } fn record_batch_put_result( - joined: Option), tokio::task::JoinError>>, + joined: Option< + Result<(String, Result), tokio::task::JoinError>, + >, success_count: &mut u32, failure_count: &mut u32, errors: &mut Vec, diff --git a/lightningstor/crates/lightningstor-node/src/storage.rs b/lightningstor/crates/lightningstor-node/src/storage.rs index e813309..8514131 100644 --- a/lightningstor/crates/lightningstor-node/src/storage.rs +++ b/lightningstor/crates/lightningstor-node/src/storage.rs @@ -121,11 +121,7 @@ impl LocalChunkStore { self.total_bytes.store(total_bytes, Ordering::SeqCst); self.chunk_count.store(chunk_count, Ordering::SeqCst); - debug!( - total_bytes, - chunk_count, - "Scanned existing chunks" - ); + debug!(total_bytes, chunk_count, "Scanned existing chunks"); Ok(()) } @@ -309,8 +305,8 @@ impl LocalChunkStore { #[cfg(test)] mod tests { use super::*; - use tempfile::TempDir; use std::sync::Arc; + use tempfile::TempDir; use tokio::sync::Barrier; async fn create_test_store() -> (LocalChunkStore, TempDir) { @@ -415,14 +411,19 @@ mod tests { async fn test_scan_preserves_chunk_path_cache() { let temp_dir = TempDir::new().unwrap(); let nested_path = temp_dir.path().join("ab").join("cd").join("abcd-test"); - fs::create_dir_all(nested_path.parent().unwrap()).await.unwrap(); + fs::create_dir_all(nested_path.parent().unwrap()) + .await + .unwrap(); fs::write(&nested_path, vec![7u8; 128]).await.unwrap(); let store = LocalChunkStore::new(temp_dir.path().to_path_buf(), 0, false) .await .unwrap(); - let resolved = store.resolve_existing_chunk_path("abcd-test").await.unwrap(); + let resolved = store + .resolve_existing_chunk_path("abcd-test") + .await + .unwrap(); assert_eq!(resolved, nested_path); assert_eq!(store.get("abcd-test").await.unwrap(), vec![7u8; 128]); } diff --git a/lightningstor/crates/lightningstor-server/src/bucket_service.rs b/lightningstor/crates/lightningstor-server/src/bucket_service.rs index 45096d5..9f4f1e9 100644 --- a/lightningstor/crates/lightningstor-server/src/bucket_service.rs +++ b/lightningstor/crates/lightningstor-server/src/bucket_service.rs @@ -1,17 +1,21 @@ //! BucketService gRPC implementation use crate::metadata::MetadataStore; +use chrono::Utc; +use iam_service_auth::{ + get_tenant_context, resolve_tenant_ids_from_context, resource_for_tenant, AuthService, + TenantContext, +}; use lightningstor_api::proto::{ BucketInfo, CreateBucketRequest, CreateBucketResponse, DeleteBucketPolicyRequest, DeleteBucketRequest, DeleteBucketTaggingRequest, GetBucketPolicyRequest, GetBucketPolicyResponse, GetBucketTaggingRequest, GetBucketTaggingResponse, - GetBucketVersioningRequest, GetBucketVersioningResponse, HeadBucketRequest, - HeadBucketResponse, ListBucketsRequest, ListBucketsResponse, PutBucketPolicyRequest, - PutBucketTaggingRequest, PutBucketVersioningRequest, + GetBucketVersioningRequest, GetBucketVersioningResponse, HeadBucketRequest, HeadBucketResponse, + ListBucketsRequest, ListBucketsResponse, PutBucketPolicyRequest, PutBucketTaggingRequest, Tag, + PutBucketVersioningRequest, }; use lightningstor_api::BucketService; -use lightningstor_types::{Bucket, BucketName, Result as LightningStorResult}; -use iam_service_auth::{get_tenant_context, resolve_tenant_ids_from_context, resource_for_tenant, AuthService}; +use lightningstor_types::{Bucket, BucketName, Result as LightningStorResult, Versioning}; use std::sync::Arc; use tonic::{Request, Response, Status}; @@ -51,6 +55,37 @@ impl BucketServiceImpl { project_id: bucket.project_id.clone(), } } + + async fn load_bucket_for_tenant( + &self, + tenant: &TenantContext, + bucket_name: &str, + ) -> Result { + self.metadata + .load_bucket(&tenant.org_id, &tenant.project_id, bucket_name) + .await + .map_err(Self::to_status)? + .ok_or_else(|| Status::not_found(format!("Bucket {} not found", bucket_name))) + } + + fn versioning_to_proto_status(versioning: Versioning) -> String { + match versioning { + Versioning::Disabled => String::new(), + Versioning::Enabled => "Enabled".to_string(), + Versioning::Suspended => "Suspended".to_string(), + } + } + + fn parse_versioning_status(status: &str) -> Result { + match status.trim() { + "" => Ok(Versioning::Disabled), + value if value.eq_ignore_ascii_case("enabled") => Ok(Versioning::Enabled), + value if value.eq_ignore_ascii_case("suspended") => Ok(Versioning::Suspended), + _ => Err(Status::invalid_argument( + "bucket versioning status must be Enabled, Suspended, or empty", + )), + } + } } const ACTION_BUCKETS_CREATE: &str = "storage:buckets:create"; @@ -231,10 +266,8 @@ impl BucketService for BucketServiceImpl { .await .map_err(Self::to_status)?; - let bucket_infos: Vec = buckets - .iter() - .map(|b| self.bucket_to_proto(b)) - .collect(); + let bucket_infos: Vec = + buckets.iter().map(|b| self.bucket_to_proto(b)).collect(); Ok(Response::new(ListBucketsResponse { buckets: bucket_infos, @@ -245,65 +278,249 @@ impl BucketService for BucketServiceImpl { async fn get_bucket_versioning( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented( - "GetBucketVersioning not yet implemented", - )) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_READ, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + Ok(Response::new(GetBucketVersioningResponse { + status: Self::versioning_to_proto_status(bucket.versioning), + })) } async fn put_bucket_versioning( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented( - "PutBucketVersioning not yet implemented", - )) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let mut bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_UPDATE, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + bucket.versioning = Self::parse_versioning_status(&req.status)?; + bucket.updated_at = Utc::now(); + self.metadata + .save_bucket(&bucket) + .await + .map_err(Self::to_status)?; + + Ok(Response::new(())) } async fn get_bucket_policy( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented("GetBucketPolicy not yet implemented")) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_READ, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + Ok(Response::new(GetBucketPolicyResponse { + policy: bucket.policy.policy_json.unwrap_or_default(), + })) } async fn put_bucket_policy( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented("PutBucketPolicy not yet implemented")) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + if req.policy.trim().is_empty() { + return Err(Status::invalid_argument("bucket policy JSON must not be empty")); + } + + let mut bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_UPDATE, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + bucket.policy.policy_json = Some(req.policy); + bucket.updated_at = Utc::now(); + self.metadata + .save_bucket(&bucket) + .await + .map_err(Self::to_status)?; + + Ok(Response::new(())) } async fn delete_bucket_policy( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented( - "DeleteBucketPolicy not yet implemented", - )) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let mut bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_UPDATE, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + bucket.policy = Default::default(); + bucket.updated_at = Utc::now(); + self.metadata + .save_bucket(&bucket) + .await + .map_err(Self::to_status)?; + + Ok(Response::new(())) } async fn get_bucket_tagging( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented("GetBucketTagging not yet implemented")) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_READ, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + let mut tags: Vec = bucket + .tags + .into_iter() + .map(|(key, value)| Tag { key, value }) + .collect(); + tags.sort_by(|lhs, rhs| lhs.key.cmp(&rhs.key)); + + Ok(Response::new(GetBucketTaggingResponse { tags })) } async fn put_bucket_tagging( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented("PutBucketTagging not yet implemented")) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let mut bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_UPDATE, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + bucket.tags.clear(); + for tag in req.tags { + if tag.key.trim().is_empty() { + return Err(Status::invalid_argument("bucket tag keys must not be empty")); + } + bucket.tags.insert(tag.key, tag.value); + } + + bucket.updated_at = Utc::now(); + self.metadata + .save_bucket(&bucket) + .await + .map_err(Self::to_status)?; + + Ok(Response::new(())) } async fn delete_bucket_tagging( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented( - "DeleteBucketTagging not yet implemented", - )) + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + let mut bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + + self.auth + .authorize( + &tenant, + ACTION_BUCKETS_UPDATE, + &resource_for_tenant( + "bucket", + &bucket.id.to_string(), + &bucket.org_id, + &bucket.project_id, + ), + ) + .await?; + + bucket.tags.clear(); + bucket.updated_at = Utc::now(); + self.metadata + .save_bucket(&bucket) + .await + .map_err(Self::to_status)?; + + Ok(Response::new(())) } } diff --git a/lightningstor/crates/lightningstor-server/src/lib.rs b/lightningstor/crates/lightningstor-server/src/lib.rs index 8afe3fb..cfec1f0 100644 --- a/lightningstor/crates/lightningstor-server/src/lib.rs +++ b/lightningstor/crates/lightningstor-server/src/lib.rs @@ -16,4 +16,4 @@ pub mod tenant; pub use bucket_service::BucketServiceImpl; pub use config::ServerConfig; pub use object_service::ObjectServiceImpl; -pub use repair::{MetadataRepairQueue, spawn_replicated_repair_worker}; +pub use repair::{spawn_replicated_repair_worker, MetadataRepairQueue}; diff --git a/lightningstor/crates/lightningstor-server/src/object_service.rs b/lightningstor/crates/lightningstor-server/src/object_service.rs index 92b822d..aaec0b6 100644 --- a/lightningstor/crates/lightningstor-server/src/object_service.rs +++ b/lightningstor/crates/lightningstor-server/src/object_service.rs @@ -8,20 +8,22 @@ use iam_service_auth::{get_tenant_context, resource_for_tenant, AuthService, Ten use lightningstor_api::proto::{ AbortMultipartUploadRequest, CompleteMultipartUploadRequest, CompleteMultipartUploadResponse, CompletedPart, CopyObjectRequest, CopyObjectResponse, CreateMultipartUploadRequest, - CreateMultipartUploadResponse, DeleteObjectRequest, DeleteObjectResponse, GetObjectRequest, - GetObjectResponse, HeadObjectRequest, HeadObjectResponse, ListMultipartUploadsRequest, - ListMultipartUploadsResponse, ListObjectVersionsRequest, ListObjectVersionsResponse, - ListObjectsRequest, ListObjectsResponse, ListPartsRequest, ListPartsResponse, - MultipartUploadInfo, ObjectInfo, ObjectMetadata as ProtoObjectMetadata, PartInfo, - PutObjectRequest, PutObjectResponse, UploadPartRequest, UploadPartResponse, + CreateMultipartUploadResponse, DeleteMarkerEntry, DeleteObjectRequest, + DeleteObjectResponse, GetObjectRequest, GetObjectResponse, HeadObjectRequest, + HeadObjectResponse, ListMultipartUploadsRequest, ListMultipartUploadsResponse, + ListObjectVersionsRequest, ListObjectVersionsResponse, ListObjectsRequest, + ListObjectsResponse, ListPartsRequest, ListPartsResponse, MultipartUploadInfo, ObjectInfo, + ObjectMetadata as ProtoObjectMetadata, PartInfo, PutObjectRequest, PutObjectResponse, + UploadPartRequest, UploadPartResponse, }; use lightningstor_api::ObjectService; use lightningstor_storage::StorageBackend; use lightningstor_types::{ Bucket, BucketId, ETag, MultipartUpload, Object, ObjectKey, ObjectMetadata, ObjectVersion, - Part, PartNumber, Result as LightningStorResult, + Part, PartNumber, Result as LightningStorResult, Versioning, }; use md5::{Digest, Md5}; +use std::cmp::Ordering; use std::str::FromStr; use std::sync::Arc; use tokio::sync::Mutex; @@ -88,6 +90,18 @@ impl ObjectServiceImpl { } } + fn delete_marker_to_proto(&self, obj: &Object) -> DeleteMarkerEntry { + DeleteMarkerEntry { + key: obj.key.as_str().to_string(), + version_id: obj.version.as_str().to_string(), + is_latest: obj.is_latest, + last_modified: Some(prost_types::Timestamp { + seconds: obj.last_modified.timestamp(), + nanos: obj.last_modified.timestamp_subsec_nanos() as i32, + }), + } + } + /// Calculate MD5 hash of data fn calculate_md5(data: &[u8]) -> ETag { let mut hasher = Md5::new(); @@ -368,6 +382,161 @@ impl ObjectServiceImpl { } } + async fn load_replaced_object( + &self, + bucket: &Bucket, + key: &str, + ) -> Result, Status> { + if bucket.versioning == lightningstor_types::Versioning::Enabled { + return Ok(None); + } + + let bucket_id = BucketId::from_str(&bucket.id.to_string()) + .map_err(|_| Status::internal("Invalid bucket ID"))?; + + self.metadata + .load_object(&bucket_id, key, None) + .await + .map_err(Self::to_status) + } + + fn versioning_active(versioning: Versioning) -> bool { + versioning != Versioning::Disabled + } + + fn compare_version_order(lhs: &Object, rhs: &Object) -> Ordering { + lhs.key + .as_str() + .cmp(rhs.key.as_str()) + .then_with(|| rhs.is_latest.cmp(&lhs.is_latest)) + .then_with(|| rhs.last_modified.cmp(&lhs.last_modified)) + .then_with(|| rhs.version.as_str().cmp(lhs.version.as_str())) + } + + fn sort_object_versions(objects: &mut [Object]) { + objects.sort_by(Self::compare_version_order); + } + + fn collapse_latest_objects(mut objects: Vec) -> Vec { + Self::sort_object_versions(&mut objects); + let mut latest_by_key = std::collections::BTreeMap::new(); + for object in objects { + latest_by_key + .entry(object.key.as_str().to_string()) + .or_insert(object); + } + latest_by_key + .into_values() + .filter(|object| !object.is_delete_marker) + .collect() + } + + async fn list_versions_for_key( + &self, + bucket_id: &BucketId, + object_key: &str, + ) -> Result, Status> { + let mut objects = self + .metadata + .list_objects(bucket_id, object_key, 0) + .await + .map_err(Self::to_status)?; + objects.retain(|object| object.key.as_str() == object_key); + Self::sort_object_versions(&mut objects); + Ok(objects) + } + + async fn load_object_for_request( + &self, + bucket_id: &BucketId, + object_key: &str, + version_id: Option<&str>, + ) -> Result, Status> { + if version_id.is_some() { + return self + .metadata + .load_object(bucket_id, object_key, version_id) + .await + .map_err(Self::to_status); + } + + if let Some(object) = self + .metadata + .load_object(bucket_id, object_key, None) + .await + .map_err(Self::to_status)? + { + return Ok(Some(object)); + } + + Ok(self + .list_versions_for_key(bucket_id, object_key) + .await? + .into_iter() + .next()) + } + + async fn mark_non_latest_versions( + &self, + bucket_id: &BucketId, + object_key: &str, + ) -> Result<(), Status> { + for mut object in self.list_versions_for_key(bucket_id, object_key).await? { + if object.is_latest { + object.is_latest = false; + self.metadata + .save_object(&object) + .await + .map_err(Self::to_status)?; + } + } + Ok(()) + } + + async fn normalize_latest_version( + &self, + bucket_id: &BucketId, + object_key: &str, + ) -> Result<(), Status> { + let versions = self.list_versions_for_key(bucket_id, object_key).await?; + let latest_version = versions.first().map(|object| object.version.as_str().to_string()); + for mut object in versions { + let should_be_latest = latest_version + .as_deref() + .map(|version| object.version.as_str() == version) + .unwrap_or(false); + if object.is_latest != should_be_latest { + object.is_latest = should_be_latest; + self.metadata + .save_object(&object) + .await + .map_err(Self::to_status)?; + } + } + Ok(()) + } + + async fn delete_replaced_object_data(&self, object: &Object) -> Result<(), Status> { + if let Some(upload) = self + .metadata + .load_object_multipart_upload(&object.id) + .await + .map_err(Self::to_status)? + { + self.delete_multipart_parts(&upload).await?; + self.metadata + .delete_object_multipart_upload(&object.id) + .await + .map_err(Self::to_status)?; + } else { + self.storage.delete_object(&object.id).await.map_err(|e| { + Status::internal(format!("Failed to delete replaced object data: {}", e)) + })?; + } + + Ok(()) + } + fn multipart_lock(&self, upload_id: &str) -> Arc> { self.multipart_locks .entry(upload_id.to_string()) @@ -413,6 +582,8 @@ impl ObjectService for ObjectServiceImpl { ); let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + let bucket_id: BucketId = BucketId::from_str(&bucket.id.to_string()) + .map_err(|_| Status::internal("Invalid bucket ID"))?; self.authorize_object_action(&tenant, ACTION_OBJECTS_CREATE, &bucket, &req.key) .await?; @@ -420,6 +591,7 @@ impl ObjectService for ObjectServiceImpl { // Validate object key let object_key = ObjectKey::new(&req.key) .map_err(|e| Status::invalid_argument(format!("Invalid object key: {}", e)))?; + let replaced_object = self.load_replaced_object(&bucket, &req.key).await?; // Calculate ETag let etag = Self::calculate_md5(&body); @@ -439,6 +611,7 @@ impl ObjectService for ObjectServiceImpl { // Handle versioning if bucket.versioning == lightningstor_types::Versioning::Enabled { + self.mark_non_latest_versions(&bucket_id, &req.key).await?; object.version = ObjectVersion::new(); } @@ -454,6 +627,10 @@ impl ObjectService for ObjectServiceImpl { .await .map_err(Self::to_status)?; + if let Some(existing_object) = replaced_object { + self.delete_replaced_object_data(&existing_object).await?; + } + tracing::debug!( bucket = %req.bucket, key = %req.key, @@ -496,10 +673,8 @@ impl ObjectService for ObjectServiceImpl { }; let object = self - .metadata - .load_object(&bucket_id, &req.key, version_id) - .await - .map_err(Self::to_status)? + .load_object_for_request(&bucket_id, &req.key, version_id) + .await? .ok_or_else(|| Status::not_found(format!("Object {} not found", req.key)))?; // Check if delete marker @@ -561,10 +736,8 @@ impl ObjectService for ObjectServiceImpl { // Load object to get its storage ID let object = self - .metadata - .load_object(&bucket_id, &req.key, version_id) - .await - .map_err(Self::to_status)? + .load_object_for_request(&bucket_id, &req.key, version_id) + .await? .ok_or_else(|| Status::not_found(format!("Object {} not found", req.key)))?; if let Some(upload) = self @@ -594,6 +767,9 @@ impl ObjectService for ObjectServiceImpl { .delete_object(&bucket_id, &req.key, version_id) .await .map_err(Self::to_status)?; + if Self::versioning_active(bucket.versioning) { + self.normalize_latest_version(&bucket_id, &req.key).await?; + } tracing::debug!( bucket = %req.bucket, @@ -693,6 +869,9 @@ impl ObjectService for ObjectServiceImpl { .map_err(|_| Status::internal("Invalid source bucket ID"))?; let dest_key = ObjectKey::new(&req.dest_key) .map_err(|e| Status::invalid_argument(format!("Invalid destination key: {}", e)))?; + let replaced_object = self + .load_replaced_object(&dest_bucket, &req.dest_key) + .await?; let source_version_id = if req.source_version_id.is_empty() { None @@ -700,10 +879,8 @@ impl ObjectService for ObjectServiceImpl { Some(req.source_version_id.as_str()) }; let source_object = self - .metadata - .load_object(&source_bucket_id, &req.source_key, source_version_id) - .await - .map_err(Self::to_status)? + .load_object_for_request(&source_bucket_id, &req.source_key, source_version_id) + .await? .ok_or_else(|| Status::not_found(format!("Object {} not found", req.source_key)))?; if source_object.is_delete_marker { @@ -727,6 +904,10 @@ impl ObjectService for ObjectServiceImpl { dest_object.metadata = object_metadata; dest_object.storage_class = source_object.storage_class.clone(); if dest_bucket.versioning == lightningstor_types::Versioning::Enabled { + let dest_bucket_id: BucketId = BucketId::from_str(&dest_bucket.id.to_string()) + .map_err(|_| Status::internal("Invalid destination bucket ID"))?; + self.mark_non_latest_versions(&dest_bucket_id, &req.dest_key) + .await?; dest_object.version = ObjectVersion::new(); } @@ -739,6 +920,10 @@ impl ObjectService for ObjectServiceImpl { .await .map_err(Self::to_status)?; + if let Some(existing_object) = replaced_object { + self.delete_replaced_object_data(&existing_object).await?; + } + Ok(Response::new(CopyObjectResponse { etag: dest_object.etag.as_str().to_string(), version_id: dest_object.version.as_str().to_string(), @@ -788,8 +973,11 @@ impl ObjectService for ObjectServiceImpl { .await .map_err(Self::to_status)?; - // Filter delete markers and apply start_after - objects.retain(|obj| !obj.is_delete_marker); + if Self::versioning_active(bucket.versioning) { + objects = Self::collapse_latest_objects(objects); + } else { + objects.retain(|obj| !obj.is_delete_marker); + } if !start_after.is_empty() { objects.retain(|obj| obj.key.as_str() > start_after); } @@ -861,11 +1049,157 @@ impl ObjectService for ObjectServiceImpl { async fn list_object_versions( &self, - _request: Request, + request: Request, ) -> Result, Status> { - Err(Status::unimplemented( - "ListObjectVersions not yet implemented", - )) + enum VersionEntry { + Version(Object), + DeleteMarker(Object), + Prefix(String), + } + + let tenant = get_tenant_context(&request)?; + let req = request.into_inner(); + tracing::info!( + bucket = %req.bucket, + prefix = %req.prefix, + max_keys = req.max_keys, + "ListObjectVersions request" + ); + + let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; + self.authorize_object_action(&tenant, ACTION_OBJECTS_LIST, &bucket, &req.prefix) + .await?; + + let bucket_id: BucketId = BucketId::from_str(&bucket.id.to_string()) + .map_err(|_| Status::internal("Invalid bucket ID"))?; + let max_keys = if req.max_keys > 0 { req.max_keys } else { 1000 }; + + let mut objects = self + .metadata + .list_objects(&bucket_id, &req.prefix, 0) + .await + .map_err(Self::to_status)?; + Self::sort_object_versions(&mut objects); + + if !req.key_marker.is_empty() { + if req.version_id_marker.is_empty() { + objects.retain(|object| object.key.as_str() > req.key_marker.as_str()); + } else { + let mut seen_marker = false; + objects.retain(|object| { + if seen_marker { + return true; + } + if object.key.as_str() < req.key_marker.as_str() { + return false; + } + if object.key.as_str() == req.key_marker.as_str() { + if object.version.as_str() == req.version_id_marker { + seen_marker = true; + } + return false; + } + true + }); + } + } + + let delimiter = req.delimiter.as_str(); + let has_delimiter = !delimiter.is_empty(); + let mut common_prefixes = std::collections::BTreeSet::new(); + let mut entries = Vec::new(); + + if has_delimiter { + for object in objects { + let key = object.key.as_str(); + let relative = key.strip_prefix(req.prefix.as_str()).unwrap_or(key); + if let Some(pos) = relative.find(delimiter) { + let prefix = format!("{}{}{}", req.prefix, &relative[..pos], delimiter); + common_prefixes.insert(prefix); + } else if object.is_delete_marker { + entries.push(( + object.key.as_str().to_string(), + VersionEntry::DeleteMarker(object), + )); + } else { + entries.push((object.key.as_str().to_string(), VersionEntry::Version(object))); + } + } + } else { + for object in objects { + if object.is_delete_marker { + entries.push(( + object.key.as_str().to_string(), + VersionEntry::DeleteMarker(object), + )); + } else { + entries.push((object.key.as_str().to_string(), VersionEntry::Version(object))); + } + } + } + + for prefix in common_prefixes { + entries.push((prefix.clone(), VersionEntry::Prefix(prefix))); + } + + entries.sort_by(|lhs, rhs| { + lhs.0.cmp(&rhs.0).then_with(|| match (&lhs.1, &rhs.1) { + (VersionEntry::Version(lhs_obj), VersionEntry::Version(rhs_obj)) + | (VersionEntry::Version(lhs_obj), VersionEntry::DeleteMarker(rhs_obj)) + | (VersionEntry::DeleteMarker(lhs_obj), VersionEntry::Version(rhs_obj)) + | (VersionEntry::DeleteMarker(lhs_obj), VersionEntry::DeleteMarker(rhs_obj)) => { + rhs_obj + .is_latest + .cmp(&lhs_obj.is_latest) + .then_with(|| rhs_obj.last_modified.cmp(&lhs_obj.last_modified)) + .then_with(|| rhs_obj.version.as_str().cmp(lhs_obj.version.as_str())) + } + _ => Ordering::Equal, + }) + }); + + let is_truncated = entries.len() > max_keys as usize; + let limited_entries = entries.into_iter().take(max_keys as usize); + + let mut versions = Vec::new(); + let mut delete_markers = Vec::new(); + let mut common_prefixes = Vec::new(); + let mut next_key_marker = String::new(); + let mut next_version_id_marker = String::new(); + + for (_, entry) in limited_entries { + match entry { + VersionEntry::Version(object) => { + next_key_marker = object.key.as_str().to_string(); + next_version_id_marker = object.version.as_str().to_string(); + versions.push(self.object_to_proto(&object)); + } + VersionEntry::DeleteMarker(object) => { + next_key_marker = object.key.as_str().to_string(); + next_version_id_marker = object.version.as_str().to_string(); + delete_markers.push(self.delete_marker_to_proto(&object)); + } + VersionEntry::Prefix(prefix) => { + next_key_marker = prefix.clone(); + next_version_id_marker.clear(); + common_prefixes.push(prefix); + } + } + } + + if !is_truncated { + next_key_marker.clear(); + next_version_id_marker.clear(); + } + + Ok(Response::new(ListObjectVersionsResponse { + versions, + delete_markers, + common_prefixes, + is_truncated, + next_key_marker, + next_version_id_marker, + })) } async fn create_multipart_upload( @@ -1013,6 +1347,7 @@ impl ObjectService for ObjectServiceImpl { let bucket = self.load_bucket_for_tenant(&tenant, &req.bucket).await?; self.authorize_object_action(&tenant, ACTION_OBJECTS_UPDATE, &bucket, &req.key) .await?; + let replaced_object = self.load_replaced_object(&bucket, &req.key).await?; let upload_lock = self.multipart_lock(&req.upload_id); let _guard = upload_lock.lock().await; @@ -1073,6 +1408,10 @@ impl ObjectService for ObjectServiceImpl { ); object.metadata = upload.metadata.clone(); if bucket.versioning == lightningstor_types::Versioning::Enabled { + let bucket_id: BucketId = BucketId::from_str(&bucket.id.to_string()) + .map_err(|_| Status::internal("Invalid bucket ID"))?; + self.mark_non_latest_versions(&bucket_id, req.key.as_str()) + .await?; object.version = ObjectVersion::new(); } @@ -1088,6 +1427,10 @@ impl ObjectService for ObjectServiceImpl { .delete_multipart_upload(upload.upload_id.as_str()) .await .map_err(Self::to_status)?; + + if let Some(existing_object) = replaced_object { + self.delete_replaced_object_data(&existing_object).await?; + } drop(_guard); self.drop_multipart_lock_if_idle(&req.upload_id); diff --git a/lightningstor/crates/lightningstor-server/src/s3/auth.rs b/lightningstor/crates/lightningstor-server/src/s3/auth.rs index 7c90087..6ac5977 100644 --- a/lightningstor/crates/lightningstor-server/src/s3/auth.rs +++ b/lightningstor/crates/lightningstor-server/src/s3/auth.rs @@ -826,7 +826,7 @@ mod tests { &self, _request: TonicRequest, ) -> Result, Status> { - Err(Status::unimplemented("not needed in test")) + Err(Status::failed_precondition("create_s3_credential is unused in this test")) } async fn get_secret_key( diff --git a/lightningstor/crates/lightningstor-server/src/s3/router.rs b/lightningstor/crates/lightningstor-server/src/s3/router.rs index cff2270..766aa3d 100644 --- a/lightningstor/crates/lightningstor-server/src/s3/router.rs +++ b/lightningstor/crates/lightningstor-server/src/s3/router.rs @@ -712,6 +712,17 @@ async fn put_object( Err(e) => return error_response(StatusCode::BAD_REQUEST, "InvalidArgument", e), }; + let replaced_object = match load_replaced_object(&state, &bucket_obj, &key).await { + Ok(object) => object, + Err(e) => { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "InternalError", + &e.to_string(), + ) + } + }; + // Extract content type from headers let content_type = headers .get("content-type") @@ -861,6 +872,16 @@ async fn put_object( ); } + if let Some(existing_object) = replaced_object { + if let Err(e) = delete_replaced_object_data(&state, &existing_object).await { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "InternalError", + &e.to_string(), + ); + } + } + tracing::debug!(bucket = %bucket, key = %key, etag = %etag.as_str(), "Object stored successfully"); Response::builder() @@ -1328,6 +1349,39 @@ async fn delete_multipart_parts( Ok(()) } +async fn load_replaced_object( + state: &Arc, + bucket: &Bucket, + key: &str, +) -> lightningstor_types::Result> { + if bucket.versioning == lightningstor_types::Versioning::Enabled { + return Ok(None); + } + + state.metadata.load_object(&bucket.id, key, None).await +} + +async fn delete_replaced_object_data( + state: &Arc, + object: &Object, +) -> lightningstor_types::Result<()> { + if let Some(upload) = state + .metadata + .load_object_multipart_upload(&object.id) + .await? + { + delete_multipart_parts(state, &upload).await?; + state + .metadata + .delete_object_multipart_upload(&object.id) + .await?; + } else { + state.storage.delete_object(&object.id).await?; + } + + Ok(()) +} + fn calculate_etag(body: &[u8]) -> lightningstor_types::ETag { let mut md5 = Md5::new(); md5.update(body); diff --git a/lightningstor/crates/lightningstor-server/src/tenant.rs b/lightningstor/crates/lightningstor-server/src/tenant.rs index ccc28d2..9671f0a 100644 --- a/lightningstor/crates/lightningstor-server/src/tenant.rs +++ b/lightningstor/crates/lightningstor-server/src/tenant.rs @@ -26,10 +26,7 @@ fn project_from_metadata(metadata: &MetadataMap) -> Option { .or_else(|| metadata_value(metadata, "project_id")) } -pub fn resolve_org( - metadata: &MetadataMap, - org_id: Option, -) -> Result { +pub fn resolve_org(metadata: &MetadataMap, org_id: Option) -> Result { org_id .filter(|value| !value.is_empty()) .or_else(|| org_from_metadata(metadata)) @@ -54,6 +51,7 @@ pub fn resolve_tenant( project_id: Option, ) -> Result { let (org_id, project_id) = resolve_org_project_optional(metadata, org_id, project_id)?; - let project_id = project_id.ok_or_else(|| Status::invalid_argument("project_id is required"))?; + let project_id = + project_id.ok_or_else(|| Status::invalid_argument("project_id is required"))?; Ok(TenantContext { org_id, project_id }) } diff --git a/lightningstor/crates/lightningstor-storage/src/backend.rs b/lightningstor/crates/lightningstor-storage/src/backend.rs index 7322d94..20c33dc 100644 --- a/lightningstor/crates/lightningstor-storage/src/backend.rs +++ b/lightningstor/crates/lightningstor-storage/src/backend.rs @@ -11,13 +11,13 @@ use thiserror::Error; pub enum StorageError { #[error("IO error: {0}")] Io(#[from] io::Error), - + #[error("Object not found: {0}")] NotFound(ObjectId), - + #[error("Storage backend error: {0}")] Backend(String), - + #[error("Invalid object ID: {0}")] InvalidObjectId(String), } @@ -29,16 +29,12 @@ impl From for lightningstor_types::Error { fn from(err: StorageError) -> Self { match err { StorageError::Io(e) => lightningstor_types::Error::StorageError(e.to_string()), - StorageError::NotFound(id) => { - lightningstor_types::Error::ObjectNotFound { - bucket: String::new(), - key: id.to_string(), - } - } + StorageError::NotFound(id) => lightningstor_types::Error::ObjectNotFound { + bucket: String::new(), + key: id.to_string(), + }, StorageError::Backend(msg) => lightningstor_types::Error::StorageError(msg), - StorageError::InvalidObjectId(msg) => { - lightningstor_types::Error::InvalidArgument(msg) - } + StorageError::InvalidObjectId(msg) => lightningstor_types::Error::InvalidArgument(msg), } } } @@ -59,7 +55,7 @@ pub trait StorageBackend: Send + Sync { /// * `Ok(())` if write succeeded /// * `Err(StorageError)` if write failed async fn put_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()>; - + /// Read object data /// /// # Arguments @@ -70,7 +66,7 @@ pub trait StorageBackend: Send + Sync { /// * `Err(StorageError::NotFound)` if object does not exist /// * `Err(StorageError)` for other errors async fn get_object(&self, object_id: &ObjectId) -> StorageResult; - + /// Delete object data /// /// # Arguments @@ -80,7 +76,7 @@ pub trait StorageBackend: Send + Sync { /// * `Ok(())` if delete succeeded (or object didn't exist) /// * `Err(StorageError)` for other errors async fn delete_object(&self, object_id: &ObjectId) -> StorageResult<()>; - + /// Check if object exists /// /// # Arguments @@ -91,7 +87,7 @@ pub trait StorageBackend: Send + Sync { /// * `Ok(false)` if object does not exist /// * `Err(StorageError)` for other errors async fn object_exists(&self, object_id: &ObjectId) -> StorageResult; - + /// Get object size in bytes /// /// # Arguments @@ -102,7 +98,7 @@ pub trait StorageBackend: Send + Sync { /// * `Err(StorageError::NotFound)` if object does not exist /// * `Err(StorageError)` for other errors async fn object_size(&self, object_id: &ObjectId) -> StorageResult; - + /// Write part data (for multipart uploads) /// /// # Arguments @@ -113,13 +109,8 @@ pub trait StorageBackend: Send + Sync { /// # Returns /// * `Ok(())` if write succeeded /// * `Err(StorageError)` if write failed - async fn put_part( - &self, - upload_id: &str, - part_number: u32, - data: Bytes, - ) -> StorageResult<()>; - + async fn put_part(&self, upload_id: &str, part_number: u32, data: Bytes) -> StorageResult<()>; + /// Read part data (for multipart uploads) /// /// # Arguments @@ -130,12 +121,8 @@ pub trait StorageBackend: Send + Sync { /// * `Ok(Bytes)` if part exists /// * `Err(StorageError::NotFound)` if part does not exist /// * `Err(StorageError)` for other errors - async fn get_part( - &self, - upload_id: &str, - part_number: u32, - ) -> StorageResult; - + async fn get_part(&self, upload_id: &str, part_number: u32) -> StorageResult; + /// Delete part data (for multipart uploads) /// /// # Arguments @@ -146,7 +133,7 @@ pub trait StorageBackend: Send + Sync { /// * `Ok(())` if delete succeeded /// * `Err(StorageError)` for other errors async fn delete_part(&self, upload_id: &str, part_number: u32) -> StorageResult<()>; - + /// Delete all parts for a multipart upload /// /// # Arguments diff --git a/lightningstor/crates/lightningstor-storage/src/local_fs.rs b/lightningstor/crates/lightningstor-storage/src/local_fs.rs index 7cb66f3..7491e86 100644 --- a/lightningstor/crates/lightningstor-storage/src/local_fs.rs +++ b/lightningstor/crates/lightningstor-storage/src/local_fs.rs @@ -35,18 +35,18 @@ impl LocalFsBackend { let data_dir = data_dir.as_ref().to_path_buf(); let objects_dir = data_dir.join("objects"); let parts_dir = data_dir.join("parts"); - + // Create directories if they don't exist fs::create_dir_all(&objects_dir).await?; fs::create_dir_all(&parts_dir).await?; - + Ok(Self { objects_dir, parts_dir, sync_on_write, }) } - + /// Get object file path fn object_file_name(&self, object_id: &ObjectId) -> String { object_id.to_string() @@ -67,13 +67,13 @@ impl LocalFsBackend { fn legacy_object_path(&self, object_id: &ObjectId) -> PathBuf { self.objects_dir.join(self.object_file_name(object_id)) } - + /// Get part file path fn part_path(&self, upload_id: &str, part_number: u32) -> StorageResult { self.validate_upload_id(upload_id)?; Ok(self.parts_dir.join(upload_id).join(part_number.to_string())) } - + /// Get upload directory path fn upload_dir(&self, upload_id: &str) -> StorageResult { self.validate_upload_id(upload_id)?; @@ -113,12 +113,12 @@ impl LocalFsBackend { impl StorageBackend for LocalFsBackend { async fn put_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()> { let path = self.object_path(object_id); - + // Create parent directory if needed (shouldn't be needed, but be safe) if let Some(parent) = path.parent() { fs::create_dir_all(parent).await?; } - + // Write data atomically using temporary file + rename let temp_path = path.with_extension(".tmp"); let mut file = fs::File::create(&temp_path).await?; @@ -127,33 +127,33 @@ impl StorageBackend for LocalFsBackend { file.sync_data().await?; } drop(file); - + // Atomic rename fs::rename(&temp_path, &path).await?; - + tracing::debug!( object_id = %object_id, size = data.len(), path = %path.display(), "Stored object to local filesystem" ); - + Ok(()) } - + async fn get_object(&self, object_id: &ObjectId) -> StorageResult { let path = self.resolve_existing_object_path(object_id).await?; let data = fs::read(&path).await?; - + tracing::debug!( object_id = %object_id, size = data.len(), "Read object from local filesystem" ); - + Ok(Bytes::from(data)) } - + async fn delete_object(&self, object_id: &ObjectId) -> StorageResult<()> { let path = match self.resolve_existing_object_path(object_id).await { Ok(path) => path, @@ -165,33 +165,28 @@ impl StorageBackend for LocalFsBackend { fs::remove_file(&path).await?; tracing::debug!(object_id = %object_id, "Deleted object from local filesystem"); } - + Ok(()) } - + async fn object_exists(&self, object_id: &ObjectId) -> StorageResult { Ok(self.resolve_existing_object_path(object_id).await.is_ok()) } - + async fn object_size(&self, object_id: &ObjectId) -> StorageResult { let path = self.resolve_existing_object_path(object_id).await?; let metadata = fs::metadata(&path).await?; Ok(metadata.len()) } - - async fn put_part( - &self, - upload_id: &str, - part_number: u32, - data: Bytes, - ) -> StorageResult<()> { + + async fn put_part(&self, upload_id: &str, part_number: u32, data: Bytes) -> StorageResult<()> { let path = self.part_path(upload_id, part_number)?; - + // Create upload directory if needed if let Some(parent) = path.parent() { fs::create_dir_all(parent).await?; } - + // Write part data atomically let temp_path = path.with_extension(".tmp"); let mut file = fs::File::create(&temp_path).await?; @@ -200,56 +195,52 @@ impl StorageBackend for LocalFsBackend { file.sync_data().await?; } drop(file); - + fs::rename(&temp_path, &path).await?; - + tracing::debug!( upload_id = upload_id, part_number = part_number, size = data.len(), "Stored part to local filesystem" ); - + Ok(()) } - - async fn get_part( - &self, - upload_id: &str, - part_number: u32, - ) -> StorageResult { + + async fn get_part(&self, upload_id: &str, part_number: u32) -> StorageResult { let path = self.part_path(upload_id, part_number)?; - + if !fs::try_exists(&path).await? { return Err(StorageError::Backend(format!( "Part {} of upload {} not found", part_number, upload_id ))); } - + let data = fs::read(&path).await?; - + Ok(Bytes::from(data)) } - + async fn delete_part(&self, upload_id: &str, part_number: u32) -> StorageResult<()> { let path = self.part_path(upload_id, part_number)?; - + if fs::try_exists(&path).await? { fs::remove_file(&path).await?; } - + Ok(()) } - + async fn delete_upload_parts(&self, upload_id: &str) -> StorageResult<()> { let upload_dir = self.upload_dir(upload_id)?; - + if fs::try_exists(&upload_dir).await? { fs::remove_dir_all(&upload_dir).await?; tracing::debug!(upload_id = upload_id, "Deleted all parts for upload"); } - + Ok(()) } } @@ -258,95 +249,107 @@ impl StorageBackend for LocalFsBackend { mod tests { use super::*; use tempfile::TempDir; - + #[tokio::test] async fn test_put_get_object() { let temp_dir = TempDir::new().unwrap(); let backend = LocalFsBackend::new(temp_dir.path(), false).await.unwrap(); - + let object_id = ObjectId::new(); let data = Bytes::from("test data"); - + // Put object backend.put_object(&object_id, data.clone()).await.unwrap(); - + // Get object let retrieved = backend.get_object(&object_id).await.unwrap(); assert_eq!(retrieved, data); } - + #[tokio::test] async fn test_object_exists() { let temp_dir = TempDir::new().unwrap(); let backend = LocalFsBackend::new(temp_dir.path(), false).await.unwrap(); - + let object_id = ObjectId::new(); - + // Object doesn't exist assert!(!backend.object_exists(&object_id).await.unwrap()); - + // Put object - backend.put_object(&object_id, Bytes::from("data")).await.unwrap(); - + backend + .put_object(&object_id, Bytes::from("data")) + .await + .unwrap(); + // Object exists assert!(backend.object_exists(&object_id).await.unwrap()); } - + #[tokio::test] async fn test_delete_object() { let temp_dir = TempDir::new().unwrap(); let backend = LocalFsBackend::new(temp_dir.path(), false).await.unwrap(); - + let object_id = ObjectId::new(); - + // Put object - backend.put_object(&object_id, Bytes::from("data")).await.unwrap(); + backend + .put_object(&object_id, Bytes::from("data")) + .await + .unwrap(); assert!(backend.object_exists(&object_id).await.unwrap()); - + // Delete object backend.delete_object(&object_id).await.unwrap(); assert!(!backend.object_exists(&object_id).await.unwrap()); } - + #[tokio::test] async fn test_object_size() { let temp_dir = TempDir::new().unwrap(); let backend = LocalFsBackend::new(temp_dir.path(), false).await.unwrap(); - + let object_id = ObjectId::new(); let data = Bytes::from("test data"); - + // Put object backend.put_object(&object_id, data.clone()).await.unwrap(); - + // Check size let size = backend.object_size(&object_id).await.unwrap(); assert_eq!(size, data.len() as u64); } - + #[tokio::test] async fn test_multipart_parts() { let temp_dir = TempDir::new().unwrap(); let backend = LocalFsBackend::new(temp_dir.path(), false).await.unwrap(); - + let upload_id = "test-upload-123"; let part1_data = Bytes::from("part 1"); let part2_data = Bytes::from("part 2"); - + // Put parts - backend.put_part(upload_id, 1, part1_data.clone()).await.unwrap(); - backend.put_part(upload_id, 2, part2_data.clone()).await.unwrap(); - + backend + .put_part(upload_id, 1, part1_data.clone()) + .await + .unwrap(); + backend + .put_part(upload_id, 2, part2_data.clone()) + .await + .unwrap(); + // Get parts let retrieved1 = backend.get_part(upload_id, 1).await.unwrap(); let retrieved2 = backend.get_part(upload_id, 2).await.unwrap(); assert_eq!(retrieved1, part1_data); assert_eq!(retrieved2, part2_data); - + // Delete parts backend.delete_part(upload_id, 1).await.unwrap(); assert!(backend.get_part(upload_id, 1).await.is_err()); - + // Delete all parts backend.delete_upload_parts(upload_id).await.unwrap(); assert!(backend.get_part(upload_id, 2).await.is_err()); diff --git a/lightningstor/crates/lightningstor-types/src/bucket.rs b/lightningstor/crates/lightningstor-types/src/bucket.rs index 5509d2c..008cff6 100644 --- a/lightningstor/crates/lightningstor-types/src/bucket.rs +++ b/lightningstor/crates/lightningstor-types/src/bucket.rs @@ -63,7 +63,9 @@ impl BucketName { .chars() .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-' || c == '.') { - return Err("bucket name can only contain lowercase letters, numbers, hyphens, and periods"); + return Err( + "bucket name can only contain lowercase letters, numbers, hyphens, and periods", + ); } if !name.chars().next().unwrap().is_ascii_alphanumeric() { @@ -75,11 +77,7 @@ impl BucketName { } // Cannot look like IP address - if name.split('.').count() == 4 - && name - .split('.') - .all(|part| part.parse::().is_ok()) - { + if name.split('.').count() == 4 && name.split('.').all(|part| part.parse::().is_ok()) { return Err("bucket name cannot be formatted as an IP address"); } @@ -131,8 +129,7 @@ pub enum BucketStatus { } /// Bucket access policy -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[derive(Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] pub struct BucketPolicy { /// Policy JSON document (S3 policy format) pub policy_json: Option, @@ -142,7 +139,6 @@ pub struct BucketPolicy { pub public_write: bool, } - /// A storage bucket containing objects #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Bucket { diff --git a/lightningstor/crates/lightningstor-types/src/object.rs b/lightningstor/crates/lightningstor-types/src/object.rs index 3668604..ae898d3 100644 --- a/lightningstor/crates/lightningstor-types/src/object.rs +++ b/lightningstor/crates/lightningstor-types/src/object.rs @@ -383,7 +383,6 @@ impl MultipartUpload { } } - #[cfg(test)] mod tests { use super::*; diff --git a/nightlight/README.md b/nightlight/README.md new file mode 100644 index 0000000..f30ae9d --- /dev/null +++ b/nightlight/README.md @@ -0,0 +1,19 @@ +# NightLight + +`nightlight` is UltraCloud's supported metrics ingestion and query service for the edge bundle. + +## Supported product shape + +NightLight is supported as a single-node WAL/snapshot service; replicated HA metrics storage is not part of the product contract. + +- Retention is instance-wide and controlled by `retention_days`, the WAL, and periodic snapshots. +- The supported proof scope is `nix run ./nix/test-cluster#cluster -- fresh-matrix`, which validates the shipped HTTP and gRPC query surface on the gateway node. +- Shared deployments should put authentication and exposure policy in front of NightLight through APIGateway or another authenticated front door. + +## Tenant boundary + +NightLight does not provide a hard multi-tenant security boundary inside the process. + +- One NightLight instance per environment or tenant boundary is the recommended shape when strong separation is needed. +- Labels and caller discipline can partition data operationally, but they are not a product-grade authorization boundary. +- Per-tenant retention or quota enforcement is not part of the current contract. diff --git a/nix-nos/flake.nix b/nix-nos/flake.nix index 04cfd64..4e5eb5e 100644 --- a/nix-nos/flake.nix +++ b/nix-nos/flake.nix @@ -1,5 +1,5 @@ { - description = "Nix-NOS: Generic network operating system modules for NixOS"; + description = "Nix-NOS: legacy compatibility and low-level network primitive modules for NixOS"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; @@ -26,7 +26,7 @@ ]; shellHook = '' - echo "Nix-NOS development shell" + echo "Nix-NOS development shell (compatibility and network primitives only)" echo "Run: nix flake check" ''; }; diff --git a/nix-nos/modules/default.nix b/nix-nos/modules/default.nix index e797eb1..98ab69a 100644 --- a/nix-nos/modules/default.nix +++ b/nix-nos/modules/default.nix @@ -9,7 +9,7 @@ ]; options.nix-nos = { - enable = lib.mkEnableOption "Nix-NOS network primitive modules"; + enable = lib.mkEnableOption "Nix-NOS legacy compatibility and low-level network primitives only; not a supported cluster authoring source"; version = lib.mkOption { type = lib.types.str; @@ -20,6 +20,13 @@ }; config = lib.mkIf config.nix-nos.enable { + warnings = [ + '' + nix-nos is limited to legacy compatibility and low-level network primitives. + ultracloud.cluster backed by nix/lib/cluster-schema.nix is the only supported cluster authoring source. + '' + ]; + # Global assertions assertions = [ { diff --git a/nix/ci/flake.nix b/nix/ci/flake.nix index d797f50..d0f7a45 100644 --- a/nix/ci/flake.nix +++ b/nix/ci/flake.nix @@ -379,6 +379,13 @@ ${gate}/bin/ultracloud-gate --tier 0 --no-logs touch $out/ok ''; + checks.canonical-profile-eval-guards = ultracloud.checks.${system}.canonical-profile-eval-guards; + checks.canonical-profile-build-guards = + ultracloud.checks.${system}.canonical-profile-build-guards; + checks.supported-surface-guard = ultracloud.checks.${system}.supported-surface-guard; + checks.portable-control-plane-regressions = + ultracloud.checks.${system}.portable-control-plane-regressions; + checks.baremetal-iso-e2e = ultracloud.checks.${system}.baremetal-iso-e2e; checks.deployer-vm-smoke = ultracloud.checks.${system}.deployer-vm-smoke; checks.deployer-vm-rollback = ultracloud.checks.${system}.deployer-vm-rollback; checks.deployer-bootstrap-e2e = ultracloud.checks.${system}.deployer-bootstrap-e2e; diff --git a/nix/images/netboot-all-in-one.nix b/nix/images/netboot-all-in-one.nix index b829e3d..70a44a5 100644 --- a/nix/images/netboot-all-in-one.nix +++ b/nix/images/netboot-all-in-one.nix @@ -1,222 +1,142 @@ { config, pkgs, lib, ... }: +let + surface = import ../single-node/surface.nix; + corePackages = map (service: builtins.getAttr service.packageAttr pkgs) surface.vmPlatformCore; +in { imports = [ ./netboot-base.nix - ../modules # Import UltraCloud service modules + ../modules ]; # ============================================================================ - # ALL-IN-ONE PROFILE + # SINGLE-NODE / ALL-IN-ONE INSTALL IMAGE # ============================================================================ - # This profile includes all 8 UltraCloud services for a single-node deployment: - # - Chainfire: Distributed configuration and coordination - # - FlareDB: Time-series metrics and events database - # - IAM: Identity and access management - # - PlasmaVMC: Virtual machine control plane - # - PrismNET: Software-defined networking controller - # - FlashDNS: High-performance DNS server - # - FiberLB: Layer 4/7 load balancer - # - LightningStor: Distributed block storage - # - K8sHost: Kubernetes hosting component + # This netboot image is the bare-metal companion to the QEMU-first + # `single-node-quickstart` profile. It keeps only the minimum VM stack in the + # image by default and leaves DNS, load-balancing, storage, API, metrics, and + # Kubernetes layers as explicit add-ons in the final installed system. # - # This profile is optimized for: - # - Development/testing environments - # - Small deployments (1-3 nodes) - # - Edge locations with limited infrastructure - # - Proof-of-concept installations + # Included by default: + # - Chainfire: local coordination and placement metadata + # - FlareDB: metadata/event storage + # - IAM: local identity plane for the dev profile + # - PrismNET: VM networking control plane + # - PlasmaVMC: VM control plane # - # Services are DISABLED by default in the netboot image. - # They will be enabled in the final installed system configuration. + # Intentionally not included by default: + # - Deployer / Nix Agent: rollout and install authority + # - Fleet Scheduler / Node Agent: native host-service placement + # - K8sHost: tenant pod and service control plane + # + # Optional after install: + # - LightningStor, CoronaFS + # - FlashDNS, FiberLB + # - API Gateway, Nightlight, CreditService + # - K8sHost # ============================================================================ - # ============================================================================ - # SERVICE PACKAGE AVAILABILITY - # ============================================================================ - # Make all service packages available in the netboot image - environment.systemPackages = with pkgs; [ - # Core services - chainfire-server - flaredb-server - iam-server + environment.systemPackages = corePackages ++ (with pkgs; [ + qemu + libvirt + bridge-utils + openvswitch + curl + jq + ]); - # Compute and networking - plasmavmc-server - prismnet-server + environment.etc."ultracloud-product-surface.json".text = builtins.toJSON { + profile = "single-node dev"; + coreServices = map (service: builtins.removeAttrs service [ "packageAttr" "unit" ]) surface.vmPlatformCore; + optionalBundles = + map + (bundle: { + inherit (bundle) option name summary; + services = map (service: service.name) bundle.services; + } + // lib.optionalAttrs (bundle ? requires) { + requires = bundle.requires; + }) + surface.optionalBundles; + responsibilityBoundaries = surface.responsibilityBoundaries; + easyTrial = surface.easyTrial; + }; - # Network services - flashdns-server - fiberlb-server - - # Storage - lightningstor-server - - # Container orchestration - k8shost-server - - # Additional tools for all-in-one deployment - qemu # For running VMs - libvirt # Virtualization management - bridge-utils # Network bridge configuration - openvswitch # Software-defined networking - ]; - - # ============================================================================ - # CHAINFIRE CONFIGURATION (DISABLED) - # ============================================================================ services.chainfire = { enable = lib.mkDefault false; port = 2379; raftPort = 2380; gossipPort = 2381; + httpPort = 8081; }; - # ============================================================================ - # FLAREDB CONFIGURATION (DISABLED) - # ============================================================================ services.flaredb = { enable = lib.mkDefault false; port = 2479; raftPort = 2480; + httpPort = 8082; }; - # ============================================================================ - # IAM CONFIGURATION (DISABLED) - # ============================================================================ services.iam = { enable = lib.mkDefault false; - port = 8080; + port = 50080; + httpPort = 8083; }; - # ============================================================================ - # PLASMAVMC CONFIGURATION (DISABLED) - # ============================================================================ - services.plasmavmc = { - enable = lib.mkDefault false; - port = 8081; - }; - - # ============================================================================ - # PRISMNET CONFIGURATION (DISABLED) - # ============================================================================ services.prismnet = { enable = lib.mkDefault false; - port = 8082; + port = 50081; + httpPort = 8087; }; - # ============================================================================ - # FLASHDNS CONFIGURATION (DISABLED) - # ============================================================================ - services.flashdns = { + services.plasmavmc = { enable = lib.mkDefault false; - port = 53; + port = 50082; + httpPort = 8084; }; - # ============================================================================ - # FIBERLB CONFIGURATION (DISABLED) - # ============================================================================ - services.fiberlb = { - enable = lib.mkDefault false; - port = 8083; - }; + services.deployer.enable = lib.mkDefault false; + services.nix-agent.enable = lib.mkDefault false; + services.node-agent.enable = lib.mkDefault false; + services.fleet-scheduler.enable = lib.mkDefault false; + services.k8shost.enable = lib.mkDefault false; - # ============================================================================ - # LIGHTNINGSTOR CONFIGURATION (DISABLED) - # ============================================================================ - services.lightningstor = { - enable = lib.mkDefault false; - port = 8084; - }; - - # ============================================================================ - # K8SHOST CONFIGURATION (DISABLED) - # ============================================================================ - services.k8shost = { - enable = lib.mkDefault false; - port = 8085; - }; - - # ============================================================================ - # VIRTUALIZATION SUPPORT - # ============================================================================ - # Enable KVM virtualization - boot.kernelModules = [ "kvm-intel" "kvm-amd" ]; - - # Enable nested virtualization + boot.kernelModules = [ "kvm-intel" "kvm-amd" "tun" ]; boot.extraModprobeConfig = '' options kvm_intel nested=1 options kvm_amd nested=1 ''; - # ============================================================================ - # NETWORKING CONFIGURATION - # ============================================================================ - # Enable Open vSwitch for SDN networking.vswitches = lib.mkDefault {}; - # Open firewall ports for all services networking.firewall.allowedTCPPorts = [ - # Chainfire - 2379 # API - 2380 # Raft - 2381 # Gossip - - # FlareDB - 2479 # API - 2480 # Raft - - # IAM - 8080 - - # PlasmaVMC + 22 + 2379 + 2380 + 2381 + 2479 + 2480 + 50080 + 50081 + 50082 8081 - - # PrismNET 8082 - - # FlashDNS - 53 - - # FiberLB 8083 - - # LightningStor 8084 - - # K8sHost - 8085 - - # QEMU/LibVirt - 16509 # libvirtd - 5900 # VNC (for VM console access) + 8087 + 16509 + 5900 ]; networking.firewall.allowedUDPPorts = [ - # FlashDNS - 53 - - # Chainfire gossip 2381 - - # VXLAN for overlay networking 4789 ]; - # ============================================================================ - # STORAGE CONFIGURATION - # ============================================================================ - # Enable LVM for flexible storage management services.lvm.enable = true; - - # Enable ZFS if needed boot.supportedFilesystems = [ "ext4" "xfs" "btrfs" "zfs" ]; - # ============================================================================ - # RESOURCE LIMITS (BALANCED FOR ALL-IN-ONE) - # ============================================================================ - # Balance resources between services on a single node - # These are minimal limits for netboot; adjust in final config based on hardware - systemd.services.chainfire.serviceConfig = lib.mkIf config.services.chainfire.enable { MemoryMax = "1G"; CPUQuota = "100%"; @@ -242,26 +162,13 @@ CPUQuota = "50%"; }; - # ============================================================================ - # PERFORMANCE TUNING - # ============================================================================ - # Optimize for mixed workload (services + VMs) boot.kernel.sysctl = { - # Increase max number of open files "fs.file-max" = 1000000; - - # Increase network buffer sizes + "net.core.netdev_max_backlog" = 5000; "net.core.rmem_max" = 134217728; "net.core.wmem_max" = 134217728; - - # Enable IP forwarding for VM networking "net.ipv4.ip_forward" = 1; "net.ipv6.conf.all.forwarding" = 1; - - # Optimize for high-performance networking - "net.core.netdev_max_backlog" = 5000; - - # Swappiness for server workloads "vm.swappiness" = 10; }; } diff --git a/nix/iso/ultracloud-iso.nix b/nix/iso/ultracloud-iso.nix index 0c35c1f..7d33d9e 100644 --- a/nix/iso/ultracloud-iso.nix +++ b/nix/iso/ultracloud-iso.nix @@ -2,7 +2,15 @@ # Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install # For VM cluster deployment: boots, phones home, partitions disk, installs NixOS -{ config, lib, pkgs, modulesPath, ... }: +{ + config, + lib, + pkgs, + modulesPath, + ultracloudBaremetalFormatMountPaths ? { }, + ultracloudBaremetalSystemPaths ? { }, + ... +}: { imports = [ @@ -58,16 +66,34 @@ return 1 } + dmi_value() { + local path="$1" + if [ -r "$path" ]; then + tr -d '\n' <"$path" 2>/dev/null || true + fi + } + + resolve_deployer_url() { + local explicit_url="''${DEPLOYER_URL:-}" + if [ -z "$explicit_url" ]; then + explicit_url="$(cmdline_value ultracloud.deployer_url || true)" + fi + if [ -n "$explicit_url" ]; then + echo "$explicit_url" + return 0 + fi + if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \ + http://10.0.2.2:8088/health >/dev/null 2>&1; then + echo "http://10.0.2.2:8088" + return 0 + fi + echo "http://192.168.100.1:8080" + } + mkdir -p /etc/ultracloud # Discover Deployer via environment, kernel cmdline, or fallback. - DEPLOYER_URL="''${DEPLOYER_URL:-}" - if [ -z "$DEPLOYER_URL" ]; then - DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)" - fi - if [ -z "$DEPLOYER_URL" ]; then - DEPLOYER_URL="http://192.168.100.1:8080" - fi + DEPLOYER_URL="$(resolve_deployer_url)" # Get machine identity MACHINE_ID=$(cat /etc/machine-id) @@ -113,7 +139,24 @@ if [ -z "$NODE_IP" ]; then NODE_IP=$(hostname -I 2>/dev/null | ${pkgs.gawk}/bin/awk '{print $1}') fi - NODE_HOSTNAME=$(hostname) + REQUESTED_NODE_ID="''${ULTRACLOUD_NODE_ID:-}" + if [ -z "$REQUESTED_NODE_ID" ]; then + REQUESTED_NODE_ID="$(cmdline_value ultracloud.node_id || true)" + fi + if [ -z "$REQUESTED_NODE_ID" ]; then + REQUESTED_NODE_ID="$(dmi_value /sys/class/dmi/id/product_serial)" + fi + if [ -z "$REQUESTED_NODE_ID" ]; then + REQUESTED_NODE_ID="$(hostname)" + fi + REQUESTED_HOSTNAME="''${ULTRACLOUD_HOSTNAME:-}" + if [ -z "$REQUESTED_HOSTNAME" ]; then + REQUESTED_HOSTNAME="$(cmdline_value ultracloud.hostname || true)" + fi + if [ -z "$REQUESTED_HOSTNAME" ]; then + REQUESTED_HOSTNAME="$REQUESTED_NODE_ID" + fi + echo "ULTRACLOUD_MARKER pre-install.boot.$REQUESTED_NODE_ID" CPU_MODEL=$(${pkgs.gawk}/bin/awk -F: '/model name/ {gsub(/^[ \t]+/, "", $2); print $2; exit}' /proc/cpuinfo 2>/dev/null || true) CPU_CORES=$(${pkgs.gawk}/bin/awk '/^cpu cores/ {print $4; exit}' /proc/cpuinfo 2>/dev/null || true) CPU_THREADS=$(${pkgs.coreutils}/bin/nproc --all 2>/dev/null || true) @@ -172,8 +215,8 @@ ') REQUEST_JSON=$(${pkgs.jq}/bin/jq -n \ --arg machine_id "$MACHINE_ID" \ - --arg node_id "$NODE_HOSTNAME" \ - --arg hostname "$NODE_HOSTNAME" \ + --arg node_id "$REQUESTED_NODE_ID" \ + --arg hostname "$REQUESTED_HOSTNAME" \ --arg ip "$NODE_IP" \ --argjson hardware_facts "$HARDWARE_FACTS" ' { @@ -253,6 +296,7 @@ # Signal success NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.assignment.node_id // "unknown"') + echo "ULTRACLOUD_MARKER pre-install.phone-home.complete.$NODE_ID" echo "✓ Bootstrap complete: $NODE_ID" exit 0 else @@ -282,6 +326,7 @@ script = '' set -euo pipefail + export PATH="${pkgs.nix}/bin:${config.system.build.nixos-install}/bin:$PATH" cmdline_value() { local key="$1" @@ -297,6 +342,40 @@ return 1 } + resolve_deployer_url() { + local explicit_url="''${DEPLOYER_URL:-}" + if [ -z "$explicit_url" ]; then + explicit_url="$(cmdline_value ultracloud.deployer_url || true)" + fi + if [ -n "$explicit_url" ]; then + echo "$explicit_url" + return 0 + fi + if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \ + http://10.0.2.2:8088/health >/dev/null 2>&1; then + echo "http://10.0.2.2:8088" + return 0 + fi + echo "http://192.168.100.1:8080" + } + + resolve_binary_cache_url() { + local explicit_url="''${ULTRACLOUD_BINARY_CACHE_URL:-}" + if [ -z "$explicit_url" ]; then + explicit_url="$(cmdline_value ultracloud.binary_cache_url || true)" + fi + if [ -n "$explicit_url" ]; then + echo "$explicit_url" + return 0 + fi + if ${pkgs.curl}/bin/curl -fsS --connect-timeout 2 --max-time 5 \ + http://10.0.2.2:8090/nix-cache-info >/dev/null 2>&1; then + echo "http://10.0.2.2:8090" + return 0 + fi + return 1 + } + if [ ! -s /etc/ultracloud/node-config.json ]; then echo "ERROR: node-config.json missing (bootstrap not complete?)" exit 1 @@ -305,16 +384,24 @@ NODE_ID=$(${pkgs.jq}/bin/jq -r '.assignment.hostname // .assignment.node_id // empty' /etc/ultracloud/node-config.json) NODE_IP=$(${pkgs.jq}/bin/jq -r '.assignment.ip // empty' /etc/ultracloud/node-config.json) NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.nixos_configuration // .assignment.hostname // empty' /etc/ultracloud/node-config.json) - DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_config_path // empty' /etc/ultracloud/node-config.json) + INSTALL_PLAN_DISKO_CONFIG_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_config_path // empty' /etc/ultracloud/node-config.json) + DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_script_path // empty' /etc/ultracloud/node-config.json) + if [ -z "$DISKO_SCRIPT_PATH" ] && [ -r /etc/ultracloud/disko-script-paths.json ]; then + DISKO_SCRIPT_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/disko-script-paths.json) + if [ -n "$DISKO_SCRIPT_PATH" ]; then + echo "Resolved pre-built Disko script for install profile $NIXOS_CONFIGURATION from the ISO profile map" + fi + fi + TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_system_path // empty' /etc/ultracloud/node-config.json) + if [ -z "$TARGET_SYSTEM_PATH" ] && [ -r /etc/ultracloud/system-paths.json ]; then + TARGET_SYSTEM_PATH=$(${pkgs.jq}/bin/jq -r --arg cfg "$NIXOS_CONFIGURATION" '.[$cfg] // empty' /etc/ultracloud/system-paths.json) + if [ -n "$TARGET_SYSTEM_PATH" ]; then + echo "Resolved pre-built target system for install profile $NIXOS_CONFIGURATION from the ISO profile map" + fi + fi TARGET_DISK=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk // empty' /etc/ultracloud/node-config.json) TARGET_DISK_BY_ID=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk_by_id // empty' /etc/ultracloud/node-config.json) - DEPLOYER_URL="''${DEPLOYER_URL:-}" - if [ -z "$DEPLOYER_URL" ]; then - DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)" - fi - if [ -z "$DEPLOYER_URL" ]; then - DEPLOYER_URL="http://192.168.100.1:8080" - fi + DEPLOYER_URL="$(resolve_deployer_url)" SRC_ROOT="/opt/ultracloud-src" if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then @@ -362,6 +449,7 @@ "$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \ -o "$BUNDLE_PATH"; then echo "Downloaded bootstrap flake bundle from deployer" + echo "ULTRACLOUD_MARKER install.bundle-downloaded.$NODE_ID" rm -rf "$SRC_ROOT" mkdir -p "$SRC_ROOT" ${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT" @@ -369,25 +457,22 @@ echo "No deployer flake bundle available; using embedded source tree" fi - if [ -z "$DISKO_PATH" ]; then - CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix" - if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then - DISKO_PATH="$CANDIDATE_DISKO" - fi + echo "ULTRACLOUD_MARKER install.start.$NODE_ID" + DISPLAY_TARGET_DISK="$TARGET_DISK" + DISK_SELECTOR_SOURCE="auto-discovery" + if [ -n "$TARGET_DISK_BY_ID" ]; then + DISPLAY_TARGET_DISK="$TARGET_DISK_BY_ID" + DISK_SELECTOR_SOURCE="install_plan.target_disk_by_id" + elif [ -n "$TARGET_DISK" ]; then + DISK_SELECTOR_SOURCE="install_plan.target_disk" fi - - if [ -z "$DISKO_PATH" ]; then - echo "ERROR: node-config.json missing install_plan.disko_config_path and no default Disko path exists for $NODE_ID" - exit 1 + echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, target_disk=$DISPLAY_TARGET_DISK)" + if [ -n "$INSTALL_PLAN_DISKO_CONFIG_PATH" ]; then + echo "Install contract: disko_config_path=$INSTALL_PLAN_DISKO_CONFIG_PATH, disk_selector_source=$DISK_SELECTOR_SOURCE" + else + echo "Install contract: disko_config_path=(embedded profile map for $NIXOS_CONFIGURATION), disk_selector_source=$DISK_SELECTOR_SOURCE" fi - if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then - echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH" - exit 1 - fi - - echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, disko_path=$DISKO_PATH)" - # Resolve installation target disk. if [ -n "$TARGET_DISK_BY_ID" ]; then if [ ! -b "$TARGET_DISK_BY_ID" ]; then @@ -423,50 +508,99 @@ umount /mnt || true fi - echo "Validating NixOS configuration output..." - nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null - - EFFECTIVE_DISKO_PATH="$SRC_ROOT/$DISKO_PATH" - if [ -n "$DISK" ]; then - cat > /run/ultracloud/disko-wrapper.nix </dev/null + "$DISKO_SCRIPT_PATH/bin/disko-format-mount" + else + ${pkgs.disko}/bin/disko \ + --mode destroy,format,mount \ + --yes-wipe-all-disks \ + --root-mountpoint /mnt \ + --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" + fi + echo "ULTRACLOUD_MARKER install.disko.complete.$NODE_ID" echo "Running nixos-install..." - nixos-install --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" --no-root-passwd + if [ -n "$TARGET_SYSTEM_PATH" ]; then + echo "Realising pre-built target system: $TARGET_SYSTEM_PATH" + ${pkgs.nix}/bin/nix-store --realise "$TARGET_SYSTEM_PATH" >/dev/null + ${config.system.build.nixos-install}/bin/nixos-install \ + --system "$TARGET_SYSTEM_PATH" \ + --no-root-passwd \ + --no-channel-copy + else + ${config.system.build.nixos-install}/bin/nixos-install \ + --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" \ + --no-root-passwd \ + --no-channel-copy + fi + echo "ULTRACLOUD_MARKER install.nixos-install.complete.$NODE_ID" + + mkdir -p /mnt/etc/ssh /mnt/etc/ultracloud /mnt/root/.ssh /mnt/var/lib + cp -f /etc/ultracloud/node-config.json /mnt/etc/ultracloud/node-config.json + cp -f /root/.ssh/authorized_keys /mnt/root/.ssh/authorized_keys + shopt -s nullglob + for host_key in /etc/ssh/ssh_host_*; do + cp -f "$host_key" /mnt/etc/ssh/"$(basename "$host_key")" + done + shopt -u nullglob + chmod 700 /mnt/root/.ssh + chmod 600 /mnt/root/.ssh/authorized_keys + chmod 600 /mnt/etc/ssh/ssh_host_*_key 2>/dev/null || true + chmod 644 /mnt/etc/ssh/ssh_host_*_key.pub 2>/dev/null || true + + rm -rf /mnt/var/lib/photon-src + cp -a "$SRC_ROOT" /mnt/var/lib/photon-src sync + echo "ULTRACLOUD_MARKER reboot.$NODE_ID" + echo "Allowing the harness to observe the reboot marker before shutting down..." + sleep 15 echo "✓ Install complete; rebooting..." ${pkgs.systemd}/bin/systemctl reboot ''; }; # Packages for bootstrap + install + environment.etc."ultracloud/disko-script-paths.json".text = + builtins.toJSON ultracloudBaremetalFormatMountPaths; + environment.etc."ultracloud/system-paths.json".text = + builtins.toJSON ultracloudBaremetalSystemPaths; + environment.systemPackages = with pkgs; [ curl jq vim htop + nix gawk gnugrep util-linux parted dosfstools e2fsprogs + disko gnutar gzip ]; diff --git a/nix/modules/creditservice.nix b/nix/modules/creditservice.nix index 4ca17f0..9631681 100644 --- a/nix/modules/creditservice.nix +++ b/nix/modules/creditservice.nix @@ -31,7 +31,7 @@ let in { options.services.creditservice = { - enable = lib.mkEnableOption "minimal auth-integrated creditservice reference"; + enable = lib.mkEnableOption "auth-integrated creditservice quota and admission service"; grpcPort = lib.mkOption { type = lib.types.port; @@ -69,7 +69,7 @@ in storageBackend = lib.mkOption { type = lib.types.enum [ "flaredb" "postgres" "sqlite" ]; default = "flaredb"; - description = "Persistent storage backend for the minimal creditservice reference."; + description = "Persistent storage backend for the supported creditservice runtime."; }; databaseUrl = lib.mkOption { @@ -96,13 +96,13 @@ in users.users.creditservice = { isSystemUser = true; group = "creditservice"; - description = "CreditService reference service user"; + description = "CreditService service user"; }; users.groups.creditservice = {}; systemd.services.creditservice = { - description = "CreditService Minimal Auth-Integrated Credit Control Reference"; + description = "CreditService Auth-Integrated Credit Control"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ] ++ localDependencies; wants = localDependencies; diff --git a/nix/modules/default.nix b/nix/modules/default.nix index 4ea5066..4d59488 100644 --- a/nix/modules/default.nix +++ b/nix/modules/default.nix @@ -18,6 +18,7 @@ ./lightningstor.nix ./k8shost.nix ./nightlight.nix + ./apigateway.nix ./deployer.nix ./nix-agent.nix ./node-agent.nix diff --git a/nix/modules/deployer.nix b/nix/modules/deployer.nix index 68184b2..1f48863 100644 --- a/nix/modules/deployer.nix +++ b/nix/modules/deployer.nix @@ -52,7 +52,7 @@ let in { options.services.deployer = { - enable = lib.mkEnableOption "deployer bootstrap orchestration service"; + enable = lib.mkEnableOption "deployer bootstrap and rollout-intent service (enrollment and desired-state authority only; canonical cluster state comes from ultracloud.cluster)"; bindAddr = lib.mkOption { type = lib.types.str; @@ -63,7 +63,7 @@ in chainfireEndpoints = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; - description = "ChainFire endpoints for persistent deployer state"; + description = "ChainFire endpoints for persistent deployer state. The supported release contract is one active deployer writer plus optional cold-standby restore using the same namespace and local state; do not treat this as active/active or automatic multi-instance failover support."; example = [ "http://127.0.0.1:2379" ]; }; @@ -94,7 +94,7 @@ in localStatePath = lib.mkOption { type = lib.types.str; default = "/var/lib/deployer/state"; - description = "Local storage path for deployer bootstrap state"; + description = "Local storage path for deployer bootstrap state. Include this path in the supported deployer DR backup set together with cluster-state JSON and credentials."; }; bootstrapFlakeBundle = lib.mkOption { @@ -142,7 +142,7 @@ in clusterStateFile = lib.mkOption { type = lib.types.nullOr lib.types.path; default = null; - description = "Optional declarative cluster state JSON/YAML file applied with deployer-ctl"; + description = "Optional declarative cluster state JSON/YAML file applied with deployer-ctl to publish rollout intent. The supported source is the generated ultracloud.cluster output when available; deployer still does not execute host-local switches."; }; seedClusterState = lib.mkOption { @@ -241,7 +241,7 @@ in ]; systemd.services.deployer = { - description = "UltraCloud Deployer Server"; + description = "UltraCloud Deployer Server (bootstrap and desired-state authority only)"; wantedBy = [ "multi-user.target" ]; wants = [ "network-online.target" ] ++ localChainfireDeps; after = [ "network-online.target" ] ++ localChainfireDeps; diff --git a/nix/modules/fleet-scheduler.nix b/nix/modules/fleet-scheduler.nix index f119438..7c97c4d 100644 --- a/nix/modules/fleet-scheduler.nix +++ b/nix/modules/fleet-scheduler.nix @@ -5,12 +5,12 @@ let in { options.services.fleet-scheduler = { - enable = lib.mkEnableOption "fleet-scheduler service"; + enable = lib.mkEnableOption "fleet-scheduler service (native host-service placement only, consuming deployer state derived from ultracloud.cluster)"; chainfireEndpoint = lib.mkOption { type = lib.types.str; - default = "http://127.0.0.1:7000"; - description = "ChainFire endpoint used by fleet-scheduler"; + default = "http://127.0.0.1:2379"; + description = "ChainFire endpoint used by fleet-scheduler for native service placement; tenant pod scheduling stays in k8shost."; }; clusterNamespace = lib.mkOption { @@ -34,7 +34,7 @@ in heartbeatTimeoutSecs = lib.mkOption { type = lib.types.int; default = 300; - description = "Maximum node heartbeat age before a node becomes ineligible"; + description = "Maximum node heartbeat age before a node becomes ineligible and failover begins. The supported release proof covers the two native-runtime worker lab with one planned drain cycle and one fail-stop worker-loss cycle held for the rollout-soak window; long-duration maintenance policy and larger-cluster choreography remain out of scope."; }; dryRun = lib.mkOption { @@ -46,7 +46,7 @@ in iamEndpoint = lib.mkOption { type = lib.types.nullOr lib.types.str; default = null; - description = "IAM endpoint used for service publication"; + description = "IAM endpoint used for native service publication; fleet-scheduler still does not expose Kubernetes semantics."; }; fiberlbEndpoint = lib.mkOption { @@ -103,7 +103,7 @@ in users.groups.fleet-scheduler = { }; systemd.services.fleet-scheduler = { - description = "UltraCloud Fleet Scheduler"; + description = "UltraCloud Fleet Scheduler (native host-service placement only)"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; diff --git a/nix/modules/k8shost.nix b/nix/modules/k8shost.nix index 05c417e..dacd628 100644 --- a/nix/modules/k8shost.nix +++ b/nix/modules/k8shost.nix @@ -2,6 +2,55 @@ let cfg = config.services.k8shost; + isLocalHttpEndpoint = value: + value != null + && ( + lib.hasPrefix "http://127.0.0.1:" value + || lib.hasPrefix "http://localhost:" value + || lib.hasPrefix "http://[::1]:" value + || lib.hasPrefix "https://127.0.0.1:" value + || lib.hasPrefix "https://localhost:" value + || lib.hasPrefix "https://[::1]:" value + ); + isLocalHostPort = value: + value != null + && ( + lib.hasPrefix "127.0.0.1:" value + || lib.hasPrefix "localhost:" value + || lib.hasPrefix "[::1]:" value + ); + flaredbUsesExplicitRemote = + (cfg.flaredbPdAddr != null && !(isLocalHostPort cfg.flaredbPdAddr)) + || (cfg.flaredbDirectAddr != null && !(isLocalHostPort cfg.flaredbDirectAddr)); + localServiceDeps = + lib.optional ( + (config.services.iam.enable or false) + && (cfg.iamAddr == null || isLocalHttpEndpoint cfg.iamAddr) + ) "iam.service" + ++ lib.optional ( + (config.services.creditservice.enable or false) + && (cfg.creditserviceAddr == null || isLocalHttpEndpoint cfg.creditserviceAddr) + ) "creditservice.service" + ++ lib.optional ( + (config.services.chainfire.enable or false) + && cfg.chainfireAddr != null + && isLocalHttpEndpoint cfg.chainfireAddr + ) "chainfire.service" + ++ lib.optional ( + (config.services.prismnet.enable or false) + && (cfg.prismnetAddr == null || isLocalHttpEndpoint cfg.prismnetAddr) + ) "prismnet.service" + ++ lib.optional ( + (config.services.flashdns.enable or false) + && (cfg.flashdnsAddr == null || isLocalHttpEndpoint cfg.flashdnsAddr) + ) "flashdns.service" + ++ lib.optional ( + (config.services.fiberlb.enable or false) + && (cfg.fiberlbAddr == null || isLocalHttpEndpoint cfg.fiberlbAddr) + ) "fiberlb.service" + ++ lib.optional ( + (config.services.flaredb.enable or false) && !flaredbUsesExplicitRemote + ) "flaredb.service"; tomlFormat = pkgs.formats.toml { }; generatedConfig = { server = { @@ -58,7 +107,7 @@ let in { options.services.k8shost = { - enable = lib.mkEnableOption "k8shost service"; + enable = lib.mkEnableOption "k8shost service (tenant pod and service control plane only; standalone from fleet-scheduler and base OS rollout)"; port = lib.mkOption { type = lib.types.port; @@ -82,14 +131,14 @@ in creditserviceAddr = lib.mkOption { type = lib.types.nullOr lib.types.str; default = null; - description = "CreditService endpoint address (http://host:port) for pod admission and scheduler quota enforcement."; + description = "CreditService endpoint address (http://host:port) for pod admission and quota enforcement. Native host-service placement remains the fleet-scheduler path."; example = "http://10.0.0.1:3010"; }; chainfireAddr = lib.mkOption { type = lib.types.nullOr lib.types.str; default = null; - description = "ChainFire endpoint address (http://host:port) for cluster coordination"; + description = "ChainFire endpoint address (http://host:port) for tenant workload coordination; k8shost does not own host-native service placement."; example = "http://10.0.0.1:2379"; }; @@ -160,10 +209,11 @@ in # Create systemd service systemd.services.k8shost = { - description = "K8shost Kubernetes Hosting Service"; + description = "K8shost Kubernetes Hosting Service (tenant workload control plane only)"; wantedBy = [ "multi-user.target" ]; - after = [ "network.target" "iam.service" "flaredb.service" "chainfire.service" "prismnet.service" ]; - requires = [ "iam.service" "flaredb.service" "chainfire.service" "prismnet.service" ]; + wants = [ "network-online.target" ] ++ localServiceDeps; + after = [ "network-online.target" ] ++ localServiceDeps; + requires = localServiceDeps; serviceConfig = { Type = "simple"; diff --git a/nix/modules/lightningstor.nix b/nix/modules/lightningstor.nix index d344685..8d53332 100644 --- a/nix/modules/lightningstor.nix +++ b/nix/modules/lightningstor.nix @@ -137,15 +137,28 @@ let ${nodeCommand} & node_pid=$! + ${serverCommand} & + server_pid=$! + cleanup() { - if kill -0 "$node_pid" 2>/dev/null; then - kill "$node_pid" 2>/dev/null || true - wait "$node_pid" 2>/dev/null || true - fi + for pid in "$server_pid" "$node_pid"; do + if kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + fi + done + + wait "$server_pid" 2>/dev/null || true + wait "$node_pid" 2>/dev/null || true } trap cleanup EXIT INT TERM - exec ${serverCommand} + + set +e + wait -n "$server_pid" "$node_pid" + rc=$? + set -e + + exit "$rc" ''; execStart = diff --git a/nix/modules/nix-agent.nix b/nix/modules/nix-agent.nix index 8f78373..5e1222e 100644 --- a/nix/modules/nix-agent.nix +++ b/nix/modules/nix-agent.nix @@ -10,12 +10,12 @@ let in { options.services.nix-agent = { - enable = lib.mkEnableOption "UltraCloud nix-agent service"; + enable = lib.mkEnableOption "UltraCloud nix-agent service (host OS convergence only, consuming desired-system state published by deployer)"; chainfireEndpoint = lib.mkOption { type = lib.types.str; - default = "http://127.0.0.1:7000"; - description = "ChainFire endpoint consumed by nix-agent"; + default = "http://127.0.0.1:2379"; + description = "ChainFire endpoint consumed by nix-agent for desired-system data; nix-agent does not schedule native services."; }; clusterNamespace = lib.mkOption { @@ -56,19 +56,19 @@ in healthCheckCommand = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; - description = "Command vector executed after activation to verify node health"; + description = "Command vector executed after activation to verify node health. Entries are argv items, not a shell snippet; a non-zero exit triggers rollback when rollbackOnFailure is enabled."; }; rollbackOnFailure = lib.mkOption { type = lib.types.bool; default = true; - description = "Roll back to the previous system if the post-activation health check fails"; + description = "Roll back to the previous system if the post-activation health check fails, leaving observed status as rolled-back instead of keeping the rejected target active."; }; apply = lib.mkOption { type = lib.types.bool; default = true; - description = "Apply desired NixOS system state on the node"; + description = "Apply desired NixOS system state on the node; runtime process placement remains the node-agent and fleet-scheduler path."; }; package = lib.mkOption { @@ -80,7 +80,7 @@ in config = lib.mkIf cfg.enable { systemd.services.nix-agent = { - description = "UltraCloud Nix Agent"; + description = "UltraCloud Nix Agent (host OS reconcile only)"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; @@ -90,6 +90,11 @@ in Type = "simple"; Restart = "on-failure"; RestartSec = "5s"; + # The agent can invoke switch-to-configuration on its own host. Keep + # that child process alive when systemd stops the agent during the + # switch transaction so activation can finish and restart services in + # the new generation. + KillMode = "process"; ExecStart = '' ${cfg.package}/bin/nix-agent \ --chainfire-endpoint ${lib.escapeShellArg cfg.chainfireEndpoint} \ diff --git a/nix/modules/node-agent.nix b/nix/modules/node-agent.nix index fb4ef96d..fe176e4 100644 --- a/nix/modules/node-agent.nix +++ b/nix/modules/node-agent.nix @@ -6,12 +6,12 @@ let in { options.services.node-agent = { - enable = lib.mkEnableOption "UltraCloud node-agent service"; + enable = lib.mkEnableOption "UltraCloud node-agent service (native runtime reconcile only, consuming fleet-scheduler instance state)"; chainfireEndpoint = lib.mkOption { type = lib.types.str; - default = "http://127.0.0.1:7000"; - description = "ChainFire endpoint consumed by node-agent"; + default = "http://127.0.0.1:2379"; + description = "ChainFire endpoint consumed by node-agent for scheduled runtime state; node-agent does not switch the base OS."; }; clusterNamespace = lib.mkOption { @@ -40,7 +40,7 @@ in apply = lib.mkOption { type = lib.types.bool; default = true; - description = "Apply desired runtime state on the node"; + description = "Apply desired runtime state on the node; base-system rollout remains the nix-agent path."; }; allowLocalInstanceUpsert = lib.mkOption { @@ -52,13 +52,13 @@ in enableContainers = lib.mkOption { type = lib.types.bool; default = true; - description = "Install and enable Podman for container-based workloads"; + description = "Install and enable Podman for container-based host workloads managed by node-agent. This is separate from k8shost tenant workload semantics."; }; extraPackages = lib.mkOption { type = lib.types.listOf lib.types.package; default = [ ]; - description = "Additional packages made available to managed workloads"; + description = "Additional packages made available to managed workloads. Secrets and volume contents still have to exist on the host already; node-agent does not provision them."; }; package = lib.mkOption { @@ -70,7 +70,7 @@ in stateDir = lib.mkOption { type = lib.types.str; default = "/var/lib/node-agent"; - description = "State directory for node-agent process metadata"; + description = "State directory for node-agent process metadata. Per-instance pid files, argv and boot-id metadata, and combined stdout/stderr logs live under ${cfg.stateDir}/pids."; }; }; @@ -86,7 +86,7 @@ in ]; systemd.services.node-agent = { - description = "UltraCloud Node Agent"; + description = "UltraCloud Node Agent (native runtime reconcile only)"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; diff --git a/nix/modules/plasmavmc.nix b/nix/modules/plasmavmc.nix index aa900e1..8c11838 100644 --- a/nix/modules/plasmavmc.nix +++ b/nix/modules/plasmavmc.nix @@ -2,13 +2,38 @@ let cfg = config.services.plasmavmc; - localIamDeps = lib.optional (config.services.iam.enable or false) "iam.service"; + isLocalHostPort = value: + value != null + && ( + lib.hasPrefix "127.0.0.1:" value + || lib.hasPrefix "localhost:" value + || lib.hasPrefix "[::1]:" value + ); + usesLocalIam = + (config.services.iam.enable or false) + && (cfg.iamAddr == null || isLocalHostPort cfg.iamAddr); + usesLocalPrismnet = + (config.services.prismnet.enable or false) + && cfg.prismnetAddr != null + && isLocalHostPort cfg.prismnetAddr; + usesLocalFlaredb = + (config.services.flaredb.enable or false) + && (cfg.flaredbAddr == null || isLocalHostPort cfg.flaredbAddr); + usesLocalChainfire = + (config.services.chainfire.enable or false) + && cfg.chainfireAddr != null + && isLocalHostPort cfg.chainfireAddr; + localIamDeps = lib.optional usesLocalIam "iam.service"; + localControlPlaneDeps = + lib.optional usesLocalPrismnet "prismnet.service" + ++ lib.optional usesLocalFlaredb "flaredb.service" + ++ lib.optional usesLocalChainfire "chainfire.service"; localIamHealthUrl = - if config.services.iam.enable or false + if usesLocalIam then "http://127.0.0.1:${toString config.services.iam.httpPort}/health" else null; remoteIamEndpoint = - if !(config.services.iam.enable or false) && cfg.iamAddr != null + if !usesLocalIam && cfg.iamAddr != null then cfg.iamAddr else null; coronafsEnabled = lib.hasAttrByPath [ "services" "coronafs" "enable" ] config && config.services.coronafs.enable; @@ -112,12 +137,12 @@ let in { options.services.plasmavmc = { - enable = lib.mkEnableOption "plasmavmc service"; + enable = lib.mkEnableOption "plasmavmc tenant VM control plane and KVM worker agent; standalone from deployer and fleet-scheduler"; mode = lib.mkOption { type = lib.types.enum [ "server" "agent" "all-in-one" ]; default = "all-in-one"; - description = "PlasmaVMC operating mode: server (control-plane), agent (compute), or all-in-one"; + description = "PlasmaVMC operating mode: server (tenant VM API/control-plane), agent (compute worker), or all-in-one for the standalone VM-platform path"; }; port = lib.mkOption { @@ -325,10 +350,10 @@ in # Create systemd service systemd.services.plasmavmc = { - description = "PlasmaVMC Virtual Machine Compute Service"; + description = "PlasmaVMC tenant VM control plane and KVM worker agent"; wantedBy = [ "multi-user.target" ]; - after = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps; - wants = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps; + after = [ "network-online.target" ] ++ localControlPlaneDeps ++ localIamDeps; + wants = [ "network-online.target" ] ++ localControlPlaneDeps ++ localIamDeps; path = [ pkgs.qemu pkgs.coreutils pkgs.curl pkgs.iproute2 pkgs.dnsmasq ]; preStart = lib.optionalString (localIamHealthUrl != null) '' diff --git a/nix/modules/ultracloud-cluster.nix b/nix/modules/ultracloud-cluster.nix index 6e937ac..8f77af6 100644 --- a/nix/modules/ultracloud-cluster.nix +++ b/nix/modules/ultracloud-cluster.nix @@ -4,6 +4,9 @@ with lib; let cfg = config.ultracloud.cluster; + nixNosEnabled = + lib.hasAttrByPath [ "nix-nos" "enable" ] config + && config.nix-nos.enable; clusterConfigLib = import ../lib/cluster-schema.nix { inherit lib; }; nodeType = clusterConfigLib.mkNodeType types; nodeClassType = clusterConfigLib.mkNodeClassType types; @@ -33,7 +36,7 @@ let in { options.ultracloud.cluster = { - enable = mkEnableOption "UltraCloud cluster configuration"; + enable = mkEnableOption "UltraCloud cluster configuration (the only supported cluster authoring source, backed by nix/lib/cluster-schema.nix)"; name = mkOption { type = types.str; @@ -140,6 +143,11 @@ in }; config = mkIf cfg.enable { + warnings = lib.optional nixNosEnabled '' + ultracloud.cluster backed by nix/lib/cluster-schema.nix is the only supported cluster authoring source. + nix-nos is limited to legacy compatibility and low-level network primitives. + ''; + # Assertions assertions = [ { diff --git a/nix/modules/ultracloud-resources.nix b/nix/modules/ultracloud-resources.nix index 819ec8d..25e292c 100644 --- a/nix/modules/ultracloud-resources.nix +++ b/nix/modules/ultracloud-resources.nix @@ -510,7 +510,7 @@ in { endpoint = mkOption { type = types.str; - default = "http://127.0.0.1:7000"; + default = "http://127.0.0.1:50085"; description = "FiberLB gRPC endpoint"; }; diff --git a/nix/nodes/baremetal-qemu/common.nix b/nix/nodes/baremetal-qemu/common.nix new file mode 100644 index 0000000..e973217 --- /dev/null +++ b/nix/nodes/baremetal-qemu/common.nix @@ -0,0 +1,87 @@ +{ lib, pkgs, ... }: + +{ + boot.kernelParams = [ "console=ttyS0,115200n8" ]; + boot.initrd.availableKernelModules = [ + "ahci" + "sr_mod" + "virtio_blk" + "virtio_net" + "virtio_pci" + "virtio_scsi" + "xhci_pci" + ]; + + networking.firewall.enable = false; + networking.useDHCP = lib.mkForce false; + networking.dhcpcd.enable = lib.mkForce false; + networking.usePredictableInterfaceNames = false; + + systemd.network = { + enable = true; + wait-online.enable = true; + networks."10-eth0" = { + matchConfig.Name = "eth0"; + networkConfig.DHCP = "yes"; + linkConfig.RequiredForOnline = "routable"; + }; + }; + + services.openssh = { + enable = true; + settings = { + PermitRootLogin = "prohibit-password"; + PasswordAuthentication = false; + KbdInteractiveAuthentication = false; + }; + }; + + users.users.root.openssh.authorizedKeys.keys = [ ]; + + nix.registry = lib.mkForce { }; + nix.nixPath = lib.mkForce [ ]; + nix.channel.enable = false; + nix.settings = { + experimental-features = [ + "nix-command" + "flakes" + ]; + flake-registry = ""; + }; + nixpkgs.flake = { + source = lib.mkForce null; + setFlakeRegistry = lib.mkForce false; + setNixPath = lib.mkForce false; + }; + + documentation.enable = false; + documentation.nixos.enable = false; + documentation.man.enable = false; + documentation.info.enable = false; + documentation.doc.enable = false; + + environment.systemPackages = with pkgs; [ + curl + jq + ]; + + systemd.services.ultracloud-baremetal-postinstall-marker = { + description = "Emit a canonical post-install marker for bare-metal QEMU smoke"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + StandardOutput = "journal+console"; + StandardError = "journal+console"; + }; + script = '' + hostname="$(tr -d '\n' /dev/null 2>&1; then + return 0 + fi + if [ "$SECONDS" -ge "$deadline" ]; then + echo "timed out waiting for $name at $url" >&2 + return 1 + fi + sleep 1 + done + } + + wait_for_tcp() { + local name="$1" + local port="$2" + local deadline=$((SECONDS + 180)) + while true; do + if nc -z 127.0.0.1 "$port" >/dev/null 2>&1; then + return 0 + fi + if [ "$SECONDS" -ge "$deadline" ]; then + echo "timed out waiting for $name TCP listener on $port" >&2 + return 1 + fi + sleep 1 + done + } + + systemctl is-active ${readyUnitArgs} + ${healthCheckScript} + ${tcpCheckScript} + test -x ${pkgs.qemu}/bin/qemu-system-x86_64 + test -x ${pkgs.qemu}/bin/qemu-img + test -c /dev/net/tun + jq -e '.coreServices | map(.name) == ${builtins.toJSON coreServiceNames}' /etc/ultracloud-product-surface.json >/dev/null + jq -e '.easyTrial.kind == "vm-appliance"' /etc/ultracloud-product-surface.json >/dev/null + if [ -e /dev/kvm ]; then + test -r /dev/kvm + fi + ''; + }; + + system.stateVersion = "24.11"; + }; +} diff --git a/nix/single-node/qemu-vm.nix b/nix/single-node/qemu-vm.nix new file mode 100644 index 0000000..6a56f81 --- /dev/null +++ b/nix/single-node/qemu-vm.nix @@ -0,0 +1,24 @@ +{ modulesPath, ... }: + +{ + imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") ]; + + virtualisation = { + graphics = false; + cores = 2; + memorySize = 3072; + diskSize = 16384; + }; + + services.openssh = { + enable = true; + settings = { + KbdInteractiveAuthentication = false; + PasswordAuthentication = true; + PermitRootLogin = "yes"; + }; + }; + + users.mutableUsers = false; + users.users.root.hashedPassword = "$6$iu4O1PEqq77wLMfh$T4bP3V9v8RoPgwqgBr2taKEgVNcb42HaTUy.VMjjsFtWTvnai3rqvy8AQbELKWdB1Qzfb7wkUOSK1wnmSZph/."; +} diff --git a/nix/single-node/surface.nix b/nix/single-node/surface.nix new file mode 100644 index 0000000..cc92096 --- /dev/null +++ b/nix/single-node/surface.nix @@ -0,0 +1,240 @@ +{ + vmPlatformCore = [ + { + name = "chainfire"; + packageAttr = "chainfire-server"; + unit = "chainfire.service"; + summary = "replicated coordination store"; + ports = { + api = 2379; + raft = 2380; + gossip = 2381; + http = 8081; + }; + healthUrl = "http://127.0.0.1:8081/health"; + } + { + name = "flaredb"; + packageAttr = "flaredb-server"; + unit = "flaredb.service"; + summary = "replicated metadata and event store"; + ports = { + api = 2479; + raft = 2480; + http = 8082; + }; + healthUrl = "http://127.0.0.1:8082/health"; + } + { + name = "iam"; + packageAttr = "iam-server"; + unit = "iam.service"; + summary = "identity, token, and authorization control plane"; + ports = { + grpc = 50080; + http = 8083; + }; + healthUrl = "http://127.0.0.1:8083/health"; + } + { + name = "prismnet"; + packageAttr = "prismnet-server"; + unit = "prismnet.service"; + summary = "tenant network control plane"; + ports = { + grpc = 50081; + http = 8087; + }; + healthUrl = "http://127.0.0.1:8087/health"; + } + { + name = "plasmavmc"; + packageAttr = "plasmavmc-server"; + unit = "plasmavmc.service"; + summary = "VM control plane"; + ports = { + grpc = 50082; + http = 8084; + }; + healthUrl = "http://127.0.0.1:8084/health"; + } + ]; + + optionalBundles = [ + { + option = "enableLightningStor"; + name = "lightningstor"; + summary = "object storage and VM image backing"; + services = [ + { + name = "lightningstor"; + packageAttr = "lightningstor-server"; + unit = "lightningstor.service"; + tcpPort = 50086; + } + ]; + } + { + option = "enableCoronafs"; + name = "coronafs"; + summary = "shared mutable VM volume layer"; + services = [ + { + name = "coronafs"; + packageAttr = "coronafs-server"; + unit = "coronafs.service"; + healthUrl = "http://127.0.0.1:50088/healthz"; + } + ]; + } + { + option = "enableFlashDNS"; + name = "flashdns"; + summary = "DNS publication layer"; + services = [ + { + name = "flashdns"; + packageAttr = "flashdns-server"; + unit = "flashdns.service"; + tcpPort = 50084; + } + ]; + } + { + option = "enableFiberLB"; + name = "fiberlb"; + summary = "service publication and VIP layer"; + services = [ + { + name = "fiberlb"; + packageAttr = "fiberlb-server"; + unit = "fiberlb.service"; + tcpPort = 50085; + } + ]; + } + { + option = "enableApiGateway"; + name = "apigateway"; + summary = "external API and proxy surface"; + services = [ + { + name = "apigateway"; + packageAttr = "apigateway-server"; + unit = "apigateway.service"; + healthUrl = "http://127.0.0.1:8080/health"; + } + ]; + } + { + option = "enableNightlight"; + name = "nightlight"; + summary = "metrics ingestion and query service"; + services = [ + { + name = "nightlight"; + packageAttr = "nightlight-server"; + unit = "nightlight.service"; + healthUrl = "http://127.0.0.1:9101/healthz"; + } + ]; + } + { + option = "enableCreditService"; + name = "creditservice"; + summary = "quota, wallet, reservation, and admission-control service"; + services = [ + { + name = "creditservice"; + packageAttr = "creditservice-server"; + unit = "creditservice.service"; + healthUrl = "http://127.0.0.1:3011/health"; + } + ]; + } + { + option = "enableK8sHost"; + name = "k8shost"; + summary = "tenant pod and service control plane layered on top of network providers"; + requires = [ + "enableFlashDNS" + "enableFiberLB" + ]; + services = [ + { + name = "k8shost"; + packageAttr = "k8shost-server"; + unit = "k8shost.service"; + healthUrl = "http://127.0.0.1:8085/health"; + } + ]; + } + ]; + + responsibilityBoundaries = { + deployer = { + owns = [ + "machine enrollment and /api/v1/phone-home" + "install plans and desired-system references" + "cluster inventory and rollout intent" + ]; + excludes = [ + "host-local switch-to-configuration execution" + "native service process placement" + "tenant pod scheduling" + ]; + }; + nix-agent = { + owns = [ + "host-local NixOS convergence" + "health-check and rollback handling for desired systems" + ]; + excludes = [ + "node enrollment" + "native process scheduling" + "tenant workload APIs" + ]; + }; + node-agent = { + owns = [ + "host-local runtime reconciliation for scheduled service instances" + "process and optional container execution plus heartbeats" + ]; + excludes = [ + "NixOS system switching" + "cluster-wide placement decisions" + "tenant pod scheduling" + ]; + }; + fleet-scheduler = { + owns = [ + "cluster-wide placement of native host services" + "instance failover and placement updates written to ChainFire" + ]; + excludes = [ + "node-local execution" + "OS rollout switching" + "tenant pod semantics" + ]; + }; + k8shost = { + owns = [ + "tenant pod and service APIs" + "translation of tenant intent into prismnet, flashdns, and fiberlb objects" + ]; + excludes = [ + "native host service placement" + "machine enrollment or install plans" + "host-local NixOS switching" + ]; + }; + }; + + easyTrial = { + kind = "vm-appliance"; + package = "single-node-trial-vm"; + app = "single-node-trial"; + smokeApp = "single-node-quickstart"; + rationale = "The minimal supported surface is a VM platform, not a stateless HTTP service. An OCI/Docker artifact would need privileged host KVM, /dev/net/tun, and OVS/libvirt access, so the lightest credible trial path is a host-built NixOS VM plus the one-command smoke launcher."; + }; +} diff --git a/nix/test-cluster/README.md b/nix/test-cluster/README.md index c7b1000..8f244e9 100644 --- a/nix/test-cluster/README.md +++ b/nix/test-cluster/README.md @@ -3,18 +3,40 @@ `nix/test-cluster` is the canonical local validation path for UltraCloud. It boots six QEMU VMs, treats them as hardware-like nodes, and validates representative control-plane, worker, and gateway behavior over SSH and service endpoints. All VM images are built on the host in a single Nix invocation and then booted as prebuilt artifacts. The guests do not compile the stack locally. +The same harness also owns the canonical bare-metal bootstrap proof: a raw-QEMU ISO flow that phones home to `deployer`, runs Disko, reboots, and waits for `nix-agent` desired-system convergence on one control-plane node and one worker-equivalent node. + +That local QEMU proof is intentionally the same operator route planned for hardware. The same `nixosConfigurations.ultracloud-iso` image can be written to USB or attached through BMC virtual media on a physical host; QEMU with KVM is only standing in for the chassis while the install flow, phone-home, Disko, reboot, and desired-system handoff stay the same. + +The hardware bridge now has its own canonical wrapper: `nix run ./nix/test-cluster#hardware-smoke -- preflight`. It writes the exact kernel parameters, expected `ULTRACLOUD_MARKER` lines, failure markers, and operator handoff under `./work/hardware-smoke/latest`, and the same wrapper can later be rerun as `run` or `capture` when USB or BMC/Redfish transport is actually available. + +The harness keeps the install contract reusable by pushing install details into classes and pools. `verify-baremetal-iso.sh` now publishes node classes whose `install_plan` owns the install profile and stable disk selector, while node records carry only identity plus any desired-system override that is genuinely host-specific. In the canonical QEMU proof that means the node record carries the prebuilt `desired_system.target_system` plus the health check, and the class carries the install plan. The chassis emulates the preferred hardware-style disk selection by attaching explicit virtio serials and installing against `/dev/disk/by-id/virtio-uc-control-root` and `/dev/disk/by-id/virtio-uc-worker-root`. + +When `/dev/kvm` is absent, the portable fallback is not another harness subcommand. Use the root-flake non-KVM lane instead: `nix build .#checks.x86_64-linux.portable-control-plane-regressions`. +When `/dev/kvm` and nested virtualization are available, the reproducible publishable lane is `./nix/test-cluster/run-publishable-kvm-suite.sh`, which records environment metadata and then runs `fresh-smoke`, `fresh-demo-vm-webapp`, and `fresh-matrix` in order. +`nix run ./nix/test-cluster#cluster -- durability-proof` is the canonical chainfire flaredb deployer backup/restore lane. It persists artifacts under `./work/durability-proof/latest`, proves logical backup/restore for ChainFire keys and FlareDB SQL rows, uses the canonical Deployer admin pre-register request itself as the backup artifact, verifies that the pre-registered node survives a `deployer.service` restart, replays the same request idempotently, and injects CoronaFS plus LightningStor failures against the live KVM cluster. +`nix run ./nix/test-cluster#cluster -- rollout-soak` is the longer-running KVM companion lane for the rollout bundle and fixed-membership control plane. It rebuilds from clean local runtime state, writes dated artifacts under `./work/rollout-soak/latest`, validates exactly one planned `draining` maintenance cycle and one fail-stop worker-loss cycle on the two native-runtime workers, holds each degraded state for 30 seconds, then restarts `deployer`, `fleet-scheduler`, `node-agent`, `chainfire`, and `flaredb` before revalidating the live cluster. The same proof root includes `scope-fixed-contract.json`, `deployer-scope-fixed.txt`, and `fleet-scheduler-scope-fixed.txt` so the supported release boundary is recorded with the runtime evidence. The steady-state KVM nodes do not run `nix-agent.service`, so the lane records `nix-agent` scope markers instead of pretending a live-cluster `nix-agent` restart happened. +`nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof` is the focused local-KVM reality lane for `prismnet`, `flashdns`, `fiberlb`, and `plasmavmc`. It writes authoritative DNS answers, FiberLB backend drain or restore artifacts, and PlasmaVMC migration or storage-handoff state under `./work/provider-vm-reality-proof/latest`. +`./nix/test-cluster/run-core-control-plane-ops-proof.sh` is the focused operator lifecycle proof for `chainfire`, `flaredb`, and `iam`. It records the ChainFire fixed-membership boundary, the FlareDB additive-first migration and destructive-DDL boundary, and the standalone IAM bootstrap hardening plus signing-key, credential, and mTLS rotation proof under `./work/core-control-plane-ops-proof`. +`./nix/test-cluster/work-root-budget.sh` is the checked helper for local disk budget reporting, stronger local enforcement, and safer cleanup guidance under `./work`. +The dated 2026-04-10 artifact root for the focused control-plane proof is `./work/core-control-plane-ops-proof/20260410T172148+09:00`. +Runner-specific workflow wiring from `task/f5c70db0-baseline-profiles` is intentionally excluded from this re-aggregated baseline; the checked-in artifact here is the local wrapper. ## What it validates - 3-node control-plane formation for `chainfire`, `flaredb`, and `iam` - control-plane service health for `prismnet`, `flashdns`, `fiberlb`, `plasmavmc`, `lightningstor`, and `k8shost` -- worker-node `plasmavmc` and `lightningstor` startup +- worker-node `plasmavmc` and `lightningstor` startup, including KVM-only PlasmaVMC worker registration on the supported public surface +- LightningStor bucket metadata and explicit object-version APIs on the optional storage surface - PrismNet port binding for PlasmaVMC guests, including lifecycle cleanup on VM deletion - nested KVM inside worker VMs by booting an inner guest with `qemu-system-x86_64 -accel kvm` -- gateway-node `apigateway`, `nightlight`, and minimal `creditservice` startup +- gateway-node `apigateway`, `nightlight`, and `creditservice` quota, wallet, reservation, and admission flows - host-forwarded access to the API gateway and NightLight HTTP surfaces - cross-node data replication smoke tests for `chainfire` and `flaredb` - deployer-seeded native runtime scheduling from declarative Nix service definitions, including drain/failover recovery +- ISO-based bare-metal bootstrap from `nixosConfigurations.ultracloud-iso` through phone-home, flake bundle fetch, Disko install, reboot, and desired-system activation +- durability and restore coverage for `chainfire`, `flaredb`, `deployer`, `coronafs`, and `lightningstor` + +The supported `k8shost` coverage here is the `k8shost-server` API surface. `k8shost` is fixed as an API/control-plane product surface; runtime dataplane helpers stay archived non-product. Archived `k8shost-cni`, `k8shost-controllers`, and `lightningstor-csi` scaffolds stay outside the canonical profiles and are not part of the publishable proof. ## Validation layers @@ -24,20 +46,24 @@ All VM images are built on the host in a single Nix invocation and then booted a - replicated state: write and read convergence checks across the 3-node `chainfire` and `flaredb` clusters - worker virtualization: launch a nested KVM guest inside both worker VMs - external entrypoints: verify host-forwarded API gateway and NightLight access from outside the guest -- auth-integrated minimal services: confirm `creditservice` stays up and actually connects to IAM +- auth-integrated add-ons: confirm `creditservice` stays up, connects to IAM, and serves the published quota and wallet flows +- workload API contract: confirm `k8shost` pod watches return bounded snapshot streams and that LightningStor bucket metadata or version-listing RPCs round-trip against the live cluster ## Requirements - minimal host requirements: - - Linux host with `/dev/kvm` + - Linux host with readable and writable `/dev/kvm` - nested virtualization enabled on the host hypervisor - `nix` + - enough free space under `./work` or `ULTRACLOUD_WORK_ROOT` - if you do not use `nix run` or `nix develop`, install: - `qemu-system-x86_64` - `ssh` - `sshpass` - `curl` +The checked-in wrappers force local Nix builders and derive parallelism from host CPU count by default. Override with `ULTRACLOUD_LOCAL_NIX_MAX_JOBS`, `ULTRACLOUD_LOCAL_NIX_BUILD_CORES`, `PHOTON_CLUSTER_NIX_MAX_JOBS`, or `PHOTON_CLUSTER_NIX_BUILD_CORES` when a host needs different scheduling. + ## Main commands ```bash @@ -45,12 +71,16 @@ nix run ./nix/test-cluster#cluster -- build nix run ./nix/test-cluster#cluster -- start nix run ./nix/test-cluster#cluster -- smoke nix run ./nix/test-cluster#cluster -- fresh-smoke +nix run ./nix/test-cluster#cluster -- baremetal-iso nix run ./nix/test-cluster#cluster -- demo-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp nix run ./nix/test-cluster#cluster -- serve-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-serve-vm-webapp nix run ./nix/test-cluster#cluster -- matrix nix run ./nix/test-cluster#cluster -- fresh-matrix +nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof +nix run ./nix/test-cluster#cluster -- rollout-soak +nix run ./nix/test-cluster#cluster -- durability-proof nix run ./nix/test-cluster#cluster -- bench-storage nix run ./nix/test-cluster#cluster -- fresh-bench-storage nix run ./nix/test-cluster#cluster -- validate @@ -63,6 +93,24 @@ make cluster-smoke Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#cluster -- fresh-smoke` +Preferred entrypoint for publishable bare-metal bootstrap verification: `nix run ./nix/test-cluster#cluster -- baremetal-iso` + +Preferred entrypoint for the exact host-KVM bare-metal proof lane: `nix build .#checks.x86_64-linux.baremetal-iso-e2e && ./result/bin/baremetal-iso-e2e ` + +Preferred entrypoint for physical-node preflight and handoff: `nix run ./nix/test-cluster#hardware-smoke -- preflight` + +Preferred entrypoint for portable local verification on TCG-only hosts: `nix build .#checks.x86_64-linux.portable-control-plane-regressions` + +Preferred entrypoint for reproducible KVM-suite reruns: `./nix/test-cluster/run-publishable-kvm-suite.sh ` + +Preferred entrypoint for the full supported-surface proof on a local AMD/KVM host: `./nix/test-cluster/run-supported-surface-final-proof.sh ` + +Preferred entrypoint for focused ChainFire, FlareDB, and IAM operator lifecycle verification: `./nix/test-cluster/run-core-control-plane-ops-proof.sh ` + +Preferred entrypoint for local disk budget reporting: `./nix/test-cluster/work-root-budget.sh status` +Preferred entrypoint for local budget enforcement: `./nix/test-cluster/work-root-budget.sh enforce` +Preferred entrypoint for safer dated-proof cleanup dry-runs: `./nix/test-cluster/work-root-budget.sh prune-proof-logs 2` + `make cluster-smoke` is a convenience wrapper for the same clean host-build VM validation flow. `nix run ./nix/test-cluster#cluster -- demo-vm-webapp` creates a PrismNet-attached VM, boots a tiny web app inside the guest, stores its counter in FlareDB, writes JSON snapshots to LightningStor object storage, and then proves that the state survives guest restart plus cross-worker migration. The attached data volume is still used by the guest for its local bootstrap config. @@ -73,6 +121,25 @@ Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#c Preferred entrypoint for publishable matrix verification: `nix run ./nix/test-cluster#cluster -- fresh-matrix` +Preferred entrypoint for focused provider and VM-hosting reality verification: `nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof` + +Preferred entrypoint for longer-running rollout maintenance and DR verification: `nix run ./nix/test-cluster#cluster -- rollout-soak` + +Preferred entrypoint for durability and restore verification: `nix run ./nix/test-cluster#cluster -- durability-proof` + +The dated 2026-04-10 proof root for that lane is `./work/durability-proof/20260410T120618+0900`; `result.json` records `success=true`, and the artifact set includes `deployer-post-restart-list.json`, `coronafs-node04-local-state.json`, and `lightningstor-head-during-node05-outage.json`. +The dated 2026-04-10 proof root for the provider and VM-hosting lane is `./work/provider-vm-reality-proof/20260410T135827+0900`; `result.json` records `success=true`, and the artifact set includes `network-provider/fiberlb-drain-summary.txt`, `network-provider/flashdns-service-authoritative-answer.txt`, and `vm-hosting/migration-summary.json`. + +## Rollout Bundle Operator Contract + +The supported operator contract for `deployer`, `fleet-scheduler`, `nix-agent`, and `node-agent` is fixed in [../../docs/rollout-bundle.md](../../docs/rollout-bundle.md). + +- `deployer` is supported as one active writer with restart or cold-standby restore. Automatic ChainFire-backed multi-instance failover is outside the supported product contract for this release. +- `nix-agent` health-check and rollback behavior is proven by `nix build .#checks.x86_64-linux.deployer-vm-rollback`, while `baremetal-iso` and `baremetal-iso-e2e` prove the same desired-system handoff with the installer in front. +- `fresh-smoke` is the canonical KVM proof for `fleet-scheduler` drain, maintenance, and failover semantics. It drains `node04`, checks relocation to `node05`, restores `node04`, then stops `node05` and verifies failover plus replica restoration when the worker returns. +- `rollout-soak` is the longer-running companion for that same contract. It proves the current release boundary of one planned drain cycle, one fail-stop worker-loss cycle, and 30-second held degraded states on the two native-runtime workers, then restarts the rollout services and the fixed-membership control-plane services before rechecking the live runtime state. The dated 2026-04-10 release-grade artifact root is `./work/rollout-soak/20260410T164549+0900`. +- `node-agent` product scope is host-local runtime reconcile only. Logs and pid metadata live under `${stateDir}/pids`, secrets must already exist in the rendered spec or mounted files, host-path volumes are pass-through only, and upgrades are replace-and-reconcile operations. + `nix run ./nix/test-cluster#cluster -- bench-storage` benchmarks CoronaFS controller-export vs node-local-export I/O, worker-side materialization latency, and LightningStor large/small-object S3 throughput, then writes a report to `docs/storage-benchmarks.md`. Preferred entrypoint for publishable storage numbers: `nix run ./nix/test-cluster#cluster -- fresh-storage-bench` @@ -96,9 +163,14 @@ nix develop ./nix/test-cluster -c ./nix/test-cluster/run-cluster.sh fresh-smoke ## Runtime state -The harness stores build links and VM runtime state under `${PHOTON_VM_DIR:-$HOME/.ultracloud-test-cluster}` for the default profile and uses profile-suffixed siblings such as `${PHOTON_VM_DIR:-$HOME/.ultracloud-test-cluster}-storage` for alternate build profiles. +The harness stores build links and VM runtime state under `${PHOTON_CLUSTER_WORK_ROOT:-$REPO_ROOT/work/test-cluster}` by default, with VM disks under `${PHOTON_VM_DIR:-$PHOTON_CLUSTER_WORK_ROOT/state}` and VDE switch state under `${PHOTON_CLUSTER_VDE_SWITCH_DIR:-$PHOTON_CLUSTER_WORK_ROOT/vde-switch}`. Alternate build profiles use profile-suffixed siblings such as `${PHOTON_VM_DIR:-$PHOTON_CLUSTER_WORK_ROOT/state}-storage`. +The publishable KVM wrapper keeps its logs under the path you pass in, defaults runtime/cache state to `./work/publishable-kvm-runtime`, and defaults temporary files to `./work/tmp`. Logs for each VM are written to `//vm.log`. +Use `./nix/test-cluster/work-root-budget.sh status` for disk budget reporting, `./nix/test-cluster/work-root-budget.sh enforce` when a local proof run should fail once tracked paths exceed soft budgets, and `./nix/test-cluster/work-root-budget.sh prune-proof-logs 2` for a safer dated-proof cleanup dry-run. The helper reports the size of `./work`, `./work/test-cluster/state`, disposable runtime roots, and dated proof directories including `./work/rollout-soak`, `./work/provider-vm-reality-proof`, and `./work/hardware-smoke`, then prints a safe cleanup sequence that stops the cluster, removes transient VM state, trims old proof logs, and finally runs a Nix store GC once old result symlinks are no longer needed. + +`./work/hardware-smoke` is the proof root for physical-node bring-up attempts. `hardware-smoke.sh` keeps `latest` pointed at the newest preflight or capture run so transport-free blocked state and real hardware evidence land in the same place. + ## Scope note -This harness is intentionally VM-first. Older ad hoc launch scripts under `baremetal/vm-cluster` are legacy/manual paths and should not be treated as the primary local validation entrypoint. +This harness is intentionally VM-first, but the canonical bare-metal install proof also lives here so the docs, harness, and `flake check` all exercise the same ISO route. Older ad hoc launch scripts under `baremetal/vm-cluster` are `legacy/manual` paths, `nixosConfigurations.netboot-worker` is an archived worker helper outside the canonical guard set, and only `netboot-all-in-one` plus `netboot-control-plane` remain companion images for the supported profiles. diff --git a/nix/test-cluster/common.nix b/nix/test-cluster/common.nix index d1fb637..384b556 100644 --- a/nix/test-cluster/common.nix +++ b/nix/test-cluster/common.nix @@ -81,7 +81,7 @@ in }; }; users.mutableUsers = false; - users.users.root.hashedPassword = "$6$ultracloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; + users.users.root.hashedPassword = "$6$ultracloud$AeL/E5Ed/qfGOskMjhx35Y/w49xj8dICoeDoIhkhN./rSVckrGBPOlIulW6s.EmzohU0CmNZKlR2IX/BF0ela1"; # qemu-vm.nix provides the default SLiRP NIC as eth0. # The extra multicast NIC above becomes eth1 and carries intra-cluster traffic. diff --git a/nix/test-cluster/flake.nix b/nix/test-cluster/flake.nix index ed040f4..0a63d9a 100644 --- a/nix/test-cluster/flake.nix +++ b/nix/test-cluster/flake.nix @@ -52,6 +52,8 @@ bash coreutils curl + ultracloud.packages.${system}.chainfire-server + ultracloud.packages.${system}.deployer-server ultracloud.packages.${system}.deployer-ctl findutils gawk @@ -60,7 +62,9 @@ gnugrep iproute2 jq + nix openssh + python3 procps clusterPython qemu @@ -70,7 +74,30 @@ text = '' repo_root="$(${pkgs.gitMinimal}/bin/git rev-parse --show-toplevel 2>/dev/null || ${pkgs.coreutils}/bin/pwd)" export PHOTON_CLUSTER_FLAKE="''${repo_root}/nix/test-cluster" - exec "''${repo_root}/nix/test-cluster/run-cluster.sh" "$@" + export PHOTON_CLUSTER_PYTHON="${clusterPython}/bin/python3" + exec ${pkgs.bash}/bin/bash "''${repo_root}/nix/test-cluster/run-cluster.sh" "$@" + ''; + }; + + hardwareSmoke = pkgs.writeShellApplication { + name = "ultracloud-hardware-smoke"; + runtimeInputs = with pkgs; [ + bash + coreutils + curl + findutils + gawk + gnugrep + jq + nix + openssh + procps + sshpass + util-linux + ]; + text = '' + repo_root="$(${pkgs.gitMinimal}/bin/git rev-parse --show-toplevel 2>/dev/null || ${pkgs.coreutils}/bin/pwd)" + exec ${pkgs.bash}/bin/bash "''${repo_root}/nix/test-cluster/hardware-smoke.sh" "$@" ''; }; @@ -99,15 +126,23 @@ packages.${system} = { cluster = clusterHarness; + hardwareSmoke = hardwareSmoke; vmGuestImage = vmGuestImage; vmBenchGuestImage = vmBenchGuestImage; deployerClusterState = self.nixosConfigurations.node06.config.system.build.ultracloudDeployerClusterState; }; - apps.${system}.cluster = { - type = "app"; - program = "${clusterHarness}/bin/ultracloud-test-cluster"; + apps.${system} = { + cluster = { + type = "app"; + program = "${clusterHarness}/bin/ultracloud-test-cluster"; + }; + + hardware-smoke = { + type = "app"; + program = "${hardwareSmoke}/bin/ultracloud-hardware-smoke"; + }; }; devShells.${system}.default = pkgs.mkShell { diff --git a/nix/test-cluster/hardware-smoke.sh b/nix/test-cluster/hardware-smoke.sh new file mode 100755 index 0000000..21f311c --- /dev/null +++ b/nix/test-cluster/hardware-smoke.sh @@ -0,0 +1,615 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "${SCRIPT_DIR}/../.." && pwd)}" +TASK_ID="3dba03d3-525b-4079-8c93-90af6a89d32b" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" +RUN_ID="${ULTRACLOUD_HARDWARE_RUN_ID:-$(date +%Y%m%dT%H%M%S%z)}" +PROOF_BASE="${WORK_ROOT}/hardware-smoke" +PROOF_ROOT="${ULTRACLOUD_HARDWARE_PROOF_ROOT:-${PROOF_BASE}/${RUN_ID}}" +LATEST_LINK="${PROOF_BASE}/latest" + +REQUESTED_MODE="${1:-preflight}" +REQUESTED_TRANSPORT="${ULTRACLOUD_HARDWARE_TRANSPORT:-auto}" +RESOLVED_TRANSPORT="" + +DEPLOYER_URL="${ULTRACLOUD_HARDWARE_DEPLOYER_URL:-}" +BOOTSTRAP_TOKEN="${ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN:-}" +ALLOW_UNAUTHENTICATED="${ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED:-0}" +CA_CERT_URL="${ULTRACLOUD_HARDWARE_CA_CERT_URL:-}" +BINARY_CACHE_URL="${ULTRACLOUD_HARDWARE_BINARY_CACHE_URL:-}" +NODE_ID_OVERRIDE="${ULTRACLOUD_HARDWARE_NODE_ID:-}" +HOSTNAME_OVERRIDE="${ULTRACLOUD_HARDWARE_HOSTNAME:-}" +NODE_ROLE="${ULTRACLOUD_HARDWARE_NODE_ROLE:-control-plane}" +ISO_ATTR=".#nixosConfigurations.ultracloud-iso.config.system.build.isoImage" +ISO_PATH_OVERRIDE="${ULTRACLOUD_HARDWARE_ISO_PATH:-}" +ISO_URL="${ULTRACLOUD_HARDWARE_ISO_URL:-}" + +USB_DEVICE="${ULTRACLOUD_HARDWARE_USB_DEVICE:-}" +ALLOW_DESTRUCTIVE="${ULTRACLOUD_HARDWARE_ALLOW_DESTRUCTIVE:-}" + +REDFISH_ENDPOINT="${ULTRACLOUD_HARDWARE_REDFISH_ENDPOINT:-${ULTRACLOUD_HARDWARE_BMC_ENDPOINT:-}}" +REDFISH_USERNAME="${ULTRACLOUD_HARDWARE_REDFISH_USERNAME:-${ULTRACLOUD_HARDWARE_BMC_USERNAME:-}}" +REDFISH_PASSWORD="${ULTRACLOUD_HARDWARE_REDFISH_PASSWORD:-${ULTRACLOUD_HARDWARE_BMC_PASSWORD:-}}" +REDFISH_SYSTEM_ID="${ULTRACLOUD_HARDWARE_REDFISH_SYSTEM_ID:-${ULTRACLOUD_HARDWARE_BMC_SYSTEM_ID:-System.Embedded.1}}" +REDFISH_MANAGER_ID="${ULTRACLOUD_HARDWARE_REDFISH_MANAGER_ID:-${ULTRACLOUD_HARDWARE_BMC_MANAGER_ID:-iDRAC.Embedded.1}}" +REDFISH_VIRTUAL_MEDIA_ID="${ULTRACLOUD_HARDWARE_REDFISH_VIRTUAL_MEDIA_ID:-${ULTRACLOUD_HARDWARE_BMC_VIRTUAL_MEDIA_ID:-CD}}" +REDFISH_RESET_TYPE="${ULTRACLOUD_HARDWARE_REDFISH_RESET_TYPE:-ForceRestart}" +REDFISH_INSECURE="${ULTRACLOUD_HARDWARE_REDFISH_INSECURE:-0}" + +SSH_HOST="${ULTRACLOUD_HARDWARE_SSH_HOST:-}" +SSH_USER="${ULTRACLOUD_HARDWARE_SSH_USER:-root}" +SSH_PORT="${ULTRACLOUD_HARDWARE_SSH_PORT:-22}" +SSH_IDENTITY_FILE="${ULTRACLOUD_HARDWARE_SSH_IDENTITY_FILE:-}" +SSH_PASSWORD="${ULTRACLOUD_HARDWARE_SSH_PASSWORD:-}" +SSH_TIMEOUT_SECS="${ULTRACLOUD_HARDWARE_SSH_TIMEOUT_SECS:-3600}" +SERIAL_LOG="${ULTRACLOUD_HARDWARE_SERIAL_LOG:-}" + +STATUS_FILE="${PROOF_ROOT}/status.env" +MISSING_FILE="${PROOF_ROOT}/missing-requirements.txt" +HANDOFF_FILE="${PROOF_ROOT}/operator-handoff.md" +KERNEL_PARAMS_FILE="${PROOF_ROOT}/kernel-params.txt" +EXPECTED_MARKERS_FILE="${PROOF_ROOT}/expected-markers.txt" +FAILURE_MARKERS_FILE="${PROOF_ROOT}/failure-markers.txt" +ENVIRONMENT_FILE="${PROOF_ROOT}/environment.txt" +ISO_REF_FILE="${PROOF_ROOT}/iso-reference.txt" +TRANSPORT_LOG="${PROOF_ROOT}/transport.log" +CAPTURE_DIR="${PROOF_ROOT}/capture" + +MISSING_REQUIREMENTS=() + +log() { + printf '[hardware-smoke] %s\n' "$*" +} + +mode_normalized() { + case "${REQUESTED_MODE}" in + preflight|run|capture) + printf '%s\n' "${REQUESTED_MODE}" + ;; + *) + printf 'preflight\n' + ;; + esac +} + +timestamp() { + date -Is +} + +prepare_paths() { + mkdir -p "${PROOF_ROOT}" "${CAPTURE_DIR}" "${PROOF_BASE}" + ln -sfn "$(basename "${PROOF_ROOT}")" "${LATEST_LINK}" +} + +append_missing() { + MISSING_REQUIREMENTS+=("$1") +} + +detect_transport() { + case "${REQUESTED_TRANSPORT}" in + auto) + if [[ -n "${USB_DEVICE}" ]]; then + RESOLVED_TRANSPORT="usb" + elif [[ -n "${REDFISH_ENDPOINT}" ]]; then + RESOLVED_TRANSPORT="redfish" + else + RESOLVED_TRANSPORT="none" + fi + ;; + usb) + RESOLVED_TRANSPORT="usb" + ;; + bmc|redfish) + RESOLVED_TRANSPORT="redfish" + ;; + *) + RESOLVED_TRANSPORT="invalid" + append_missing "transport: set ULTRACLOUD_HARDWARE_TRANSPORT=usb|bmc|redfish, or leave auto and provide USB or Redfish inputs" + ;; + esac +} + +write_iso_reference() { + { + printf 'iso_attr=%s\n' "${ISO_ATTR}" + if [[ -n "${ISO_PATH_OVERRIDE}" ]]; then + printf 'iso_path_override=%s\n' "${ISO_PATH_OVERRIDE}" + fi + if [[ -n "${ISO_URL}" ]]; then + printf 'iso_url=%s\n' "${ISO_URL}" + fi + } >"${ISO_REF_FILE}" +} + +write_kernel_params() { + { + printf 'ultracloud.deployer_url=%s\n' "${DEPLOYER_URL:-}" + if [[ -n "${BOOTSTRAP_TOKEN}" ]]; then + printf 'ultracloud.bootstrap_token=%s\n' "${BOOTSTRAP_TOKEN}" + elif [[ "${ALLOW_UNAUTHENTICATED}" == "1" ]]; then + printf '# ultracloud.bootstrap_token omitted because ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED=1\n' + else + printf 'ultracloud.bootstrap_token=\n' + fi + if [[ -n "${CA_CERT_URL}" ]]; then + printf 'ultracloud.ca_cert_url=%s\n' "${CA_CERT_URL}" + fi + if [[ -n "${BINARY_CACHE_URL}" ]]; then + printf 'ultracloud.binary_cache_url=%s\n' "${BINARY_CACHE_URL}" + fi + if [[ -n "${NODE_ID_OVERRIDE}" ]]; then + printf 'ultracloud.node_id=%s\n' "${NODE_ID_OVERRIDE}" + fi + if [[ -n "${HOSTNAME_OVERRIDE}" ]]; then + printf 'ultracloud.hostname=%s\n' "${HOSTNAME_OVERRIDE}" + fi + } >"${KERNEL_PARAMS_FILE}" +} + +write_expected_markers() { + { + printf 'ULTRACLOUD_MARKER pre-install.boot.\n' + printf 'ULTRACLOUD_MARKER pre-install.phone-home.complete.\n' + printf 'ULTRACLOUD_MARKER install.bundle-downloaded.\n' + printf 'ULTRACLOUD_MARKER install.disko.complete.\n' + printf 'ULTRACLOUD_MARKER install.nixos-install.complete.\n' + printf 'ULTRACLOUD_MARKER reboot.\n' + printf 'ULTRACLOUD_MARKER post-install.boot..%s\n' "${NODE_ROLE}" + printf 'ULTRACLOUD_MARKER desired-system-active.\n' + } >"${EXPECTED_MARKERS_FILE}" +} + +write_failure_markers() { + cat >"${FAILURE_MARKERS_FILE}" <<'EOF' +missing transport inputs +missing ultracloud.deployer_url kernel parameter +missing bootstrap token or unauthenticated bootstrap acknowledgement +missing USB device +missing Redfish/BMC endpoint +missing Redfish/BMC credentials +missing Redfish ISO URL +missing capture channel (SSH or serial log) +missing destructive acknowledgement for USB write +phone-home marker not observed +install.disko.complete marker not observed +reboot marker not observed +desired-system-active marker not observed +nix-agent.service inactive after install +chainfire.service inactive after install on control-plane node +EOF +} + +write_environment() { + { + printf 'task_id=%s\n' "${TASK_ID}" + printf 'mode=%s\n' "$(mode_normalized)" + printf 'started_at=%s\n' "$(timestamp)" + printf 'repo_root=%s\n' "${REPO_ROOT}" + printf 'work_root=%s\n' "${WORK_ROOT}" + printf 'proof_root=%s\n' "${PROOF_ROOT}" + printf 'requested_transport=%s\n' "${REQUESTED_TRANSPORT}" + printf 'resolved_transport=%s\n' "${RESOLVED_TRANSPORT}" + printf 'node_role=%s\n' "${NODE_ROLE}" + printf 'node_id_override=%s\n' "${NODE_ID_OVERRIDE:-}" + printf 'hostname_override=%s\n' "${HOSTNAME_OVERRIDE:-}" + printf 'deployer_url_set=%s\n' "$([[ -n "${DEPLOYER_URL}" ]] && echo yes || echo no)" + printf 'bootstrap_token_set=%s\n' "$([[ -n "${BOOTSTRAP_TOKEN}" ]] && echo yes || echo no)" + printf 'allow_unauthenticated=%s\n' "${ALLOW_UNAUTHENTICATED}" + printf 'binary_cache_url=%s\n' "${BINARY_CACHE_URL:-}" + printf 'ca_cert_url=%s\n' "${CA_CERT_URL:-}" + printf 'usb_device=%s\n' "${USB_DEVICE:-}" + printf 'redfish_endpoint=%s\n' "${REDFISH_ENDPOINT:-}" + printf 'redfish_system_id=%s\n' "${REDFISH_SYSTEM_ID}" + printf 'redfish_manager_id=%s\n' "${REDFISH_MANAGER_ID}" + printf 'redfish_virtual_media_id=%s\n' "${REDFISH_VIRTUAL_MEDIA_ID}" + printf 'iso_url=%s\n' "${ISO_URL:-}" + printf 'ssh_host=%s\n' "${SSH_HOST:-}" + printf 'ssh_port=%s\n' "${SSH_PORT}" + printf 'serial_log=%s\n' "${SERIAL_LOG:-}" + } >"${ENVIRONMENT_FILE}" +} + +write_missing_requirements() { + : >"${MISSING_FILE}" + if (( ${#MISSING_REQUIREMENTS[@]} == 0 )); then + printf 'none\n' >"${MISSING_FILE}" + return 0 + fi + + local item + for item in "${MISSING_REQUIREMENTS[@]}"; do + printf '%s\n' "${item}" >>"${MISSING_FILE}" + done +} + +write_operator_handoff() { + cat >"${HANDOFF_FILE}" <"${STATUS_FILE}" +} + +resolve_iso_image() { + local candidate="$1" + if [[ -f "${candidate}" ]]; then + printf '%s\n' "${candidate}" + return 0 + fi + if [[ -d "${candidate}/iso" ]]; then + find "${candidate}/iso" -maxdepth 1 -type f -name '*.iso' | head -n 1 + return 0 + fi + find "${candidate}" -maxdepth 1 -type f -name '*.iso' | head -n 1 +} + +materialize_iso_for_usb() { + if [[ -n "${ISO_PATH_OVERRIDE}" ]]; then + printf '%s\n' "${ISO_PATH_OVERRIDE}" + return 0 + fi + + local out + out="$(nix build "${REPO_ROOT}#nixosConfigurations.ultracloud-iso.config.system.build.isoImage" --no-link --print-out-paths)" + resolve_iso_image "${out}" +} + +copy_serial_log_if_present() { + if [[ -n "${SERIAL_LOG}" && -f "${SERIAL_LOG}" ]]; then + cp "${SERIAL_LOG}" "${CAPTURE_DIR}/serial.log" + fi +} + +run_ssh() { + local cmd="$1" + local ssh_opts=( + -o StrictHostKeyChecking=no + -o UserKnownHostsFile=/dev/null + -o ConnectTimeout=5 + -p "${SSH_PORT}" + ) + + if [[ -n "${SSH_IDENTITY_FILE}" ]]; then + ssh_opts+=(-i "${SSH_IDENTITY_FILE}") + fi + + if [[ -n "${SSH_PASSWORD}" ]]; then + sshpass -p "${SSH_PASSWORD}" ssh "${ssh_opts[@]}" "${SSH_USER}@${SSH_HOST}" "${cmd}" + else + ssh "${ssh_opts[@]}" "${SSH_USER}@${SSH_HOST}" "${cmd}" + fi +} + +wait_for_ssh() { + local timeout_secs="$1" + local started + started="$(date +%s)" + + while (( $(date +%s) - started < timeout_secs )); do + if run_ssh 'true' >/dev/null 2>&1; then + return 0 + fi + sleep 10 + done + + return 1 +} + +capture_over_ssh() { + [[ -n "${SSH_HOST}" ]] || return 1 + + log "waiting for SSH on ${SSH_HOST}:${SSH_PORT}" + wait_for_ssh "${SSH_TIMEOUT_SECS}" || return 1 + + run_ssh 'hostnamectl --static 2>/dev/null || hostname' >"${CAPTURE_DIR}/hostname.txt" || true + run_ssh 'readlink -f /run/current-system || true' >"${CAPTURE_DIR}/current-system.txt" || true + run_ssh 'journalctl -b --no-pager || true' >"${CAPTURE_DIR}/journal-boot.log" || true + run_ssh 'journalctl -b -u nix-agent.service --no-pager || true' >"${CAPTURE_DIR}/journal-nix-agent.log" || true + run_ssh 'systemctl is-active nix-agent.service || true' >"${CAPTURE_DIR}/nix-agent-active.txt" || true + if [[ "${NODE_ROLE}" == "control-plane" ]]; then + run_ssh 'journalctl -b -u chainfire.service --no-pager || true' >"${CAPTURE_DIR}/journal-chainfire.log" || true + run_ssh 'systemctl is-active chainfire.service || true' >"${CAPTURE_DIR}/chainfire-active.txt" || true + fi + grep 'ULTRACLOUD_MARKER' "${CAPTURE_DIR}/journal-boot.log" >"${CAPTURE_DIR}/marker-summary.log" || true + grep 'ULTRACLOUD_MARKER desired-system-active\.' "${CAPTURE_DIR}/journal-boot.log" >"${CAPTURE_DIR}/desired-system-active.log" || true + return 0 +} + +capture_success() { + copy_serial_log_if_present + + if capture_over_ssh; then + : + fi + + if [[ -s "${CAPTURE_DIR}/desired-system-active.log" ]]; then + if [[ -f "${CAPTURE_DIR}/nix-agent-active.txt" ]]; then + grep -Eq '^active$' "${CAPTURE_DIR}/nix-agent-active.txt" || return 1 + fi + if [[ "${NODE_ROLE}" == "control-plane" && -f "${CAPTURE_DIR}/chainfire-active.txt" ]]; then + grep -Eq '^active$' "${CAPTURE_DIR}/chainfire-active.txt" || return 1 + fi + return 0 + fi + + if [[ -f "${CAPTURE_DIR}/serial.log" ]]; then + grep 'ULTRACLOUD_MARKER desired-system-active\.' "${CAPTURE_DIR}/serial.log" >"${CAPTURE_DIR}/desired-system-active.log" || true + [[ -s "${CAPTURE_DIR}/desired-system-active.log" ]] + return $? + fi + + return 1 +} + +run_usb_transport() { + local iso_path + iso_path="$(materialize_iso_for_usb)" + [[ -n "${iso_path}" ]] || { + append_missing "USB transport: unable to resolve ultracloud-iso image from ${ISO_ATTR}" + return 1 + } + + { + printf 'mode=usb\n' + printf 'iso_path=%s\n' "${iso_path}" + printf 'usb_device=%s\n' "${USB_DEVICE}" + } >"${PROOF_ROOT}/transport.env" + + log "writing ${iso_path} to ${USB_DEVICE}" + lsblk "${USB_DEVICE}" >"${PROOF_ROOT}/usb-device-before.txt" 2>&1 || true + dd if="${iso_path}" of="${USB_DEVICE}" bs=16M conv=fsync status=progress 2>&1 | tee "${TRANSPORT_LOG}" + sync + lsblk "${USB_DEVICE}" >"${PROOF_ROOT}/usb-device-after.txt" 2>&1 || true +} + +redfish_curl() { + local method="$1" + local url="$2" + local body_file="$3" + local out_prefix="$4" + local curl_args=( + -sS + -X "${method}" + -u "${REDFISH_USERNAME}:${REDFISH_PASSWORD}" + -D "${PROOF_ROOT}/${out_prefix}.headers" + -o "${PROOF_ROOT}/${out_prefix}.body" + ) + + if [[ "${REDFISH_INSECURE}" == "1" ]]; then + curl_args+=(-k) + fi + if [[ -n "${body_file}" ]]; then + curl_args+=(-H 'Content-Type: application/json' --data @"${body_file}") + fi + + curl "${curl_args[@]}" "${url}" +} + +run_redfish_transport() { + local endpoint="${REDFISH_ENDPOINT%/}" + local virtual_media_url="${endpoint}/redfish/v1/Managers/${REDFISH_MANAGER_ID}/VirtualMedia/${REDFISH_VIRTUAL_MEDIA_ID}" + local system_url="${endpoint}/redfish/v1/Systems/${REDFISH_SYSTEM_ID}" + local insert_body="${PROOF_ROOT}/insert-media.json" + local boot_body="${PROOF_ROOT}/boot-override.json" + local reset_body="${PROOF_ROOT}/reset.json" + + cat >"${insert_body}" <"${boot_body}" <"${reset_body}" <"${PROOF_ROOT}/transport.env" + + redfish_curl POST "${virtual_media_url}/EjectMedia" "" "redfish-eject-media" || true + redfish_curl POST "${virtual_media_url}/InsertMedia" "${insert_body}" "redfish-insert-media" + redfish_curl PATCH "${system_url}" "${boot_body}" "redfish-boot-override" + redfish_curl POST "${system_url}/Actions/ComputerSystem.Reset" "${reset_body}" "redfish-reset" +} + +preflight() { + detect_transport + validate_common_requirements + validate_transport_requirements + write_environment + write_iso_reference + write_kernel_params + write_expected_markers + write_failure_markers + write_missing_requirements + write_operator_handoff + + if (( ${#MISSING_REQUIREMENTS[@]} == 0 )); then + write_status ready + log "hardware-smoke preflight is ready; artifacts in ${PROOF_ROOT}" + else + write_status blocked + log "hardware-smoke preflight is blocked; see ${MISSING_FILE}" + fi +} + +run_mode() { + preflight + if grep -Eq '^status=blocked$' "${STATUS_FILE}"; then + return 2 + fi + + case "${RESOLVED_TRANSPORT}" in + usb) + run_usb_transport + ;; + redfish) + run_redfish_transport + ;; + *) + return 2 + ;; + esac + + if capture_success; then + write_status success + log "hardware smoke captured desired-system evidence in ${CAPTURE_DIR}" + return 0 + fi + + write_status failed + log "hardware smoke did not capture desired-system evidence; see ${CAPTURE_DIR}" + return 3 +} + +capture_mode() { + detect_transport + write_environment + write_iso_reference + write_kernel_params + write_expected_markers + write_failure_markers + write_operator_handoff + + if capture_success; then + write_missing_requirements + write_status success + log "captured desired-system evidence in ${CAPTURE_DIR}" + return 0 + fi + + if [[ -z "${SSH_HOST}" && -z "${SERIAL_LOG}" ]]; then + append_missing "capture channel: set ULTRACLOUD_HARDWARE_SSH_HOST or ULTRACLOUD_HARDWARE_SERIAL_LOG" + write_missing_requirements + write_status blocked + return 2 + fi + + write_missing_requirements + write_status failed + return 3 +} + +main() { + prepare_paths + + case "$(mode_normalized)" in + preflight) + preflight + ;; + run) + run_mode + ;; + capture) + capture_mode + ;; + esac +} + +main "$@" diff --git a/nix/test-cluster/node01.nix b/nix/test-cluster/node01.nix index a663985..818a8d3 100644 --- a/nix/test-cluster/node01.nix +++ b/nix/test-cluster/node01.nix @@ -168,6 +168,44 @@ services.lightningstor.s3AccessKeyId = "ultracloud-test"; services.lightningstor.s3SecretKey = "ultracloud-test-secret"; + systemd.services.lightningstor = { + after = [ + "network-online.target" + "chainfire.service" + "flaredb.service" + "iam.service" + ]; + wants = [ + "network-online.target" + "chainfire.service" + "flaredb.service" + "iam.service" + ]; + serviceConfig.TimeoutStartSec = 300; + preStart = lib.mkAfter '' + wait_for_tcp() { + local host="$1" + local port="$2" + local label="$3" + local deadline=$((SECONDS + 180)) + + while ! ${pkgs.bash}/bin/bash -lc "exec 3<>/dev/tcp/''${host}/''${port}" >/dev/null 2>&1; do + if (( SECONDS >= deadline )); then + echo "timed out waiting for ''${label} (''${host}:''${port})" >&2 + return 1 + fi + sleep 2 + done + } + + wait_for_tcp 10.100.0.11 2379 "ChainFire" + wait_for_tcp 10.100.0.11 2479 "FlareDB" + wait_for_tcp 10.100.0.11 50080 "IAM" + wait_for_tcp 10.100.0.21 50086 "LightningStor worker replica node04" + wait_for_tcp 10.100.0.22 50086 "LightningStor worker replica node05" + ''; + }; + ultracloud.tenantNetworking = { enable = true; endpoint = "http://127.0.0.1:50081"; diff --git a/nix/test-cluster/node06.nix b/nix/test-cluster/node06.nix index 38ce4f9..409804e 100644 --- a/nix/test-cluster/node06.nix +++ b/nix/test-cluster/node06.nix @@ -1,6 +1,6 @@ # node06 - Gateway Node # -# Services: APIGateway, NightLight, minimal auth-integrated CreditService reference +# Services: APIGateway, NightLight, and CreditService quota/admission control { config, lib, pkgs, ... }: diff --git a/nix/test-cluster/run-baremetal-iso-e2e.sh b/nix/test-cluster/run-baremetal-iso-e2e.sh new file mode 100755 index 0000000..c5cdbaa --- /dev/null +++ b/nix/test-cluster/run-baremetal-iso-e2e.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_FLAKE_ROOT="${ULTRACLOUD_BAREMETAL_E2E_SOURCE_FLAKE_ROOT:-$(cd "${SCRIPT_DIR}/../.." && pwd)}" +TASK_ID="0de75570-dabd-471b-95fe-5898c54e2e8c" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${PWD}/work}" +LOG_ROOT="${1:-${ULTRACLOUD_BAREMETAL_E2E_LOG_ROOT:-${WORK_ROOT}/baremetal-iso-e2e/${TASK_ID}}}" +STATE_DIR="${ULTRACLOUD_BAREMETAL_STATE_DIR:-${LOG_ROOT}/state}" + +log() { + printf '[baremetal-iso-e2e-runner] %s\n' "$*" +} + +die() { + printf '[baremetal-iso-e2e-runner] ERROR: %s\n' "$*" >&2 + exit 1 +} + +host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "${count}" +} + +default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" +} + +default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + printf '%s\n' "${build_cores}" +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +host_nested_param_path() { + if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_intel/parameters/nested + elif [[ -f /sys/module/kvm_amd/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_amd/parameters/nested + fi +} + +require_baremetal_kvm_host() { + [[ -e /dev/kvm ]] || die "/dev/kvm is missing; baremetal-iso-e2e requires host KVM" + [[ -r /dev/kvm && -w /dev/kvm ]] || die "/dev/kvm is not readable and writable for $(id -un)" +} + +prepare_runtime_dirs() { + local cpu_count default_max_jobs default_build_cores + + cpu_count="$(host_cpu_count)" + default_max_jobs="$(default_local_nix_max_jobs "${cpu_count}")" + default_build_cores="$(default_local_nix_build_cores "${cpu_count}" "${default_max_jobs}")" + + export ULTRACLOUD_WORK_ROOT="${WORK_ROOT}" + export TMPDIR="${TMPDIR:-${WORK_ROOT}/tmp}" + export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${WORK_ROOT}/xdg-cache}" + export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${default_max_jobs}}" + export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${default_build_cores}}" + export ULTRACLOUD_BAREMETAL_STATE_DIR="${STATE_DIR}" + export ULTRACLOUD_BAREMETAL_FORCE_TCG=0 + export ULTRACLOUD_REPO_ROOT="${SOURCE_FLAKE_ROOT}" + export ULTRACLOUD_BAREMETAL_PROOF_MODEL="${ULTRACLOUD_BAREMETAL_PROOF_MODEL:-materialized-check-runner}" + + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG + + mkdir -p "${TMPDIR}" "${XDG_CACHE_HOME}" "${WORK_ROOT}" "${LOG_ROOT}" "${STATE_DIR}" +} + +git_value() { + local repo_root="$1" + shift + if git -C "${repo_root}" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git -C "${repo_root}" "$@" + else + printf 'unavailable\n' + fi +} + +capture_environment() { + { + printf 'task_id=%s\n' "${TASK_ID}" + printf 'execution_model=%s\n' "${ULTRACLOUD_BAREMETAL_PROOF_MODEL}" + printf 'started_at=%s\n' "$(date -Is)" + printf 'source_flake_root=%s\n' "${SOURCE_FLAKE_ROOT}" + printf 'invoked_from=%s\n' "${PWD}" + printf 'work_root=%s\n' "${WORK_ROOT}" + printf 'log_root=%s\n' "${LOG_ROOT}" + printf 'state_dir=%s\n' "${STATE_DIR}" + printf 'source_branch=%s\n' "$(git_value "${PWD}" branch --show-current)" + printf 'source_commit=%s\n' "$(git_value "${PWD}" rev-parse HEAD)" + printf 'host_cpu_count=%s\n' "$(host_cpu_count)" + printf 'ultracloud_local_nix_max_jobs=%s\n' "${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + printf 'ultracloud_local_nix_build_cores=%s\n' "${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + printf 'tmpdir=%s\n' "${TMPDIR}" + printf 'xdg_cache_home=%s\n' "${XDG_CACHE_HOME}" + printf 'nix_version=%s\n' "$(nix --version)" + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + if [[ -e /dev/kvm ]]; then + printf 'kvm_stat=%s\n' "$(stat -c '%A %U %G %t:%T' /dev/kvm)" + fi + local nested_path + nested_path="$(host_nested_param_path || true)" + if [[ -n "${nested_path}" ]]; then + printf 'nested_param_path=%s\n' "${nested_path}" + printf 'nested_param_value=%s\n' "$(<"${nested_path}")" + fi + } >"${LOG_ROOT}/environment.txt" +} + +run_case() { + local name="$1" + local timeout_secs="$2" + shift 2 + + local logfile="${LOG_ROOT}/${name}.log" + local metafile="${LOG_ROOT}/${name}.meta" + local started_at ended_at rc + + started_at="$(date -Is)" + printf 'name=%s\n' "${name}" >"${metafile}" + printf 'started_at=%s\n' "${started_at}" >>"${metafile}" + printf 'timeout_secs=%s\n' "${timeout_secs}" >>"${metafile}" + printf 'command=' >>"${metafile}" + printf '%q ' "$@" >>"${metafile}" + printf '\n' >>"${metafile}" + + log "running ${name}: $*" + set +e + timeout --signal=TERM --kill-after=120 "${timeout_secs}" "$@" 2>&1 | tee "${logfile}" + rc=${PIPESTATUS[0]} + set -e + + ended_at="$(date -Is)" + printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}" + printf 'exit_code=%s\n' "${rc}" >>"${metafile}" + + if (( rc == 124 )); then + log "${name} timed out after ${timeout_secs}s" + elif (( rc == 0 )); then + log "${name} passed" + else + log "${name} failed with exit ${rc}" + fi + + return "${rc}" +} + +main() { + prepare_runtime_dirs + require_baremetal_kvm_host + capture_environment + + run_case baremetal-iso-e2e 21600 \ + env \ + ULTRACLOUD_REPO_ROOT="${SOURCE_FLAKE_ROOT}" \ + ULTRACLOUD_WORK_ROOT="${WORK_ROOT}" \ + ULTRACLOUD_BAREMETAL_STATE_DIR="${STATE_DIR}" \ + ULTRACLOUD_BAREMETAL_FORCE_TCG=0 \ + ULTRACLOUD_BAREMETAL_PROOF_MODEL="${ULTRACLOUD_BAREMETAL_PROOF_MODEL}" \ + bash "${SOURCE_FLAKE_ROOT}/nix/test-cluster/verify-baremetal-iso.sh" + + log "baremetal-iso-e2e exact proof passed; logs in ${LOG_ROOT}" +} + +main "$@" diff --git a/nix/test-cluster/run-cluster.sh b/nix/test-cluster/run-cluster.sh index 2babaac..bd3eb3c 100755 --- a/nix/test-cluster/run-cluster.sh +++ b/nix/test-cluster/run-cluster.sh @@ -27,8 +27,10 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" CLUSTER_DIR="${SCRIPT_DIR}" CLUSTER_FLAKE_REF="${PHOTON_CLUSTER_FLAKE:-${CLUSTER_DIR}}" -VM_DIR_BASE="${PHOTON_VM_DIR:-${HOME}/.ultracloud-test-cluster}" -VDE_SWITCH_DIR_BASE="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-/tmp/ultracloud-test-cluster-vde.sock}" +ULTRACLOUD_WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" +WORK_ROOT="${PHOTON_CLUSTER_WORK_ROOT:-${ULTRACLOUD_WORK_ROOT}/test-cluster}" +VM_DIR_BASE="${PHOTON_VM_DIR:-${WORK_ROOT}/state}" +VDE_SWITCH_DIR_BASE="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-${WORK_ROOT}/vde-switch}" CORONAFS_API_PORT="${PHOTON_CORONAFS_API_PORT:-50088}" CORONAFS_VOLUME_ROOT="/var/lib/coronafs/volumes" SSH_PASSWORD="${PHOTON_VM_ROOT_PASSWORD:-test}" @@ -48,18 +50,74 @@ STORAGE_BENCHMARK_COMMAND="${PHOTON_VM_STORAGE_BENCH_COMMAND:-bench-storage}" LIGHTNINGSTOR_BENCH_CLIENT_NODE="${PHOTON_VM_LIGHTNINGSTOR_BENCH_CLIENT_NODE:-node06}" STORAGE_SKIP_PLASMAVMC_IMAGE_BENCH="${PHOTON_VM_SKIP_PLASMAVMC_IMAGE_BENCH:-0}" STORAGE_SKIP_PLASMAVMC_GUEST_RUNTIME_BENCH="${PHOTON_VM_SKIP_PLASMAVMC_GUEST_RUNTIME_BENCH:-0}" -HOST_CPU_COUNT="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 4)" -DEFAULT_CLUSTER_NIX_MAX_JOBS=2 -DEFAULT_CLUSTER_NIX_BUILD_CORES=4 -if [[ "${HOST_CPU_COUNT}" =~ ^[0-9]+$ ]] && (( HOST_CPU_COUNT >= 12 )); then - DEFAULT_CLUSTER_NIX_MAX_JOBS=3 - DEFAULT_CLUSTER_NIX_BUILD_CORES=6 -fi -CLUSTER_NIX_MAX_JOBS="${PHOTON_CLUSTER_NIX_MAX_JOBS:-${DEFAULT_CLUSTER_NIX_MAX_JOBS}}" -CLUSTER_NIX_BUILD_CORES="${PHOTON_CLUSTER_NIX_BUILD_CORES:-${DEFAULT_CLUSTER_NIX_BUILD_CORES}}" BUILD_PROFILE="${PHOTON_CLUSTER_BUILD_PROFILE:-default}" CLUSTER_SKIP_BUILD="${PHOTON_CLUSTER_SKIP_BUILD:-0}" +CLUSTER_PYTHON="${PHOTON_CLUSTER_PYTHON:-python3}" CLUSTER_LOCK_HELD=0 +PHOTON_VM_SKIP_NESTED_KVM_VALIDATE="${PHOTON_VM_SKIP_NESTED_KVM_VALIDATE:-0}" +PHOTON_VM_FORCE_TCG="${PHOTON_VM_FORCE_TCG:-0}" +LEGACY_VDE_SWITCH_SOCKET="/tmp/ultracloud-test-cluster-vde.sock" +export TMPDIR="${TMPDIR:-${ULTRACLOUD_WORK_ROOT}/tmp}" + +host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "${count}" +} + +default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" +} + +default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + + printf '%s\n' "${build_cores}" +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +configure_local_nix_execution() { + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${CLUSTER_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${CLUSTER_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG +} + +HOST_CPU_COUNT="$(host_cpu_count)" +DEFAULT_CLUSTER_NIX_MAX_JOBS="$(default_local_nix_max_jobs "${HOST_CPU_COUNT}")" +DEFAULT_CLUSTER_NIX_BUILD_CORES="$(default_local_nix_build_cores "${HOST_CPU_COUNT}" "${DEFAULT_CLUSTER_NIX_MAX_JOBS}")" +CLUSTER_NIX_MAX_JOBS="${PHOTON_CLUSTER_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${DEFAULT_CLUSTER_NIX_MAX_JOBS}}}" +CLUSTER_NIX_BUILD_CORES="${PHOTON_CLUSTER_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${DEFAULT_CLUSTER_NIX_BUILD_CORES}}}" +export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${CLUSTER_NIX_MAX_JOBS}}" +export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${CLUSTER_NIX_BUILD_CORES}}" +configure_local_nix_execution NODES=(node01 node02 node03 node04 node05 node06) STORAGE_NODES=(node01 node02 node03 node04 node05) @@ -148,6 +206,7 @@ declare -A STORAGE_BUILD_TARGETS=( ) SSH_OPTS=( + -F /dev/null -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR @@ -172,11 +231,20 @@ warn() { run_deployer_ctl() { RUST_LOG="${RUST_LOG:-warn}" \ - nix --option warn-dirty false run --quiet \ + NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix --option builders '' --option warn-dirty false \ + --max-jobs "${CLUSTER_NIX_MAX_JOBS}" run --quiet \ --extra-experimental-features 'nix-command flakes' \ "${REPO_ROOT}#deployer-ctl" -- "$@" } +nix_build_local() { + NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix build -L \ + --option builders '' \ + --max-jobs "${CLUSTER_NIX_MAX_JOBS}" \ + --extra-experimental-features 'nix-command flakes' \ + "$@" +} + release_cluster_lock() { local lock_dir local owner="" @@ -458,6 +526,55 @@ wait_for_lightningstor_counts_equal() { done } +durability_proof_root() { + printf '%s/%s\n' "${ULTRACLOUD_WORK_ROOT}" "durability-proof" +} + +prepare_durability_proof_dir() { + local proof_root proof_dir timestamp + proof_root="$(durability_proof_root)" + timestamp="$(date '+%Y%m%dT%H%M%S%z')" + proof_dir="${proof_root}/${timestamp}" + mkdir -p "${proof_dir}" + ln -sfn "${proof_dir}" "${proof_root}/latest" + printf '%s\n' "${proof_dir}" +} + +rollout_soak_root() { + printf '%s/%s\n' "${ULTRACLOUD_WORK_ROOT}" "rollout-soak" +} + +prepare_rollout_soak_dir() { + local proof_root proof_dir timestamp + proof_root="$(rollout_soak_root)" + timestamp="$(date '+%Y%m%dT%H%M%S%z')" + proof_dir="${proof_root}/${timestamp}" + mkdir -p "${proof_dir}" + ln -sfn "${proof_dir}" "${proof_root}/latest" + printf '%s\n' "${proof_dir}" +} + +provider_vm_reality_proof_root() { + printf '%s/%s\n' "${ULTRACLOUD_WORK_ROOT}" "provider-vm-reality-proof" +} + +prepare_provider_vm_reality_proof_dir() { + local proof_root proof_dir timestamp + proof_root="$(provider_vm_reality_proof_root)" + timestamp="$(date '+%Y%m%dT%H%M%S%z')" + proof_dir="${proof_root}/${timestamp}" + mkdir -p "${proof_dir}" + ln -sfn "${proof_dir}" "${proof_root}/latest" + printf '%s\n' "${proof_dir}" +} + +provider_vm_proof_subdir() { + local subdir="$1" + local dir="${ULTRACLOUD_PROVIDER_VM_PROOF_DIR}/${subdir}" + mkdir -p "${dir}" + printf '%s\n' "${dir}" +} + require_cmd() { command -v "$1" >/dev/null 2>&1 || die "required command not found: $1" } @@ -699,6 +816,45 @@ runvm_path() { find -L "$(build_link "${node}")/bin" -maxdepth 1 -name 'run-*-vm' | head -n1 } +vm_accelerator_mode() { + if [[ "${PHOTON_VM_FORCE_TCG}" == "1" ]]; then + printf '%s\n' "tcg" + else + printf '%s\n' "kvm" + fi +} + +prepared_runvm_path() { + local node="$1" + local source_runvm="$2" + local node_runtime="$3" + local target_runvm="${node_runtime}/run-${node}-vm.local" + local switch_socket + + switch_socket="$(vde_switch_dir)" + + if [[ "${PHOTON_VM_FORCE_TCG}" != "1" && "${switch_socket}" == "${LEGACY_VDE_SWITCH_SOCKET}" ]]; then + printf '%s\n' "${source_runvm}" + return 0 + fi + + if [[ "${PHOTON_VM_FORCE_TCG}" == "1" ]]; then + sed \ + -e "s|${LEGACY_VDE_SWITCH_SOCKET}|${switch_socket}|g" \ + -e 's/-machine accel=kvm:tcg -cpu max \\/-machine accel=tcg -cpu max \\/' \ + -e '/^[[:space:]]*-enable-kvm[[:space:]]*\\$/d' \ + -e '/^[[:space:]]*-cpu host[[:space:]]*\\$/d' \ + "${source_runvm}" >"${target_runvm}" + else + sed \ + -e "s|${LEGACY_VDE_SWITCH_SOCKET}|${switch_socket}|g" \ + "${source_runvm}" >"${target_runvm}" + fi + + chmod +x "${target_runvm}" + printf '%s\n' "${target_runvm}" +} + guest_image_path() { local link_path link_path="$(guest_image_link)" @@ -771,11 +927,22 @@ preflight() { require_cmd grpcurl require_cmd vde_switch + mkdir -p "$(dirname "${VM_DIR_BASE}")" "$(dirname "${VDE_SWITCH_DIR_BASE}")" "${TMPDIR}" mkdir -p "$(vm_dir)" log "Cluster build profile: ${BUILD_PROFILE} (state dir $(vm_dir))" + log "Host temp dir: ${TMPDIR}" + log "Local Nix execution: builders=local max-jobs=${CLUSTER_NIX_MAX_JOBS} build-cores=${CLUSTER_NIX_BUILD_CORES}" [[ -e /dev/kvm ]] || die "/dev/kvm is not present; nested-KVM VM validation requires hardware virtualization" - [[ -r /dev/kvm && -w /dev/kvm ]] || warn "/dev/kvm exists but current user may not have full access" + if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then + warn "/dev/kvm exists but current user may not have full access" + if [[ "${PHOTON_VM_FORCE_TCG}" != "1" ]]; then + PHOTON_VM_FORCE_TCG=1 + warn "Falling back to software TCG acceleration because /dev/kvm is not accessible" + fi + fi + log "VM accelerator mode: $(vm_accelerator_mode)" + log "VM console wait timeout: $(vm_console_wait_timeout)s" local nested_path nested_path="$(host_nested_param_path || true)" @@ -899,18 +1066,45 @@ terminate_pids() { stale_vm_pids_for_nodes_current_profile() { local nodes=("$@") - local pid cmd node port runtime_path + local pid cmd node port runtime_path runtime_root declare -A seen=() + cmdline_matches_current_profile_scope() { + local cmdline="$1" + local scope + + for scope in "$(vm_dir)" "$(vde_switch_dir)"; do + [[ -n "${scope}" ]] || continue + if [[ "${cmdline}" == *"${scope}"* ]]; then + return 0 + fi + done + + return 1 + } + + pid_matches_current_profile_scope() { + local pid="$1" + local cmdline="" + + [[ -r "/proc/${pid}/cmdline" ]] || return 1 + cmdline="$(tr '\0' ' ' <"/proc/${pid}/cmdline" 2>/dev/null || true)" + [[ -n "${cmdline}" ]] || return 1 + cmdline_matches_current_profile_scope "${cmdline}" + } + while read -r pid cmd; do [[ -n "${pid:-}" ]] || continue for node in "${nodes[@]}"; do port="$(ssh_port_for_node "${node}")" runtime_path="$(runtime_dir "${node}")/${node}.qcow2" - if [[ "${cmd}" == *"qemu-system"* ]] && { - [[ "${cmd}" == *"file=${runtime_path}"* ]] || - [[ "${cmd}" == *"hostfwd=tcp::${port}-:22"* ]]; - }; then + runtime_root="$(runtime_dir "${node}")" + if [[ "${cmd}" == *"qemu-system"* ]] \ + && cmdline_matches_current_profile_scope "${cmd}" \ + && { + [[ "${cmd}" == *"file=${runtime_path}"* ]] || + [[ "${cmd}" == *"${runtime_root}/"* ]]; + }; then seen["${pid}"]=1 fi done @@ -926,6 +1120,11 @@ stale_vm_pids_for_nodes_current_profile() { | sed -n 's/.*pid=\([0-9]\+\).*/\1/p' \ | sort -u ) + for pid in "${!seen[@]}"; do + if ! pid_matches_current_profile_scope "${pid}"; then + unset 'seen[$pid]' + fi + done done printf '%s\n' "${!seen[@]}" | sort -n @@ -992,9 +1191,7 @@ build_vm() { target="$(build_target_for_node "${node}")" log "Building ${node} VM derivation (${target})" - out="$(NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix build -L \ - --max-jobs "${CLUSTER_NIX_MAX_JOBS}" \ - --extra-experimental-features 'nix-command flakes' \ + out="$(nix_build_local \ "${CLUSTER_FLAKE_REF}#nixosConfigurations.${target}.config.system.build.vm" \ --no-link --print-out-paths | tail -n1)" [[ -n "${out}" ]] || die "failed to resolve VM output for ${node}" @@ -1026,9 +1223,7 @@ build_vms() { log "Building VM derivations in one Nix invocation: ${nodes[*]}" mapfile -t outputs < <( - NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix build -L \ - --max-jobs "${CLUSTER_NIX_MAX_JOBS}" \ - --extra-experimental-features 'nix-command flakes' \ + nix_build_local \ "${targets[@]}" \ --no-link --print-out-paths ) @@ -1049,9 +1244,7 @@ build_guest_image() { fi log "Building bootable VM guest image on the host" - out="$(NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix build -L \ - --max-jobs "${CLUSTER_NIX_MAX_JOBS}" \ - --extra-experimental-features 'nix-command flakes' \ + out="$(nix_build_local \ "${CLUSTER_FLAKE_REF}#vmGuestImage" \ --no-link --print-out-paths | tail -n1)" [[ -n "${out}" ]] || die "failed to resolve VM guest image output" @@ -1067,9 +1260,7 @@ build_guest_bench_image() { fi log "Building VM benchmark guest image on the host" - out="$(NIX_BUILD_CORES="${CLUSTER_NIX_BUILD_CORES}" nix build -L \ - --max-jobs "${CLUSTER_NIX_MAX_JOBS}" \ - --extra-experimental-features 'nix-command flakes' \ + out="$(nix_build_local \ "${CLUSTER_FLAKE_REF}#vmBenchGuestImage" \ --no-link --print-out-paths | tail -n1)" [[ -n "${out}" ]] || die "failed to resolve VM benchmark guest image output" @@ -1108,6 +1299,7 @@ start_vm() { pid_path="$(pid_file "${node}")" vm_log="$(log_file "${node}")" mkdir -p "${node_runtime}" + runvm="$(prepared_runvm_path "${node}" "${runvm}" "${node_runtime}")" if is_running "${node}"; then log "${node} already running (PID $(<"${pid_path}"))" @@ -2464,9 +2656,9 @@ wait_for_plasmavmc_workers_registered() { -d '{}' \ 127.0.0.1:"${vm_port}" plasmavmc.v1.NodeService/ListNodes \ | jq -e ' - ([.nodes[] | select(.state == "NODE_STATE_READY") | .id] | index("node04")) != null + ([.nodes[] | select(.state == "NODE_STATE_READY" and (.id == "node04" or .id == "node05"))] | length) == 2 and - ([.nodes[] | select(.state == "NODE_STATE_READY") | .id] | index("node05")) != null + ([.nodes[] | select(.state == "NODE_STATE_READY" and (.id == "node04" or .id == "node05")) | (.hypervisors == ["HYPERVISOR_TYPE_KVM"])] | all) ' >/dev/null 2>&1; do if (( SECONDS >= deadline )); then die "timed out waiting for PlasmaVMC workers to register" @@ -2574,7 +2766,7 @@ wait_for_http() { local deadline=$((SECONDS + timeout)) log "Waiting for HTTP endpoint on ${node}: ${url}" - until ssh_node "${node}" "curl -fsS '${url}' >/dev/null" >/dev/null 2>&1; do + until ssh_node "${node}" "curl --connect-timeout 2 --max-time 5 -fsS '${url}' >/dev/null" >/dev/null 2>&1; do if (( SECONDS >= deadline )); then die "timed out waiting for ${url} on ${node}" fi @@ -2590,7 +2782,7 @@ wait_for_http_status() { local deadline=$((SECONDS + timeout)) log "Waiting for HTTP status on ${node}: ${url} (${expected_codes})" - until ssh_node "${node}" "code=\$(curl -sS -o /dev/null -w '%{http_code}' '${url}' || true); case \" ${expected_codes} \" in *\" \${code} \"*) exit 0 ;; *) exit 1 ;; esac" >/dev/null 2>&1; do + until ssh_node "${node}" "code=\$(curl --connect-timeout 2 --max-time 5 -sS -o /dev/null -w '%{http_code}' '${url}' || true); case \" ${expected_codes} \" in *\" \${code} \"*) exit 0 ;; *) exit 1 ;; esac" >/dev/null 2>&1; do if (( SECONDS >= deadline )); then die "timed out waiting for HTTP status ${expected_codes} from ${url} on ${node}" fi @@ -2610,7 +2802,7 @@ wait_for_http_body() { set -euo pipefail url="$1" expected="$2" -body="$(curl -fsS "${url}")" +body="$(curl --connect-timeout 2 --max-time 5 -fsS "${url}")" [[ "${body}" == "${expected}" ]] EOF do @@ -2664,14 +2856,15 @@ vm_demo_create_todo_json() { attachment_name_b64="$(printf '%s' "${attachment_name}" | base64 -w0)" attachment_body_b64="$(printf '%s' "${attachment_body}" | base64 -w0)" - ssh_node_script "${node}" "$(vm_demo_url "${ip}" "/api/todos")" "${title_b64}" "${details_b64}" "${attachment_name_b64}" "${attachment_body_b64}" <<'EOF' +ssh_node_script "${node}" "$(vm_demo_url "${ip}" "/api/todos")" "${title_b64}" "${details_b64}" "${attachment_name_b64}" "${attachment_body_b64}" <<'EOF' set -euo pipefail url="$1" title="$(printf '%s' "$2" | base64 -d)" details="$(printf '%s' "$3" | base64 -d)" attachment_name="$(printf '%s' "$4" | base64 -d)" attachment_body="$(printf '%s' "$5" | base64 -d)" -attachment_file="$(mktemp)" +tmp_root="${TMPDIR:-/tmp}" +attachment_file="$(mktemp -p "${tmp_root}" photon-demo-vm-attachment-XXXXXX)" trap 'rm -f "${attachment_file}"' EXIT printf '%s' "${attachment_body}" >"${attachment_file}" curl -fsS -X POST \ @@ -2840,7 +3033,7 @@ assert_vm_demo_backend_artifacts() { lightningstor_head_object_json 15086 "${token}" "${bucket}" "${latest_attachment_object_key}" \ | jq -e '(.object.size | tonumber) > 0' >/dev/null \ || die "VM demo attachment object missing from LightningStor: ${bucket}/${latest_attachment_object_key}" - latest_attachment_file="$(mktemp)" + latest_attachment_file="$(mktemp -p "${TMPDIR}" photon-demo-vm-latest-attachment-XXXXXX)" download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${latest_attachment_object_key}" "${latest_attachment_file}" [[ "$(<"${latest_attachment_file}")" == "${expected_latest_attachment_body}" ]] || die "unexpected VM demo attachment body in LightningStor: ${bucket}/${latest_attachment_object_key}" rm -f "${latest_attachment_file}" @@ -2849,7 +3042,7 @@ assert_vm_demo_backend_artifacts() { [[ -z "${latest_attachment_filename}" ]] || die "VM demo unexpectedly returned attachment name ${latest_attachment_filename}" fi - snapshot_file="$(mktemp)" + snapshot_file="$(mktemp -p "${TMPDIR}" photon-demo-vm-snapshot-XXXXXX)" download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${latest_object_key}" "${snapshot_file}" jq -e \ --argjson todo_count "${expected_todo_count}" \ @@ -3154,11 +3347,24 @@ vm_console_path() { printf '%s/console.log\n' "$(vm_runtime_dir_path "$1")" } +vm_console_wait_timeout() { + if [[ -n "${PHOTON_VM_CONSOLE_WAIT_TIMEOUT:-}" ]]; then + printf '%s\n' "${PHOTON_VM_CONSOLE_WAIT_TIMEOUT}" + return 0 + fi + + if [[ "${PHOTON_VM_FORCE_TCG}" == "1" ]]; then + printf '%s\n' 1200 + else + printf '%s\n' "${HTTP_WAIT_TIMEOUT}" + fi +} + wait_for_vm_console_pattern() { local node="$1" local vm_id="$2" local pattern="$3" - local timeout="${4:-${HTTP_WAIT_TIMEOUT}}" + local timeout="${4:-$(vm_console_wait_timeout)}" local deadline=$((SECONDS + timeout)) local console_path console_q pattern_q prefix prefix_q target_count @@ -3496,12 +3702,6 @@ validate_storage_units() { wait_for_unit "${node}" iam done - for unit in plasmavmc lightningstor coronafs; do - wait_for_unit node01 "${unit}" - done - assert_unit_clean_boot node01 plasmavmc - assert_unit_clean_boot node01 lightningstor - for node in node04 node05; do for unit in ${NODE_UNITS[${node}]}; do wait_for_unit "${node}" "${unit}" @@ -3509,6 +3709,12 @@ validate_storage_units() { assert_unit_clean_boot "${node}" plasmavmc assert_unit_clean_boot "${node}" lightningstor done + + for unit in plasmavmc lightningstor coronafs; do + wait_for_unit node01 "${unit}" + done + assert_unit_clean_boot node01 plasmavmc + assert_unit_clean_boot node01 lightningstor } validate_storage_control_plane() { @@ -4210,6 +4416,90 @@ validate_flashdns_flow() { stop_ssh_tunnel node01 "${iam_tunnel}" } +write_fiberlb_test_tls_material() { + local work_dir="$1" + + cat >"${work_dir}/fiberlb-test-cert.pem" <<'EOF' +-----BEGIN CERTIFICATE----- +MIIDHTCCAgWgAwIBAgIUXbGpybONHAWOAaVKue4TBfyKrlcwDQYJKoZIhvcNAQEL +BQAwHjEcMBoGA1UEAwwTZmliZXJsYi5tYXRyaXgudGVzdDAeFw0yNjA0MDgyMjQ0 +MjVaFw0zNjA0MDUyMjQ0MjVaMB4xHDAaBgNVBAMME2ZpYmVybGIubWF0cml4LnRl +c3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC0QFMdJbe/uKJ0lG/+ +EwowGOSsZ7eor/YSqsj565LuhAW9WtwypvB2Q5FCPaeRSYlayRNhEMzKd9fMiGeh +KsUJQdaN/2/OVHl1u0TNl2ib7d8Y0KUBswoCSL5kiiQN0qHOn3bwOi7NHWopM5GF +tU85ADL/rgCrSm9GyRuDq0xJ0WQsTbNQsXWmFng3bC7OoxTVup7pin0Mmkt0CQTo +OZRQsPtOQxoU8fQehasEybrfylR0txfh1NkX6lrtsbdbnmbL0WYh612z2TRePlbL +P3JW9bIjylKlu7YCsu8tnkj4IMLdlVGZxeSApqRnM4JxB3n8TeS4dkuOhkA6y4PJ +tTbhAgMBAAGjUzBRMB0GA1UdDgQWBBQEv7n2rAV2YxD64AcaNxNT1jV3jDAfBgNV +HSMEGDAWgBQEv7n2rAV2YxD64AcaNxNT1jV3jDAPBgNVHRMBAf8EBTADAQH/MA0G +CSqGSIb3DQEBCwUAA4IBAQBGmNFaZ+B9WBuJ66qOoqWOcvb2Pnt9+ul2gFis9MHk +srJ/wIxwNQEDS1N/7tT2hOU2m2hoZl6oyNzMnSa6L3x8m55+gQ84wUSu1G464XO/ +nAeLh7zXYfX+aPQhqygNpsLmO7vjlKhM+EwVAPKrOoKl1PSJnEmgjwmrpymKLP62 +fQ/t9tHreN7GzBKAjNRP6mdNj4xef/rjNf4g3qOPAO5OKs2Mn//fALqetlAlPiSV +jOLN60bDf3Or/04Tfpg2XNlhx1d6TQKMytQ3J6AJpPWWKnhGcPownXniIdWAhCNX +x8xmJ49pZ5qtUt2nYKQhu6ubgMkk7uM/dy1jnmJQbydX +-----END CERTIFICATE----- +EOF + + cat >"${work_dir}/fiberlb-test-key.pem" <<'EOF' +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC0QFMdJbe/uKJ0 +lG/+EwowGOSsZ7eor/YSqsj565LuhAW9WtwypvB2Q5FCPaeRSYlayRNhEMzKd9fM +iGehKsUJQdaN/2/OVHl1u0TNl2ib7d8Y0KUBswoCSL5kiiQN0qHOn3bwOi7NHWop +M5GFtU85ADL/rgCrSm9GyRuDq0xJ0WQsTbNQsXWmFng3bC7OoxTVup7pin0Mmkt0 +CQToOZRQsPtOQxoU8fQehasEybrfylR0txfh1NkX6lrtsbdbnmbL0WYh612z2TRe +PlbLP3JW9bIjylKlu7YCsu8tnkj4IMLdlVGZxeSApqRnM4JxB3n8TeS4dkuOhkA6 +y4PJtTbhAgMBAAECggEACde9HiFnErTbqKVqJefu4YxbcNNnUNKee5uYNsb7jpMW +n+XaMjkh1nhnMkR8/erTwXmLSYyt/SSdC+f7Li6gZKpz44xCNARyfzh9F+OroH+l +VAThLMoEuvYTUPjG4hsZTzfPmiu4LmbLuzEb4j7xwvdhAYuUMWZStB0jV63bACkT +n8Tse6+LLt2hbqVwK151f9sZdgQpk0cJ8wlbl1kt7+Ccx5JVl4poB9owtz2lFarZ +rzu1o7+/pU913jdVhIN0UeJnSXVAOTgINJ9rsoxRRxnzML2MkgcNOsliLF02IprY +SfBxu+1ukXJl8XYlkblYU8uvxtUPVdVpeaWbiWdssQKBgQDow+IAfZo3R4ZYWVQs +cSVlAUn01Oh+C8JEhKCYSfcuT4rXhvFV4cyZycoGq/UgQt/GmMW0hFlYRp45MJ+c +q4j2Ve8pn1N89RzHnvUPiHjh1+x1rECFppk8foz5Xvmm+pfiflLaMXbF+HIVKenV +Ut1VeEFLt7KS4JQZ3Tvz0PcIcQKBgQDGPn5XkeO5yntXGpAuxCbd3LjbG/qG7GB1 +ajzYmlLQKGQgbhywfxkPypMs454aRDfMvI9qzH1iUBJL9UqZkLsyaCRd5l4wrZ74 +qrFw7pZYPXVUuLBt+IgFofzt06ikqw1e4O0fSBIi5l156zDg3CmFoJzlSxUyPypy +pyVmnWjNcQKBgQCpC3nbOV0CwKNFDrse+51Y2bLIaMfgotGXyWqIr08hBwuis5IZ ++T8KgY6DgecoYuj55oHPND9e6QqIJlQFCF8T6B0lcVzE547bOwpnGYoRGyvT2iGR +7whi8tWXVe6h4mTkTTIG3N2RsXnI/QOhteig5hEl2GeMXCWzEwUzcZO3QQKBgHJn +BgoLCtvg2aS+k44pbDSEd7kaQE00g8W+dTaaeEffx0hfwvanohLW2HADOu31iC4C +ctEzCPh/NM/Y0iAkwaKpLrNxidGVoDPobVW6mdB6RweJPWd5ESYPASNODfhZLfIe +4zg2IyE4x6Df9OkOpHeU5HX+ClFg2mInyxGYrS5BAoGBAIHcHVX+M/SktpfHFAGD +WexqE3rv2d8rJR/bppA3ViLr0zIGDrlfXQ5IGQwSAh35mpammnQAwXnKwoYmb545 +c5mGkRpXiksMfALGxrbxYP4YKJBLtoasx41xL6oQquWbXqdZxp5g9xSXLvgDdY0h +OM9mkrXFzK5u6RX48W+EjtwU +-----END PRIVATE KEY----- +EOF +} + +create_fiberlb_test_certificate() { + local token="$1" + local lb_id="$2" + local cert_name="$3" + local tmp_dir cert_id + + tmp_dir="$(mktemp -d)" + write_fiberlb_test_tls_material "${tmp_dir}" + cert_id="$( + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn \ + --arg lb "${lb_id}" \ + --arg name "${cert_name}" \ + --rawfile cert "${tmp_dir}/fiberlb-test-cert.pem" \ + --rawfile key "${tmp_dir}/fiberlb-test-key.pem" \ + '{loadbalancerId:$lb, name:$name, certificate:$cert, privateKey:$key, certType:"CERTIFICATE_TYPE_SERVER"}')" \ + 127.0.0.1:15085 fiberlb.v1.CertificateService/CreateCertificate \ + | jq -r '.certificate.id' + )" + rm -rf "${tmp_dir}" + + printf '%s' "${cert_id}" +} + validate_fiberlb_flow() { log "Validating FiberLB management API, runtime listeners, and backend failover behavior" @@ -4222,6 +4512,8 @@ validate_fiberlb_flow() { local project_id="fiberlb-smoke-project" local principal_id="fiberlb-smoke-$(date +%s)" local token lb_id pool_id health_check_id backend_id listener_id listener_port + local https_pool_id https_backend_id certificate_id https_listener_id terminated_listener_id + local https_listener_port terminated_listener_port token="$(issue_project_admin_token 15080 "${org_id}" "${project_id}" "${principal_id}")" listener_port=$((18080 + (RANDOM % 100))) @@ -4356,6 +4648,83 @@ validate_fiberlb_flow() { | jq -e '.backend.adminState == "BACKEND_ADMIN_STATE_ENABLED"' >/dev/null wait_for_http node01 "http://127.0.0.1:${listener_port}/health" + log "Validating FiberLB HTTPS and TerminatedHttps listener runtimes" + certificate_id="$(create_fiberlb_test_certificate "${token}" "${lb_id}" "fiberlb-smoke-cert")" + [[ -n "${certificate_id}" && "${certificate_id}" != "null" ]] || die "FiberLB CreateCertificate did not return an ID" + + https_pool_id="$(grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg name "fiberlb-smoke-http-pool" --arg lb "${lb_id}" '{name:$name, loadbalancerId:$lb, algorithm:"POOL_ALGORITHM_ROUND_ROBIN", protocol:"POOL_PROTOCOL_HTTP"}')" \ + 127.0.0.1:15085 fiberlb.v1.PoolService/CreatePool \ + | jq -r '.pool.id')" + [[ -n "${https_pool_id}" && "${https_pool_id}" != "null" ]] || die "FiberLB HTTPS CreatePool did not return an ID" + + https_backend_id="$(grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg name "fiberlb-smoke-http-backend" --arg pool "${https_pool_id}" '{name:$name, poolId:$pool, address:"10.100.0.11", port:8081, weight:1}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/CreateBackend \ + | jq -r '.backend.id')" + [[ -n "${https_backend_id}" && "${https_backend_id}" != "null" ]] || die "FiberLB HTTPS CreateBackend did not return an ID" + + https_listener_port="$(allocate_free_listener_port node01 19000 19149)" || die "failed to allocate FiberLB HTTPS listener port" + https_listener_id="$(grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg name "fiberlb-smoke-https-listener" --arg lb "${lb_id}" --arg pool "${https_pool_id}" --arg cert "${certificate_id}" --argjson port "${https_listener_port}" '{name:$name, loadbalancerId:$lb, protocol:"LISTENER_PROTOCOL_HTTPS", port:$port, defaultPoolId:$pool, tlsConfig:{certificateId:$cert, minVersion:"TLS_VERSION_TLS_1_2"}, connectionLimit:0}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener \ + | jq -r '.listener.id')" + [[ -n "${https_listener_id}" && "${https_listener_id}" != "null" ]] || die "FiberLB HTTPS CreateListener did not return an ID" + + terminated_listener_port="$(allocate_free_listener_port node01 19150 19299)" || die "failed to allocate FiberLB TerminatedHttps listener port" + terminated_listener_id="$(grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg name "fiberlb-smoke-terminated-listener" --arg lb "${lb_id}" --arg pool "${https_pool_id}" --arg cert "${certificate_id}" --argjson port "${terminated_listener_port}" '{name:$name, loadbalancerId:$lb, protocol:"LISTENER_PROTOCOL_TERMINATED_HTTPS", port:$port, defaultPoolId:$pool, tlsConfig:{certificateId:$cert, minVersion:"TLS_VERSION_TLS_1_2"}, connectionLimit:0}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener \ + | jq -r '.listener.id')" + [[ -n "${terminated_listener_id}" && "${terminated_listener_id}" != "null" ]] || die "FiberLB TerminatedHttps CreateListener did not return an ID" + + wait_for_tcp_port node01 "${https_listener_port}" + wait_for_tcp_port node01 "${terminated_listener_port}" + ssh_node node01 "curl -kfsS --max-time 5 https://127.0.0.1:${https_listener_port}/health >/dev/null" + ssh_node node01 "curl -kfsS --max-time 5 https://127.0.0.1:${terminated_listener_port}/health >/dev/null" + + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${terminated_listener_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/DeleteListener >/dev/null + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${https_listener_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/DeleteListener >/dev/null + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${https_backend_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/DeleteBackend >/dev/null + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${https_pool_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.PoolService/DeletePool >/dev/null + grpcurl -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" \ + -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${certificate_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.CertificateService/DeleteCertificate >/dev/null grpcurl -plaintext \ -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" \ @@ -4521,12 +4890,30 @@ validate_k8shost_flow() { sleep 2 done - grpcurl -plaintext \ + local pod_create_json pod_resource_version + pod_create_json="$(grpcurl_capture -plaintext \ -H "authorization: Bearer ${token}" \ -import-path "${K8SHOST_PROTO_DIR}" \ -proto "${K8SHOST_PROTO}" \ -d "$(jq -cn --arg name "${pod_name}" --arg org "${org_id}" --arg project "${project_id}" '{pod:{metadata:{name:$name, namespace:"default", orgId:$org, projectId:$project, labels:{app:"k8shost-smoke"}}, spec:{containers:[{name:"backend", image:"smoke", ports:[{containerPort:8081, protocol:"TCP"}]}]}, status:{phase:"Running", podIp:"10.100.0.11", hostIp:"10.100.0.11"}}}')" \ - 127.0.0.1:15087 k8shost.PodService/CreatePod >/dev/null + 127.0.0.1:15087 k8shost.PodService/CreatePod)" \ + || die "failed to create K8sHost pod ${pod_name}: ${pod_create_json}" + pod_resource_version="$(printf '%s\n' "${pod_create_json}" | jq -r '.pod.metadata.resourceVersion')" + [[ -n "${pod_resource_version}" && "${pod_resource_version}" != "null" ]] \ + || die "K8sHost pod creation did not return a resource version: ${pod_create_json}" + + log "Matrix case: K8sHost watch snapshot stream" + local watch_snapshot_json + watch_snapshot_json="$(grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${K8SHOST_PROTO_DIR}" \ + -proto "${K8SHOST_PROTO}" \ + -d "$(jq -cn '{namespace:"default"}')" \ + 127.0.0.1:15087 k8shost.PodService/WatchPods)" \ + || die "failed to watch K8sHost pods in namespace default: ${watch_snapshot_json}" + printf '%s\n' "${watch_snapshot_json}" | jq -s -e --arg name "${pod_name}" ' + any(.[]; .type == "ADDED" and .object.metadata.name == $name)' >/dev/null \ + || die "K8sHost pod watch snapshot did not include ${pod_name}: ${watch_snapshot_json}" log "Matrix case: K8sHost + PrismNet" local pools_json @@ -4740,6 +5127,11 @@ validate_workers() { } validate_nested_kvm_workers() { + if [[ "${PHOTON_VM_SKIP_NESTED_KVM_VALIDATE}" == "1" ]]; then + warn "Skipping nested KVM validation because PHOTON_VM_SKIP_NESTED_KVM_VALIDATE=1" + return 0 + fi + log "Validating nested KVM inside worker VMs" for node in node04 node05; do ssh_node_script "${node}" <<'EOS' @@ -4887,6 +5279,189 @@ validate_lightningstor_distributed_storage() { || die "failed to decode copied LightningStor object ${bucket}/${copy_key}: ${output}" [[ "${copied_body}" == "${body}" ]] || die "copied LightningStor object payload did not match source" + log "Matrix case: LightningStor bucket config + object version listing" + local version_before_node01 version_before_node04 version_before_node05 + read -r version_before_node01 version_before_node04 version_before_node05 < <(lightningstor_count_triplet) + + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg status "Enabled" '{bucket:$bucket, status:$status}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/PutBucketVersioning + )" || die "failed to enable LightningStor bucket versioning for ${bucket}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/GetBucketVersioning + )" || die "failed to read LightningStor bucket versioning for ${bucket}: ${output}" + printf '%s\n' "${output}" | jq -e '.status == "Enabled"' >/dev/null \ + || die "LightningStor bucket versioning did not persist: ${output}" + + local policy_document + policy_document="$(jq -cn --arg bucket "${bucket}" '{ + Version:"2012-10-17", + Statement:[ + { + Sid:"PublicReadProbe", + Effect:"Allow", + Principal:"*", + Action:["s3:GetObject"], + Resource:["arn:aws:s3:::\($bucket)/*"] + } + ] + }')" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg policy "${policy_document}" '{bucket:$bucket, policy:$policy}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/PutBucketPolicy + )" || die "failed to set LightningStor bucket policy for ${bucket}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/GetBucketPolicy + )" || die "failed to read LightningStor bucket policy for ${bucket}: ${output}" + printf '%s\n' "${output}" | jq -e --arg policy "${policy_document}" '.policy == $policy' >/dev/null \ + || die "LightningStor bucket policy round-trip did not match: ${output}" + + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket, tags:[{key:"suite", value:"fresh-matrix"}, {key:"mode", value:"distributed"}]}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/PutBucketTagging + )" || die "failed to set LightningStor bucket tags for ${bucket}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/GetBucketTagging + )" || die "failed to read LightningStor bucket tags for ${bucket}: ${output}" + printf '%s\n' "${output}" | jq -e ' + (.tags | length) == 2 and + any(.tags[]; .key == "suite" and .value == "fresh-matrix") and + any(.tags[]; .key == "mode" and .value == "distributed")' >/dev/null \ + || die "LightningStor bucket tagging round-trip did not match: ${output}" + + local versioned_key version_body_v1 version_body_v2 version_body_v1_b64 version_body_v2_b64 + local version_output version_list_output version_head_output first_version_id second_version_id + versioned_key="${key}-versions" + version_body_v1="version-one-${key}" + version_body_v2="version-two-${key}" + version_body_v1_b64="$(printf '%s' "${version_body_v1}" | base64 -w0)" + version_body_v2_b64="$(printf '%s' "${version_body_v2}" | base64 -w0)" + + version_output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${versioned_key}" --arg body "${version_body_v1_b64}" '{bucket:$bucket, key:$key, body:$body, contentMd5:"", ifNoneMatch:""}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/PutObject + )" || die "failed to store LightningStor versioned object ${bucket}/${versioned_key} v1: ${version_output}" + first_version_id="$(printf '%s\n' "${version_output}" | jq -r '.versionId')" + [[ -n "${first_version_id}" && "${first_version_id}" != "null" && "${first_version_id}" != "null" ]] \ + || die "LightningStor versioned write did not return a version ID: ${version_output}" + + version_output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${versioned_key}" --arg body "${version_body_v2_b64}" '{bucket:$bucket, key:$key, body:$body, contentMd5:"", ifNoneMatch:""}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/PutObject + )" || die "failed to store LightningStor versioned object ${bucket}/${versioned_key} v2: ${version_output}" + second_version_id="$(printf '%s\n' "${version_output}" | jq -r '.versionId')" + [[ -n "${second_version_id}" && "${second_version_id}" != "null" && "${second_version_id}" != "${first_version_id}" ]] \ + || die "LightningStor second versioned write returned an invalid version ID: ${version_output}" + + wait_for_lightningstor_counts_greater_than "${version_before_node01}" "${version_before_node04}" "${version_before_node05}" "LightningStor versioned object replication" + + version_list_output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg prefix "${versioned_key}" '{bucket:$bucket, prefix:$prefix, maxKeys:10}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/ListObjectVersions + )" || die "failed to list LightningStor object versions for ${bucket}/${versioned_key}: ${version_list_output}" + printf '%s\n' "${version_list_output}" | jq -e \ + --arg key "${versioned_key}" \ + --arg first "${first_version_id}" \ + --arg second "${second_version_id}" ' + (.versions | length) >= 2 and + any(.versions[]; .key == $key and .versionId == $first and (.isLatest // false) == false) and + any(.versions[]; .key == $key and .versionId == $second and .isLatest == true)' >/dev/null \ + || die "LightningStor object version listing did not expose the expected versions: ${version_list_output}" + + version_head_output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${versioned_key}" --arg version_id "${second_version_id}" '{bucket:$bucket, key:$key, versionId:$version_id}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/HeadObject + )" || die "failed to head the latest LightningStor object version ${bucket}/${versioned_key}@${second_version_id}: ${version_head_output}" + printf '%s\n' "${version_head_output}" | jq -e --arg version_id "${second_version_id}" '.object.versionId == $version_id and .object.isLatest == true' >/dev/null \ + || die "LightningStor explicit version lookup returned unexpected metadata: ${version_head_output}" + + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/DeleteBucketTagging + )" || die "failed to clear LightningStor bucket tags for ${bucket}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/DeleteBucketPolicy + )" || die "failed to clear LightningStor bucket policy for ${bucket}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket, status:""}')" \ + 127.0.0.1:15086 lightningstor.v1.BucketService/PutBucketVersioning + )" || die "failed to disable LightningStor bucket versioning for ${bucket}: ${output}" + + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${versioned_key}" --arg version_id "${first_version_id}" '{bucket:$bucket, key:$key, versionId:$version_id}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/DeleteObject + )" || die "failed to delete LightningStor object version ${bucket}/${versioned_key}@${first_version_id}: ${output}" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${versioned_key}" --arg version_id "${second_version_id}" '{bucket:$bucket, key:$key, versionId:$version_id}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/DeleteObject + )" || die "failed to delete LightningStor object version ${bucket}/${versioned_key}@${second_version_id}: ${output}" + + wait_for_lightningstor_counts_equal "${version_before_node01}" "${version_before_node04}" "${version_before_node05}" "LightningStor versioned object cleanup" + delete_json="$(jq -cn --arg bucket "${bucket}" --arg key "${key}" '{bucket:$bucket, key:$key}')" copy_delete_json="$(jq -cn --arg bucket "${bucket}" --arg key "${copy_key}" '{bucket:$bucket, key:$key}')" log "LightningStor distributed replication: DELETE ${bucket}/${key}" @@ -4920,6 +5495,7 @@ validate_vm_storage_flow() { log "Validating PlasmaVMC image import, shared-volume execution, and cross-node migration" local iam_tunnel="" prism_tunnel="" ls_tunnel="" vm_tunnel="" coronafs_tunnel="" gateway_tunnel="" + local provider_vm_hosting_proof_dir="" local node04_coronafs_tunnel="" node05_coronafs_tunnel="" local demo_access_tunnel="" demo_access_node="" demo_access_port="" local current_worker_coronafs_port="" peer_worker_coronafs_port="" @@ -4928,6 +5504,17 @@ validate_vm_storage_flow() { local preserve_vm_demo_gateway_proxies=0 local keep_running="${PHOTON_VM_DEMO_KEEP_RUNNING:-0}" local vm_port=15082 + + if [[ -n "${ULTRACLOUD_PROVIDER_VM_PROOF_DIR:-}" ]]; then + provider_vm_hosting_proof_dir="$(provider_vm_proof_subdir "vm-hosting")" + jq -n \ + --arg command "nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof" \ + --arg component_bundle "plasmavmc+prismnet+coronafs+lightningstor" \ + --arg artifact_dir "${provider_vm_hosting_proof_dir}" \ + '{command:$command, component_bundle:$component_bundle, artifact_dir:$artifact_dir}' \ + >"${provider_vm_hosting_proof_dir}/meta.json" + fi + iam_tunnel="$(start_ssh_tunnel node01 15080 50080)" prism_tunnel="$(start_ssh_tunnel node01 15081 50081)" ls_tunnel="$(start_ssh_tunnel node01 15086 50086)" @@ -5190,6 +5777,9 @@ EOS local create_response vm_id create_response="$(api_gateway_request POST "${token}" "/api/v1/vms" "${create_vm_rest_json}")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${create_response}" >"${provider_vm_hosting_proof_dir}/vm-create-response.json" + fi vm_id="$(printf '%s' "${create_response}" | jq -r '.data.id')" [[ -n "${vm_id}" && "${vm_id}" != "null" ]] || die "failed to create VM through PlasmaVMC" port_id="$(printf '%s' "${create_response}" | jq -r '.data.network[0].port_id // empty')" @@ -5237,6 +5827,9 @@ EOS fi local vm_spec_json volume_id data_volume_id vm_spec_json="$(wait_for_vm_network_spec "${token}" "${get_vm_json}" "${port_id}" "${subnet_id}" "${port_mac}" "${port_ip}" "${vm_port}")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${vm_spec_json}" >"${provider_vm_hosting_proof_dir}/vm-spec-initial.json" + fi volume_id="$(vm_disk_volume_id_from_json "${vm_spec_json}" "root")" data_volume_id="$(vm_disk_volume_id_from_json "${vm_spec_json}" "data")" [[ -n "${volume_id}" ]] || die "failed to resolve root volume ID from VM spec" @@ -5356,6 +5949,10 @@ EOS )" root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${root_volume_state_json}" >"${provider_vm_hosting_proof_dir}/root-volume-before-migration.json" + printf '%s\n' "${data_volume_state_json}" >"${provider_vm_hosting_proof_dir}/data-volume-before-migration.json" + fi [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToVm // empty')" == "${vm_id}" ]] || die "root volume ${volume_id} is not attached to VM ${vm_id}" [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} is not owned by node ${node_id}" [[ "$(printf '%s' "${data_volume_state_json}" | jq -r '.attachedToVm // empty')" == "${vm_id}" ]] || die "data volume ${data_volume_id} is not attached to VM ${vm_id}" @@ -5545,6 +6142,9 @@ EOS wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_SMOKE_HEARTBEAT count=3" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${demo_state_json}" >"${provider_vm_hosting_proof_dir}/demo-state-after-migration.json" + fi assert_vm_demo_state "${demo_state_json}" 2 2 3 3 "${todo2_title}" "${todo2_attachment_name}" assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 2 2 3 3 "${todo2_attachment_body}" assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 2 "${todo2_attachment_body}" @@ -5562,6 +6162,20 @@ EOS ) root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${root_volume_state_json}" >"${provider_vm_hosting_proof_dir}/root-volume-after-migration.json" + printf '%s\n' "${data_volume_state_json}" >"${provider_vm_hosting_proof_dir}/data-volume-after-migration.json" + jq -n \ + --arg source_node "${source_node}" \ + --arg destination_node "${destination_node}" \ + --arg vm_id "${vm_id}" \ + --arg root_volume_id "${volume_id}" \ + --arg data_volume_id "${data_volume_id}" \ + '{source_node:$source_node, destination_node:$destination_node, vm_id:$vm_id, root_volume_id:$root_volume_id, data_volume_id:$data_volume_id}' \ + >"${provider_vm_hosting_proof_dir}/migration-summary.json" + prismnet_get_port_json "${token}" "${org_id}" "${project_id}" "${subnet_id}" "${port_id}" \ + >"${provider_vm_hosting_proof_dir}/prismnet-port-after-migration.json" + fi [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} is not owned by migrated node ${node_id}" [[ "$(printf '%s' "${data_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "data volume ${data_volume_id} is not owned by migrated node ${node_id}" local next_root_attachment_generation next_data_attachment_generation @@ -5646,6 +6260,9 @@ EOS wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_DEMO_WEB_READY count=4" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${demo_state_json}" >"${provider_vm_hosting_proof_dir}/demo-state-after-post-migration-restart.json" + fi assert_vm_demo_state "${demo_state_json}" 3 3 4 4 "${todo3_title}" "${todo3_attachment_name}" assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 3 3 4 4 "${todo3_attachment_body}" assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 3 "${todo3_attachment_body}" @@ -5657,6 +6274,10 @@ EOS ) root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + printf '%s\n' "${root_volume_state_json}" >"${provider_vm_hosting_proof_dir}/root-volume-after-post-migration-restart.json" + printf '%s\n' "${data_volume_state_json}" >"${provider_vm_hosting_proof_dir}/data-volume-after-post-migration-restart.json" + fi [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} drifted away from migrated node ${node_id} after restart" [[ "$(printf '%s' "${data_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "data volume ${data_volume_id} drifted away from migrated node ${node_id} after restart" [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachmentGeneration // 0')" == "${root_attachment_generation}" ]] || die "root volume ${volume_id} attachment generation changed across migrated-node restart" @@ -5739,6 +6360,9 @@ EOS done wait_for_plasmavmc_vm_watch_completion node01 "${vm_watch_output}" 60 assert_plasmavmc_vm_watch_events node01 "${vm_watch_output}" "${vm_id}" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + ssh_node node01 "cat '${vm_watch_output}'" >"${provider_vm_hosting_proof_dir}/vm-watch-output.json" + fi wait_for_prismnet_port_absent "${token}" "${org_id}" "${project_id}" "${subnet_id}" "${port_id}" >/dev/null port_id="" api_gateway_request DELETE "${token}" "/api/v1/security-groups/${demo_http_sg_id}" >/dev/null @@ -5809,6 +6433,14 @@ EOS die "image object still present after ImageService/DeleteImage" fi wait_for_lightningstor_counts_equal "${image_before_node01}" "${image_before_node04}" "${image_before_node05}" "PlasmaVMC image cleanup" + if [[ -n "${provider_vm_hosting_proof_dir}" ]]; then + ssh_node node01 "journalctl -u plasmavmc -b --no-pager" >"${provider_vm_hosting_proof_dir}/node01-plasmavmc-journal.log" + ssh_node node04 "journalctl -u plasmavmc -b --no-pager" >"${provider_vm_hosting_proof_dir}/node04-plasmavmc-journal.log" + ssh_node node05 "journalctl -u plasmavmc -b --no-pager" >"${provider_vm_hosting_proof_dir}/node05-plasmavmc-journal.log" + ssh_node node01 "journalctl -u coronafs -b --no-pager" >"${provider_vm_hosting_proof_dir}/node01-coronafs-journal.log" + ssh_node node04 "journalctl -u coronafs -b --no-pager" >"${provider_vm_hosting_proof_dir}/node04-coronafs-journal.log" + ssh_node node05 "journalctl -u coronafs -b --no-pager" >"${provider_vm_hosting_proof_dir}/node05-coronafs-journal.log" + fi trap - RETURN cleanup_vm_storage_flow @@ -5900,7 +6532,7 @@ nightlight_remote_write_sample() { shift 4 local -a cmd=( - python3 "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py" + "${CLUSTER_PYTHON}" "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py" --url "${base_url}/write" --metric "${metric_name}" --value "${metric_value}" @@ -6095,7 +6727,7 @@ validate_nightlight_grpc_and_persistence() { grpc_tunnel="$(start_ssh_tunnel node06 15090 50088)" trap 'stop_ssh_tunnel node06 "${grpc_tunnel}"' RETURN - python3 "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py" \ + "${CLUSTER_PYTHON}" "${REPO_ROOT}/nix/test-cluster/nightlight_remote_write.py" \ --url "${base_url}/write" \ --metric "${metric_name}" \ --value "${metric_value}" \ @@ -6850,19 +7482,34 @@ validate_network_provider_matrix() { log "Validating component matrix: PrismNet, FlashDNS, and FiberLB in composed tenant scenarios" local iam_tunnel="" prism_tunnel="" dns_tunnel="" lb_tunnel="" + local provider_network_proof_dir="" iam_tunnel="$(start_ssh_tunnel node01 15080 50080)" prism_tunnel="$(start_ssh_tunnel node01 15081 50081)" dns_tunnel="$(start_ssh_tunnel node01 15084 50084)" lb_tunnel="$(start_ssh_tunnel node01 15085 50085)" + if [[ -n "${ULTRACLOUD_PROVIDER_VM_PROOF_DIR:-}" ]]; then + provider_network_proof_dir="$(provider_vm_proof_subdir "network-provider")" + jq -n \ + --arg command "nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof" \ + --arg component_bundle "prismnet+flashdns+fiberlb" \ + --arg artifact_dir "${provider_network_proof_dir}" \ + '{command:$command, component_bundle:$component_bundle, artifact_dir:$artifact_dir}' \ + >"${provider_network_proof_dir}/meta.json" + fi + local org_id="matrix-net-org" local project_id="matrix-net-project" local principal_id="matrix-net-$(date +%s)" local token="" local vpc_id="" subnet_id="" port_id="" port_ip="" + local security_group_id="" security_group_rule_id="" local zone_id="" zone_name="matrix-$(date +%s).cluster.test" local workload_record_id="" service_record_id="" local lb_id="" pool_id="" backend_id="" listener_id="" listener_port="" + local http_pool_id="" http_backend_id="" certificate_id="" + local https_listener_id="" https_listener_port="" + local terminated_listener_id="" terminated_listener_port="" local workload_fqdn="" service_fqdn="" cleanup_network_provider_matrix() { @@ -6884,18 +7531,48 @@ validate_network_provider_matrix() { -d "$(jq -cn --arg id "${listener_id}" '{id:$id}')" \ 127.0.0.1:15085 fiberlb.v1.ListenerService/DeleteListener >/dev/null 2>&1 || true fi + if [[ -n "${terminated_listener_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${terminated_listener_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/DeleteListener >/dev/null 2>&1 || true + fi + if [[ -n "${https_listener_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${https_listener_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/DeleteListener >/dev/null 2>&1 || true + fi if [[ -n "${backend_id:-}" ]]; then grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg id "${backend_id}" '{id:$id}')" \ 127.0.0.1:15085 fiberlb.v1.BackendService/DeleteBackend >/dev/null 2>&1 || true fi + if [[ -n "${http_backend_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${http_backend_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/DeleteBackend >/dev/null 2>&1 || true + fi if [[ -n "${pool_id:-}" ]]; then grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg id "${pool_id}" '{id:$id}')" \ 127.0.0.1:15085 fiberlb.v1.PoolService/DeletePool >/dev/null 2>&1 || true fi + if [[ -n "${http_pool_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${http_pool_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.PoolService/DeletePool >/dev/null 2>&1 || true + fi + if [[ -n "${certificate_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${certificate_id}" '{id:$id}')" \ + 127.0.0.1:15085 fiberlb.v1.CertificateService/DeleteCertificate >/dev/null 2>&1 || true + fi if [[ -n "${lb_id:-}" ]]; then grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ @@ -6920,6 +7597,12 @@ validate_network_provider_matrix() { -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg id "${vpc_id}" '{orgId:$org, projectId:$project, id:$id}')" \ 127.0.0.1:15081 prismnet.VpcService/DeleteVpc >/dev/null 2>&1 || true fi + if [[ -n "${security_group_id:-}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg id "${security_group_id}" '{orgId:$org, projectId:$project, id:$id}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/DeleteSecurityGroup >/dev/null 2>&1 || true + fi if [[ -n "${zone_id:-}" ]]; then grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FLASHDNS_PROTO_DIR}" -proto "${FLASHDNS_PROTO}" \ @@ -6958,10 +7641,62 @@ validate_network_provider_matrix() { -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg subnet "${subnet_id}" '{orgId:$org, projectId:$project, subnetId:$subnet, name:"matrix-port", description:"component matrix", ipAddress:""}')" \ 127.0.0.1:15081 prismnet.PortService/CreatePort)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${port_response}" >"${provider_network_proof_dir}/prismnet-port-create.json" + fi port_id="$(printf '%s' "${port_response}" | jq -r '.port.id')" port_ip="$(printf '%s' "${port_response}" | jq -r '.port.ipAddress')" [[ -n "${port_id}" && "${port_id}" != "null" && -n "${port_ip}" && "${port_ip}" != "null" ]] || die "component matrix PrismNet port creation failed" + if [[ -n "${provider_network_proof_dir}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg subnet "${subnet_id}" --arg id "${port_id}" '{orgId:$org, projectId:$project, subnetId:$subnet, id:$id}')" \ + 127.0.0.1:15081 prismnet.PortService/GetPort \ + >"${provider_network_proof_dir}/prismnet-port-get.json" + fi + + log "Matrix case: PrismNet security-group ACL lifecycle" + security_group_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" '{orgId:$org, projectId:$project, name:"matrix-sg", description:"component matrix ACL lifecycle"}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/CreateSecurityGroup | jq -r '.securityGroup.id')" + [[ -n "${security_group_id}" && "${security_group_id}" != "null" ]] || die "component matrix PrismNet security group creation failed" + + security_group_rule_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg sg "${security_group_id}" '{orgId:$org, projectId:$project, securityGroupId:$sg, direction:"RULE_DIRECTION_INGRESS", protocol:"IP_PROTOCOL_TCP", portRangeMin:443, portRangeMax:443, remoteCidr:"0.0.0.0/0", description:"matrix https ingress"}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/AddRule | jq -r '.rule.id')" + [[ -n "${security_group_rule_id}" && "${security_group_rule_id}" != "null" ]] || die "component matrix PrismNet security group rule creation failed" + + local security_group_after_add_json + security_group_after_add_json="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg id "${security_group_id}" '{orgId:$org, projectId:$project, id:$id}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/GetSecurityGroup)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${security_group_after_add_json}" >"${provider_network_proof_dir}/prismnet-security-group-after-add.json" + fi + printf '%s' "${security_group_after_add_json}" \ + | jq -e --arg rule "${security_group_rule_id}" '.securityGroup.rules | any(.id == $rule and .portRangeMin == 443 and .remoteCidr == "0.0.0.0/0")' >/dev/null + + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg sg "${security_group_id}" --arg rule "${security_group_rule_id}" '{orgId:$org, projectId:$project, securityGroupId:$sg, ruleId:$rule}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/RemoveRule >/dev/null + + local security_group_after_remove_json + security_group_after_remove_json="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${PRISMNET_PROTO_DIR}" -proto "${PRISMNET_PROTO}" \ + -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" --arg id "${security_group_id}" '{orgId:$org, projectId:$project, id:$id}')" \ + 127.0.0.1:15081 prismnet.SecurityGroupService/GetSecurityGroup)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${security_group_after_remove_json}" >"${provider_network_proof_dir}/prismnet-security-group-after-remove.json" + fi + printf '%s' "${security_group_after_remove_json}" \ + | jq -e --arg rule "${security_group_rule_id}" '.securityGroup.rules | map(.id) | index($rule) == null' >/dev/null + security_group_rule_id="" + log "Matrix case: PrismNet + FlashDNS" zone_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FLASHDNS_PROTO_DIR}" -proto "${FLASHDNS_PROTO}" \ @@ -6986,33 +7721,125 @@ validate_network_provider_matrix() { fi sleep 2 done + if [[ -n "${provider_network_proof_dir}" ]]; then + ssh_node node01 "dig @10.100.0.11 -p 5353 +short ${workload_fqdn} A" \ + >"${provider_network_proof_dir}/flashdns-workload-authoritative-answer.txt" + fi log "Matrix case: PrismNet + FiberLB" listener_port="$(allocate_free_listener_port node01 18180 18999)" || die "failed to allocate a free FiberLB listener port for component matrix" - lb_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + local lb_create_response + lb_create_response="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg org "${org_id}" --arg project "${project_id}" '{name:"matrix-lb", orgId:$org, projectId:$project, description:"component matrix"}')" \ - 127.0.0.1:15085 fiberlb.v1.LoadBalancerService/CreateLoadBalancer | jq -r '.loadbalancer.id')" + 127.0.0.1:15085 fiberlb.v1.LoadBalancerService/CreateLoadBalancer)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${lb_create_response}" >"${provider_network_proof_dir}/fiberlb-loadbalancer-create.json" + fi + lb_id="$(printf '%s' "${lb_create_response}" | jq -r '.loadbalancer.id')" [[ -n "${lb_id}" && "${lb_id}" != "null" ]] || die "component matrix FiberLB creation failed" - pool_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + local pool_create_response + pool_create_response="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg lb "${lb_id}" '{name:"matrix-pool", loadbalancerId:$lb, algorithm:"POOL_ALGORITHM_ROUND_ROBIN", protocol:"POOL_PROTOCOL_TCP"}')" \ - 127.0.0.1:15085 fiberlb.v1.PoolService/CreatePool | jq -r '.pool.id')" + 127.0.0.1:15085 fiberlb.v1.PoolService/CreatePool)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${pool_create_response}" >"${provider_network_proof_dir}/fiberlb-pool-create.json" + fi + pool_id="$(printf '%s' "${pool_create_response}" | jq -r '.pool.id')" [[ -n "${pool_id}" && "${pool_id}" != "null" ]] || die "component matrix FiberLB pool creation failed" - backend_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + local backend_create_response + backend_create_response="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg pool "${pool_id}" '{name:"matrix-backend", poolId:$pool, address:"10.100.0.11", port:8081, weight:1}')" \ - 127.0.0.1:15085 fiberlb.v1.BackendService/CreateBackend | jq -r '.backend.id')" + 127.0.0.1:15085 fiberlb.v1.BackendService/CreateBackend)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${backend_create_response}" >"${provider_network_proof_dir}/fiberlb-backend-create.json" + fi + backend_id="$(printf '%s' "${backend_create_response}" | jq -r '.backend.id')" [[ -n "${backend_id}" && "${backend_id}" != "null" ]] || die "component matrix FiberLB backend creation failed" - listener_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + local listener_create_response + listener_create_response="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ -d "$(jq -cn --arg lb "${lb_id}" --arg pool "${pool_id}" --argjson port "${listener_port}" '{name:"matrix-listener", loadbalancerId:$lb, protocol:"LISTENER_PROTOCOL_TCP", port:$port, defaultPoolId:$pool, connectionLimit:0}')" \ - 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener | jq -r '.listener.id')" + 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener)" + if [[ -n "${provider_network_proof_dir}" ]]; then + printf '%s\n' "${listener_create_response}" >"${provider_network_proof_dir}/fiberlb-listener-create.json" + fi + listener_id="$(printf '%s' "${listener_create_response}" | jq -r '.listener.id')" [[ -n "${listener_id}" && "${listener_id}" != "null" ]] || die "component matrix FiberLB listener creation failed" wait_for_http node01 "http://127.0.0.1:${listener_port}/health" + if [[ -n "${provider_network_proof_dir}" ]]; then + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn '{pageSize:100, pageToken:""}')" \ + 127.0.0.1:15085 fiberlb.v1.LoadBalancerService/ListLoadBalancers \ + >"${provider_network_proof_dir}/fiberlb-loadbalancers.json" + ssh_node node01 "curl -fsS --max-time 5 http://127.0.0.1:${listener_port}/health" \ + >"${provider_network_proof_dir}/fiberlb-tcp-health-before-drain.txt" + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${backend_id}" '{id:$id, adminState:"BACKEND_ADMIN_STATE_DISABLED"}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/UpdateBackend \ + >"${provider_network_proof_dir}/fiberlb-backend-disabled.json" + deadline=$((SECONDS + HTTP_WAIT_TIMEOUT)) + while true; do + if ! ssh_node node01 "curl -fsS --max-time 2 http://127.0.0.1:${listener_port}/health >/dev/null" >/dev/null 2>&1; then + printf 'listener drained after backend disable\n' >"${provider_network_proof_dir}/fiberlb-drain-summary.txt" + break + fi + if (( SECONDS >= deadline )); then + die "timed out waiting for disabled FiberLB backend to stop serving traffic in provider/vm proof" + fi + sleep 2 + done + grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg id "${backend_id}" '{id:$id, adminState:"BACKEND_ADMIN_STATE_ENABLED"}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/UpdateBackend \ + >"${provider_network_proof_dir}/fiberlb-backend-restored.json" + wait_for_http node01 "http://127.0.0.1:${listener_port}/health" + ssh_node node01 "curl -fsS --max-time 5 http://127.0.0.1:${listener_port}/health" \ + >"${provider_network_proof_dir}/fiberlb-tcp-health-after-restore.txt" + fi + + log "Matrix case: FiberLB HTTPS termination" + certificate_id="$(create_fiberlb_test_certificate "${token}" "${lb_id}" "matrix-fiberlb-cert")" + [[ -n "${certificate_id}" && "${certificate_id}" != "null" ]] || die "component matrix FiberLB certificate creation failed" + + http_pool_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg lb "${lb_id}" '{name:"matrix-http-pool", loadbalancerId:$lb, algorithm:"POOL_ALGORITHM_ROUND_ROBIN", protocol:"POOL_PROTOCOL_HTTP"}')" \ + 127.0.0.1:15085 fiberlb.v1.PoolService/CreatePool | jq -r '.pool.id')" + [[ -n "${http_pool_id}" && "${http_pool_id}" != "null" ]] || die "component matrix FiberLB HTTP pool creation failed" + + http_backend_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg pool "${http_pool_id}" '{name:"matrix-http-backend", poolId:$pool, address:"10.100.0.11", port:8081, weight:1}')" \ + 127.0.0.1:15085 fiberlb.v1.BackendService/CreateBackend | jq -r '.backend.id')" + [[ -n "${http_backend_id}" && "${http_backend_id}" != "null" ]] || die "component matrix FiberLB HTTP backend creation failed" + + https_listener_port="$(allocate_free_listener_port node01 19000 19149)" || die "failed to allocate a free FiberLB HTTPS listener port for component matrix" + https_listener_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg lb "${lb_id}" --arg pool "${http_pool_id}" --arg cert "${certificate_id}" --argjson port "${https_listener_port}" '{name:"matrix-https-listener", loadbalancerId:$lb, protocol:"LISTENER_PROTOCOL_HTTPS", port:$port, defaultPoolId:$pool, tlsConfig:{certificateId:$cert, minVersion:"TLS_VERSION_TLS_1_2"}, connectionLimit:0}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener | jq -r '.listener.id')" + [[ -n "${https_listener_id}" && "${https_listener_id}" != "null" ]] || die "component matrix FiberLB HTTPS listener creation failed" + + terminated_listener_port="$(allocate_free_listener_port node01 19150 19299)" || die "failed to allocate a free FiberLB TerminatedHttps listener port for component matrix" + terminated_listener_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ + -import-path "${FIBERLB_PROTO_DIR}" -proto "${FIBERLB_PROTO}" \ + -d "$(jq -cn --arg lb "${lb_id}" --arg pool "${http_pool_id}" --arg cert "${certificate_id}" --argjson port "${terminated_listener_port}" '{name:"matrix-terminated-https-listener", loadbalancerId:$lb, protocol:"LISTENER_PROTOCOL_TERMINATED_HTTPS", port:$port, defaultPoolId:$pool, tlsConfig:{certificateId:$cert, minVersion:"TLS_VERSION_TLS_1_2"}, connectionLimit:0}')" \ + 127.0.0.1:15085 fiberlb.v1.ListenerService/CreateListener | jq -r '.listener.id')" + [[ -n "${terminated_listener_id}" && "${terminated_listener_id}" != "null" ]] || die "component matrix FiberLB TerminatedHttps listener creation failed" + + wait_for_tcp_port node01 "${https_listener_port}" + wait_for_tcp_port node01 "${terminated_listener_port}" + ssh_node node01 "curl -kfsS --max-time 5 https://127.0.0.1:${https_listener_port}/health >/dev/null" + ssh_node node01 "curl -kfsS --max-time 5 https://127.0.0.1:${terminated_listener_port}/health >/dev/null" log "Matrix case: PrismNet + FlashDNS + FiberLB" service_record_id="$(grpcurl -plaintext -H "authorization: Bearer ${token}" \ @@ -7034,6 +7861,15 @@ validate_network_provider_matrix() { done ssh_node node01 "curl -fsS --max-time 5 --resolve ${service_fqdn}:${listener_port}:10.100.0.11 http://${service_fqdn}:${listener_port}/health >/dev/null" + if [[ -n "${provider_network_proof_dir}" ]]; then + ssh_node node01 "dig @10.100.0.11 -p 5353 +short ${service_fqdn} A" \ + >"${provider_network_proof_dir}/flashdns-service-authoritative-answer.txt" + ssh_node node01 "curl -fsS --max-time 5 --resolve ${service_fqdn}:${listener_port}:10.100.0.11 http://${service_fqdn}:${listener_port}/health" \ + >"${provider_network_proof_dir}/fiberlb-service-curl.txt" + ssh_node node01 "journalctl -u prismnet -b --no-pager" >"${provider_network_proof_dir}/prismnet-journal.log" + ssh_node node01 "journalctl -u flashdns -b --no-pager" >"${provider_network_proof_dir}/flashdns-journal.log" + ssh_node node01 "journalctl -u fiberlb -b --no-pager" >"${provider_network_proof_dir}/fiberlb-journal.log" + fi trap - RETURN cleanup_network_provider_matrix @@ -8271,7 +9107,7 @@ validate_worker_fault_injection() { key="survive-${RANDOM}.txt" ensure_lightningstor_bucket 15086 "${token}" "${bucket}" "${org_id}" "${project_id}" - tmpfile="$(mktemp)" + tmpfile="$(mktemp -p "${TMPDIR}" photon-worker-fault-XXXXXX)" trap 'rm -f "${tmpfile}"; start_vm node04 >/dev/null 2>&1 || true; wait_for_ssh node04 || true; stop_ssh_tunnel node01 "${vm_tunnel}"; stop_ssh_tunnel node01 "${ls_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"' RETURN printf 'worker-fault-check-%s\n' "${RANDOM}" >"${tmpfile}" @@ -8323,6 +9159,864 @@ validate_fault_injection() { validate_worker_fault_injection } +run_durability_proof() { + local proof_dir started_at finished_at + local chainfire_rest_tunnel="" flaredb_rest_tunnel="" + local deployer_tunnel="" iam_tunnel="" ls_tunnel="" coronafs_controller_tunnel="" coronafs_node04_tunnel="" + local bucket="" token="" coronafs_controller_volume="" coronafs_node04_volume="" + + proof_dir="$(prepare_durability_proof_dir)" + started_at="$(date -Iseconds)" + + cleanup_durability_proof() { + set +e + if [[ -n "${bucket:-}" && -n "${token:-}" && -n "${ls_tunnel:-}" ]]; then + delete_lightningstor_bucket_recursive 15086 "${token}" "${bucket}" >/dev/null 2>&1 || true + fi + if [[ -n "${coronafs_node04_volume:-}" && -n "${coronafs_node04_tunnel:-}" ]]; then + coronafs_delete_volume 25088 "${coronafs_node04_volume}" >/dev/null 2>&1 || true + fi + if [[ -n "${coronafs_controller_volume:-}" && -n "${coronafs_controller_tunnel:-}" ]]; then + coronafs_delete_volume 15088 "${coronafs_controller_volume}" >/dev/null 2>&1 || true + fi + ssh_node node05 "systemctl start lightningstor.service >/dev/null 2>&1 || true" >/dev/null 2>&1 || true + ssh_node node01 "systemctl start coronafs.service >/dev/null 2>&1 || true" >/dev/null 2>&1 || true + if [[ -n "${coronafs_node04_tunnel:-}" ]]; then + stop_ssh_tunnel node04 "${coronafs_node04_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${coronafs_controller_tunnel:-}" ]]; then + stop_ssh_tunnel node01 "${coronafs_controller_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${ls_tunnel:-}" ]]; then + stop_ssh_tunnel node01 "${ls_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${iam_tunnel:-}" ]]; then + stop_ssh_tunnel node01 "${iam_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${deployer_tunnel:-}" ]]; then + stop_ssh_tunnel node06 "${deployer_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${flaredb_rest_tunnel:-}" ]]; then + stop_ssh_tunnel node01 "${flaredb_rest_tunnel}" >/dev/null 2>&1 || true + fi + if [[ -n "${chainfire_rest_tunnel:-}" ]]; then + stop_ssh_tunnel node01 "${chainfire_rest_tunnel}" >/dev/null 2>&1 || true + fi + } + + trap cleanup_durability_proof RETURN + + jq -n \ + --arg command "nix run ./nix/test-cluster#cluster -- durability-proof" \ + --arg proof_dir "${proof_dir}" \ + --arg started_at "${started_at}" \ + --arg ultracloud_work_root "${ULTRACLOUD_WORK_ROOT}" \ + --arg photon_cluster_work_root "${WORK_ROOT}" \ + --arg build_profile "${BUILD_PROFILE}" \ + '{command:$command, proof_dir:$proof_dir, started_at:$started_at, ultracloud_work_root:$ultracloud_work_root, photon_cluster_work_root:$photon_cluster_work_root, build_profile:$build_profile}' \ + >"${proof_dir}/meta.json" + + log "Running durability proof; artifacts will be written to ${proof_dir}" + + validate_control_plane + validate_workers + wait_for_http node06 "http://127.0.0.1:8088/health" + + chainfire_rest_tunnel="$(start_ssh_tunnel node01 18081 8081)" + flaredb_rest_tunnel="$(start_ssh_tunnel node01 18082 8082)" + deployer_tunnel="$(start_ssh_tunnel node06 13012 8088)" + iam_tunnel="$(start_ssh_tunnel node01 15080 50080)" + ls_tunnel="$(start_ssh_tunnel node01 15086 50086)" + coronafs_controller_tunnel="$(start_ssh_tunnel node01 15088 "${CORONAFS_API_PORT}")" + coronafs_node04_tunnel="$(start_ssh_tunnel node04 25088 "${CORONAFS_API_PORT}")" + + jq -n \ + --arg chainfire_rest "http://127.0.0.1:18081" \ + --arg flaredb_rest "http://127.0.0.1:18082" \ + --arg deployer_http "http://127.0.0.1:13012" \ + --arg iam_grpc "127.0.0.1:15080" \ + --arg lightningstor_grpc "127.0.0.1:15086" \ + --arg coronafs_controller "http://127.0.0.1:15088" \ + --arg coronafs_node04 "http://127.0.0.1:25088" \ + '{chainfire_rest:$chainfire_rest, flaredb_rest:$flaredb_rest, deployer_http:$deployer_http, iam_grpc:$iam_grpc, lightningstor_grpc:$lightningstor_grpc, coronafs_controller:$coronafs_controller, coronafs_node04:$coronafs_node04}' \ + >"${proof_dir}/environment.json" + + log "Durability proof: ChainFire logical backup/restore" + local chainfire_key chainfire_value_json chainfire_backup_value chainfire_missing_status + chainfire_key="durability-chainfire-$(date +%s)-$RANDOM" + chainfire_value_json="$(jq -cn --arg proof "chainfire-backup-restore" --arg key "${chainfire_key}" --arg ts "${started_at}" '{proof:$proof, key:$key, timestamp:$ts}')" + curl -fsS \ + -X PUT \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg value "${chainfire_value_json}" '{value:$value}')" \ + "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" \ + >"${proof_dir}/chainfire-put.json" + curl -fsS "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" >"${proof_dir}/chainfire-backup-response.json" + jq -e --argjson expected "${chainfire_value_json}" '.data.value | fromjson == $expected' \ + "${proof_dir}/chainfire-backup-response.json" >/dev/null \ + || die "ChainFire backup response did not match the durability proof payload" + chainfire_backup_value="$(jq -r '.data.value' "${proof_dir}/chainfire-backup-response.json")" + printf '%s\n' "${chainfire_backup_value}" >"${proof_dir}/chainfire-backup-value.json" + curl -fsS -X DELETE "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" >"${proof_dir}/chainfire-delete.json" + chainfire_missing_status="$(curl -sS -o "${proof_dir}/chainfire-after-delete.out" -w '%{http_code}' "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" || true)" + [[ "${chainfire_missing_status}" == "404" ]] || die "expected ChainFire durability key deletion to return 404, got ${chainfire_missing_status}" + curl -fsS \ + -X PUT \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg value "${chainfire_backup_value}" '{value:$value}')" \ + "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" \ + >"${proof_dir}/chainfire-restore-put.json" + curl -fsS "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" >"${proof_dir}/chainfire-restored-response.json" + jq -e --argjson expected "${chainfire_value_json}" '.data.value | fromjson == $expected' \ + "${proof_dir}/chainfire-restored-response.json" >/dev/null \ + || die "ChainFire restore did not reproduce the backed-up payload" + curl -fsS -X DELETE "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" >"${proof_dir}/chainfire-cleanup.json" + + log "Durability proof: FlareDB logical SQL backup/restore" + local flaredb_namespace flaredb_table flaredb_payload flaredb_backup_id flaredb_backup_payload + flaredb_namespace="durability_sql_$(date +%s)_$RANDOM" + flaredb_table="restore_rows" + flaredb_payload="payload-${RANDOM}" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "CREATE TABLE ${flaredb_table} (id INTEGER PRIMARY KEY, payload TEXT NOT NULL)" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-create-table.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "INSERT INTO ${flaredb_table} (id, payload) VALUES (1, '${flaredb_payload}')" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-insert.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "SELECT id, payload FROM ${flaredb_table} ORDER BY id" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-backup.json" + jq -e --arg payload "${flaredb_payload}" ' + (.data.rows | length) == 1 and + .data.rows[0].id == 1 and + .data.rows[0].payload == $payload + ' "${proof_dir}/flaredb-backup.json" >/dev/null \ + || die "FlareDB backup query did not return the expected row" + flaredb_backup_id="$(jq -r '.data.rows[0].id' "${proof_dir}/flaredb-backup.json")" + flaredb_backup_payload="$(jq -r '.data.rows[0].payload' "${proof_dir}/flaredb-backup.json")" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "DELETE FROM ${flaredb_table} WHERE id = ${flaredb_backup_id}" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-delete.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "SELECT id, payload FROM ${flaredb_table} ORDER BY id" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-after-delete.json" + jq -e '(.data.rows | length) == 0' "${proof_dir}/flaredb-after-delete.json" >/dev/null \ + || die "FlareDB delete step did not remove the backed-up row" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "INSERT INTO ${flaredb_table} (id, payload) VALUES (${flaredb_backup_id}, '${flaredb_backup_payload}')" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-restore-insert.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "SELECT id, payload FROM ${flaredb_table} ORDER BY id" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-restored.json" + jq -e --arg payload "${flaredb_backup_payload}" ' + (.data.rows | length) == 1 and + .data.rows[0].id == 1 and + .data.rows[0].payload == $payload + ' "${proof_dir}/flaredb-restored.json" >/dev/null \ + || die "FlareDB restore did not replay the backed-up row" + log "Durability proof: Deployer bootstrap-state backup plus idempotent restore replay" + local deployer_machine_id deployer_node_id deployer_request_json + deployer_machine_id="durability-machine-$(date +%s)-$RANDOM" + deployer_node_id="durability-node-$(date +%s)-$RANDOM" + deployer_request_json="$( + jq -cn \ + --arg machine "${deployer_machine_id}" \ + --arg node "${deployer_node_id}" \ + '{ + machine_id:$machine, + node_config:{ + assignment:{ + node_id:$node, + hostname:$node, + role:"worker", + ip:"10.100.0.251", + labels:{purpose:"durability-proof"}, + pool:"durability", + node_class:"durability" + }, + bootstrap_plan:{ + services:["nix-agent"], + nix_profile:"durability-proof", + install_plan:{ + nixos_configuration:"single-node-quickstart", + disko_config_path:"nix/iso/disko-single-disk.nix", + target_disk_by_id:"/dev/disk/by-id/virtio-durability-proof" + } + }, + bootstrap_secrets:{ + ssh_authorized_keys:["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMockDurabilityProofKey ultracloud-durability-proof"] + } + } + }' + )" + printf '%s\n' "${deployer_request_json}" >"${proof_dir}/deployer-pre-register-request.json" + curl -fsS \ + -H 'content-type: application/json' \ + -H 'x-deployer-token: test-admin-token' \ + -d "${deployer_request_json}" \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-pre-register-response.json" + jq -e --arg machine "${deployer_machine_id}" --arg node "${deployer_node_id}" '.success == true and .machine_id == $machine and .node_id == $node' \ + "${proof_dir}/deployer-pre-register-response.json" >/dev/null \ + || die "Deployer pre-registration did not accept the durability proof node config" + curl -fsS \ + -H 'x-deployer-token: test-admin-token' \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-backup-list.json" + jq -e --arg node "${deployer_node_id}" ' + any(.nodes[]?; .node_id == $node and .state == "pre-registered") + ' "${proof_dir}/deployer-backup-list.json" >/dev/null \ + || die "Deployer backup listing did not include the pre-registered durability proof node" + ssh_node node06 "systemctl restart deployer.service" + wait_for_unit node06 deployer + wait_for_http node06 "http://127.0.0.1:8088/health" + curl -fsS \ + -H 'x-deployer-token: test-admin-token' \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-post-restart-list.json" + jq -e --arg node "${deployer_node_id}" ' + any(.nodes[]?; .node_id == $node and .state == "pre-registered") + ' "${proof_dir}/deployer-post-restart-list.json" >/dev/null \ + || die "Deployer pre-registered node did not survive a deployer restart" + curl -fsS \ + -H 'content-type: application/json' \ + -H 'x-deployer-token: test-admin-token' \ + -d "${deployer_request_json}" \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-replay-response.json" + jq -e --arg machine "${deployer_machine_id}" --arg node "${deployer_node_id}" '.success == true and .machine_id == $machine and .node_id == $node' \ + "${proof_dir}/deployer-replay-response.json" >/dev/null \ + || die "Deployer restore replay did not preserve the registered node config" + curl -fsS \ + -H 'x-deployer-token: test-admin-token' \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-replayed-list.json" + jq -s -e --arg node "${deployer_node_id}" ' + [ .[] | .nodes[]? | select(.node_id == $node) ] as $entries + | ($entries | length) == 3 + and ($entries[0] == $entries[1]) + and ($entries[1] == $entries[2]) + and $entries[0].state == "pre-registered" + ' \ + "${proof_dir}/deployer-backup-list.json" \ + "${proof_dir}/deployer-post-restart-list.json" \ + "${proof_dir}/deployer-replayed-list.json" >/dev/null \ + || die "Deployer restore replay changed the backed-up registration summary unexpectedly" + + log "Durability proof: CoronaFS controller/node split failure injection" + local coronafs_controller_export_uri coronafs_node04_export_uri + coronafs_controller_volume="durability-coronafs-$(date +%s)-$RANDOM" + coronafs_node04_volume="${coronafs_controller_volume}-node04" + coronafs_create_volume 15088 "${coronafs_controller_volume}" $((64 * 1024 * 1024)) >"${proof_dir}/coronafs-create.json" + coronafs_export_volume_json 15088 "${coronafs_controller_volume}" >"${proof_dir}/coronafs-controller-export.json" + coronafs_controller_export_uri="$(jq -r '.export.uri' "${proof_dir}/coronafs-controller-export.json")" + [[ -n "${coronafs_controller_export_uri}" && "${coronafs_controller_export_uri}" != "null" ]] \ + || die "CoronaFS controller export did not return a usable URI" + run_remote_nbd_fio_json node04 "${coronafs_controller_export_uri}" write 1M 32 >"${proof_dir}/coronafs-controller-write.json" + coronafs_materialize_volume 25088 "${coronafs_node04_volume}" "${coronafs_controller_export_uri}" $((64 * 1024 * 1024)) >"${proof_dir}/coronafs-materialize-node04.json" + assert_coronafs_materialized_volume 25088 "${coronafs_node04_volume}" + coronafs_export_volume_json 25088 "${coronafs_node04_volume}" >"${proof_dir}/coronafs-node04-export.json" + coronafs_node04_export_uri="$(jq -r '.export.uri' "${proof_dir}/coronafs-node04-export.json")" + [[ -n "${coronafs_node04_export_uri}" && "${coronafs_node04_export_uri}" != "null" ]] \ + || die "CoronaFS node-local export did not return a usable URI" + run_remote_nbd_dd_read_json node05 "${coronafs_node04_export_uri}" 32 >"${proof_dir}/coronafs-node04-prefault-read.json" + ssh_node node01 "systemctl stop coronafs.service" + ssh_node node01 "! systemctl is-active --quiet coronafs.service" + run_remote_nbd_dd_read_json node05 "${coronafs_node04_export_uri}" 32 >"${proof_dir}/coronafs-node04-during-controller-outage.json" + coronafs_get_volume_json 25088 "${coronafs_node04_volume}" >"${proof_dir}/coronafs-node04-local-state.json" + jq -e '.node_local == true and .path != null' "${proof_dir}/coronafs-node04-local-state.json" >/dev/null \ + || die "CoronaFS node-local state was not preserved while the controller was stopped" + ssh_node_script node04 <<'EOS' >"${proof_dir}/coronafs-node04-capabilities.json" +set -euo pipefail +curl -fsS http://127.0.0.1:50088/v1/capabilities +EOS + jq -e '.mode == "node" and .supports_node_api == true and .supports_controller_api == false' \ + "${proof_dir}/coronafs-node04-capabilities.json" >/dev/null \ + || die "CoronaFS node04 capability contract drifted during controller failure injection" + ssh_node node01 "systemctl start coronafs.service" + wait_for_http node01 "http://127.0.0.1:${CORONAFS_API_PORT}/healthz" + coronafs_get_volume_json 15088 "${coronafs_controller_volume}" >"${proof_dir}/coronafs-controller-restored.json" + coronafs_export_volume_json 15088 "${coronafs_controller_volume}" >"${proof_dir}/coronafs-controller-reexport.json" + + log "Durability proof: LightningStor distributed-backend failure injection" + local before_node01 before_node04 before_node05 lightningstor_key lightningstor_body lightningstor_body_b64 + token="$(issue_project_admin_token 15080 "durability-org" "durability-project" "durability-$(date +%s)")" + bucket="durability-lightningstor-$(date +%s)-$RANDOM" + ensure_lightningstor_bucket 15086 "${token}" "${bucket}" "durability-org" "durability-project" + wait_for_lightningstor_write_quorum 15086 "${token}" "${bucket}" "durability proof" + read -r before_node01 before_node04 before_node05 < <(lightningstor_count_triplet) + ssh_node node05 "systemctl stop lightningstor.service" + ssh_node node05 "! systemctl is-active --quiet lightningstor.service" + lightningstor_key="repair-probe-$(date +%s)-$RANDOM" + lightningstor_body="durability-object-${lightningstor_key}" + lightningstor_body_b64="$(printf '%s' "${lightningstor_body}" | base64 -w0)" + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${lightningstor_key}" --arg body "${lightningstor_body_b64}" '{bucket:$bucket, key:$key, body:$body, contentMd5:"", ifNoneMatch:""}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/PutObject \ + >"${proof_dir}/lightningstor-put-during-node05-outage.json" \ + || die "LightningStor write failed during node05 outage" + lightningstor_head_object_json 15086 "${token}" "${bucket}" "${lightningstor_key}" >"${proof_dir}/lightningstor-head-during-node05-outage.json" + download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${lightningstor_key}" "${proof_dir}/lightningstor-object-during-node05-outage.txt" + grep -Fx "${lightningstor_body}" "${proof_dir}/lightningstor-object-during-node05-outage.txt" >/dev/null \ + || die "LightningStor returned an unexpected payload during node05 outage" + ssh_node node05 "systemctl start lightningstor.service" + wait_for_unit node05 lightningstor + wait_for_tcp_port node05 50086 + wait_for_lightningstor_counts_greater_than "${before_node01}" "${before_node04}" "${before_node05}" "LightningStor durability repair" + download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${lightningstor_key}" "${proof_dir}/lightningstor-object-after-repair.txt" + grep -Fx "${lightningstor_body}" "${proof_dir}/lightningstor-object-after-repair.txt" >/dev/null \ + || die "LightningStor returned an unexpected payload after node05 repair" + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "$(jq -cn --arg bucket "${bucket}" --arg key "${lightningstor_key}" '{bucket:$bucket, key:$key}')" \ + 127.0.0.1:15086 lightningstor.v1.ObjectService/DeleteObject \ + >"${proof_dir}/lightningstor-delete.json" \ + || die "LightningStor delete failed during durability cleanup" + delete_lightningstor_bucket_recursive 15086 "${token}" "${bucket}" + wait_for_lightningstor_counts_equal "${before_node01}" "${before_node04}" "${before_node05}" "LightningStor durability cleanup" + bucket="" + + finished_at="$(date -Iseconds)" + jq -n \ + --arg started_at "${started_at}" \ + --arg finished_at "${finished_at}" \ + --arg artifact_root "${proof_dir}" \ + --arg deployer_restore_mode "admin pre-register request replay with pre/post-restart list verification" \ + '{started_at:$started_at, finished_at:$finished_at, artifact_root:$artifact_root, deployer_restore_mode:$deployer_restore_mode, success:true}' \ + >"${proof_dir}/result.json" + + log "Durability proof succeeded; artifacts are in ${proof_dir}" +} + +run_rollout_soak() { + local proof_dir started_at finished_at soak_hold_secs + local deployer_supported_writer_mode fleet_supported_native_runtime_nodes + local validated_maintenance_cycles validated_power_loss_cycles + local chainfire_tunnel_node01="" chainfire_tunnel_node02="" chainfire_tunnel_node03="" + local chainfire_rest_tunnel="" flaredb_rest_tunnel="" deployer_tunnel="" + local chainfire_endpoint="http://127.0.0.1:12379,http://127.0.0.1:12380,http://127.0.0.1:12381" + local native_fresh_healthy_map_expr native_fresh_healthy_count_expr + + proof_dir="$(prepare_rollout_soak_dir)" + started_at="$(date -Iseconds)" + soak_hold_secs="${ULTRACLOUD_ROLLOUT_SOAK_HOLD_SECS:-30}" + deployer_supported_writer_mode="single-writer restart-in-place or cold-standby restore" + fleet_supported_native_runtime_nodes="2" + validated_maintenance_cycles="1" + validated_power_loss_cycles="1" + + cleanup_rollout_soak() { + set +e + set +u + run_deployer_ctl \ + --chainfire-endpoint "${chainfire_endpoint}" \ + --cluster-id "test-cluster" \ + --cluster-namespace "ultracloud" \ + --deployer-namespace "deployer" \ + node set-state --node-id "node04" --state "active" >/dev/null 2>&1 || true + start_vm node05 >/dev/null 2>&1 || true + wait_for_ssh node05 >/dev/null 2>&1 || true + stop_ssh_tunnel node06 "${deployer_tunnel}" >/dev/null 2>&1 || true + stop_ssh_tunnel node01 "${flaredb_rest_tunnel}" >/dev/null 2>&1 || true + stop_ssh_tunnel node01 "${chainfire_rest_tunnel}" >/dev/null 2>&1 || true + stop_ssh_tunnel node03 "${chainfire_tunnel_node03}" >/dev/null 2>&1 || true + stop_ssh_tunnel node02 "${chainfire_tunnel_node02}" >/dev/null 2>&1 || true + stop_ssh_tunnel node01 "${chainfire_tunnel_node01}" >/dev/null 2>&1 || true + } + + trap cleanup_rollout_soak RETURN + + jq -n \ + --arg command "nix run ./nix/test-cluster#cluster -- rollout-soak" \ + --arg proof_dir "${proof_dir}" \ + --arg started_at "${started_at}" \ + --arg ultracloud_work_root "${ULTRACLOUD_WORK_ROOT}" \ + --arg photon_cluster_work_root "${WORK_ROOT}" \ + --arg build_profile "${BUILD_PROFILE}" \ + --arg deployer_supported_writer_mode "${deployer_supported_writer_mode}" \ + --argjson fleet_supported_native_runtime_nodes "${fleet_supported_native_runtime_nodes}" \ + --argjson validated_maintenance_cycles "${validated_maintenance_cycles}" \ + --argjson validated_power_loss_cycles "${validated_power_loss_cycles}" \ + --argjson soak_hold_secs "${soak_hold_secs}" \ + '{command:$command, proof_dir:$proof_dir, started_at:$started_at, ultracloud_work_root:$ultracloud_work_root, photon_cluster_work_root:$photon_cluster_work_root, build_profile:$build_profile, deployer_supported_writer_mode:$deployer_supported_writer_mode, fleet_supported_native_runtime_nodes:$fleet_supported_native_runtime_nodes, validated_maintenance_cycles:$validated_maintenance_cycles, validated_power_loss_cycles:$validated_power_loss_cycles, soak_hold_secs:$soak_hold_secs}' \ + >"${proof_dir}/meta.json" + + log "Running long-run rollout soak; artifacts will be written to ${proof_dir}" + + validate_control_plane + validate_workers + validate_native_runtime_flow + + chainfire_tunnel_node01="$(start_ssh_tunnel node01 12379 2379 "${NODE_IPS[node01]}")" + chainfire_tunnel_node02="$(start_ssh_tunnel node02 12380 2379 "${NODE_IPS[node02]}")" + chainfire_tunnel_node03="$(start_ssh_tunnel node03 12381 2379 "${NODE_IPS[node03]}")" + chainfire_rest_tunnel="$(start_ssh_tunnel node01 18081 8081)" + flaredb_rest_tunnel="$(start_ssh_tunnel node01 18082 8082)" + deployer_tunnel="$(start_ssh_tunnel node06 13012 8088)" + + native_fresh_healthy_map_expr='map(select(.state == "healthy" and (((((.last_heartbeat // .observed_at) // "") | sub("\\.[0-9]+"; "") | sub("\\+00:00$"; "Z") | fromdateiso8601?) // 0) >= (now - 300))))' + native_fresh_healthy_count_expr="${native_fresh_healthy_map_expr} | length" + + jq -n \ + --arg artifact_root "${proof_dir}" \ + --arg started_at "${started_at}" \ + --arg deployer_supported_writer_mode "${deployer_supported_writer_mode}" \ + --argjson fleet_supported_native_runtime_nodes "${fleet_supported_native_runtime_nodes}" \ + --argjson validated_maintenance_cycles "${validated_maintenance_cycles}" \ + --argjson validated_power_loss_cycles "${validated_power_loss_cycles}" \ + --argjson soak_hold_secs "${soak_hold_secs}" \ + '{ + artifact_root:$artifact_root, + started_at:$started_at, + deployer:{ + supported_writer_mode:$deployer_supported_writer_mode, + automatic_chainfire_backed_multi_instance_failover_supported:false, + supported_recovery_path:"restart-in-place or cold-standby restore with ultracloud.cluster state re-apply and preserved admin request replay" + }, + fleet_scheduler:{ + supported_lab_shape:{ + native_runtime_nodes:$fleet_supported_native_runtime_nodes, + validated_maintenance_cycles:$validated_maintenance_cycles, + validated_fail_stop_cycles:$validated_power_loss_cycles, + held_degraded_state_secs:$soak_hold_secs + }, + multi_hour_maintenance_supported:false, + larger_cluster_soak_supported:false + } + }' \ + >"${proof_dir}/scope-fixed-contract.json" + + printf '%s\n' \ + "Deployer release boundary: ${deployer_supported_writer_mode}; automatic ChainFire-backed multi-instance failover is outside the supported product contract for this release." \ + >"${proof_dir}/deployer-scope-fixed.txt" + printf '%s\n' \ + "Fleet-scheduler release boundary: the current proof covers ${fleet_supported_native_runtime_nodes} native-runtime workers, ${validated_maintenance_cycles} planned drain cycle, ${validated_power_loss_cycles} fail-stop worker-loss cycle, and ${soak_hold_secs}-second held degraded states; multi-hour maintenance and larger-cluster drain choreography are outside the supported product contract for this release." \ + >"${proof_dir}/fleet-scheduler-scope-fixed.txt" + + soak_native_dump_values() { + local prefix="$1" + run_deployer_ctl \ + --chainfire-endpoint "${chainfire_endpoint}" \ + --cluster-id "test-cluster" \ + --cluster-namespace "ultracloud" \ + --deployer-namespace "deployer" \ + dump --prefix "${prefix}" --format json \ + | jq -rc '.value' + } + + soak_native_dump_array() { + local prefix="$1" + soak_native_dump_values "${prefix}" | sed '/^$/d' | jq -s '.' + } + + soak_wait_for_native_dump_count() { + local prefix="$1" + local jq_expr="$2" + local expected="$3" + local timeout="${4:-${HTTP_WAIT_TIMEOUT}}" + local deadline=$((SECONDS + timeout)) + + while true; do + local count + count="$( + soak_native_dump_values "${prefix}" \ + | sed '/^$/d' \ + | jq -sr "${jq_expr}" 2>/dev/null \ + || printf '0' + )" + if [[ "${count}" == "${expected}" ]]; then + return 0 + fi + if (( SECONDS >= deadline )); then + die "timed out waiting for prefix ${prefix} to satisfy ${jq_expr} == ${expected} (got ${count})" + fi + sleep 2 + done + } + + soak_wait_for_native_instance_node() { + local service="$1" + local expected_node="$2" + local timeout="${3:-${HTTP_WAIT_TIMEOUT}}" + local deadline=$((SECONDS + timeout)) + local instance_value="" node_id="" + + while true; do + instance_value="$( + soak_native_dump_values "ultracloud/clusters/test-cluster/instances/${service}/" \ + | sed '/^$/d' \ + | jq -sr \ + --arg node "${expected_node}" \ + "${native_fresh_healthy_map_expr} | map(select(.node_id == \$node)) | sort_by(.instance_id) | first" + )" + node_id="$(printf '%s' "${instance_value}" | jq -r '.node_id // empty')" + if [[ "${node_id}" == "${expected_node}" ]]; then + printf '%s' "${instance_value}" + return 0 + fi + if (( SECONDS >= deadline )); then + die "timed out waiting for ${service} to run on ${expected_node}" + fi + sleep 2 + done + } + + soak_wait_for_native_instance_node_any_state() { + local service="$1" + local expected_node="$2" + local timeout="${3:-${HTTP_WAIT_TIMEOUT}}" + local deadline=$((SECONDS + timeout)) + local instance_value="" node_id="" + + while true; do + instance_value="$( + soak_native_dump_values "ultracloud/clusters/test-cluster/instances/${service}/" \ + | sed '/^$/d' \ + | jq -sr --arg node "${expected_node}" 'map(select(.node_id == $node)) | sort_by(.instance_id) | first' + )" + node_id="$(printf '%s' "${instance_value}" | jq -r '.node_id // empty')" + if [[ "${node_id}" == "${expected_node}" ]]; then + printf '%s' "${instance_value}" + return 0 + fi + if (( SECONDS >= deadline )); then + die "timed out waiting for ${service} to exist on ${expected_node}" + fi + sleep 2 + done + } + + soak_set_native_node_state() { + local node_id="$1" + local state="$2" + run_deployer_ctl \ + --chainfire-endpoint "${chainfire_endpoint}" \ + --cluster-id "test-cluster" \ + --cluster-namespace "ultracloud" \ + --deployer-namespace "deployer" \ + node set-state --node-id "${node_id}" --state "${state}" + } + + soak_restart_optional_nix_agent() { + local node="$1" + local journal_path="$2" + local scope_path="$3" + + if ssh_node "${node}" "systemctl list-unit-files 'nix-agent.service' --no-legend --no-pager 2>/dev/null | grep -q '^nix-agent\\.service'"; then + ssh_node "${node}" "systemctl restart nix-agent.service" + wait_for_unit "${node}" nix-agent + ssh_node "${node}" "journalctl -u nix-agent -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/${journal_path}" + return 0 + fi + + printf '%s\n' \ + "nix-agent.service is not installed on ${node} in the current KVM test-cluster profile; long-run nix-agent proof remains the deployer-vm-rollback and baremetal ISO lanes rather than this live rollout soak." \ + >"${proof_dir}/${scope_path}" + } + + soak_capture_native_snapshot() { + local relative_path="$1" + jq -n \ + --arg captured_at "$(date -Iseconds)" \ + --argjson nodes "$(soak_native_dump_array "ultracloud/clusters/test-cluster/nodes/")" \ + --argjson services "$(soak_native_dump_array "ultracloud/clusters/test-cluster/services/")" \ + --argjson native_web "$(soak_native_dump_array "ultracloud/clusters/test-cluster/instances/native-web/")" \ + --argjson native_container "$(soak_native_dump_array "ultracloud/clusters/test-cluster/instances/native-container/")" \ + --argjson native_daemon "$(soak_native_dump_array "ultracloud/clusters/test-cluster/instances/native-daemon/")" \ + --argjson publications "$(soak_native_dump_array "ultracloud/clusters/test-cluster/publications/")" \ + --argjson admin_nodes "$(curl -fsS -H 'x-deployer-token: test-admin-token' http://127.0.0.1:13012/api/v1/admin/nodes)" \ + '{captured_at:$captured_at, nodes:$nodes, services:$services, native_web:$native_web, native_container:$native_container, native_daemon:$native_daemon, publications:$publications, admin_nodes:$admin_nodes}' \ + >"${proof_dir}/${relative_path}" + } + + log "Rollout soak: capturing baseline runtime state" + soak_capture_native_snapshot "baseline.json" + + log "Rollout soak: repeated planned maintenance on node04" + soak_set_native_node_state "node04" "draining" + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/nodes/" \ + 'map(select(.node_id == "node04" and .state == "draining")) | length' \ + "1" \ + 120 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "1" \ + 240 + soak_wait_for_native_instance_node "native-web" "node05" 240 >/dev/null + soak_wait_for_native_instance_node_any_state "native-container" "node05" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node05" 240 >/dev/null + soak_capture_native_snapshot "maintenance-during.json" + sleep "${soak_hold_secs}" + soak_capture_native_snapshot "maintenance-held.json" + soak_set_native_node_state "node04" "active" + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/nodes/" \ + 'map(select(.node_id == "node04" and .state == "active")) | length' \ + "1" \ + 120 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_instance_node "native-web" "node04" 240 >/dev/null + soak_wait_for_native_instance_node "native-web" "node05" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node04" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node05" 240 >/dev/null + soak_wait_for_native_instance_node_any_state "native-container" "node05" 240 >/dev/null + soak_capture_native_snapshot "maintenance-restored.json" + + log "Rollout soak: simulating repeated worker power loss on node05" + stop_vm node05 + wait_for_ssh_down node05 120 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "1" \ + 240 + soak_wait_for_native_instance_node "native-web" "node04" 240 >/dev/null + soak_wait_for_native_instance_node_any_state "native-container" "node04" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node04" 240 >/dev/null + soak_capture_native_snapshot "power-loss-during.json" + sleep "${soak_hold_secs}" + soak_capture_native_snapshot "power-loss-held.json" + start_vm node05 + wait_for_ssh node05 + wait_for_unit node05 plasmavmc + wait_for_unit node05 lightningstor + wait_for_unit node05 node-agent + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/nodes/" \ + 'map(select(.labels.runtime == "native" and .state == "active")) | length' \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_instance_node "native-web" "node04" 240 >/dev/null + soak_wait_for_native_instance_node "native-web" "node05" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node04" 240 >/dev/null + soak_wait_for_native_instance_node "native-daemon" "node05" 240 >/dev/null + soak_wait_for_native_instance_node_any_state "native-container" "node04" 240 >/dev/null + soak_capture_native_snapshot "power-loss-restored.json" + + log "Rollout soak: restarting deployer, fleet-scheduler, nix-agent, and node-agent services" + ssh_node node06 "systemctl restart deployer.service" + wait_for_unit node06 deployer + wait_for_http node06 "http://127.0.0.1:8088/health" + curl -fsS -H 'x-deployer-token: test-admin-token' \ + http://127.0.0.1:13012/api/v1/admin/nodes \ + >"${proof_dir}/deployer-post-restart-nodes.json" + ssh_node node06 "journalctl -u deployer -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/deployer-journal.log" + + ssh_node node06 "systemctl restart fleet-scheduler.service" + wait_for_unit node06 fleet-scheduler + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_instance_node_any_state "native-container" "node04" 240 >/dev/null + soak_capture_native_snapshot "fleet-scheduler-post-restart.json" + ssh_node node06 "journalctl -u fleet-scheduler -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/fleet-scheduler-journal.log" + + soak_restart_optional_nix_agent node01 "node01-nix-agent-journal.log" "node01-nix-agent-scope.txt" + soak_restart_optional_nix_agent node04 "node04-nix-agent-journal.log" "node04-nix-agent-scope.txt" + + ssh_node node04 "systemctl restart node-agent.service" + wait_for_unit node04 node-agent + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_instance_node_any_state "native-container" "node04" 240 >/dev/null + soak_capture_native_snapshot "node04-node-agent-post-restart.json" + ssh_node node04 "journalctl -u node-agent -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/node04-node-agent-journal.log" + + ssh_node node05 "systemctl restart node-agent.service" + wait_for_unit node05 node-agent + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ + 'length' \ + "1" \ + 240 + soak_wait_for_native_dump_count \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ + "${native_fresh_healthy_count_expr}" \ + "2" \ + 240 + soak_wait_for_native_instance_node_any_state "native-container" "node04" 240 >/dev/null + soak_capture_native_snapshot "node05-node-agent-post-restart.json" + ssh_node node05 "journalctl -u node-agent -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/node05-node-agent-journal.log" + + log "Rollout soak: restarting fixed-membership ChainFire and FlareDB members" + ssh_node node02 "systemctl restart chainfire.service" + wait_for_unit node02 chainfire + wait_for_http node02 "http://127.0.0.1:8081/health" + local chainfire_key chainfire_value + chainfire_key="rollout-soak-chainfire-$(date +%s)-$RANDOM" + chainfire_value="rollout-soak-${RANDOM}" + curl -fsS \ + -X PUT \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg value "${chainfire_value}" '{value:$value}')" \ + "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" \ + >"${proof_dir}/chainfire-post-restart-put.json" + curl -fsS "http://127.0.0.1:18081/api/v1/kv/${chainfire_key}" \ + >"${proof_dir}/chainfire-post-restart.json" + jq -e --arg expected "${chainfire_value}" '.data.value == $expected' \ + "${proof_dir}/chainfire-post-restart.json" >/dev/null \ + || die "ChainFire fixed-membership restart proof did not reproduce the expected value" + ssh_node node02 "journalctl -u chainfire -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/chainfire-node02-journal.log" + + ssh_node node02 "systemctl restart flaredb.service" + wait_for_unit node02 flaredb + wait_for_http node02 "http://127.0.0.1:8082/health" + wait_for_flaredb_region node02 + wait_for_flaredb_route_metadata node01 + local flaredb_namespace flaredb_table flaredb_payload + flaredb_namespace="rollout_soak_sql_$(date +%s)_$RANDOM" + flaredb_table="restart_rows" + flaredb_payload="payload-${RANDOM}" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "CREATE TABLE ${flaredb_table} (id INTEGER PRIMARY KEY, payload TEXT NOT NULL)" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-post-restart-create.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "INSERT INTO ${flaredb_table} (id, payload) VALUES (1, '${flaredb_payload}')" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-post-restart-insert.json" + curl -fsS \ + -H 'content-type: application/json' \ + -d "$(jq -cn --arg namespace "${flaredb_namespace}" --arg query "SELECT id, payload FROM ${flaredb_table} ORDER BY id" '{namespace:$namespace, query:$query}')" \ + http://127.0.0.1:18082/api/v1/sql \ + >"${proof_dir}/flaredb-post-restart.json" + jq -e --arg payload "${flaredb_payload}" ' + (.data.rows | length) == 1 and + .data.rows[0].id == 1 and + .data.rows[0].payload == $payload + ' "${proof_dir}/flaredb-post-restart.json" >/dev/null \ + || die "FlareDB post-restart SQL proof did not return the expected row" + ssh_node node02 "journalctl -u flaredb -b --since '${started_at}' --no-pager" \ + >"${proof_dir}/flaredb-node02-journal.log" + + validate_control_plane + soak_capture_native_snapshot "post-control-plane-restarts.json" + + finished_at="$(date -Iseconds)" + jq -n \ + --arg started_at "${started_at}" \ + --arg finished_at "${finished_at}" \ + --arg artifact_root "${proof_dir}" \ + --arg deployer_supported_writer_mode "${deployer_supported_writer_mode}" \ + --argjson fleet_supported_native_runtime_nodes "${fleet_supported_native_runtime_nodes}" \ + --argjson validated_maintenance_cycles "${validated_maintenance_cycles}" \ + --argjson validated_power_loss_cycles "${validated_power_loss_cycles}" \ + --argjson soak_hold_secs "${soak_hold_secs}" \ + --arg summary "validated one planned drain cycle and one fail-stop worker-loss cycle on the two-node native-runtime lab, held each degraded state for the configured soak window, restarted deployer or scheduler or agent services, and revalidated fixed-membership control-plane restarts while keeping deployer HA scope-fixed to single-writer recovery" \ + '{started_at:$started_at, finished_at:$finished_at, artifact_root:$artifact_root, deployer_supported_writer_mode:$deployer_supported_writer_mode, fleet_supported_native_runtime_nodes:$fleet_supported_native_runtime_nodes, validated_maintenance_cycles:$validated_maintenance_cycles, validated_power_loss_cycles:$validated_power_loss_cycles, soak_hold_secs:$soak_hold_secs, summary:$summary, success:true}' \ + >"${proof_dir}/result.json" + + log "Long-run rollout soak succeeded; artifacts are in ${proof_dir}" +} + validate_cluster() { preflight wait_requested @@ -8369,6 +10063,10 @@ fresh_smoke_requested() { smoke_requested "$@" } +baremetal_iso_requested() { + bash "${REPO_ROOT}/nix/test-cluster/verify-baremetal-iso.sh" "$@" +} + storage_smoke_requested() { BUILD_PROFILE="storage" start_requested "${STORAGE_NODES[@]}" @@ -8411,6 +10109,59 @@ fresh_matrix_requested() { matrix_requested "$@" } +run_provider_vm_reality_proof() { + local proof_dir started_at finished_at + + proof_dir="$(prepare_provider_vm_reality_proof_dir)" + started_at="$(date -Iseconds)" + export ULTRACLOUD_PROVIDER_VM_PROOF_DIR="${proof_dir}" + trap 'unset ULTRACLOUD_PROVIDER_VM_PROOF_DIR' RETURN + + jq -n \ + --arg command "nix run ./nix/test-cluster#cluster -- provider-vm-reality-proof" \ + --arg proof_dir "${proof_dir}" \ + --arg started_at "${started_at}" \ + --arg ultracloud_work_root "${ULTRACLOUD_WORK_ROOT}" \ + --arg photon_cluster_work_root "${WORK_ROOT}" \ + '{command:$command, proof_dir:$proof_dir, started_at:$started_at, ultracloud_work_root:$ultracloud_work_root, photon_cluster_work_root:$photon_cluster_work_root}' \ + >"${proof_dir}/meta.json" + + log "Running provider and VM-hosting reality proof; artifacts will be written to ${proof_dir}" + + validate_control_plane + validate_workers + validate_network_provider_matrix + validate_vm_storage_flow + + finished_at="$(date -Iseconds)" + jq -n \ + --arg started_at "${started_at}" \ + --arg finished_at "${finished_at}" \ + --arg artifact_root "${proof_dir}" \ + --arg network_provider_artifacts "${proof_dir}/network-provider" \ + --arg vm_hosting_artifacts "${proof_dir}/vm-hosting" \ + '{started_at:$started_at, finished_at:$finished_at, artifact_root:$artifact_root, network_provider_artifacts:$network_provider_artifacts, vm_hosting_artifacts:$vm_hosting_artifacts, success:true}' \ + >"${proof_dir}/result.json" + + log "Provider and VM-hosting reality proof succeeded; artifacts are in ${proof_dir}" +} + +provider_vm_reality_proof_requested() { + start_requested "$@" + run_provider_vm_reality_proof +} + +rollout_soak_requested() { + clean_requested "$@" + start_requested "$@" + run_rollout_soak +} + +durability_proof_requested() { + start_requested "$@" + run_durability_proof +} + bench_storage_requested() { STORAGE_BENCHMARK_COMMAND="${STORAGE_BENCHMARK_COMMAND:-bench-storage}" LIGHTNINGSTOR_BENCH_CLIENT_NODE="node03" @@ -8686,6 +10437,7 @@ Commands: validate Run the cluster smoke validation smoke start + validate fresh-smoke clean local runtime state, rebuild on the host, start, and validate + baremetal-iso verify the canonical ISO bootstrap path from phone-home through desired-system convergence storage-smoke start the storage lab (node01-05) and validate CoronaFS/LightningStor/PlasmaVMC fresh-storage-smoke clean local runtime state, rebuild node01-05 on the host, start, and validate the storage lab demo-vm-webapp start the cluster and run the VM web app demo backed by FlareDB and LightningStor @@ -8694,6 +10446,9 @@ Commands: fresh-serve-vm-webapp clean local runtime state, rebuild on the host, start, run the VM web app demo, and leave it reachable on localhost matrix Start the cluster and validate composed service configurations against the current running VMs fresh-matrix clean local runtime state, rebuild on the host, start, and validate composed service configurations + provider-vm-reality-proof start the cluster if needed, then persist provider and VM-hosting interop artifacts under ./work/provider-vm-reality-proof + rollout-soak clean local runtime state, rebuild on the host, start, and persist a longer-run control-plane and rollout soak under ./work/rollout-soak + durability-proof start the cluster if needed, then persist durability and restore artifacts under ./work/durability-proof bench-storage start the cluster and benchmark CoronaFS plus LightningStor against the current running VMs fresh-bench-storage clean local runtime state, rebuild on the host, start, and benchmark CoronaFS plus LightningStor bench-coronafs start the storage lab and benchmark CoronaFS against the current running VMs @@ -8716,6 +10471,7 @@ Commands: Examples: $0 smoke $0 fresh-smoke + $0 baremetal-iso $0 storage-smoke $0 fresh-storage-smoke $0 demo-vm-webapp @@ -8724,6 +10480,9 @@ Examples: $0 fresh-serve-vm-webapp $0 matrix $0 fresh-matrix + $0 provider-vm-reality-proof + $0 rollout-soak + $0 durability-proof $0 bench-storage $0 fresh-bench-storage $0 bench-coronafs @@ -8756,6 +10515,7 @@ main() { validate) validate_cluster ;; smoke) smoke_requested "$@" ;; fresh-smoke) fresh_smoke_requested "$@" ;; + baremetal-iso) baremetal_iso_requested "$@" ;; storage-smoke) storage_smoke_requested ;; fresh-storage-smoke) fresh_storage_smoke_requested ;; demo-vm-webapp) demo_vm_webapp_requested "$@" ;; @@ -8764,6 +10524,9 @@ main() { fresh-serve-vm-webapp) fresh_serve_vm_webapp_requested "$@" ;; matrix) matrix_requested "$@" ;; fresh-matrix) fresh_matrix_requested "$@" ;; + provider-vm-reality-proof) provider_vm_reality_proof_requested "$@" ;; + rollout-soak) rollout_soak_requested "$@" ;; + durability-proof) durability_proof_requested "$@" ;; bench-storage) bench_storage_requested "$@" ;; fresh-bench-storage) fresh_bench_storage_requested "$@" ;; bench-coronafs) coronafs_bench_requested ;; diff --git a/nix/test-cluster/run-core-control-plane-ops-proof.sh b/nix/test-cluster/run-core-control-plane-ops-proof.sh new file mode 100755 index 0000000..b397a10 --- /dev/null +++ b/nix/test-cluster/run-core-control-plane-ops-proof.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" + +timestamp() { + date +%Y%m%dT%H%M%S%:z +} + +DEFAULT_LOG_ROOT="${WORK_ROOT}/core-control-plane-ops-proof/$(timestamp)" +LOG_ROOT="${1:-${DEFAULT_LOG_ROOT}}" + +mkdir -p "${LOG_ROOT}" + +log() { + printf '[core-control-plane-ops-proof] %s\n' "$*" +} + +run_case() { + local name="$1" + shift + local logfile="${LOG_ROOT}/${name}.log" + local metafile="${LOG_ROOT}/${name}.meta" + local started_at ended_at rc + + started_at="$(date -Is)" + printf 'command=' >"${metafile}" + printf '%q ' "$@" >>"${metafile}" + printf '\nstarted_at=%s\n' "${started_at}" >>"${metafile}" + + log "running ${name}: $*" + set +e + ( + cd "${REPO_ROOT}" + "$@" + ) >"${logfile}" 2>&1 + rc=$? + set -e + + ended_at="$(date -Is)" + printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}" + printf 'exit_code=%s\n' "${rc}" >>"${metafile}" + + if (( rc != 0 )); then + log "${name} failed; see ${logfile}" + return "${rc}" + fi + + log "${name} passed" +} + +write_environment() { + { + printf 'started_at=%s\n' "$(date -Is)" + printf 'repo_root=%s\n' "${REPO_ROOT}" + printf 'log_root=%s\n' "${LOG_ROOT}" + printf 'branch=%s\n' "$(git -C "${REPO_ROOT}" branch --show-current)" + printf 'commit=%s\n' "$(git -C "${REPO_ROOT}" rev-parse HEAD)" + } >"${LOG_ROOT}/environment.txt" +} + +write_scope_summary() { + cat >"${LOG_ROOT}/scope-fixed-contract.json" <"${LOG_ROOT}/result.json" < 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + printf '%s\n' "${build_cores}" +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +host_nested_param_path() { + if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_intel/parameters/nested + elif [[ -f /sys/module/kvm_amd/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_amd/parameters/nested + fi +} + +prepare_runtime_dirs() { + local cpu_count default_max_jobs default_build_cores + + cpu_count="$(host_cpu_count)" + default_max_jobs="$(default_local_nix_max_jobs "${cpu_count}")" + default_build_cores="$(default_local_nix_build_cores "${cpu_count}" "${default_max_jobs}")" + + export ULTRACLOUD_WORK_ROOT="${WORK_ROOT}" + export TMPDIR="${TMPDIR:-${WORK_ROOT}/tmp}" + export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${WORK_ROOT}/xdg-cache}" + export PHOTON_CLUSTER_WORK_ROOT="${PHOTON_CLUSTER_WORK_ROOT:-${WORK_ROOT}/test-cluster}" + export PHOTON_VM_DIR="${PHOTON_VM_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/state}" + export PHOTON_CLUSTER_VDE_SWITCH_DIR="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/vde-switch}" + export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${default_max_jobs}}" + export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${default_build_cores}}" + export PHOTON_CLUSTER_NIX_MAX_JOBS="${PHOTON_CLUSTER_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}}" + export PHOTON_CLUSTER_NIX_BUILD_CORES="${PHOTON_CLUSTER_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}}" + + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG + + mkdir -p \ + "${TMPDIR}" \ + "${XDG_CACHE_HOME}" \ + "${PHOTON_CLUSTER_WORK_ROOT}" \ + "${PHOTON_CLUSTER_VDE_SWITCH_DIR}" \ + "${LOG_ROOT}" +} + +capture_environment() { + { + printf 'task_id=%s\n' "${TASK_ID}" + printf 'started_at=%s\n' "$(date -Is)" + printf 'repo_root=%s\n' "${REPO_ROOT}" + printf 'work_root=%s\n' "${WORK_ROOT}" + printf 'log_root=%s\n' "${LOG_ROOT}" + printf 'branch=%s\n' "$(git -C "${REPO_ROOT}" branch --show-current)" + printf 'commit=%s\n' "$(git -C "${REPO_ROOT}" rev-parse HEAD)" + printf 'host_cpu_count=%s\n' "$(host_cpu_count)" + printf 'ultracloud_local_nix_max_jobs=%s\n' "${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + printf 'ultracloud_local_nix_build_cores=%s\n' "${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + printf 'photon_cluster_nix_max_jobs=%s\n' "${PHOTON_CLUSTER_NIX_MAX_JOBS}" + printf 'photon_cluster_nix_build_cores=%s\n' "${PHOTON_CLUSTER_NIX_BUILD_CORES}" + printf 'tmpdir=%s\n' "${TMPDIR}" + printf 'xdg_cache_home=%s\n' "${XDG_CACHE_HOME}" + printf 'photon_cluster_work_root=%s\n' "${PHOTON_CLUSTER_WORK_ROOT}" + printf 'photon_vm_dir=%s\n' "${PHOTON_VM_DIR}" + printf 'photon_cluster_vde_switch_dir=%s\n' "${PHOTON_CLUSTER_VDE_SWITCH_DIR}" + printf 'nix_version=%s\n' "$(nix --version)" + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + if [[ -e /dev/kvm ]]; then + printf 'kvm_stat=%s\n' "$(stat -c '%A %U %G %t:%T' /dev/kvm)" + fi + local nested_path + nested_path="$(host_nested_param_path || true)" + if [[ -n "${nested_path}" ]]; then + printf 'nested_param_path=%s\n' "${nested_path}" + printf 'nested_param_value=%s\n' "$(<"${nested_path}")" + fi + } >"${LOG_ROOT}/environment.txt" +} + +run_case() { + local name="$1" + local timeout_secs="$2" + shift 2 + + local logfile="${LOG_ROOT}/${name}.log" + local metafile="${LOG_ROOT}/${name}.meta" + local started_at ended_at rc + + started_at="$(date -Is)" + printf 'name=%s\n' "${name}" >"${metafile}" + printf 'started_at=%s\n' "${started_at}" >>"${metafile}" + printf 'timeout_secs=%s\n' "${timeout_secs}" >>"${metafile}" + printf 'command=' >>"${metafile}" + printf '%q ' "$@" >>"${metafile}" + printf '\n' >>"${metafile}" + + log "running ${name}: $*" + set +e + ( + cd "${REPO_ROOT}" + timeout --signal=TERM --kill-after=60 "${timeout_secs}" "$@" + ) 2>&1 | tee "${logfile}" + rc=${PIPESTATUS[0]} + set -e + + ended_at="$(date -Is)" + printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}" + printf 'exit_code=%s\n' "${rc}" >>"${metafile}" + + if (( rc == 124 )); then + log "${name} timed out after ${timeout_secs}s" + elif (( rc == 0 )); then + log "${name} passed" + else + log "${name} failed with exit ${rc}" + fi + + return 0 +} + +main() { + prepare_runtime_dirs + capture_environment + + run_case canonical-profile-eval-guards 3600 \ + nix build .#checks.x86_64-linux.canonical-profile-eval-guards --no-link + run_case supported-surface-guard 3600 \ + nix build .#checks.x86_64-linux.supported-surface-guard --no-link + run_case portable-control-plane-regressions 10800 \ + nix build .#checks.x86_64-linux.portable-control-plane-regressions + run_case deployer-bootstrap-e2e 10800 \ + nix build .#checks.x86_64-linux.deployer-bootstrap-e2e + run_case host-lifecycle-e2e 10800 \ + nix build .#checks.x86_64-linux.host-lifecycle-e2e + run_case fleet-scheduler-e2e 10800 \ + nix build .#checks.x86_64-linux.fleet-scheduler-e2e + run_case single-node-quickstart 14400 \ + nix run .#single-node-quickstart + run_case baremetal-iso 21600 \ + nix run ./nix/test-cluster#cluster -- baremetal-iso + run_case fresh-smoke 28800 \ + nix run ./nix/test-cluster#cluster -- fresh-smoke +} + +main "$@" diff --git a/nix/test-cluster/run-publishable-kvm-suite.sh b/nix/test-cluster/run-publishable-kvm-suite.sh new file mode 100755 index 0000000..af4a947 --- /dev/null +++ b/nix/test-cluster/run-publishable-kvm-suite.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" +LOG_DIR="${1:-${ULTRACLOUD_KVM_PUBLISHABLE_LOG_DIR:-${WORK_ROOT}/publishable-kvm-suite}}" + +mkdir -p "${LOG_DIR}" + +log() { + printf '[publishable-kvm-suite] %s\n' "$*" +} + +die() { + printf '[publishable-kvm-suite] ERROR: %s\n' "$*" >&2 + exit 1 +} + +host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "${count}" +} + +default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" +} + +default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + + printf '%s\n' "${build_cores}" +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +host_nested_param_path() { + if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_intel/parameters/nested + elif [[ -f /sys/module/kvm_amd/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_amd/parameters/nested + fi +} + +require_publishable_kvm_host() { + [[ -e /dev/kvm ]] || die "/dev/kvm is missing" + [[ -r /dev/kvm && -w /dev/kvm ]] || die "/dev/kvm is not readable and writable for $(id -un)" + + local nested_path nested_value + nested_path="$(host_nested_param_path || true)" + if [[ -z "${nested_path}" ]]; then + return 0 + fi + + nested_value="$(<"${nested_path}")" + case "${nested_value}" in + 1|Y|y) + ;; + *) + die "nested virtualization is disabled on the host (${nested_path}=${nested_value})" + ;; + esac +} + +choose_runtime_root() { + if [[ -n "${ULTRACLOUD_KVM_RUNTIME_ROOT:-}" ]]; then + printf '%s\n' "${ULTRACLOUD_KVM_RUNTIME_ROOT}" + return 0 + fi + + printf '%s\n' "${WORK_ROOT}/publishable-kvm-runtime" +} + +get_hostname() { + if command -v hostname >/dev/null 2>&1; then + hostname + else + uname -n + fi +} + +prepare_runtime_dirs() { + local runtime_root cpu_count default_max_jobs default_build_cores + + runtime_root="$(choose_runtime_root)" + cpu_count="$(host_cpu_count)" + default_max_jobs="$(default_local_nix_max_jobs "${cpu_count}")" + default_build_cores="$(default_local_nix_build_cores "${cpu_count}" "${default_max_jobs}")" + export ULTRACLOUD_KVM_RUNTIME_ROOT="${runtime_root}" + export TMPDIR="${TMPDIR:-${WORK_ROOT}/tmp}" + export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${runtime_root}/xdg-cache}" + export PHOTON_CLUSTER_WORK_ROOT="${PHOTON_CLUSTER_WORK_ROOT:-${WORK_ROOT}/test-cluster}" + export PHOTON_VM_DIR="${PHOTON_VM_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/state}" + export PHOTON_CLUSTER_VDE_SWITCH_DIR="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/vde-switch}" + export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${default_max_jobs}}" + export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${default_build_cores}}" + export PHOTON_CLUSTER_NIX_MAX_JOBS="${PHOTON_CLUSTER_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}}" + export PHOTON_CLUSTER_NIX_BUILD_CORES="${PHOTON_CLUSTER_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}}" + export PHOTON_VM_SKIP_NESTED_KVM_VALIDATE=0 + export PHOTON_VM_FORCE_TCG=0 + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG + mkdir -p "${TMPDIR}" "${XDG_CACHE_HOME}" "${PHOTON_CLUSTER_WORK_ROOT}" "${PHOTON_CLUSTER_VDE_SWITCH_DIR}" "${runtime_root}" +} + +capture_environment() { + { + printf 'started_at=%s\n' "$(date -Is)" + printf 'hostname=%s\n' "$(get_hostname)" + printf 'kernel=%s\n' "$(uname -a)" + printf 'pwd=%s\n' "$(pwd)" + printf 'user=%s\n' "$(id -un)" + printf 'uid=%s\n' "$(id -u)" + printf 'gid=%s\n' "$(id -g)" + printf 'branch=%s\n' "$(git -C "${REPO_ROOT}" branch --show-current)" + printf 'commit=%s\n' "$(git -C "${REPO_ROOT}" rev-parse HEAD)" + printf 'nix_version=%s\n' "$(nix --version)" + printf 'runtime_root=%s\n' "${ULTRACLOUD_KVM_RUNTIME_ROOT:-}" + printf 'tmpdir=%s\n' "${TMPDIR:-}" + printf 'xdg_cache_home=%s\n' "${XDG_CACHE_HOME:-}" + printf 'photon_cluster_work_root=%s\n' "${PHOTON_CLUSTER_WORK_ROOT:-}" + printf 'photon_vm_dir=%s\n' "${PHOTON_VM_DIR:-}" + printf 'photon_cluster_vde_switch_dir=%s\n' "${PHOTON_CLUSTER_VDE_SWITCH_DIR:-}" + printf 'host_cpu_count=%s\n' "$(host_cpu_count)" + printf 'local_nix_max_jobs=%s\n' "${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-}" + printf 'local_nix_build_cores=%s\n' "${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-}" + printf 'photon_cluster_nix_max_jobs=%s\n' "${PHOTON_CLUSTER_NIX_MAX_JOBS:-}" + printf 'photon_cluster_nix_build_cores=%s\n' "${PHOTON_CLUSTER_NIX_BUILD_CORES:-}" + printf 'nested_kvm_validate_skipped=%s\n' "${PHOTON_VM_SKIP_NESTED_KVM_VALIDATE:-0}" + printf 'vm_accelerator_mode=%s\n' "$([[ "${PHOTON_VM_FORCE_TCG:-0}" == "1" ]] && echo tcg || echo kvm)" + printf 'photon_vm_console_wait_timeout=%s\n' "${PHOTON_VM_CONSOLE_WAIT_TIMEOUT:-}" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + if [[ -e /dev/kvm ]]; then + printf 'kvm_stat=%s\n' "$(stat -c '%A %U %G %t:%T' /dev/kvm)" + fi + if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then + printf 'kvm_intel_nested=%s\n' "$(cat /sys/module/kvm_intel/parameters/nested)" + fi + if [[ -f /sys/module/kvm_amd/parameters/nested ]]; then + printf 'kvm_amd_nested=%s\n' "$(cat /sys/module/kvm_amd/parameters/nested)" + fi + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + } >"${LOG_DIR}/environment.txt" +} + +finish_environment() { + local rc="${1:-0}" + { + printf 'finished_at=%s\n' "$(date -Is)" + printf 'exit_status=%s\n' "${rc}" + } >>"${LOG_DIR}/environment.txt" +} + +run_case() { + local name="$1" + shift + local logfile="${LOG_DIR}/${name}.log" + local metafile="${LOG_DIR}/${name}.meta" + local started_at ended_at rc + + started_at="$(date -Is)" + printf 'command=%s\n' "$*" >"${metafile}" + printf 'started_at=%s\n' "${started_at}" >>"${metafile}" + + log "running ${name}: $*" + set +e + ( + cd "${REPO_ROOT}" + "$@" + ) 2>&1 | tee "${logfile}" + rc=${PIPESTATUS[0]} + set -e + + ended_at="$(date -Is)" + printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}" + printf 'exit_code=%s\n' "${rc}" >>"${metafile}" + + if (( rc != 0 )); then + log "${name} failed; see ${logfile}" + return "${rc}" + fi + + log "${name} passed" +} + +main() { + trap 'finish_environment "$?"' EXIT + prepare_runtime_dirs + require_publishable_kvm_host + capture_environment + + run_case fresh-smoke nix run ./nix/test-cluster#cluster -- fresh-smoke + run_case fresh-demo-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp + run_case fresh-matrix nix run ./nix/test-cluster#cluster -- fresh-matrix + + log "publishable KVM suite passed; logs in ${LOG_DIR}" +} + +main "$@" diff --git a/nix/test-cluster/run-supported-surface-final-proof.sh b/nix/test-cluster/run-supported-surface-final-proof.sh new file mode 100755 index 0000000..35d60a0 --- /dev/null +++ b/nix/test-cluster/run-supported-surface-final-proof.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash +set -euo pipefail + +export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +TASK_ID="32f64c10-1b74-4d8a-8d7d-b2cc6bf6b4f0" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" +LOG_ROOT="${1:-${WORK_ROOT}/final-proofs/${TASK_ID}}" + +mkdir -p "${LOG_ROOT}" + +log() { + printf '[supported-surface-final-proof] %s\n' "$*" +} + +host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "${count}" +} + +default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" +} + +default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + printf '%s\n' "${build_cores}" +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +host_nested_param_path() { + if [[ -f /sys/module/kvm_intel/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_intel/parameters/nested + elif [[ -f /sys/module/kvm_amd/parameters/nested ]]; then + printf '%s\n' /sys/module/kvm_amd/parameters/nested + fi +} + +prepare_runtime_dirs() { + local cpu_count default_max_jobs default_build_cores + + cpu_count="$(host_cpu_count)" + default_max_jobs="$(default_local_nix_max_jobs "${cpu_count}")" + default_build_cores="$(default_local_nix_build_cores "${cpu_count}" "${default_max_jobs}")" + + export ULTRACLOUD_WORK_ROOT="${WORK_ROOT}" + export TMPDIR="${TMPDIR:-${WORK_ROOT}/tmp}" + export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${WORK_ROOT}/xdg-cache}" + export PHOTON_CLUSTER_WORK_ROOT="${PHOTON_CLUSTER_WORK_ROOT:-${WORK_ROOT}/test-cluster}" + export PHOTON_VM_DIR="${PHOTON_VM_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/state}" + export PHOTON_CLUSTER_VDE_SWITCH_DIR="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-${PHOTON_CLUSTER_WORK_ROOT}/vde-switch}" + export ULTRACLOUD_LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-${default_max_jobs}}" + export ULTRACLOUD_LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-${default_build_cores}}" + export PHOTON_CLUSTER_NIX_MAX_JOBS="${PHOTON_CLUSTER_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}}" + export PHOTON_CLUSTER_NIX_BUILD_CORES="${PHOTON_CLUSTER_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}}" + + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG + + mkdir -p \ + "${TMPDIR}" \ + "${XDG_CACHE_HOME}" \ + "${PHOTON_CLUSTER_WORK_ROOT}" \ + "${PHOTON_CLUSTER_VDE_SWITCH_DIR}" \ + "${LOG_ROOT}" +} + +capture_environment() { + { + printf 'task_id=%s\n' "${TASK_ID}" + printf 'started_at=%s\n' "$(date -Is)" + printf 'repo_root=%s\n' "${REPO_ROOT}" + printf 'work_root=%s\n' "${WORK_ROOT}" + printf 'log_root=%s\n' "${LOG_ROOT}" + printf 'branch=%s\n' "$(git -C "${REPO_ROOT}" branch --show-current)" + printf 'commit=%s\n' "$(git -C "${REPO_ROOT}" rev-parse HEAD)" + printf 'host_cpu_count=%s\n' "$(host_cpu_count)" + printf 'ultracloud_local_nix_max_jobs=%s\n' "${ULTRACLOUD_LOCAL_NIX_MAX_JOBS}" + printf 'ultracloud_local_nix_build_cores=%s\n' "${ULTRACLOUD_LOCAL_NIX_BUILD_CORES}" + printf 'photon_cluster_nix_max_jobs=%s\n' "${PHOTON_CLUSTER_NIX_MAX_JOBS}" + printf 'photon_cluster_nix_build_cores=%s\n' "${PHOTON_CLUSTER_NIX_BUILD_CORES}" + printf 'tmpdir=%s\n' "${TMPDIR}" + printf 'xdg_cache_home=%s\n' "${XDG_CACHE_HOME}" + printf 'photon_cluster_work_root=%s\n' "${PHOTON_CLUSTER_WORK_ROOT}" + printf 'photon_vm_dir=%s\n' "${PHOTON_VM_DIR}" + printf 'photon_cluster_vde_switch_dir=%s\n' "${PHOTON_CLUSTER_VDE_SWITCH_DIR}" + printf 'nix_version=%s\n' "$(nix --version)" + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + if [[ -e /dev/kvm ]]; then + printf 'kvm_stat=%s\n' "$(stat -c '%A %U %G %t:%T' /dev/kvm)" + fi + local nested_path + nested_path="$(host_nested_param_path || true)" + if [[ -n "${nested_path}" ]]; then + printf 'nested_param_path=%s\n' "${nested_path}" + printf 'nested_param_value=%s\n' "$(<"${nested_path}")" + fi + } >"${LOG_ROOT}/environment.txt" +} + +run_case() { + local name="$1" + local timeout_secs="$2" + shift 2 + + local logfile="${LOG_ROOT}/${name}.log" + local metafile="${LOG_ROOT}/${name}.meta" + local started_at ended_at rc + + started_at="$(date -Is)" + printf 'name=%s\n' "${name}" >"${metafile}" + printf 'started_at=%s\n' "${started_at}" >>"${metafile}" + printf 'timeout_secs=%s\n' "${timeout_secs}" >>"${metafile}" + printf 'command=' >>"${metafile}" + printf '%q ' "$@" >>"${metafile}" + printf '\n' >>"${metafile}" + + log "running ${name}: $*" + set +e + ( + cd "${REPO_ROOT}" + timeout --signal=TERM --kill-after=120 "${timeout_secs}" "$@" + ) 2>&1 | tee "${logfile}" + rc=${PIPESTATUS[0]} + set -e + + ended_at="$(date -Is)" + printf 'ended_at=%s\n' "${ended_at}" >>"${metafile}" + printf 'exit_code=%s\n' "${rc}" >>"${metafile}" + + if (( rc == 124 )); then + log "${name} timed out after ${timeout_secs}s" + elif (( rc == 0 )); then + log "${name} passed" + else + log "${name} failed with exit ${rc}" + fi + + return "${rc}" +} + +main() { + prepare_runtime_dirs + capture_environment + + run_case supported-surface-guard 3600 \ + nix build .#checks.x86_64-linux.supported-surface-guard --no-link + run_case single-node-trial-vm 14400 \ + nix build .#single-node-trial-vm --no-link --print-out-paths + run_case single-node-quickstart 14400 \ + nix run .#single-node-quickstart + run_case fresh-smoke 28800 \ + nix run ./nix/test-cluster#cluster -- fresh-smoke + run_case fresh-demo-vm-webapp 43200 \ + nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp + run_case fresh-matrix 43200 \ + nix run ./nix/test-cluster#cluster -- fresh-matrix + run_case publishable-kvm-suite 86400 \ + ./nix/test-cluster/run-publishable-kvm-suite.sh "${LOG_ROOT}/publishable-kvm-suite" + + log "supported surface final proof passed; logs in ${LOG_ROOT}" +} + +main "$@" diff --git a/nix/test-cluster/verify-baremetal-iso.sh b/nix/test-cluster/verify-baremetal-iso.sh new file mode 100644 index 0000000..05b09a8 --- /dev/null +++ b/nix/test-cluster/verify-baremetal-iso.sh @@ -0,0 +1,1098 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}" +DEFAULT_WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-}" + +CLUSTER_ID="${ULTRACLOUD_BAREMETAL_CLUSTER_ID:-baremetal-iso-canonical}" +CHAINFIRE_ENDPOINT="http://127.0.0.1:2379" +DEPLOYER_ENDPOINT="http://127.0.0.1:8088" +BINARY_CACHE_ENDPOINT="http://127.0.0.1:8090" +BOOTSTRAP_TOKEN="${ULTRACLOUD_BAREMETAL_BOOTSTRAP_TOKEN:-baremetal-iso-bootstrap-token}" +CONTROL_NODE_CLASS="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_CLASS:-iso-control-plane}" +WORKER_NODE_CLASS="${ULTRACLOUD_BAREMETAL_WORKER_NODE_CLASS:-iso-worker}" +CONTROL_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_CONTROL_CONFIGURATION:-baremetal-qemu-control-plane}" +WORKER_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_WORKER_CONFIGURATION:-baremetal-qemu-worker}" +CONTROL_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_CONTROL_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/control-plane/disko.nix}" +WORKER_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_WORKER_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/worker/disko.nix}" +CONTROL_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_CONTROL_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-control-root}" +WORKER_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_WORKER_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-worker-root}" +CONTROL_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_SERIAL:-uc-control-root}" +WORKER_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_WORKER_DISK_SERIAL:-uc-worker-root}" +CONTROL_HEALTH_CHECK_PATH="/etc/ultracloud-role-control-plane" +WORKER_HEALTH_CHECK_PATH="/etc/ultracloud-role-worker" +CONTROL_NODE_ID="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_ID:-iso-control-plane-01}" +WORKER_NODE_ID="${ULTRACLOUD_BAREMETAL_WORKER_NODE_ID:-iso-worker-01}" +CONTROL_SSH_PORT="${ULTRACLOUD_BAREMETAL_CONTROL_SSH_PORT:-22231}" +WORKER_SSH_PORT="${ULTRACLOUD_BAREMETAL_WORKER_SSH_PORT:-22232}" +CONTROL_DHCP_START="${ULTRACLOUD_BAREMETAL_CONTROL_DHCP_START:-10.0.2.15}" +WORKER_DHCP_START="${ULTRACLOUD_BAREMETAL_WORKER_DHCP_START:-10.0.2.16}" +CONTROL_DISK_GIB="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_GIB:-18G}" +WORKER_DISK_GIB="${ULTRACLOUD_BAREMETAL_WORKER_DISK_GIB:-18G}" + +log() { + printf '[baremetal-iso-e2e] %s\n' "$*" +} + +marker() { + printf 'ULTRACLOUD_MARKER %s\n' "$*" +} + +die() { + echo "[baremetal-iso-e2e] ERROR: $*" >&2 + exit 1 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || die "required command not found: $1" +} + +host_cpu_count() { + local count + count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)" + if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then + count=1 + fi + printf '%s\n' "${count}" +} + +default_local_nix_max_jobs() { + local cpu_count="$1" + if (( cpu_count <= 2 )); then + printf '1\n' + return 0 + fi + + printf '%s\n' "$(( (cpu_count + 1) / 2 ))" +} + +default_local_nix_build_cores() { + local cpu_count="$1" + local max_jobs="$2" + local build_cores=1 + + if (( max_jobs > 0 )); then + build_cores="$(( cpu_count / max_jobs ))" + fi + if (( build_cores < 1 )); then + build_cores=1 + fi + + printf '%s\n' "${build_cores}" +} + +default_baremetal_vm_vcpus() { + local cpu_count="$1" + if (( cpu_count >= 8 )); then + printf '4\n' + elif (( cpu_count >= 4 )); then + printf '2\n' + else + printf '1\n' + fi +} + +default_baremetal_vm_memory_mib() { + local cpu_count="$1" + if (( cpu_count >= 8 )); then + printf '3072\n' + else + printf '2048\n' + fi +} + +append_nix_config_line() { + local line="$1" + if [[ -n "${NIX_CONFIG:-}" ]]; then + NIX_CONFIG+=$'\n' + fi + NIX_CONFIG+="${line}" +} + +configure_local_nix_execution() { + append_nix_config_line "builders =" + append_nix_config_line "max-jobs = ${LOCAL_NIX_MAX_JOBS}" + append_nix_config_line "cores = ${LOCAL_NIX_BUILD_CORES}" + append_nix_config_line "experimental-features = nix-command flakes" + append_nix_config_line "warn-dirty = false" + export NIX_CONFIG +} + +host_kvm_access() { + [[ -r /dev/kvm && -w /dev/kvm ]] +} + +qemu_machine_args() { + if [[ "${BAREMETAL_VM_ACCELERATOR_MODE}" == "kvm" ]]; then + printf '%s\n' \ + "-machine" "pc,accel=kvm:tcg" \ + "-enable-kvm" \ + "-cpu" "host" + return 0 + fi + + printf '%s\n' \ + "-machine" "pc" \ + "-accel" "tcg,thread=multi" \ + "-cpu" "max" +} + +nix_build_local() { + NIX_BUILD_CORES="${LOCAL_NIX_BUILD_CORES}" nix \ + --option builders '' \ + --option warn-dirty false \ + --max-jobs "${LOCAL_NIX_MAX_JOBS}" \ + build "$@" +} + +resolve_default_work_root() { + if [[ -n "${DEFAULT_WORK_ROOT}" ]]; then + printf '%s\n' "${DEFAULT_WORK_ROOT}" + return 0 + fi + + if [[ -w "${ROOT}" ]]; then + printf '%s\n' "${ROOT}/work" + return 0 + fi + + if [[ -n "${TMPDIR:-}" ]]; then + printf '%s\n' "${TMPDIR}/ultracloud" + return 0 + fi + + printf '%s\n' "/tmp/ultracloud" +} + +resolve_store_path() { + local env_name="$1" + local attr="$2" + if [[ -n "${!env_name:-}" ]]; then + printf '%s\n' "${!env_name}" + return 0 + fi + nix_build_local "$ROOT#$attr" --no-link --print-out-paths +} + +resolve_binary() { + local env_name="$1" + local bin_name="$2" + local attr="$3" + if [[ -n "${!env_name:-}" ]]; then + printf '%s\n' "${!env_name}" + return 0 + fi + if command -v "$bin_name" >/dev/null 2>&1; then + command -v "$bin_name" + return 0 + fi + local out + out="$(nix_build_local "$ROOT#$attr" --no-link --print-out-paths)" + printf '%s/bin/%s\n' "$out" "$bin_name" +} + +resolve_iso_image() { + local candidate="$1" + if [[ -f "$candidate" ]]; then + printf '%s\n' "$candidate" + return 0 + fi + + local iso_dir="$candidate/iso" + if [[ -d "$iso_dir" ]]; then + local iso_path + iso_path="$(find "$iso_dir" -maxdepth 1 -type f -name '*.iso' | head -n 1)" + if [[ -n "$iso_path" ]]; then + printf '%s\n' "$iso_path" + return 0 + fi + fi + + die "unable to resolve a bootable ISO file from $candidate" +} + +resolve_ovmf_firmware() { + local env_name="$1" + local relative_path="$2" + if [[ -n "${!env_name:-}" ]]; then + printf '%s\n' "${!env_name}" + return 0 + fi + + local ovmf_dir + ovmf_dir="$(nix_build_local nixpkgs#OVMF.fd --no-link --print-out-paths)" + printf '%s/%s\n' "$ovmf_dir" "$relative_path" +} + +capture_environment() { + { + printf 'started_at=%s\n' "$(date -Is)" + printf 'pwd=%s\n' "$PWD" + printf 'user=%s\n' "$(id -un)" + printf 'uid=%s\n' "$(id -u)" + printf 'gid=%s\n' "$(id -g)" + printf 'work_root=%s\n' "${DEFAULT_WORK_ROOT}" + printf 'state_dir=%s\n' "$TMP_DIR" + printf 'iso_image=%s\n' "$ISO_IMAGE" + printf 'flake_bundle=%s\n' "$FLAKE_BUNDLE" + printf 'bootstrap_token_set=%s\n' "$([[ -n "${BOOTSTRAP_TOKEN}" ]] && echo yes || echo no)" + printf 'control_node_class=%s\n' "$CONTROL_NODE_CLASS" + printf 'worker_node_class=%s\n' "$WORKER_NODE_CLASS" + printf 'control_nixos_configuration=%s\n' "$CONTROL_NIXOS_CONFIGURATION" + printf 'worker_nixos_configuration=%s\n' "$WORKER_NIXOS_CONFIGURATION" + printf 'control_disko_config_path=%s\n' "$CONTROL_DISKO_CONFIG_PATH" + printf 'worker_disko_config_path=%s\n' "$WORKER_DISKO_CONFIG_PATH" + printf 'control_target_disk_by_id=%s\n' "$CONTROL_TARGET_DISK_BY_ID" + printf 'worker_target_disk_by_id=%s\n' "$WORKER_TARGET_DISK_BY_ID" + printf 'control_target=%s\n' "$CONTROL_TARGET_SYSTEM" + printf 'worker_target=%s\n' "$WORKER_TARGET_SYSTEM" + printf 'tmpdir=%s\n' "${TMPDIR:-}" + printf 'host_cpu_count=%s\n' "${HOST_CPU_COUNT}" + printf 'local_nix_max_jobs=%s\n' "${LOCAL_NIX_MAX_JOBS}" + printf 'local_nix_build_cores=%s\n' "${LOCAL_NIX_BUILD_CORES}" + printf 'vm_accelerator_mode=%s\n' "${BAREMETAL_VM_ACCELERATOR_MODE}" + printf 'vm_vcpus=%s\n' "${BAREMETAL_VM_VCPUS}" + printf 'vm_memory_mib=%s\n' "${BAREMETAL_VM_MEMORY_MIB}" + printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)" + printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)" + printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')" + } >"$TMP_DIR/environment.txt" +} + +wait_for_http() { + local url="$1" + local timeout_secs="$2" + local deadline=$((SECONDS + timeout_secs)) + while (( SECONDS < deadline )); do + if curl -fsS "$url" >/dev/null 2>&1; then + return 0 + fi + sleep 1 + done + return 1 +} + +wait_for_log_marker() { + local label="$1" + local log_file="$2" + local needle="$3" + local timeout_secs="$4" + local deadline=$((SECONDS + timeout_secs)) + while (( SECONDS < deadline )); do + if [[ -f "$log_file" ]] && grep -Eq "$needle" "$log_file"; then + log "${label}: observed ${needle}" + return 0 + fi + sleep 2 + done + return 1 +} + +ssh_base() { + local port="$1" + shift + ssh \ + -F /dev/null \ + -i "$SSH_KEY" \ + -o BatchMode=yes \ + -o ConnectTimeout=5 \ + -o ConnectionAttempts=1 \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + -p "$port" \ + root@127.0.0.1 "$@" +} + +wait_for_ssh() { + local label="$1" + local port="$2" + local timeout_secs="$3" + local deadline=$((SECONDS + timeout_secs)) + while (( SECONDS < deadline )); do + if ssh_base "$port" true >/dev/null 2>&1; then + log "${label}: SSH is reachable on port ${port}" + return 0 + fi + sleep 2 + done + return 1 +} + +ssh_shell() { + local port="$1" + local script="$2" + local quoted + printf -v quoted '%q' "$script" + ssh_base "$port" "bash -lc $quoted" +} + +current_system_path() { + local port="$1" + ssh_shell "$port" 'readlink -f /run/current-system' +} + +remote_boot_id() { + local port="$1" + ssh_shell "$port" 'cat /proc/sys/kernel/random/boot_id' +} + +remote_journal_has_marker() { + local port="$1" + local needle="$2" + shift 2 + + local remote_cmd="journalctl -b -o cat --no-pager" + local unit + for unit in "$@"; do + printf -v remote_cmd '%s -u %q' "$remote_cmd" "$unit" + done + printf -v remote_cmd '%s | grep -Fq %q' "$remote_cmd" "$needle" + + ssh_shell "$port" "$remote_cmd" +} + +wait_for_remote_journal_marker() { + local label="$1" + local port="$2" + local needle="$3" + local timeout_secs="$4" + shift 4 + + local deadline=$((SECONDS + timeout_secs)) + while (( SECONDS < deadline )); do + if remote_journal_has_marker "$port" "$needle" "$@" >/dev/null 2>&1; then + log "${label}: observed ${needle} via remote journal" + return 0 + fi + sleep 2 + done + return 1 +} + +wait_for_remote_unit_active() { + local label="$1" + local port="$2" + local unit_name="$3" + local timeout_secs="$4" + local deadline=$((SECONDS + timeout_secs)) + + while (( SECONDS < deadline )); do + if ssh_shell "$port" "systemctl is-active ${unit_name} >/dev/null" >/dev/null 2>&1; then + log "${label}: ${unit_name} is active" + return 0 + fi + sleep 2 + done + return 1 +} + +wait_for_reboot_transition() { + local label="$1" + local port="$2" + local previous_boot_id="$3" + local timeout_secs="$4" + local deadline=$((SECONDS + timeout_secs)) + + while (( SECONDS < deadline )); do + local current_boot_id + if current_boot_id="$(remote_boot_id "$port" 2>/dev/null)"; then + if [[ -n "$current_boot_id" && "$current_boot_id" != "$previous_boot_id" ]]; then + log "${label}: reboot completed with boot_id=${current_boot_id}" + return 0 + fi + fi + sleep 2 + done + return 1 +} + +observed_status() { + local node_id="$1" + local payload + if ! payload="$( + "$DEPLOYER_CTL_BIN" \ + --chainfire-endpoint "$CHAINFIRE_ENDPOINT" \ + --cluster-id "$CLUSTER_ID" \ + --cluster-namespace ultracloud \ + --deployer-namespace deployer \ + node inspect \ + --node-id "$node_id" \ + --include-observed-system \ + --format json 2>/dev/null + )"; then + printf 'missing\n' + return 0 + fi + + jq -r '.observed_system.status // "missing"' <<<"$payload" +} + +wait_for_observed_active() { + local node_id="$1" + local timeout_secs="$2" + local deadline=$((SECONDS + timeout_secs)) + while (( SECONDS < deadline )); do + if [[ "$(observed_status "$node_id")" == "active" ]]; then + log "${node_id}: observed-system reached active" + return 0 + fi + sleep 5 + done + return 1 +} + +inspect_node_payload() { + local node_id="$1" + + "$DEPLOYER_CTL_BIN" \ + --chainfire-endpoint "$CHAINFIRE_ENDPOINT" \ + --cluster-id "$CLUSTER_ID" \ + --cluster-namespace ultracloud \ + --deployer-namespace deployer \ + node inspect \ + --node-id "$node_id" \ + --include-desired-system \ + --format json +} + +assert_node_contract() { + local node_id="$1" + local expected_node_class="$2" + local expected_nixos_configuration="$3" + local expected_disko_config_path="$4" + local expected_target_disk_by_id="$5" + local expected_health_check_path="$6" + local expected_target_system="$7" + local payload + + payload="$(inspect_node_payload "$node_id")" \ + || die "${node_id} install contract is not inspectable through deployer-ctl" + + jq -e \ + --arg node_id "$node_id" \ + --arg node_class "$expected_node_class" \ + --arg nixos_configuration "$expected_nixos_configuration" \ + --arg disko_config_path "$expected_disko_config_path" \ + --arg target_disk_by_id "$expected_target_disk_by_id" \ + --arg health_check_path "$expected_health_check_path" \ + --arg target_system "$expected_target_system" \ + ' + .node.node_id == $node_id + and .node.node_class == $node_class + and .node.install_plan.nixos_configuration == $nixos_configuration + and .node.install_plan.disko_config_path == $disko_config_path + and (.node.install_plan.target_disk_by_id // "") == $target_disk_by_id + and (.node.install_plan.target_disk // "") == "" + and .desired_system.nixos_configuration == $nixos_configuration + and (.desired_system.target_system // "") == $target_system + and (.desired_system.switch_action // "switch") == "switch" + and (.desired_system.rollback_on_failure // true) == true + and ((.desired_system.health_check_command | if length == 0 then "" else .[-1] end) == $health_check_path) + ' <<<"$payload" >/dev/null \ + || die "${node_id} install contract did not resolve to the expected class/profile defaults" + + log "${node_id}: install contract resolved via node class ${expected_node_class}" +} + +assert_port_free() { + local port="$1" + if ss -ltn "( sport = :$port )" | grep -Fq ":$port"; then + die "port $port is already in use" + fi +} + +start_host_services() { + cat >"$TMP_DIR/chainfire.toml" <"$TMP_DIR/deployer.toml" <"$CHAINFIRE_LOG" 2>&1 & + CHAINFIRE_PID="$!" + + wait_for_http "http://127.0.0.1:8081/health" 120 \ + || die "host Chainfire did not become healthy" + + log "Starting host-side Deployer" + NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \ + "$DEPLOYER_SERVER_BIN" --config "$TMP_DIR/deployer.toml" >"$DEPLOYER_LOG" 2>&1 & + DEPLOYER_PID="$!" + + wait_for_http "http://127.0.0.1:8088/health" 120 \ + || die "host Deployer did not become healthy" +} + +seed_binary_cache() { + local path + local nar_rel + local nar_path + local store_base + local store_hash + local nar_hash + local nar_size + local refs + local deriver + + mkdir -p "$NIX_CACHE_DIR/nar" + cat >"$NIX_CACHE_DIR/nix-cache-info" <<'EOF' +StoreDir: /nix/store +WantMassQuery: 1 +Priority: 30 +EOF + + log "Seeding host-local Nix binary cache" + if [[ -n "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION:-}" && -f "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration" ]]; then + nix-store --load-db <"${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration" + fi + while IFS= read -r path; do + [[ -n "$path" ]] || continue + + store_base="$(basename "$path")" + store_hash="${store_base%%-*}" + nar_rel="nar/${store_base}.nar" + nar_path="$NIX_CACHE_DIR/$nar_rel" + + if [[ ! -f "$nar_path" ]]; then + nix-store --dump "$path" >"$nar_path" + fi + + nar_size="$(stat -c%s "$nar_path")" + nar_hash="$(nix hash file --type sha256 --base32 "$nar_path")" + refs="$(nix-store --query --references "$path" | xargs -r -n1 basename | tr '\n' ' ' | sed 's/ $//')" + deriver="$(nix-store --query --deriver "$path" 2>/dev/null || true)" + deriver="$(basename "$deriver" 2>/dev/null || true)" + + { + echo "StorePath: $path" + echo "URL: $nar_rel" + echo "Compression: none" + echo "FileHash: sha256:$nar_hash" + echo "FileSize: $nar_size" + echo "NarHash: sha256:$nar_hash" + echo "NarSize: $nar_size" + echo "References: $refs" + if [[ -n "$deriver" && "$deriver" != "unknown-deriver" ]]; then + echo "Deriver: $deriver" + fi + } >"$NIX_CACHE_DIR/${store_hash}.narinfo" + done < <( + nix-store --query --requisites \ + "$CONTROL_TARGET_SYSTEM" \ + "$WORKER_TARGET_SYSTEM" \ + "$CONTROL_DISKO_SCRIPT" \ + "$WORKER_DISKO_SCRIPT" \ + | sort -u + ) +} + +start_binary_cache() { + seed_binary_cache + + log "Starting host-local Nix binary cache" + python3 -m http.server 8090 --bind 0.0.0.0 --directory "$NIX_CACHE_DIR" \ + >"$NIX_CACHE_LOG" 2>&1 & + NIX_CACHE_PID="$!" + + wait_for_http "${BINARY_CACHE_ENDPOINT}/nix-cache-info" 120 \ + || die "host-local Nix binary cache did not become reachable" +} + +apply_cluster_state() { + cat >"$TMP_DIR/cluster-state.yaml" </dev/null + rm -f "$ovmf_vars_path" + cp "$OVMF_VARS_TEMPLATE" "$ovmf_vars_path" + chmod u+w "$ovmf_vars_path" + + nohup "$QEMU_BIN" \ + -name "$label" \ + -smp "${BAREMETAL_VM_VCPUS}" \ + -m "${BAREMETAL_VM_MEMORY_MIB}" \ + -nographic \ + -no-reboot \ + -boot order=dc,once=d,menu=off \ + $(qemu_machine_args) \ + -drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \ + -drive if=pflash,format=raw,file="$ovmf_vars_path" \ + -drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \ + -device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \ + -cdrom "$ISO_IMAGE" \ + -netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \ + -device virtio-net-pci,netdev=user0,mac="${mac}" \ + -smbios type=1,product=UltraCloudQEMUBaremetal,serial="${node_id}" \ + >"$log_path" 2>&1 & + echo "$!" >"${log_path}.pid" +} + +launch_installed_vm() { + local label="$1" + local ssh_port="$2" + local dhcp_start="$3" + local mac="$4" + local disk_serial="$5" + local disk_path="$6" + local log_path="$7" + local ovmf_vars_path="${disk_path}.ovmf-vars.fd" + + [[ -f "$ovmf_vars_path" ]] || die "missing OVMF vars file for relaunch: $ovmf_vars_path" + + nohup "$QEMU_BIN" \ + -name "$label" \ + -smp "${BAREMETAL_VM_VCPUS}" \ + -m "${BAREMETAL_VM_MEMORY_MIB}" \ + -nographic \ + $(qemu_machine_args) \ + -drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \ + -drive if=pflash,format=raw,file="$ovmf_vars_path" \ + -drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \ + -device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \ + -netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \ + -device virtio-net-pci,netdev=user0,mac="${mac}" \ + >>"$log_path" 2>&1 & + echo "$!" >"${log_path}.pid" +} + +wait_for_pid_exit() { + local label="$1" + local pid_file="$2" + local timeout_secs="$3" + local deadline=$((SECONDS + timeout_secs)) + local pid + + [[ -f "$pid_file" ]] || die "${label} is missing pid file $pid_file" + pid="$(cat "$pid_file")" + while (( SECONDS < deadline )); do + if ! kill -0 "$pid" >/dev/null 2>&1; then + log "${label}: QEMU exited after installer-triggered reboot" + return 0 + fi + sleep 2 + done + return 1 +} + +verify_node() { + local node_id="$1" + local ssh_port="$2" + local disk_path="$3" + local log_path="$4" + local expected_role="$5" + local expected_system="$6" + local expected_nixos_configuration="$7" + local expected_node_class="$8" + local expected_disko_config_path="$9" + local expected_target_disk_by_id="${10}" + local expected_health_check_path="${11}" + local dhcp_start="${12}" + local mac="${13}" + local disk_serial="${14}" + + wait_for_log_marker "$node_id" "$TMP_DIR/deployer.log" "Node registered successfully.*node_id=${node_id}" 900 \ + || die "${node_id} never completed /api/v1/phone-home registration" + assert_node_contract \ + "$node_id" \ + "$expected_node_class" \ + "$expected_nixos_configuration" \ + "$expected_disko_config_path" \ + "$expected_target_disk_by_id" \ + "$expected_health_check_path" \ + "$expected_system" + wait_for_ssh "$node_id" "$ssh_port" 900 \ + || die "${node_id} never exposed SSH during the installer boot" + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.boot.${node_id}" 120 \ + ultracloud-bootstrap.service ultracloud-install.service \ + || die "${node_id} never recorded the pre-install boot marker" + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.phone-home.complete.${node_id}" 120 \ + ultracloud-bootstrap.service ultracloud-install.service \ + || die "${node_id} never recorded the phone-home completion marker" + marker "pre-install.${node_id}" + + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.bundle-downloaded.${node_id}" 1200 \ + ultracloud-install.service \ + || die "${node_id} never downloaded the flake bundle" + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.disko.complete.${node_id}" 2400 \ + ultracloud-install.service \ + || die "${node_id} never completed disko" + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.nixos-install.complete.${node_id}" 3600 \ + ultracloud-install.service \ + || die "${node_id} never finished nixos-install" + marker "install.${node_id}" + + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER reboot.${node_id}" 3600 \ + ultracloud-install.service \ + || die "${node_id} never emitted reboot marker" + marker "reboot.${node_id}" + + wait_for_pid_exit "$node_id" "${log_path}.pid" 300 \ + || die "${node_id} installer VM did not exit after the reboot marker" + launch_installed_vm \ + "ultracloud-baremetal-${node_id}-installed" \ + "$ssh_port" \ + "$dhcp_start" \ + "$mac" \ + "$disk_serial" \ + "$disk_path" \ + "$log_path" + wait_for_ssh "$node_id" "$ssh_port" 1800 \ + || die "${node_id} did not come back over SSH after reboot" + wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER post-install.boot.${node_id}.${expected_role}" 1800 \ + ultracloud-baremetal-postinstall-marker.service \ + || die "${node_id} never emitted post-install marker" + marker "post-install.${node_id}" + + ssh_shell "$ssh_port" 'test -f /etc/ultracloud/node-config.json' + ssh_shell "$ssh_port" 'test -d /var/lib/photon-src/.bundle-inputs/nixpkgs' + wait_for_remote_unit_active "$node_id" "$ssh_port" "nix-agent.service" 180 \ + || die "${node_id} never started nix-agent.service after install" + ssh_shell "$ssh_port" "grep -Fx '${expected_role}' /etc/ultracloud-role" + ssh_shell "$ssh_port" "test -b '${expected_target_disk_by_id}'" + if [[ "$expected_role" == "control-plane" ]]; then + wait_for_remote_unit_active "$node_id" "$ssh_port" "chainfire.service" 180 \ + || die "${node_id} never started chainfire.service after install" + fi + + wait_for_observed_active "$node_id" 1200 \ + || die "${node_id} never reached observed-system active" + [[ "$(current_system_path "$ssh_port")" == "$expected_system" ]] \ + || die "${node_id} current system does not match expected target" + marker "desired-system-active.${node_id}" +} + +cleanup() { + local status="$?" + set +e + + if [[ -n "${TMP_DIR:-}" && -d "${TMP_DIR}" ]]; then + { + printf 'finished_at=%s\n' "$(date -Is)" + printf 'exit_status=%s\n' "$status" + } >>"$TMP_DIR/environment.txt" + fi + + for pid_file in "$CONTROL_LOG.pid" "$WORKER_LOG.pid"; do + if [[ -f "$pid_file" ]]; then + pid="$(cat "$pid_file")" + kill "$pid" 2>/dev/null || true + wait "$pid" 2>/dev/null || true + fi + done + + if [[ -n "${DEPLOYER_PID:-}" ]]; then + kill "$DEPLOYER_PID" 2>/dev/null || true + wait "$DEPLOYER_PID" 2>/dev/null || true + fi + if [[ -n "${CHAINFIRE_PID:-}" ]]; then + kill "$CHAINFIRE_PID" 2>/dev/null || true + wait "$CHAINFIRE_PID" 2>/dev/null || true + fi + if [[ -n "${NIX_CACHE_PID:-}" ]]; then + kill "$NIX_CACHE_PID" 2>/dev/null || true + wait "$NIX_CACHE_PID" 2>/dev/null || true + fi + + if (( status != 0 )); then + log "control-plane serial log tail:" + tail -n 120 "$CONTROL_LOG" 2>/dev/null || true + log "worker serial log tail:" + tail -n 120 "$WORKER_LOG" 2>/dev/null || true + log "deployer log tail:" + tail -n 120 "$DEPLOYER_LOG" 2>/dev/null || true + log "chainfire log tail:" + tail -n 120 "$CHAINFIRE_LOG" 2>/dev/null || true + log "binary cache log tail:" + tail -n 120 "$NIX_CACHE_LOG" 2>/dev/null || true + fi + + if [[ "${KEEP_STATE_DIR:-0}" != "1" ]]; then + rm -rf "$TMP_DIR" + fi + exit "$status" +} + +main() { + DEFAULT_WORK_ROOT="$(resolve_default_work_root)" + HOST_CPU_COUNT="$(host_cpu_count)" + LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_BAREMETAL_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-$(default_local_nix_max_jobs "${HOST_CPU_COUNT}")}}" + LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_BAREMETAL_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-$(default_local_nix_build_cores "${HOST_CPU_COUNT}" "${LOCAL_NIX_MAX_JOBS}")}}" + BAREMETAL_VM_VCPUS="${ULTRACLOUD_BAREMETAL_VM_VCPUS:-$(default_baremetal_vm_vcpus "${HOST_CPU_COUNT}")}" + BAREMETAL_VM_MEMORY_MIB="${ULTRACLOUD_BAREMETAL_VM_MEMORY_MIB:-$(default_baremetal_vm_memory_mib "${HOST_CPU_COUNT}")}" + if [[ "${ULTRACLOUD_BAREMETAL_FORCE_TCG:-0}" == "1" ]]; then + BAREMETAL_VM_ACCELERATOR_MODE="tcg" + elif host_kvm_access; then + BAREMETAL_VM_ACCELERATOR_MODE="kvm" + else + BAREMETAL_VM_ACCELERATOR_MODE="tcg" + fi + configure_local_nix_execution + + require_cmd curl + require_cmd jq + require_cmd nix + require_cmd python3 + require_cmd qemu-img + require_cmd qemu-system-x86_64 + require_cmd ssh + require_cmd ssh-keygen + require_cmd ss + + ISO_IMAGE="$(resolve_iso_image "$(resolve_store_path ULTRACLOUD_BAREMETAL_ISO_IMAGE 'nixosConfigurations.ultracloud-iso.config.system.build.isoImage')")" + FLAKE_BUNDLE="$(resolve_store_path ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE 'packages.x86_64-linux.ultracloudFlakeBundle')" + CONTROL_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_TARGET 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel')" + WORKER_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_TARGET 'nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel')" + CONTROL_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount')" + WORKER_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount')" + CHAINFIRE_BIN="$(resolve_binary ULTRACLOUD_CHAINFIRE_SERVER_BIN chainfire 'packages.x86_64-linux.chainfire-server')" + DEPLOYER_SERVER_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_SERVER_BIN deployer-server 'packages.x86_64-linux.deployer-server')" + DEPLOYER_CTL_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_CTL_BIN deployer-ctl 'packages.x86_64-linux.deployer-ctl')" + OVMF_CODE_FD="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_CODE 'FV/OVMF_CODE.fd')" + OVMF_VARS_TEMPLATE="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_VARS 'FV/OVMF_VARS.fd')" + QEMU_BIN="${ULTRACLOUD_QEMU_BIN:-$(command -v qemu-system-x86_64)}" + QEMU_IMG_BIN="${ULTRACLOUD_QEMU_IMG_BIN:-$(command -v qemu-img)}" + + if [[ -n "${ULTRACLOUD_BAREMETAL_STATE_DIR:-}" ]]; then + TMP_DIR="$ULTRACLOUD_BAREMETAL_STATE_DIR" + KEEP_STATE_DIR=1 + mkdir -p "$TMP_DIR" + find "$TMP_DIR" -mindepth 1 -maxdepth 1 \ + ! -name nix-cache \ + -exec rm -rf {} + + else + TMP_DIR="${DEFAULT_WORK_ROOT}/baremetal-iso" + KEEP_STATE_DIR=1 + mkdir -p "$TMP_DIR" + find "$TMP_DIR" -mindepth 1 -maxdepth 1 \ + ! -name nix-cache \ + -exec rm -rf {} + + fi + export TMPDIR="${TMPDIR:-${DEFAULT_WORK_ROOT}/tmp}" + export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${DEFAULT_WORK_ROOT}/xdg-cache}" + mkdir -p "$TMPDIR" + mkdir -p "$XDG_CACHE_HOME" + NIX_CACHE_DIR="$TMP_DIR/nix-cache" + CONTROL_LOG="$TMP_DIR/control-plane.serial.log" + WORKER_LOG="$TMP_DIR/worker.serial.log" + DEPLOYER_LOG="$TMP_DIR/deployer.log" + CHAINFIRE_LOG="$TMP_DIR/chainfire.log" + NIX_CACHE_LOG="$TMP_DIR/nix-cache.log" + trap cleanup EXIT + + SSH_KEY="$TMP_DIR/id_ed25519" + ssh-keygen -q -t ed25519 -N "" -f "$SSH_KEY" >/dev/null + SSH_PUBKEY="$(tr -d '\n' <"$SSH_KEY.pub")" + capture_environment + + assert_port_free 2379 + assert_port_free 8081 + assert_port_free 8088 + assert_port_free 8090 + assert_port_free "$CONTROL_SSH_PORT" + assert_port_free "$WORKER_SSH_PORT" + + start_binary_cache + start_host_services + apply_cluster_state + + launch_iso_vm \ + "ultracloud-baremetal-control-plane" \ + "$CONTROL_NODE_ID" \ + "$CONTROL_SSH_PORT" \ + "$CONTROL_DHCP_START" \ + "52:54:00:11:22:31" \ + "$CONTROL_DISK_SERIAL" \ + "$CONTROL_DISK_GIB" \ + "$TMP_DIR/control-plane.qcow2" \ + "$CONTROL_LOG" + + verify_node \ + "$CONTROL_NODE_ID" \ + "$CONTROL_SSH_PORT" \ + "$TMP_DIR/control-plane.qcow2" \ + "$CONTROL_LOG" \ + "control-plane" \ + "$CONTROL_TARGET_SYSTEM" \ + "$CONTROL_NIXOS_CONFIGURATION" \ + "$CONTROL_NODE_CLASS" \ + "$CONTROL_DISKO_CONFIG_PATH" \ + "$CONTROL_TARGET_DISK_BY_ID" \ + "$CONTROL_HEALTH_CHECK_PATH" \ + "$CONTROL_DHCP_START" \ + "52:54:00:11:22:31" \ + "$CONTROL_DISK_SERIAL" + + launch_iso_vm \ + "ultracloud-baremetal-worker" \ + "$WORKER_NODE_ID" \ + "$WORKER_SSH_PORT" \ + "$WORKER_DHCP_START" \ + "52:54:00:11:22:32" \ + "$WORKER_DISK_SERIAL" \ + "$WORKER_DISK_GIB" \ + "$TMP_DIR/worker.qcow2" \ + "$WORKER_LOG" + + verify_node \ + "$WORKER_NODE_ID" \ + "$WORKER_SSH_PORT" \ + "$TMP_DIR/worker.qcow2" \ + "$WORKER_LOG" \ + "worker" \ + "$WORKER_TARGET_SYSTEM" \ + "$WORKER_NIXOS_CONFIGURATION" \ + "$WORKER_NODE_CLASS" \ + "$WORKER_DISKO_CONFIG_PATH" \ + "$WORKER_TARGET_DISK_BY_ID" \ + "$WORKER_HEALTH_CHECK_PATH" \ + "$WORKER_DHCP_START" \ + "52:54:00:11:22:32" \ + "$WORKER_DISK_SERIAL" + + log "Canonical ISO bare-metal QEMU verification succeeded" +} + +main "$@" diff --git a/nix/test-cluster/vm-guest-image.nix b/nix/test-cluster/vm-guest-image.nix index 9f83ea0..48dfeac 100644 --- a/nix/test-cluster/vm-guest-image.nix +++ b/nix/test-cluster/vm-guest-image.nix @@ -376,11 +376,11 @@ let
- +
- +
diff --git a/nix/test-cluster/work-root-budget.sh b/nix/test-cluster/work-root-budget.sh new file mode 100755 index 0000000..35317ff --- /dev/null +++ b/nix/test-cluster/work-root-budget.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" +PHOTON_CLUSTER_WORK_ROOT="${PHOTON_CLUSTER_WORK_ROOT:-${WORK_ROOT}/test-cluster}" + +usage() { + cat <<'EOF' +Usage: + ./nix/test-cluster/work-root-budget.sh status + ./nix/test-cluster/work-root-budget.sh enforce + ./nix/test-cluster/work-root-budget.sh cleanup-advice + ./nix/test-cluster/work-root-budget.sh prune-proof-logs [keep-count] [--apply] +EOF +} + +size_bytes() { + local path="$1" + if [[ -e "${path}" ]]; then + du -sb "${path}" | awk '{print $1}' + else + printf '0\n' + fi +} + +human_size() { + numfmt --to=iec --suffix=B "$1" +} + +proof_roots() { + printf '%s\n' \ + "${WORK_ROOT}/publishable-kvm-suite" \ + "${WORK_ROOT}/final-proofs" \ + "${WORK_ROOT}/durability-proof" \ + "${WORK_ROOT}/rollout-soak" \ + "${WORK_ROOT}/provider-vm-reality-proof" \ + "${WORK_ROOT}/baremetal-iso-e2e" \ + "${WORK_ROOT}/core-control-plane-ops-proof" \ + "${WORK_ROOT}/hardware-smoke" +} + +report_path() { + local label="$1" + local path="$2" + local budget_bytes="$3" + local size + + size="$(size_bytes "${path}")" + printf '%-28s %10s %s' "${label}" "$(human_size "${size}")" "${path}" + if (( budget_bytes > 0 )); then + printf ' [soft budget %s' "$(human_size "${budget_bytes}")" + if (( size > budget_bytes )); then + printf ', over budget' + fi + printf ']' + fi + printf '\n' +} + +status() { + local work_budget=$((60 * 1024 * 1024 * 1024)) + local state_budget=$((35 * 1024 * 1024 * 1024)) + local transient_budget=$((10 * 1024 * 1024 * 1024)) + local proof_budget=$((20 * 1024 * 1024 * 1024)) + + echo "UltraCloud work-root disk budget status" + echo "repo_root=${REPO_ROOT}" + echo "work_root=${WORK_ROOT}" + echo "photon_cluster_work_root=${PHOTON_CLUSTER_WORK_ROOT}" + echo + + report_path "work root" "${WORK_ROOT}" "${work_budget}" + report_path "cluster state" "${PHOTON_CLUSTER_WORK_ROOT}/state" "${state_budget}" + report_path "tmp" "${WORK_ROOT}/tmp" 0 + report_path "publishable runtime" "${WORK_ROOT}/publishable-kvm-runtime" 0 + report_path "publishable logs" "${WORK_ROOT}/publishable-kvm-suite" 0 + report_path "final proofs" "${WORK_ROOT}/final-proofs" 0 + report_path "durability proof" "${WORK_ROOT}/durability-proof" 0 + report_path "rollout soak" "${WORK_ROOT}/rollout-soak" 0 + report_path "provider or vm proof" "${WORK_ROOT}/provider-vm-reality-proof" 0 + report_path "baremetal exact proof" "${WORK_ROOT}/baremetal-iso-e2e" 0 + report_path "control-plane proof" "${WORK_ROOT}/core-control-plane-ops-proof" 0 + report_path "hardware smoke" "${WORK_ROOT}/hardware-smoke" 0 + echo + + local transient_size proof_size + transient_size=$(( $(size_bytes "${WORK_ROOT}/tmp") + $(size_bytes "${WORK_ROOT}/publishable-kvm-runtime") )) + proof_size=$(( $(size_bytes "${WORK_ROOT}/publishable-kvm-suite") + $(size_bytes "${WORK_ROOT}/final-proofs") + $(size_bytes "${WORK_ROOT}/durability-proof") + $(size_bytes "${WORK_ROOT}/rollout-soak") + $(size_bytes "${WORK_ROOT}/provider-vm-reality-proof") + $(size_bytes "${WORK_ROOT}/baremetal-iso-e2e") + $(size_bytes "${WORK_ROOT}/core-control-plane-ops-proof") + $(size_bytes "${WORK_ROOT}/hardware-smoke") )) + + printf 'transient total: %s (soft budget %s)\n' "$(human_size "${transient_size}")" "$(human_size "${transient_budget}")" + printf 'proof logs total: %s (soft budget %s)\n' "$(human_size "${proof_size}")" "$(human_size "${proof_budget}")" +} + +budget_overages() { + local work_budget=$((60 * 1024 * 1024 * 1024)) + local state_budget=$((35 * 1024 * 1024 * 1024)) + local transient_budget=$((10 * 1024 * 1024 * 1024)) + local proof_budget=$((20 * 1024 * 1024 * 1024)) + local work_size state_size transient_size proof_size overages=0 + + work_size="$(size_bytes "${WORK_ROOT}")" + state_size="$(size_bytes "${PHOTON_CLUSTER_WORK_ROOT}/state")" + transient_size=$(( $(size_bytes "${WORK_ROOT}/tmp") + $(size_bytes "${WORK_ROOT}/publishable-kvm-runtime") )) + proof_size=$(( $(size_bytes "${WORK_ROOT}/publishable-kvm-suite") + $(size_bytes "${WORK_ROOT}/final-proofs") + $(size_bytes "${WORK_ROOT}/durability-proof") + $(size_bytes "${WORK_ROOT}/rollout-soak") + $(size_bytes "${WORK_ROOT}/provider-vm-reality-proof") + $(size_bytes "${WORK_ROOT}/baremetal-iso-e2e") + $(size_bytes "${WORK_ROOT}/core-control-plane-ops-proof") + $(size_bytes "${WORK_ROOT}/hardware-smoke") )) + + (( work_size > work_budget )) && ((overages += 1)) + (( state_size > state_budget )) && ((overages += 1)) + (( transient_size > transient_budget )) && ((overages += 1)) + (( proof_size > proof_budget )) && ((overages += 1)) + + printf '%s\n' "${overages}" +} + +enforce() { + local overages + overages="$(budget_overages)" + + status + echo + + if (( overages > 0 )); then + echo "Budget enforcement failed: one or more tracked work-root areas are over the configured soft budget." + echo "Use cleanup-advice for the safe runtime cleanup sequence, or use prune-proof-logs for dated proof roots." + echo + cleanup_advice + echo + echo "Safer dated-proof cleanup dry-run:" + echo " ./nix/test-cluster/work-root-budget.sh prune-proof-logs 2" + return 1 + fi + + echo "Budget enforcement passed: all tracked work-root areas are within the configured soft budgets." +} + +cleanup_advice() { + cat < + rm -rf ${WORK_ROOT}/final-proofs/ + rm -rf ${WORK_ROOT}/durability-proof/ + rm -rf ${WORK_ROOT}/rollout-soak/ + rm -rf ${WORK_ROOT}/provider-vm-reality-proof/ + rm -rf ${WORK_ROOT}/core-control-plane-ops-proof/ + rm -rf ${WORK_ROOT}/hardware-smoke/ + +4. Run a Nix store GC after old result symlinks are gone: + nix store gc +EOF +} + +prune_proof_logs() { + local keep="${1:-2}" + local apply="${2:-}" + local mode="dry-run" + + if ! [[ "${keep}" =~ ^[0-9]+$ ]]; then + echo "keep-count must be a non-negative integer" >&2 + exit 1 + fi + + if [[ "${apply}" == "--apply" ]]; then + mode="apply" + if [[ "${ULTRACLOUD_WORK_ROOT_PRUNE_ACK:-}" != "YES" ]]; then + echo "Refusing to delete proof logs without ULTRACLOUD_WORK_ROOT_PRUNE_ACK=YES" >&2 + exit 1 + fi + elif [[ -n "${apply}" ]]; then + echo "unknown prune-proof-logs flag: ${apply}" >&2 + exit 1 + fi + + local root + while IFS= read -r root; do + [[ -d "${root}" ]] || continue + + local -a dated_dirs=() + mapfile -t dated_dirs < <(find "${root}" -mindepth 1 -maxdepth 1 -type d -printf '%P\n' | sort -r) + if (( ${#dated_dirs[@]} <= keep )); then + continue + fi + + echo "${root}:" + local idx candidate + for (( idx = keep; idx < ${#dated_dirs[@]}; idx += 1 )); do + candidate="${root}/${dated_dirs[$idx]}" + if [[ "${mode}" == "apply" ]]; then + rm -rf -- "${candidate}" + echo " deleted ${candidate}" + else + echo " would delete ${candidate}" + fi + done + done < <(proof_roots) + + if [[ "${mode}" == "dry-run" ]]; then + echo + echo "Dry-run only. Re-run with:" + echo " ULTRACLOUD_WORK_ROOT_PRUNE_ACK=YES ./nix/test-cluster/work-root-budget.sh prune-proof-logs ${keep} --apply" + fi +} + +main() { + local cmd="${1:-status}" + case "${cmd}" in + status) + status + ;; + enforce) + enforce + ;; + cleanup-advice) + cleanup_advice + ;; + prune-proof-logs) + prune_proof_logs "${2:-2}" "${3:-}" + ;; + -h|--help|help) + usage + ;; + *) + usage >&2 + exit 1 + ;; + esac +} + +main "$@" diff --git a/nix/tests/verify-fleet-scheduler-e2e-stable.sh b/nix/tests/verify-fleet-scheduler-e2e-stable.sh new file mode 100644 index 0000000..e6ddaa2 --- /dev/null +++ b/nix/tests/verify-fleet-scheduler-e2e-stable.sh @@ -0,0 +1,284 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="${ULTRACLOUD_FLEET_E2E_REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}" +ORIGINAL_SCRIPT="${ROOT}/deployer/scripts/verify-fleet-scheduler-e2e.sh" +PATCHED_SCRIPT="$(mktemp "${TMPDIR:-/tmp}/verify-fleet-scheduler-e2e-stable.XXXXXX.sh")" + +cleanup() { + rm -f "${PATCHED_SCRIPT}" +} + +trap cleanup EXIT + +python3 - "${ORIGINAL_SCRIPT}" "${PATCHED_SCRIPT}" "${ROOT}" <<'PATCHPY' +from __future__ import annotations + +import sys +from pathlib import Path + +source_path = Path(sys.argv[1]) +patched_path = Path(sys.argv[2]) +repo_root = sys.argv[3] +source = source_path.read_text() + +replacements = [ + ( + 'ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"', + f'ROOT="{repo_root}"', + ), + ( + """wait_for_endpoint_convergence() { + local timeout_secs="${1:-60}" + local deadline=$((SECONDS + timeout_secs)) + + while (( SECONDS < deadline )); do + if python3 - <<'PY' +import socket +import urllib.request + +with urllib.request.urlopen("http://127.0.0.2:18080/", timeout=5) as response: + if response.status != 200: + raise SystemExit(f"node01 endpoint returned {response.status}") +with urllib.request.urlopen("http://127.0.0.2:18081/", timeout=5) as response: + if response.status != 200: + raise SystemExit(f"node01 worker endpoint returned {response.status}") + +for port, label in ((18080, "api"), (18081, "worker")): + sock = socket.socket() + sock.settimeout(1.5) + try: + sock.connect(("127.0.0.3", port)) + except OSError: + pass + else: + raise SystemExit(f"node02 {label} endpoint still accepts connections after scale-down") + finally: + sock.close() +PY + then + return 0 + fi + sleep 1 + done + + echo "timed out waiting for endpoint convergence after scale-down" >&2 + return 1 +}""", + """wait_for_endpoint_convergence() { + local api_node_file="$1" + local worker_node_file="$2" + local timeout_secs="${3:-60}" + local deadline=$((SECONDS + timeout_secs)) + + while (( SECONDS < deadline )); do + if python3 - "$api_node_file" "$worker_node_file" <<'PY' +import socket +import sys +import urllib.request + +NODE_IPS = { + "node01": "127.0.0.2", + "node02": "127.0.0.3", +} + + +def read_node(path): + with open(path, "r", encoding="utf-8") as handle: + node_id = handle.read().strip() + if node_id not in NODE_IPS: + raise SystemExit(f"unexpected scaled node id in {path}: {node_id!r}") + return node_id + + +def assert_http(node_id, port, label): + address = f"http://{NODE_IPS[node_id]}:{port}/" + with urllib.request.urlopen(address, timeout=5) as response: + if response.status != 200: + raise SystemExit(f"{label} endpoint on {node_id} returned {response.status}") + + +def assert_closed(node_id, port, label): + sock = socket.socket() + sock.settimeout(1.5) + try: + sock.connect((NODE_IPS[node_id], port)) + except OSError: + return + finally: + sock.close() + raise SystemExit(f"{label} endpoint still accepts connections on {node_id} after scale-down") + + +api_node = read_node(sys.argv[1]) +worker_node = read_node(sys.argv[2]) + +assert_http(api_node, 18080, "api") +assert_http(worker_node, 18081, "worker") + +for node_id in NODE_IPS: + if node_id != api_node: + assert_closed(node_id, 18080, "api") + if node_id != worker_node: + assert_closed(node_id, 18081, "worker") +PY + then + return 0 + fi + sleep 1 + done + + echo "timed out waiting for endpoint convergence after scale-down" >&2 + return 1 +}""", + ), + ( + """run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump" +python3 - "$tmp_dir/instances-scaled.dump" <<'PY' +import json +import sys + +path = sys.argv[1] +instances = [] + +with open(path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + marker = " value=" + if marker not in line: + continue + value = line.split(marker, 1)[1] + instances.append(json.loads(value)) + +if len(instances) != 1: + raise SystemExit(f"expected 1 scheduled instance after scale-down, found {len(instances)}") + +instance = instances[0] +if instance["node_id"] != "node01": + raise SystemExit(f"expected remaining instance on node01, found {instance['node_id']}") +if instance.get("state") != "healthy": + raise SystemExit(f"expected remaining instance to be healthy, found {instance.get('state')}") + +print("Observed one healthy scheduled instance on node01 after scale-down") +PY""", + """run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump" +python3 - "$tmp_dir/instances-scaled.dump" "$tmp_dir/api-scaled-node.txt" <<'PY' +import json +import sys + +path = sys.argv[1] +node_path = sys.argv[2] +instances = [] + +with open(path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + marker = " value=" + if marker not in line: + continue + value = line.split(marker, 1)[1] + instances.append(json.loads(value)) + +if len(instances) != 1: + raise SystemExit(f"expected 1 scheduled instance after scale-down, found {len(instances)}") + +instance = instances[0] +node_id = instance["node_id"] +if node_id not in {"node01", "node02"}: + raise SystemExit(f"unexpected remaining api instance node {node_id}") +if instance.get("state") != "healthy": + raise SystemExit(f"expected remaining instance to be healthy, found {instance.get('state')}") + +with open(node_path, "w", encoding="utf-8") as handle: + handle.write(node_id + "\\n") + +print(f"Observed one healthy scheduled instance on {node_id} after scale-down") +PY""", + ), + ( + """run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances-scaled.dump" +python3 - "$tmp_dir/worker-instances-scaled.dump" <<'PY' +import json +import sys + +path = sys.argv[1] +instances = [] + +with open(path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + marker = " value=" + if marker not in line: + continue + value = line.split(marker, 1)[1] + instances.append(json.loads(value)) + +if len(instances) != 1: + raise SystemExit(f"expected 1 worker instance after scale-down, found {len(instances)}") + +instance = instances[0] +if instance["node_id"] != "node01": + raise SystemExit(f"expected remaining worker instance on node01, found {instance['node_id']}") +if instance.get("state") != "healthy": + raise SystemExit(f"expected remaining worker instance to be healthy, found {instance.get('state')}") + +print("Observed one healthy dependent worker instance on node01 after scale-down") +PY""", + """run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances-scaled.dump" +python3 - "$tmp_dir/worker-instances-scaled.dump" "$tmp_dir/worker-scaled-node.txt" <<'PY' +import json +import sys + +path = sys.argv[1] +node_path = sys.argv[2] +instances = [] + +with open(path, "r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + marker = " value=" + if marker not in line: + continue + value = line.split(marker, 1)[1] + instances.append(json.loads(value)) + +if len(instances) != 1: + raise SystemExit(f"expected 1 worker instance after scale-down, found {len(instances)}") + +instance = instances[0] +node_id = instance["node_id"] +if node_id not in {"node01", "node02"}: + raise SystemExit(f"unexpected remaining worker instance node {node_id}") +if instance.get("state") != "healthy": + raise SystemExit(f"expected remaining worker instance to be healthy, found {instance.get('state')}") + +with open(node_path, "w", encoding="utf-8") as handle: + handle.write(node_id + "\\n") + +print(f"Observed one healthy dependent worker instance on {node_id} after scale-down") +PY""", + ), + ( + 'wait_for_endpoint_convergence 60', + 'wait_for_endpoint_convergence "$tmp_dir/api-scaled-node.txt" "$tmp_dir/worker-scaled-node.txt" 60', + ), +] + +for old, new in replacements: + if old not in source: + raise SystemExit(f"expected snippet not found while patching {source_path}") + source = source.replace(old, new, 1) + +patched_path.write_text(source) +PATCHPY + +chmod +x "${PATCHED_SCRIPT}" +exec bash "${PATCHED_SCRIPT}" "$@" diff --git a/plans/baselines/main-reaggregation-2026-04-06.md b/plans/baselines/main-reaggregation-2026-04-06.md new file mode 100644 index 0000000..fd301ca --- /dev/null +++ b/plans/baselines/main-reaggregation-2026-04-06.md @@ -0,0 +1,43 @@ +# Main Reaggregation Baseline 2026-04-06 + +Task: `343c8c57-be11-4097-8fd5-1d30817bc2da` + +## Branch Setup + +```bash +git fetch origin --prune +git switch -c task/343c8c57-main-reaggregate origin/main +``` + +The re-aggregated branch is rooted at `origin/main` and keeps existing untracked local files untouched. + +## Inventory Summary + +Diff reviewed: + +```bash +git diff --stat origin/main..task/f5c70db0-baseline-profiles +``` + +Selected changes imported from `task/f5c70db0-baseline-profiles`: + +- `flake.nix`, `nix/ci/flake.nix`: canonical-profile eval/build guards, `single-node-quickstart`, `baremetal-iso-e2e`, and `portable-control-plane-regressions` +- `nix/single-node/*`: one-command single-node quickstart profile and VM launcher +- `nix/iso/ultracloud-iso.nix`, `nix/nodes/baremetal-qemu/*`, `nix/test-cluster/verify-baremetal-iso.sh`: canonical ISO bootstrap path for QEMU-as-bare-metal validation +- `nix/images/netboot-all-in-one.nix`, `nix/nodes/vm-cluster/common-disko.nix`, `nix/modules/default.nix`: support fixes needed by canonical helper images and optional services +- `.github/workflows/nix.yml`: portable regression coverage that matches the re-aggregated local validation surface +- `README.md`, `docs/*`, `nix/test-cluster/README.md`: public documentation for the supported quickstart, portable regression lane, and ISO bootstrap proof + +Intentionally excluded from the new baseline: + +- `.github/workflows/kvm-publishable-selfhosted.yml` +- runner-label pinning and manual remote checkout changes aimed at constrained Forgejo runners +- `plans/baselines/kvm-publishable-lane-2026-04-05.md` +- historical `plans/baselines/logs/*.meta` capture files + +## Policy + +- Keep local and branch-independent validation paths in-tree: `single-node-quickstart`, `baremetal-iso`, `baremetal-iso-e2e`, and `portable-control-plane-regressions` +- Keep helper images evaluable when they back those canonical paths +- Treat `netboot-all-in-one` and `netboot-control-plane` as companion images of supported profiles; keep `netboot-worker` helper-only but still covered by the canonical profile guards +- Do not carry runner-specific workflow tuning into the baseline branch unless it is required for local QEMU/KVM or flake evaluation diff --git a/plasmavmc/Cargo.lock b/plasmavmc/Cargo.lock index 8de1f77..236be10 100644 --- a/plasmavmc/Cargo.lock +++ b/plasmavmc/Cargo.lock @@ -2055,7 +2055,6 @@ dependencies = [ "lightningstor-api", "metrics-exporter-prometheus", "plasmavmc-api", - "plasmavmc-firecracker", "plasmavmc-hypervisor", "plasmavmc-kvm", "plasmavmc-types", diff --git a/plasmavmc/Cargo.toml b/plasmavmc/Cargo.toml index 9085f68..0324866 100644 --- a/plasmavmc/Cargo.toml +++ b/plasmavmc/Cargo.toml @@ -8,6 +8,13 @@ members = [ "crates/plasmavmc-firecracker", "crates/plasmavmc-server", ] +default-members = [ + "crates/plasmavmc-types", + "crates/plasmavmc-api", + "crates/plasmavmc-hypervisor", + "crates/plasmavmc-kvm", + "crates/plasmavmc-server", +] [workspace.package] version = "0.1.0" @@ -23,7 +30,6 @@ plasmavmc-types = { path = "crates/plasmavmc-types" } plasmavmc-api = { path = "crates/plasmavmc-api" } plasmavmc-hypervisor = { path = "crates/plasmavmc-hypervisor" } plasmavmc-kvm = { path = "crates/plasmavmc-kvm" } -plasmavmc-firecracker = { path = "crates/plasmavmc-firecracker" } plasmavmc-server = { path = "crates/plasmavmc-server" } # Async runtime diff --git a/plasmavmc/crates/plasmavmc-firecracker/src/lib.rs b/plasmavmc/crates/plasmavmc-firecracker/src/lib.rs index 580cb85..2f47f5d 100644 --- a/plasmavmc/crates/plasmavmc-firecracker/src/lib.rs +++ b/plasmavmc/crates/plasmavmc-firecracker/src/lib.rs @@ -24,6 +24,11 @@ use std::time::Duration; use tokio::process::Command; use tokio::time::Instant; +fn firecracker_host_dev_name(vm: &VirtualMachine, nic_index: usize) -> String { + let vm_hex = vm.id.as_uuid().simple().to_string(); + format!("fc{nic_index:02x}{}", &vm_hex[..8]) +} + /// FireCracker hypervisor backend pub struct FireCrackerBackend { /// Path to FireCracker binary @@ -334,10 +339,9 @@ impl HypervisorBackend for FireCrackerBackend { let mac = nic.mac_address.clone().unwrap_or_else(|| { format!("AA:FC:00:00:{:02X}:{:02X}", (vm.id.as_uuid().as_u128() >> 8) as u8, vm.id.as_uuid().as_u128() as u8) }); - // Note: host_dev_name should be set up externally (TAP interface) - // For now, we'll use a placeholder + let host_dev_name = firecracker_host_dev_name(vm, idx); client - .put_network_interface(&iface_id, &mac, "tap0") + .put_network_interface(&iface_id, &mac, &host_dev_name) .await?; } @@ -573,6 +577,7 @@ impl HypervisorBackend for FireCrackerBackend { #[cfg(test)] mod tests { use super::*; + use plasmavmc_types::DiskSpec; #[test] fn test_firecracker_backend_creation() { @@ -641,4 +646,18 @@ mod tests { assert!(backend.supports(&spec).is_err()); } + + #[test] + fn test_firecracker_host_dev_name_is_stable_and_ifname_safe() { + let vm = VirtualMachine::new( + "fc-test".to_string(), + "org-1".to_string(), + "proj-1".to_string(), + VmSpec::default(), + ); + let host_dev_name = firecracker_host_dev_name(&vm, 1); + + assert!(host_dev_name.starts_with("fc01")); + assert!(host_dev_name.len() <= 15); + } } diff --git a/plasmavmc/crates/plasmavmc-kvm/src/lib.rs b/plasmavmc/crates/plasmavmc-kvm/src/lib.rs index 5618643..09738ba 100644 --- a/plasmavmc/crates/plasmavmc-kvm/src/lib.rs +++ b/plasmavmc/crates/plasmavmc-kvm/src/lib.rs @@ -431,7 +431,7 @@ fn build_qemu_args( ) -> Result> { let mut args = vec![ "-machine".into(), - "q35,accel=kvm".into(), + "q35,accel=kvm:tcg".into(), "-name".into(), vm.name.clone(), "-m".into(), @@ -443,8 +443,7 @@ fn build_qemu_args( .cpu .cpu_model .clone() - .unwrap_or_else(|| "host".into()), - "-enable-kvm".into(), + .unwrap_or_else(|| "max".into()), "-nographic".into(), "-display".into(), "none".into(), @@ -1288,6 +1287,9 @@ mod tests { assert!(args_joined.contains("512")); // default memory MiB assert!(args_joined.contains("image.qcow2")); assert!(args_joined.contains("console.log")); + assert!(args_joined.contains("q35,accel=kvm:tcg")); + assert!(args_joined.contains("-cpu max")); + assert!(!args_joined.contains("-enable-kvm")); std::env::remove_var(env::ENV_QCOW2_PATH); } diff --git a/plasmavmc/crates/plasmavmc-server/Cargo.toml b/plasmavmc/crates/plasmavmc-server/Cargo.toml index 31e8b53..31a3d0d 100644 --- a/plasmavmc/crates/plasmavmc-server/Cargo.toml +++ b/plasmavmc/crates/plasmavmc-server/Cargo.toml @@ -15,7 +15,6 @@ plasmavmc-types = { workspace = true } plasmavmc-api = { workspace = true } plasmavmc-hypervisor = { workspace = true } plasmavmc-kvm = { workspace = true } -plasmavmc-firecracker = { workspace = true } iam-service-auth = { path = "../../../iam/crates/iam-service-auth" } tonic = { workspace = true } tonic-health = { workspace = true } diff --git a/plasmavmc/crates/plasmavmc-server/src/main.rs b/plasmavmc/crates/plasmavmc-server/src/main.rs index f175fde..881dcbd 100644 --- a/plasmavmc/crates/plasmavmc-server/src/main.rs +++ b/plasmavmc/crates/plasmavmc-server/src/main.rs @@ -12,7 +12,6 @@ use plasmavmc_api::proto::{ HeartbeatNodeRequest, HypervisorType as ProtoHypervisorType, NodeCapacity, NodeState as ProtoNodeState, VolumeDriverKind as ProtoVolumeDriverKind, }; -use plasmavmc_firecracker::FireCrackerBackend; use plasmavmc_hypervisor::HypervisorRegistry; use plasmavmc_kvm::KvmBackend; use plasmavmc_server::config::{AgentRuntimeConfig, ServerConfig}; @@ -44,14 +43,6 @@ struct Args { #[arg(short, long)] log_level: Option, - /// Path to the Firecracker kernel image (overrides config) - #[arg(long)] - firecracker_kernel_path: Option, - - /// Path to the Firecracker rootfs image (overrides config) - #[arg(long)] - firecracker_rootfs_path: Option, - /// Metrics port for Prometheus scraping #[arg(long, default_value = "9102")] metrics_port: u16, @@ -192,13 +183,6 @@ async fn main() -> Result<(), Box> { if let Some(log_level) = args.log_level { config.log_level = log_level; } - if let Some(kernel_path) = args.firecracker_kernel_path { - config.firecracker.kernel_path = Some(kernel_path); - } - if let Some(rootfs_path) = args.firecracker_rootfs_path { - config.firecracker.rootfs_path = Some(rootfs_path); - } - // Initialize tracing tracing_subscriber::fmt() .with_env_filter( @@ -239,28 +223,18 @@ async fn main() -> Result<(), Box> { )); registry.register(kvm_backend); - // Register FireCracker backend if kernel/rootfs paths are configured (config or env) + // FireCracker stays outside the supported public PlasmaVMC surface for now. let has_kernel = config.firecracker.kernel_path.is_some() || std::env::var_os("PLASMAVMC_FIRECRACKER_KERNEL_PATH").is_some(); let has_rootfs = config.firecracker.rootfs_path.is_some() || std::env::var_os("PLASMAVMC_FIRECRACKER_ROOTFS_PATH").is_some(); - if has_kernel && has_rootfs { - match FireCrackerBackend::from_config(&config.firecracker) { - Ok(firecracker_backend) => { - registry.register(Arc::new(firecracker_backend)); - tracing::info!("Registered FireCracker backend"); - } - Err(err) => { - tracing::warn!("Failed to initialize FireCracker backend: {}", err); - } - } - } else if has_kernel || has_rootfs { + if has_kernel || has_rootfs { tracing::warn!( - "FireCracker backend configuration incomplete: kernel_path/rootfs_path must both be set (config or env)" + "FireCracker backend inputs were provided, but the supported PlasmaVMC public backend contract is KVM-only; ignoring FireCracker configuration" ); } else { - tracing::debug!("FireCracker backend not available (missing kernel/rootfs paths)"); + tracing::debug!("FireCracker backend remains outside the supported public surface"); } tracing::info!("Registered hypervisors: {:?}", registry.available()); diff --git a/plasmavmc/crates/plasmavmc-server/src/rest.rs b/plasmavmc/crates/plasmavmc-server/src/rest.rs index 167d408..b274352 100644 --- a/plasmavmc/crates/plasmavmc-server/src/rest.rs +++ b/plasmavmc/crates/plasmavmc-server/src/rest.rs @@ -175,6 +175,9 @@ pub struct VmNetworkResponse { pub security_groups: Vec, } +const PUBLIC_KVM_ONLY_MESSAGE: &str = + "PlasmaVMC public VM APIs support only the KVM backend"; + fn nic_model_to_string(model: i32) -> String { match plasmavmc_api::proto::NicModel::try_from(model) .unwrap_or(plasmavmc_api::proto::NicModel::Unspecified) @@ -190,9 +193,28 @@ fn hypervisor_to_string(hypervisor: i32) -> String { .unwrap_or(plasmavmc_api::proto::HypervisorType::Unspecified) { plasmavmc_api::proto::HypervisorType::Kvm => "kvm".to_string(), - plasmavmc_api::proto::HypervisorType::Firecracker => "firecracker".to_string(), - plasmavmc_api::proto::HypervisorType::Mvisor => "mvisor".to_string(), - plasmavmc_api::proto::HypervisorType::Unspecified => "unspecified".to_string(), + plasmavmc_api::proto::HypervisorType::Firecracker => { + "legacy-unsupported-firecracker".to_string() + } + plasmavmc_api::proto::HypervisorType::Mvisor => "legacy-unsupported-mvisor".to_string(), + plasmavmc_api::proto::HypervisorType::Unspecified => "kvm".to_string(), + } +} + +fn parse_supported_public_hypervisor( + hypervisor: Option<&str>, +) -> Result { + match hypervisor.map(str::trim).filter(|value| !value.is_empty()) { + None | Some("kvm") => Ok(plasmavmc_api::proto::HypervisorType::Kvm), + Some("firecracker") => Err(format!( + "{PUBLIC_KVM_ONLY_MESSAGE}; firecracker remains outside the supported surface" + )), + Some("mvisor") => Err(format!( + "{PUBLIC_KVM_ONLY_MESSAGE}; mvisor remains outside the supported surface" + )), + Some(other) => Err(format!( + "{PUBLIC_KVM_ONLY_MESSAGE}; unsupported value `{other}`" + )), } } @@ -322,7 +344,7 @@ async fn create_vm( Json(req): Json, ) -> Result<(StatusCode, Json>), (StatusCode, Json)> { use plasmavmc_api::proto::{ - disk_source, CpuSpec, DiskBus, DiskCache, DiskSource, DiskSpec, HypervisorType, MemorySpec, + disk_source, CpuSpec, DiskBus, DiskCache, DiskSource, DiskSpec, MemorySpec, NicModel as ProtoNicModel, }; @@ -337,12 +359,8 @@ async fn create_vm( network, } = req; - let hypervisor_type = match hypervisor.as_deref() { - Some("kvm") => HypervisorType::Kvm, - Some("firecracker") => HypervisorType::Firecracker, - Some("mvisor") => HypervisorType::Mvisor, - _ => HypervisorType::Unspecified, - }; + let hypervisor_type = parse_supported_public_hypervisor(hypervisor.as_deref()) + .map_err(|message| error_response(StatusCode::BAD_REQUEST, "INVALID_ARGUMENT", &message))?; let disks = disks .into_iter() @@ -730,4 +748,38 @@ mod tests { ); assert!(response.network[0].dhcp_enabled); } + + #[test] + fn parse_supported_public_hypervisor_defaults_to_kvm() { + assert_eq!( + parse_supported_public_hypervisor(None).unwrap(), + HypervisorType::Kvm + ); + assert_eq!( + parse_supported_public_hypervisor(Some("kvm")).unwrap(), + HypervisorType::Kvm + ); + } + + #[test] + fn parse_supported_public_hypervisor_rejects_non_kvm_backends() { + assert!(parse_supported_public_hypervisor(Some("firecracker")) + .unwrap_err() + .contains(PUBLIC_KVM_ONLY_MESSAGE)); + assert!(parse_supported_public_hypervisor(Some("mvisor")) + .unwrap_err() + .contains(PUBLIC_KVM_ONLY_MESSAGE)); + } + + #[test] + fn hypervisor_to_string_marks_legacy_backends_as_unsupported() { + assert_eq!( + hypervisor_to_string(HypervisorType::Firecracker as i32), + "legacy-unsupported-firecracker" + ); + assert_eq!( + hypervisor_to_string(HypervisorType::Mvisor as i32), + "legacy-unsupported-mvisor" + ); + } } diff --git a/plasmavmc/crates/plasmavmc-server/src/vm_service.rs b/plasmavmc/crates/plasmavmc-server/src/vm_service.rs index f50cce7..cc6aa3e 100644 --- a/plasmavmc/crates/plasmavmc-server/src/vm_service.rs +++ b/plasmavmc/crates/plasmavmc-server/src/vm_service.rs @@ -305,6 +305,20 @@ impl VmServiceImpl { } } + fn require_supported_public_hypervisor( + typ: ProtoHypervisorType, + ) -> Result { + match typ { + ProtoHypervisorType::Unspecified | ProtoHypervisorType::Kvm => Ok(HypervisorType::Kvm), + ProtoHypervisorType::Firecracker => Err(Status::invalid_argument( + "PlasmaVMC public VM APIs support only HYPERVISOR_TYPE_KVM; HYPERVISOR_TYPE_FIRECRACKER remains outside the supported surface", + )), + ProtoHypervisorType::Mvisor => Err(Status::invalid_argument( + "PlasmaVMC public VM APIs support only HYPERVISOR_TYPE_KVM; HYPERVISOR_TYPE_MVISOR remains outside the supported surface", + )), + } + } + fn map_hv_proto(typ: HypervisorType) -> ProtoHypervisorType { match typ { HypervisorType::Kvm => ProtoHypervisorType::Kvm, @@ -2706,6 +2720,20 @@ mod tests { assert!(backend.last_created_vm().is_none()); } + #[tokio::test] + async fn create_vm_rejects_firecracker_on_supported_public_surface() { + let iam_endpoint = start_test_iam_server().await; + let (_tempdir, service, backend) = new_test_vm_service(&iam_endpoint, None).await; + let mut request = Request::new(test_vm_request(vec![])); + request.get_mut().hypervisor = ProtoHypervisorType::Firecracker as i32; + request.extensions_mut().insert(test_tenant()); + + let error = service.create_vm(request).await.unwrap_err(); + assert_eq!(error.code(), tonic::Code::InvalidArgument); + assert!(error.message().contains("HYPERVISOR_TYPE_KVM")); + assert!(backend.last_created_vm().is_none()); + } + #[tokio::test(flavor = "multi_thread")] async fn create_vm_rejects_unknown_security_group_reference() { let iam_endpoint = start_test_iam_server().await; @@ -2965,9 +2993,10 @@ impl VmService for VmServiceImpl { "CreateVm request" ); - let hv = self.map_hv( - ProtoHypervisorType::try_from(req.hypervisor).unwrap_or(ProtoHypervisorType::Kvm), - ); + let hv = Self::require_supported_public_hypervisor( + ProtoHypervisorType::try_from(req.hypervisor) + .map_err(|_| Status::invalid_argument("hypervisor must be a known HypervisorType"))?, + )?; if req.spec.is_none() { return Err(Status::invalid_argument("spec is required")); } @@ -4265,9 +4294,10 @@ impl VmService for VmServiceImpl { let vm_uuid = Uuid::parse_str(&req.vm_id) .map_err(|_| Status::invalid_argument("vm_id must be a UUID"))?; - let hv = self.map_hv( - ProtoHypervisorType::try_from(req.hypervisor).unwrap_or(ProtoHypervisorType::Kvm), - ); + let hv = Self::require_supported_public_hypervisor( + ProtoHypervisorType::try_from(req.hypervisor) + .map_err(|_| Status::invalid_argument("hypervisor must be a known HypervisorType"))?, + )?; let backend = self .hypervisor_registry .get(hv) @@ -4352,9 +4382,10 @@ impl VmService for VmServiceImpl { let vm_uuid = Uuid::parse_str(&req.vm_id) .map_err(|_| Status::invalid_argument("vm_id must be a UUID"))?; - let hv = self.map_hv( - ProtoHypervisorType::try_from(req.hypervisor).unwrap_or(ProtoHypervisorType::Kvm), - ); + let hv = Self::require_supported_public_hypervisor( + ProtoHypervisorType::try_from(req.hypervisor) + .map_err(|_| Status::invalid_argument("hypervisor must be a known HypervisorType"))?, + )?; let backend = self .hypervisor_registry .get(hv) diff --git a/prismnet/crates/prismnet-server/src/ovn/client.rs b/prismnet/crates/prismnet-server/src/ovn/client.rs index 33975e0..6927daa 100644 --- a/prismnet/crates/prismnet-server/src/ovn/client.rs +++ b/prismnet/crates/prismnet-server/src/ovn/client.rs @@ -122,16 +122,8 @@ impl OvnClient { OvnMode::Real { .. } => { let name = Self::logical_switch_name(vpc_id); self.run_nbctl(vec!["ls-add".into(), name.clone()]).await?; - // Store CIDR for reference (best-effort; ignore errors) - let _ = self - .run_nbctl(vec![ - "set".into(), - "Logical_Switch".into(), - name, - format!("other_config:subnet={}", cidr), - ]) - .await; - Ok(()) + self.run_nbctl(Self::logical_switch_subnet_args(&name, cidr)) + .await } } } @@ -240,7 +232,14 @@ impl OvnClient { } } - pub async fn delete_acl(&self, rule_id: &SecurityGroupRuleId) -> OvnResult<()> { + pub async fn delete_acl( + &self, + rule_id: &SecurityGroupRuleId, + logical_switch: &VpcId, + rule: &SecurityGroupRule, + match_expr: &str, + priority: u16, + ) -> OvnResult<()> { let key = Self::acl_key(rule_id); match &self.mode { OvnMode::Mock(state) => { @@ -249,23 +248,36 @@ impl OvnClient { Ok(()) } OvnMode::Real { .. } => { - // Best-effort deletion by external-id match (placeholder) - let _ = self - .run_nbctl(vec![ - "--".into(), - "find".into(), - "ACL".into(), - format!("name={}", key), - "--delete".into(), - "ACL".into(), - "uuid".into(), - ]) - .await; - Ok(()) + self.run_nbctl(Self::acl_delete_args(logical_switch, rule, match_expr, priority)) + .await } } } + fn acl_delete_args( + logical_switch: &VpcId, + rule: &SecurityGroupRule, + match_expr: &str, + priority: u16, + ) -> Vec { + vec![ + "acl-del".into(), + Self::logical_switch_name(logical_switch), + direction_to_ovn(rule), + priority.to_string(), + match_expr.to_string(), + ] + } + + fn logical_switch_subnet_args(name: &str, cidr: &str) -> Vec { + vec![ + "set".into(), + "Logical_Switch".into(), + name.to_string(), + format!("other_config:subnet={cidr}"), + ] + } + /// Create DHCP options in OVN for a subnet pub async fn create_dhcp_options( &self, @@ -586,6 +598,72 @@ mod tests { assert!(guard.acl_exists(&key)); } + #[tokio::test] + async fn mock_acl_delete_removes_rule() { + let client = OvnClient::new_mock(); + let vpc = Vpc::new("test", "org", "proj", "10.0.0.0/16"); + client + .create_logical_switch(&vpc.id, &vpc.cidr_block) + .await + .unwrap(); + + let mut rule = SecurityGroupRule::new( + SecurityGroupId::new(), + RuleDirection::Ingress, + Default::default(), + ); + rule.remote_cidr = Some("0.0.0.0/0".to_string()); + let match_expr = "ip4 && ip4.src == 0.0.0.0/0"; + let key = client + .create_acl(&rule.security_group_id, &rule, &vpc.id, match_expr, 1000) + .await + .unwrap(); + + client + .delete_acl(&rule.id, &vpc.id, &rule, match_expr, 1000) + .await + .unwrap(); + + let state = client.mock_state().unwrap(); + let guard = state.lock().await; + assert!(!guard.acl_exists(&key)); + } + + #[test] + fn real_acl_delete_uses_deterministic_match_tuple() { + let vpc = Vpc::new("test", "org", "proj", "10.0.0.0/16"); + let mut rule = SecurityGroupRule::new( + SecurityGroupId::new(), + RuleDirection::Egress, + Default::default(), + ); + rule.remote_cidr = Some("10.0.0.0/8".to_string()); + + assert_eq!( + OvnClient::acl_delete_args(&vpc.id, &rule, "ip4 && ip4.dst == 10.0.0.0/8", 800), + vec![ + "acl-del".to_string(), + format!("vpc-{}", vpc.id), + "from-lport".to_string(), + "800".to_string(), + "ip4 && ip4.dst == 10.0.0.0/8".to_string(), + ] + ); + } + + #[test] + fn real_logical_switch_subnet_args_are_deterministic() { + assert_eq!( + OvnClient::logical_switch_subnet_args("vpc-test", "10.0.0.0/16"), + vec![ + "set".to_string(), + "Logical_Switch".to_string(), + "vpc-test".to_string(), + "other_config:subnet=10.0.0.0/16".to_string(), + ] + ); + } + #[tokio::test] async fn mock_deletions_remove_state() { let client = OvnClient::new_mock(); diff --git a/prismnet/crates/prismnet-server/src/services/security_group.rs b/prismnet/crates/prismnet-server/src/services/security_group.rs index 7c2a033..597e74d 100644 --- a/prismnet/crates/prismnet-server/src/services/security_group.rs +++ b/prismnet/crates/prismnet-server/src/services/security_group.rs @@ -508,17 +508,27 @@ impl SecurityGroupService for SecurityGroupServiceImpl { ) .await?; - let _removed = self + let removed = self .metadata .remove_security_group_rule(&org_id, &project_id, &sg_id, &rule_id) .await .map_err(|e| Status::internal(e.to_string()))? .ok_or_else(|| Status::not_found("SecurityGroup or Rule not found"))?; - self.ovn - .delete_acl(&rule_id) + let vpcs = self + .metadata + .list_vpcs(&org_id, &project_id) .await .map_err(|e| Status::internal(e.to_string()))?; + let match_expr = build_acl_match(&removed, None); + let priority = calculate_priority(&removed); + + for vpc in vpcs { + self.ovn + .delete_acl(&rule_id, &vpc.id, &removed, &match_expr, priority) + .await + .map_err(|e| Status::internal(e.to_string()))?; + } Ok(Response::new(RemoveRuleResponse {})) }