diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 1770d79..3b96a4d 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -59,7 +59,7 @@ jobs: --github-output "$GITHUB_OUTPUT" # Run CI gates for changed workspaces - # Uses the provider-agnostic 'photoncloud-gate' defined in nix/ci/flake.nix + # Uses the provider-agnostic 'ultracloud-gate' defined in nix/ci/flake.nix gate: needs: filter if: ${{ needs.filter.outputs.any_changed == 'true' }} @@ -74,7 +74,7 @@ jobs: - uses: DeterminateSystems/nix-installer-action@v11 - uses: DeterminateSystems/magic-nix-cache-action@v8 - - name: Run PhotonCloud Gate + - name: Run UltraCloud Gate run: | nix run ./nix/ci#gate-ci -- --workspace ${{ matrix.workspace }} --tier 0 --no-logs diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c701f62..955467a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -PhotonCloud uses Nix as the primary development and validation entrypoint. +UltraCloud uses Nix as the primary development and validation entrypoint. ## Setup diff --git a/Makefile b/Makefile index a880eba..ff3ab67 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# PhotonCloud Makefile +# UltraCloud Makefile # Unifies build and test commands .PHONY: all build cluster-up cluster-down cluster-status cluster-validate cluster-smoke cluster-matrix cluster-bench-storage clean diff --git a/README.md b/README.md index c219b79..1a2f63a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# PhotonCloud +# UltraCloud -PhotonCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository. +UltraCloud is a Nix-first cloud platform workspace that assembles a small control plane, network services, VM hosting, shared storage, object storage, and gateway services into one reproducible repository. The canonical local proof path is the six-node VM cluster under [`nix/test-cluster`](/home/centra/cloud/nix/test-cluster/README.md). It builds all guest images on the host, boots them as hardware-like QEMU nodes, and validates real multi-node behavior. @@ -47,6 +47,6 @@ nix run ./nix/test-cluster#cluster -- fresh-smoke ## Scope -PhotonCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products. +UltraCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products. Host-level NixOS rollout validation is also expected to stay reproducible: the `deployer-vm-smoke` VM test now proves that `nix-agent` can activate a prebuilt target system closure directly, without recompiling the stack inside the guest. diff --git a/apigateway/Cargo.toml b/apigateway/Cargo.toml index 6a59fbb..15283e9 100644 --- a/apigateway/Cargo.toml +++ b/apigateway/Cargo.toml @@ -10,8 +10,8 @@ version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" rust-version = "1.75" -authors = ["PlasmaCloud Contributors"] -repository = "https://github.com/yourorg/plasmacloud" +authors = ["UltraCloud Contributors"] +repository = "https://github.com/yourorg/ultracloud" [workspace.dependencies] # Internal crates diff --git a/baremetal/image-builder/build-images.sh b/baremetal/image-builder/build-images.sh index 8359e85..dd004cb 100755 --- a/baremetal/image-builder/build-images.sh +++ b/baremetal/image-builder/build-images.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash # ============================================================================== -# PlasmaCloud NixOS Netboot Image Builder +# UltraCloud NixOS Netboot Image Builder # ============================================================================== -# This script builds netboot images for bare-metal provisioning of PlasmaCloud. +# This script builds netboot images for bare-metal provisioning of UltraCloud. # # Usage: # ./build-images.sh [--profile PROFILE] [--output-dir DIR] [--help] @@ -61,7 +61,7 @@ print_error() { print_banner() { echo "" echo "╔════════════════════════════════════════════════════════════════╗" - echo "║ PlasmaCloud NixOS Netboot Image Builder ║" + echo "║ UltraCloud NixOS Netboot Image Builder ║" echo "║ Building bare-metal provisioning images ║" echo "╚════════════════════════════════════════════════════════════════╝" echo "" @@ -72,11 +72,11 @@ print_usage() { cat << EOF Usage: $0 [OPTIONS] -Build NixOS netboot images for PlasmaCloud bare-metal provisioning. +Build NixOS netboot images for UltraCloud bare-metal provisioning. OPTIONS: --profile PROFILE Build specific profile: - - control-plane: All 8 PlasmaCloud services + - control-plane: All 8 UltraCloud services - worker: Compute-focused services (PlasmaVMC, PrismNET) - all-in-one: All services for single-node deployment - all: Build all profiles (default) @@ -107,9 +107,9 @@ OUTPUT: - netboot.ipxe iPXE boot script ENVIRONMENT: - PLASMACLOUD_DEPLOYER_URL Optional deployer endpoint embedded into generated netboot.ipxe - PLASMACLOUD_BOOTSTRAP_TOKEN Optional bootstrap token embedded into generated netboot.ipxe - PLASMACLOUD_CA_CERT_URL Optional CA certificate URL embedded into generated netboot.ipxe + ULTRACLOUD_DEPLOYER_URL Optional deployer endpoint embedded into generated netboot.ipxe + ULTRACLOUD_BOOTSTRAP_TOKEN Optional bootstrap token embedded into generated netboot.ipxe + ULTRACLOUD_CA_CERT_URL Optional CA certificate URL embedded into generated netboot.ipxe EOF } @@ -157,14 +157,14 @@ build_profile() { fi local deployer_kernel_args="" - if [ -n "${PLASMACLOUD_DEPLOYER_URL:-}" ]; then - deployer_kernel_args+=" plasmacloud.deployer_url=${PLASMACLOUD_DEPLOYER_URL}" + if [ -n "${ULTRACLOUD_DEPLOYER_URL:-}" ]; then + deployer_kernel_args+=" ultracloud.deployer_url=${ULTRACLOUD_DEPLOYER_URL}" fi - if [ -n "${PLASMACLOUD_BOOTSTRAP_TOKEN:-}" ]; then - deployer_kernel_args+=" plasmacloud.bootstrap_token=${PLASMACLOUD_BOOTSTRAP_TOKEN}" + if [ -n "${ULTRACLOUD_BOOTSTRAP_TOKEN:-}" ]; then + deployer_kernel_args+=" ultracloud.bootstrap_token=${ULTRACLOUD_BOOTSTRAP_TOKEN}" fi - if [ -n "${PLASMACLOUD_CA_CERT_URL:-}" ]; then - deployer_kernel_args+=" plasmacloud.ca_cert_url=${PLASMACLOUD_CA_CERT_URL}" + if [ -n "${ULTRACLOUD_CA_CERT_URL:-}" ]; then + deployer_kernel_args+=" ultracloud.ca_cert_url=${ULTRACLOUD_CA_CERT_URL}" fi # Generate iPXE boot script @@ -172,14 +172,14 @@ build_profile() { cat > "$profile_dir/netboot.ipxe" << EOF #!ipxe -# PlasmaCloud Netboot - $profile +# UltraCloud Netboot - $profile # Generated: $(date -u +"%Y-%m-%d %H:%M:%S UTC") # Set variables set boot-server \${boot-url} # Display info -echo Loading PlasmaCloud ($profile profile)... +echo Loading UltraCloud ($profile profile)... echo Kernel: bzImage echo Initrd: initrd echo diff --git a/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh b/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh index 568b06e..bf71d88 100755 --- a/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh +++ b/baremetal/vm-cluster/legacy/alpine-ssh-setup.sh @@ -57,7 +57,7 @@ echo "" sleep 2 echo "rc-service sshd restart" # Restart with new config sleep 2 - echo "echo 'root:plasmacloud' | chpasswd" # Set root password + echo "echo 'root:ultracloud' | chpasswd" # Set root password sleep 2 echo "ip addr show" # Show network info sleep 2 @@ -72,7 +72,7 @@ echo "" echo "=== SSH Setup Complete ===" echo "SSH should now be accessible via:" echo " ssh -p 2202 root@localhost" -echo " Password: plasmacloud" +echo " Password: ultracloud" echo "" echo "Test with: ssh -o StrictHostKeyChecking=no -p 2202 root@localhost 'echo SSH_OK'" echo "" diff --git a/baremetal/vm-cluster/legacy/launch-node01-disk.sh b/baremetal/vm-cluster/legacy/launch-node01-disk.sh index a95cfa6..5058066 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (Disk Boot) +# UltraCloud VM Cluster - Node 01 (Disk Boot) # Boots from installed NixOS on disk SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node01-dual.sh b/baremetal/vm-cluster/legacy/launch-node01-dual.sh index 396a73d..f2c9cc3 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-dual.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-dual.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (ISO Boot + Dual Networking) +# UltraCloud VM Cluster - Node 01 (ISO Boot + Dual Networking) # Features: # - Multicast socket for inter-VM L2 communication (eth0) # - SLIRP with SSH port forward for host access (eth1) diff --git a/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh b/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh index 3d867b3..b2e45b7 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-from-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (Boot from installed NixOS on disk) +# UltraCloud VM Cluster - Node 01 (Boot from installed NixOS on disk) # UEFI boot with OVMF firmware SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node01-iso.sh b/baremetal/vm-cluster/legacy/launch-node01-iso.sh index 6e06601..bd504fc 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-iso.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-iso.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (ISO Boot) -# Boots from PlasmaCloud ISO for manual NixOS installation +# UltraCloud VM Cluster - Node 01 (ISO Boot) +# Boots from UltraCloud ISO for manual NixOS installation SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DISK="${SCRIPT_DIR}/node01.qcow2" diff --git a/baremetal/vm-cluster/legacy/launch-node01-netboot.sh b/baremetal/vm-cluster/legacy/launch-node01-netboot.sh index e8e5d53..915f707 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-netboot.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-netboot.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (Netboot with SSH Key) +# UltraCloud VM Cluster - Node 01 (Netboot with SSH Key) # Features: # - Direct kernel/initrd boot (no ISO required) # - SSH key authentication baked in (no password setup needed) diff --git a/baremetal/vm-cluster/legacy/launch-node01-vde.sh b/baremetal/vm-cluster/legacy/launch-node01-vde.sh index b4c7553..e022c29 100755 --- a/baremetal/vm-cluster/legacy/launch-node01-vde.sh +++ b/baremetal/vm-cluster/legacy/launch-node01-vde.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (VDE Networking) +# UltraCloud VM Cluster - Node 01 (VDE Networking) # Uses VDE switch instead of multicast sockets SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node02-alpine.sh b/baremetal/vm-cluster/legacy/launch-node02-alpine.sh index 2d27a1a..c71ed8c 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-alpine.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-alpine.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (Alpine Bootstrap) +# UltraCloud VM Cluster - Node 02 (Alpine Bootstrap) # Features: # - Alpine virt ISO for automated SSH setup # - Multicast socket for inter-VM L2 communication (eth0) diff --git a/baremetal/vm-cluster/legacy/launch-node02-disk.sh b/baremetal/vm-cluster/legacy/launch-node02-disk.sh index cbe51a5..ded4841 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (Disk Boot) +# UltraCloud VM Cluster - Node 02 (Disk Boot) # Boots from installed NixOS on disk SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh b/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh index d848380..bc9c375 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-from-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (Boot from installed NixOS on disk) +# UltraCloud VM Cluster - Node 02 (Boot from installed NixOS on disk) # Boots from the NixOS installation created by nixos-anywhere SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node02-iso.sh b/baremetal/vm-cluster/legacy/launch-node02-iso.sh index 20423c2..0ac3e62 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-iso.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-iso.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (ISO Boot) +# UltraCloud VM Cluster - Node 02 (ISO Boot) # Boots from NixOS ISO for provisioning via nixos-anywhere SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node02-netboot.sh b/baremetal/vm-cluster/legacy/launch-node02-netboot.sh index 76d4ddd..54b1245 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-netboot.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-netboot.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (Netboot with SSH Key) +# UltraCloud VM Cluster - Node 01 (Netboot with SSH Key) # Features: # - Direct kernel/initrd boot (no ISO required) # - SSH key authentication baked in (no password setup needed) diff --git a/baremetal/vm-cluster/legacy/launch-node02-recovery.sh b/baremetal/vm-cluster/legacy/launch-node02-recovery.sh index 6d68c63..9b51b33 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-recovery.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-recovery.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (Recovery Boot) +# UltraCloud VM Cluster - Node 02 (Recovery Boot) # Boots from disk using new kernel/initrd from nix store SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node02-vde.sh b/baremetal/vm-cluster/legacy/launch-node02-vde.sh index 766e612..a1aae9e 100755 --- a/baremetal/vm-cluster/legacy/launch-node02-vde.sh +++ b/baremetal/vm-cluster/legacy/launch-node02-vde.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 02 (VDE Networking) +# UltraCloud VM Cluster - Node 02 (VDE Networking) SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DISK="${SCRIPT_DIR}/node02.qcow2" diff --git a/baremetal/vm-cluster/legacy/launch-node03-disk.sh b/baremetal/vm-cluster/legacy/launch-node03-disk.sh index fff89da..99d6e8e 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 03 (Disk Boot) +# UltraCloud VM Cluster - Node 03 (Disk Boot) # Boots from installed NixOS on disk SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh b/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh index c3c0a47..6853c72 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-from-disk.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 03 (Boot from installed NixOS on disk) +# UltraCloud VM Cluster - Node 03 (Boot from installed NixOS on disk) # Boots from the NixOS installation created by nixos-anywhere SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node03-iso.sh b/baremetal/vm-cluster/legacy/launch-node03-iso.sh index ba46d33..5652606 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-iso.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-iso.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 03 (ISO Boot) +# UltraCloud VM Cluster - Node 03 (ISO Boot) # Boots from NixOS ISO for provisioning via nixos-anywhere SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node03-netboot.sh b/baremetal/vm-cluster/legacy/launch-node03-netboot.sh index 801bf61..58263f8 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-netboot.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-netboot.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 01 (Netboot with SSH Key) +# UltraCloud VM Cluster - Node 01 (Netboot with SSH Key) # Features: # - Direct kernel/initrd boot (no ISO required) # - SSH key authentication baked in (no password setup needed) diff --git a/baremetal/vm-cluster/legacy/launch-node03-recovery.sh b/baremetal/vm-cluster/legacy/launch-node03-recovery.sh index 97198f9..5d605d7 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-recovery.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-recovery.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 03 (Recovery Boot) +# UltraCloud VM Cluster - Node 03 (Recovery Boot) # Boots from disk using new kernel/initrd from nix store SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" diff --git a/baremetal/vm-cluster/legacy/launch-node03-vde.sh b/baremetal/vm-cluster/legacy/launch-node03-vde.sh index 0683bc9..17e94ea 100755 --- a/baremetal/vm-cluster/legacy/launch-node03-vde.sh +++ b/baremetal/vm-cluster/legacy/launch-node03-vde.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# PlasmaCloud VM Cluster - Node 03 (VDE Networking) +# UltraCloud VM Cluster - Node 03 (VDE Networking) SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DISK="${SCRIPT_DIR}/node03.qcow2" diff --git a/baremetal/vm-cluster/legacy/pxe-server-setup.sh b/baremetal/vm-cluster/legacy/pxe-server-setup.sh index 87c7a4c..5781758 100644 --- a/baremetal/vm-cluster/legacy/pxe-server-setup.sh +++ b/baremetal/vm-cluster/legacy/pxe-server-setup.sh @@ -5,7 +5,7 @@ set -e -echo "=== PlasmaCloud PXE Server Setup ===" +echo "=== UltraCloud PXE Server Setup ===" echo "This script will:" echo "1. Install Alpine Linux to disk" echo "2. Configure static networking (192.168.100.1)" @@ -61,7 +61,7 @@ chroot /mnt apk add --no-cache \ # 8. Configure dnsmasq in the new system cat > /mnt/etc/dnsmasq.conf <<'EOF' -# PlasmaCloud PXE Server dnsmasq configuration +# UltraCloud PXE Server dnsmasq configuration # Interface to listen on (multicast network) interface=eth0 @@ -109,14 +109,14 @@ chroot /mnt rc-update add dnsmasq default chroot /mnt rc-update add sshd default # 13. Set root password (for SSH access) -echo "root:plasmacloud" | chroot /mnt chpasswd +echo "root:ultracloud" | chroot /mnt chpasswd echo "" echo "=== Installation Complete ===" echo "System will reboot from disk" echo "PXE server will be available at: 192.168.100.1" echo "DHCP range: 192.168.100.100-150" -echo "SSH: ssh root@192.168.100.1 (password: plasmacloud)" +echo "SSH: ssh root@192.168.100.1 (password: ultracloud)" echo "" echo "Press Enter to reboot..." read diff --git a/baremetal/vm-cluster/pxe-server/configuration.nix b/baremetal/vm-cluster/pxe-server/configuration.nix index 22d3e29..424f8e2 100644 --- a/baremetal/vm-cluster/pxe-server/configuration.nix +++ b/baremetal/vm-cluster/pxe-server/configuration.nix @@ -79,7 +79,7 @@ services.deployer = { enable = true; bindAddr = "0.0.0.0:8080"; - clusterId = "plasmacloud-vm-cluster"; + clusterId = "ultracloud-vm-cluster"; requireChainfire = false; allowUnauthenticated = true; allowUnknownNodes = true; @@ -87,7 +87,7 @@ }; # Root password (for SSH access) - users.users.root.password = "plasmacloud"; + users.users.root.password = "ultracloud"; # Packages environment.systemPackages = with pkgs; [ diff --git a/bin/cloud-cli b/bin/cloud-cli index e368204..1cb2b0a 100755 --- a/bin/cloud-cli +++ b/bin/cloud-cli @@ -86,7 +86,7 @@ def cmd_list_vms(args): def main(): global DEFAULT_API_URL - parser = argparse.ArgumentParser(description="PhotonCloud CLI") + parser = argparse.ArgumentParser(description="UltraCloud CLI") parser.add_argument("--token", help="Auth token", default=os.environ.get("CLOUD_TOKEN")) parser.add_argument("--url", help="API URL", default=DEFAULT_API_URL) diff --git a/chainfire/baremetal/pxe-server/assets/nixos/control-plane/netboot.ipxe b/chainfire/baremetal/pxe-server/assets/nixos/control-plane/netboot.ipxe index d2c5805..4cd821a 100644 --- a/chainfire/baremetal/pxe-server/assets/nixos/control-plane/netboot.ipxe +++ b/chainfire/baremetal/pxe-server/assets/nixos/control-plane/netboot.ipxe @@ -1,13 +1,13 @@ #!ipxe -# PlasmaCloud Netboot - control-plane +# UltraCloud Netboot - control-plane # Generated: 2025-12-10 21:58:15 UTC # Set variables set boot-server ${boot-url} # Display info -echo Loading PlasmaCloud (control-plane profile)... +echo Loading UltraCloud (control-plane profile)... echo Kernel: bzImage echo Initrd: initrd echo diff --git a/chainfire/baremetal/pxe-server/ipxe/boot.ipxe b/chainfire/baremetal/pxe-server/ipxe/boot.ipxe index 28aa1ad..397b6ea 100644 --- a/chainfire/baremetal/pxe-server/ipxe/boot.ipxe +++ b/chainfire/baremetal/pxe-server/ipxe/boot.ipxe @@ -182,7 +182,7 @@ set kernel-params ${kernel-params} centra.profile=${profile} set kernel-params ${kernel-params} centra.hostname=${hostname} set kernel-params ${kernel-params} centra.mac=${mac} set kernel-params ${kernel-params} centra.provisioning-server=${provisioning-server} -set kernel-params ${kernel-params} plasmacloud.deployer_url=${deployer-url} +set kernel-params ${kernel-params} ultracloud.deployer_url=${deployer-url} set kernel-params ${kernel-params} console=tty0 console=ttyS0,115200n8 # For debugging, enable these: diff --git a/chainfire/baremetal/pxe-server/nixos-module.nix b/chainfire/baremetal/pxe-server/nixos-module.nix index f45f210..8334e74 100644 --- a/chainfire/baremetal/pxe-server/nixos-module.nix +++ b/chainfire/baremetal/pxe-server/nixos-module.nix @@ -146,9 +146,9 @@ let set kernel-params ''${kernel-params} centra.hostname=''${hostname} set kernel-params ''${kernel-params} centra.mac=''${mac} set kernel-params ''${kernel-params} centra.provisioning-server=''${provisioning-server} - set kernel-params ''${kernel-params} plasmacloud.deployer_url=''${deployer-url} - ${optionalString (cfg.bootstrap.bootstrapToken != null) "set kernel-params ''${kernel-params} plasmacloud.bootstrap_token=${cfg.bootstrap.bootstrapToken}"} - ${optionalString (cfg.bootstrap.caCertUrl != null) "set kernel-params ''${kernel-params} plasmacloud.ca_cert_url=${cfg.bootstrap.caCertUrl}"} + set kernel-params ''${kernel-params} ultracloud.deployer_url=''${deployer-url} + ${optionalString (cfg.bootstrap.bootstrapToken != null) "set kernel-params ''${kernel-params} ultracloud.bootstrap_token=${cfg.bootstrap.bootstrapToken}"} + ${optionalString (cfg.bootstrap.caCertUrl != null) "set kernel-params ''${kernel-params} ultracloud.ca_cert_url=${cfg.bootstrap.caCertUrl}"} set kernel-params ''${kernel-params} console=tty0 console=ttyS0,115200n8 kernel ''${nixos-url}/bzImage ''${kernel-params} || goto failed diff --git a/chainfire/chainfire-client/src/metadata.rs b/chainfire/chainfire-client/src/metadata.rs index 8e1aad5..d160611 100644 --- a/chainfire/chainfire-client/src/metadata.rs +++ b/chainfire/chainfire-client/src/metadata.rs @@ -1,6 +1,6 @@ //! Metadata-oriented KV facade for Chainfire (and test backends). //! -//! This module exists to standardize how PhotonCloud services interact with +//! This module exists to standardize how UltraCloud services interact with //! control-plane metadata: versioned reads, CAS, prefix scans, etc. use async_trait::async_trait; diff --git a/client-common/Cargo.toml b/client-common/Cargo.toml index d4b0318..339d171 100644 --- a/client-common/Cargo.toml +++ b/client-common/Cargo.toml @@ -2,9 +2,9 @@ name = "photocloud-client-common" version = "0.1.0" edition = "2021" -authors = ["PhotonCloud"] +authors = ["UltraCloud"] license = "MIT OR Apache-2.0" -description = "Shared client config types (endpoint/auth/retry) for PhotonCloud SDKs" +description = "Shared client config types (endpoint/auth/retry) for UltraCloud SDKs" [dependencies] tonic = { version = "0.12", features = ["tls"] } diff --git a/client-common/src/lib.rs b/client-common/src/lib.rs index 8ca690f..f3901e3 100644 --- a/client-common/src/lib.rs +++ b/client-common/src/lib.rs @@ -1,4 +1,4 @@ -//! Shared client config types (endpoint/auth/retry) for PhotonCloud SDKs. +//! Shared client config types (endpoint/auth/retry) for UltraCloud SDKs. //! //! Lightweight, type-only helpers to keep SDK crates consistent without //! forcing a unified SDK dependency tree. diff --git a/coronafs/Cargo.toml b/coronafs/Cargo.toml index ae89bdd..6b93b47 100644 --- a/coronafs/Cargo.toml +++ b/coronafs/Cargo.toml @@ -9,8 +9,8 @@ version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" rust-version = "1.75" -authors = ["PhotonCloud Contributors"] -repository = "https://github.com/photoncloud/photoncloud" +authors = ["UltraCloud Contributors"] +repository = "https://github.com/ultracloud/ultracloud" [workspace.dependencies] axum = "0.8" diff --git a/coronafs/README.md b/coronafs/README.md index cc9189e..e5f9d9c 100644 --- a/coronafs/README.md +++ b/coronafs/README.md @@ -1,6 +1,6 @@ # CoronaFS -CoronaFS is PhotonCloud's mutable VM-volume layer. +CoronaFS is UltraCloud's mutable VM-volume layer. Current implementation: diff --git a/crates/photon-auth-client/Cargo.toml b/crates/photon-auth-client/Cargo.toml index 5c6c416..0c048b8 100644 --- a/crates/photon-auth-client/Cargo.toml +++ b/crates/photon-auth-client/Cargo.toml @@ -3,7 +3,7 @@ name = "photon-auth-client" version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" -description = "Shared IAM auth client wrapper for PhotonCloud services" +description = "Shared IAM auth client wrapper for UltraCloud services" [dependencies] anyhow = "1.0" diff --git a/crates/photon-config/Cargo.toml b/crates/photon-config/Cargo.toml index 61de74e..54cf55d 100644 --- a/crates/photon-config/Cargo.toml +++ b/crates/photon-config/Cargo.toml @@ -3,7 +3,7 @@ name = "photon-config" version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" -description = "Shared configuration loading helpers for PhotonCloud" +description = "Shared configuration loading helpers for UltraCloud" [dependencies] anyhow = "1.0" diff --git a/crates/photon-runtime/Cargo.toml b/crates/photon-runtime/Cargo.toml index 6e0f9ab..229ec3f 100644 --- a/crates/photon-runtime/Cargo.toml +++ b/crates/photon-runtime/Cargo.toml @@ -3,7 +3,7 @@ name = "photon-runtime" version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" -description = "Shared runtime helpers for PhotonCloud services" +description = "Shared runtime helpers for UltraCloud services" [dependencies] anyhow = "1.0" diff --git a/crates/photon-state/Cargo.toml b/crates/photon-state/Cargo.toml index 34ba1b9..c34c2bb 100644 --- a/crates/photon-state/Cargo.toml +++ b/crates/photon-state/Cargo.toml @@ -3,7 +3,7 @@ name = "photon-state" version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" -description = "Shared state backend types and validation for PhotonCloud services" +description = "Shared state backend types and validation for UltraCloud services" [dependencies] anyhow = "1.0" diff --git a/creditservice/Cargo.toml b/creditservice/Cargo.toml index 59bad2b..7f9a769 100644 --- a/creditservice/Cargo.toml +++ b/creditservice/Cargo.toml @@ -13,8 +13,8 @@ version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" rust-version = "1.75" -authors = ["PhotonCloud Contributors"] -repository = "https://github.com/photoncloud/creditservice" +authors = ["UltraCloud Contributors"] +repository = "https://github.com/ultracloud/creditservice" [workspace.dependencies] # Internal crates @@ -27,7 +27,7 @@ photon-config = { path = "../crates/photon-config" } photon-runtime = { path = "../crates/photon-runtime" } photon-state = { path = "../crates/photon-state" } -# External dependencies (aligned with PhotonCloud stack) +# External dependencies (aligned with UltraCloud stack) tokio = { version = "1.40", features = ["full"] } tokio-stream = "0.1" futures = "0.3" diff --git a/creditservice/README.md b/creditservice/README.md index 9901ca3..ce33eec 100644 --- a/creditservice/README.md +++ b/creditservice/README.md @@ -1,6 +1,6 @@ # CreditService -`creditservice` is a minimal reference service that proves PhotonCloud can integrate vendor-specific quota and credit control with platform auth and gateway admission. +`creditservice` is a minimal reference service that proves UltraCloud can integrate vendor-specific quota and credit control with platform auth and gateway admission. It is intentionally not a full billing product. diff --git a/deployer/Cargo.lock b/deployer/Cargo.lock index f1c60de..181a381 100644 --- a/deployer/Cargo.lock +++ b/deployer/Cargo.lock @@ -2028,28 +2028,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" -[[package]] -name = "plasmacloud-reconciler" -version = "0.1.0" -dependencies = [ - "anyhow", - "chainfire-client", - "chrono", - "clap", - "deployer-types", - "fiberlb-api", - "flashdns-api", - "iam-client", - "iam-types", - "prismnet-api", - "serde", - "serde_json", - "tokio", - "tonic", - "tracing", - "tracing-subscriber", -] - [[package]] name = "polyval" version = "0.6.2" @@ -3404,6 +3382,28 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "ultracloud-reconciler" +version = "0.1.0" +dependencies = [ + "anyhow", + "chainfire-client", + "chrono", + "clap", + "deployer-types", + "fiberlb-api", + "flashdns-api", + "iam-client", + "iam-types", + "prismnet-api", + "serde", + "serde_json", + "tokio", + "tonic", + "tracing", + "tracing-subscriber", +] + [[package]] name = "unicode-bidi" version = "0.3.18" diff --git a/deployer/Cargo.toml b/deployer/Cargo.toml index c35537b..27069bf 100644 --- a/deployer/Cargo.toml +++ b/deployer/Cargo.toml @@ -7,7 +7,7 @@ members = [ "crates/nix-agent", "crates/cert-authority", "crates/deployer-ctl", - "crates/plasmacloud-reconciler", + "crates/ultracloud-reconciler", "crates/fleet-scheduler", ] @@ -15,9 +15,9 @@ members = [ version = "0.1.0" edition = "2021" rust-version = "1.75" -authors = ["PhotonCloud Contributors"] +authors = ["UltraCloud Contributors"] license = "MIT OR Apache-2.0" -repository = "https://github.com/centra/plasmacloud" +repository = "https://github.com/centra/ultracloud" [workspace.dependencies] # Internal crates diff --git a/deployer/crates/cert-authority/src/main.rs b/deployer/crates/cert-authority/src/main.rs index f6215e8..5b87911 100644 --- a/deployer/crates/cert-authority/src/main.rs +++ b/deployer/crates/cert-authority/src/main.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use tracing::{info, warn}; use tracing_subscriber::EnvFilter; -const DEFAULT_CLUSTER_NAMESPACE: &str = "photoncloud"; +const DEFAULT_CLUSTER_NAMESPACE: &str = "ultracloud"; const CERT_TTL_DAYS: u64 = 90; const ROTATION_THRESHOLD_DAYS: u64 = 30; @@ -111,12 +111,12 @@ async fn init_ca(cert_path: &PathBuf, key_path: &PathBuf) -> Result<()> { .context("failed to generate CA key pair")?; // CA証明書パラメータを設定 - let mut params = CertificateParams::new(vec!["PhotonCloud CA".to_string()]) + let mut params = CertificateParams::new(vec!["UltraCloud CA".to_string()]) .context("failed to create certificate params")?; let mut distinguished_name = DistinguishedName::new(); - distinguished_name.push(DnType::OrganizationName, "PhotonCloud"); - distinguished_name.push(DnType::CommonName, "PhotonCloud CA"); + distinguished_name.push(DnType::OrganizationName, "UltraCloud"); + distinguished_name.push(DnType::CommonName, "UltraCloud CA"); params.distinguished_name = distinguished_name; params.is_ca = rcgen::IsCa::Ca(rcgen::BasicConstraints::Unconstrained); params.key_usages = vec![ @@ -215,7 +215,7 @@ async fn issue_certificate( ensure_dns_san(&mut csr_params.params, svc); } if csr_params.params.subject_alt_names.is_empty() { - ensure_dns_san(&mut csr_params.params, "photoncloud-service"); + ensure_dns_san(&mut csr_params.params, "ultracloud-service"); } // CA署名証明書を生成(CSRの公開鍵を利用) diff --git a/deployer/crates/deployer-ctl/src/chainfire.rs b/deployer/crates/deployer-ctl/src/chainfire.rs index 19ea1f3..6420487 100644 --- a/deployer/crates/deployer-ctl/src/chainfire.rs +++ b/deployer/crates/deployer-ctl/src/chainfire.rs @@ -6,9 +6,10 @@ use anyhow::{Context, Result}; use chainfire_client::{Client, ClientError}; use chrono::Utc; use deployer_types::{ - ClusterNodeRecord, ClusterStateSpec, CommissionState, DesiredSystemSpec, HostDeploymentSpec, - HostDeploymentStatus, InstallPlan, InstallState, NodeConfig, NodeSpec, ObservedSystemState, - PowerState, ServiceInstanceSpec, ServicePublicationState, ServiceSpec, ServiceStatusRecord, + BootstrapPlan, ClusterNodeRecord, ClusterStateSpec, CommissionState, DesiredSystemSpec, + HostDeploymentSpec, HostDeploymentStatus, InstallPlan, InstallState, NodeAssignment, + NodeConfig, NodeSpec, ObservedSystemState, PowerState, ServiceInstanceSpec, + ServicePublicationState, ServiceSpec, ServiceStatusRecord, }; use serde::de::DeserializeOwned; use serde_json::{json, Value}; @@ -32,10 +33,6 @@ fn deployer_node_config_key(deployer_namespace: &str, machine_id: &str) -> Vec Vec { - format!("{}/nodes/mapping/{}", deployer_namespace, machine_id).into_bytes() -} - fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec { format!( "{}nodes/{}", @@ -217,7 +214,8 @@ fn merge_install_plan( } fn node_config_from_spec(node: &NodeSpec) -> NodeConfig { - NodeConfig { + let assignment = NodeAssignment { + node_id: node.node_id.clone(), hostname: node.hostname.clone(), role: node .roles @@ -225,15 +223,21 @@ fn node_config_from_spec(node: &NodeSpec) -> NodeConfig { .cloned() .unwrap_or_else(|| "worker".to_string()), ip: node.ip.clone(), - services: Vec::new(), - ssh_authorized_keys: Vec::new(), labels: node.labels.clone(), pool: node.pool.clone(), node_class: node.node_class.clone(), failure_domain: node.failure_domain.clone(), + }; + let bootstrap_plan = BootstrapPlan { + services: Vec::new(), nix_profile: node.nix_profile.clone(), install_plan: node.install_plan.clone(), - } + }; + NodeConfig::from_parts( + assignment, + bootstrap_plan, + deployer_types::BootstrapSecrets::default(), + ) } fn desired_system_from_spec(node: &NodeSpec) -> Option { @@ -340,7 +344,7 @@ fn resolve_nodes(spec: &ClusterStateSpec) -> Result> { .or_insert_with(|| pool.to_string()); resolved .labels - .entry("pool.photoncloud.io/name".to_string()) + .entry("pool.ultracloud.io/name".to_string()) .or_insert_with(|| pool.to_string()); } @@ -351,7 +355,7 @@ fn resolve_nodes(spec: &ClusterStateSpec) -> Result> { .or_insert_with(|| node_class.to_string()); resolved .labels - .entry("nodeclass.photoncloud.io/name".to_string()) + .entry("nodeclass.ultracloud.io/name".to_string()) .or_insert_with(|| node_class.to_string()); } @@ -590,13 +594,7 @@ pub async fn bootstrap_cluster( serde_json::to_vec(&config)?, ) .await?; - client - .put( - &deployer_node_mapping_key(deployer_namespace, machine_id), - node.node_id.as_bytes(), - ) - .await?; - info!(node_id = %node.node_id, machine_id = %machine_id, "seeded deployer bootstrap mapping"); + info!(node_id = %node.node_id, machine_id = %machine_id, "seeded deployer bootstrap config"); } } @@ -710,12 +708,6 @@ pub async fn apply_cluster_state( serde_json::to_vec(&config)?, ) .await?; - client - .put( - &deployer_node_mapping_key(deployer_namespace, machine_id), - node.node_id.as_bytes(), - ) - .await?; } } @@ -1460,10 +1452,6 @@ async fn prune_cluster_state( String::from_utf8_lossy(&deployer_node_config_key(deployer_namespace, machine_id)) .to_string(), ); - desired_deployer_keys.insert( - String::from_utf8_lossy(&deployer_node_mapping_key(deployer_namespace, machine_id)) - .to_string(), - ); } } @@ -1607,7 +1595,7 @@ mod tests { assert_eq!(node.labels.get("pool").map(String::as_str), Some("general")); assert_eq!( node.labels - .get("nodeclass.photoncloud.io/name") + .get("nodeclass.ultracloud.io/name") .map(String::as_str), Some("worker-linux") ); @@ -1665,7 +1653,7 @@ mod tests { #[test] fn test_is_prunable_key_keeps_observed_system() { - let prefix = cluster_prefix("photoncloud", "test-cluster"); + let prefix = cluster_prefix("ultracloud", "test-cluster"); assert!(is_prunable_key(&format!("{}nodes/node01", prefix), &prefix)); assert!(is_prunable_key( &format!("{}nodes/node01/desired-system", prefix), @@ -1707,5 +1695,4 @@ fn is_prunable_key(key: &str, prefix: &str) -> bool { fn is_prunable_deployer_key(key: &str, deployer_namespace: &str) -> bool { key.starts_with(&format!("{}/nodes/config/", deployer_namespace)) - || key.starts_with(&format!("{}/nodes/mapping/", deployer_namespace)) } diff --git a/deployer/crates/deployer-ctl/src/main.rs b/deployer/crates/deployer-ctl/src/main.rs index 0f870a4..40c4ca0 100644 --- a/deployer/crates/deployer-ctl/src/main.rs +++ b/deployer/crates/deployer-ctl/src/main.rs @@ -8,7 +8,7 @@ mod chainfire; mod power; mod remote; -/// Deployer control CLI for PhotonCloud. +/// Deployer control CLI for UltraCloud. /// /// - 初回ブートストラップ時に Chainfire 上の Cluster/Node/Service 定義を作成 /// - 既存の Deployer クラスタに対して宣言的な設定を apply する @@ -20,12 +20,12 @@ struct Cli { #[arg(long, global = true, default_value = "http://127.0.0.1:7000")] chainfire_endpoint: String, - /// PhotonCloud Cluster ID (論理名) + /// UltraCloud Cluster ID (論理名) #[arg(long, global = true)] cluster_id: Option, - /// PhotonCloud cluster namespace (default: photoncloud) - #[arg(long, global = true, default_value = "photoncloud")] + /// UltraCloud cluster namespace (default: ultracloud) + #[arg(long, global = true, default_value = "ultracloud")] cluster_namespace: String, /// Deployer namespace used for machine_id -> NodeConfig bootstrap mappings @@ -49,7 +49,7 @@ enum Command { config: PathBuf, }, - /// 宣言的な PhotonCloud クラスタ設定を Chainfire に apply する (GitOps 的に利用可能) + /// 宣言的な UltraCloud クラスタ設定を Chainfire に apply する (GitOps 的に利用可能) Apply { /// Cluster/Node/Service/Instance/MTLSPolicy を含むJSON/YAML #[arg(long)] @@ -60,7 +60,7 @@ enum Command { prune: bool, }, - /// Chainfire 上の PhotonCloud 関連キーをダンプする (デバッグ用途) + /// Chainfire 上の UltraCloud 関連キーをダンプする (デバッグ用途) Dump { /// ダンプ対象の prefix (未指定の場合は cluster-namespace を使用) #[arg(long, default_value = "")] diff --git a/deployer/crates/deployer-server/src/admin.rs b/deployer/crates/deployer-server/src/admin.rs index c9cddfd..3a1510f 100644 --- a/deployer/crates/deployer-server/src/admin.rs +++ b/deployer/crates/deployer-server/src/admin.rs @@ -1,11 +1,8 @@ -//! Admin API endpoints for node management -//! -//! These endpoints allow administrators to pre-register nodes, -//! list registered nodes, and manage node configurations. +//! Admin API endpoints for node management. use axum::{extract::State, http::HeaderMap, http::StatusCode, Json}; use chrono::{DateTime, Utc}; -use deployer_types::{InstallPlan, NodeConfig}; +use deployer_types::NodeConfig; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::sync::Arc; @@ -41,45 +38,16 @@ fn adjust_state_for_heartbeat( state } -/// Pre-registration request payload +/// Pre-registration request payload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PreRegisterRequest { /// Machine ID (from /etc/machine-id) pub machine_id: String, - /// Assigned node identifier - pub node_id: String, - /// Node role (control-plane, worker, storage, etc.) - pub role: String, - /// Optional: Node IP address - #[serde(skip_serializing_if = "Option::is_none")] - pub ip: Option, - /// Optional: Services to run on this node - #[serde(default)] - pub services: Vec, - /// Optional: SSH authorized keys for bootstrap access - #[serde(default)] - pub ssh_authorized_keys: Vec, - /// Optional desired labels applied to the node - #[serde(default)] - pub labels: std::collections::HashMap, - /// Optional pool assignment - #[serde(default)] - pub pool: Option, - /// Optional node class assignment - #[serde(default)] - pub node_class: Option, - /// Optional failure domain - #[serde(default)] - pub failure_domain: Option, - /// Optional nix profile/flake attr - #[serde(default)] - pub nix_profile: Option, - /// Optional explicit install plan for bootstrap installers. - #[serde(default)] - pub install_plan: Option, + /// Canonical bootstrap configuration that should be served back during phone-home. + pub node_config: NodeConfig, } -/// Pre-registration response payload +/// Pre-registration response payload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PreRegisterResponse { pub success: bool, @@ -89,14 +57,14 @@ pub struct PreRegisterResponse { pub node_id: String, } -/// List nodes response payload +/// List nodes response payload. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ListNodesResponse { pub nodes: Vec, pub total: usize, } -/// Node summary for listing +/// Node summary for listing. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeSummary { pub node_id: String, @@ -107,9 +75,6 @@ pub struct NodeSummary { } /// POST /api/v1/admin/nodes -/// -/// Pre-register a machine mapping before it boots. -/// This allows administrators to configure node assignments in advance. pub async fn pre_register( State(state): State>, headers: HeaderMap, @@ -117,42 +82,28 @@ pub async fn pre_register( ) -> Result, (StatusCode, String)> { require_admin_auth(&state, &headers)?; validate_identifier(&request.machine_id, "machine_id")?; - validate_identifier(&request.node_id, "node_id")?; - if let Some(ref ip) = request.ip { - validate_ip(ip, "ip")?; + validate_identifier(&request.node_config.assignment.node_id, "node_id")?; + if !request.node_config.assignment.ip.is_empty() { + validate_ip(&request.node_config.assignment.ip, "ip")?; } + let node_id = request.node_config.assignment.node_id.clone(); info!( machine_id = %request.machine_id, - node_id = %request.node_id, - role = %request.role, + node_id = %node_id, + role = %request.node_config.assignment.role, "Pre-registration request" ); - let config = NodeConfig { - hostname: request.node_id.clone(), - role: request.role.clone(), - ip: request.ip.clone().unwrap_or_default(), - services: request.services.clone(), - ssh_authorized_keys: request.ssh_authorized_keys.clone(), - labels: request.labels.clone(), - pool: request.pool.clone(), - node_class: request.node_class.clone(), - failure_domain: request.failure_domain.clone(), - nix_profile: request.nix_profile.clone(), - install_plan: request.install_plan.clone(), - }; - - // Conflict detection across configured backends if let Some(local_storage) = &state.local_storage { let storage = local_storage.lock().await; - if let Some((existing_node, _)) = storage.get_node_config(&request.machine_id) { - if existing_node != request.node_id { + if let Some(existing) = storage.get_node_config(&request.machine_id) { + if existing.assignment.node_id != node_id { return Err(( StatusCode::CONFLICT, format!( "machine_id {} already mapped to {}", - request.machine_id, existing_node + request.machine_id, existing.assignment.node_id ), )); } @@ -161,8 +112,8 @@ pub async fn pre_register( if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; - if let Some(existing_node) = storage - .get_node_mapping(&request.machine_id) + if let Some(existing) = storage + .get_node_config(&request.machine_id) .await .map_err(|e| { ( @@ -171,12 +122,12 @@ pub async fn pre_register( ) })? { - if existing_node != request.node_id { + if existing.assignment.node_id != node_id { return Err(( StatusCode::CONFLICT, format!( "machine_id {} already mapped to {}", - request.machine_id, existing_node + request.machine_id, existing.assignment.node_id ), )); } @@ -185,13 +136,13 @@ pub async fn pre_register( { let map = state.machine_configs.read().await; - if let Some((existing_node, _)) = map.get(&request.machine_id) { - if existing_node != &request.node_id { + if let Some(existing) = map.get(&request.machine_id) { + if existing.assignment.node_id != node_id { return Err(( StatusCode::CONFLICT, format!( "machine_id {} already mapped to {}", - request.machine_id, existing_node + request.machine_id, existing.assignment.node_id ), )); } @@ -201,7 +152,7 @@ pub async fn pre_register( let mut stored_locally = false; if let Some(local_storage) = &state.local_storage { let mut storage = local_storage.lock().await; - if let Err(e) = storage.register_node(&request.machine_id, &request.node_id, &config) { + if let Err(e) = storage.register_node(&request.machine_id, &request.node_config) { error!( machine_id = %request.machine_id, error = %e, @@ -211,30 +162,29 @@ pub async fn pre_register( stored_locally = true; info!( machine_id = %request.machine_id, - node_id = %request.node_id, + node_id = %node_id, "Node pre-registered in local storage" ); } } - // Try ChainFire storage if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; match storage - .register_node(&request.machine_id, &request.node_id, &config) + .register_node(&request.machine_id, &request.node_config) .await { Ok(_) => { info!( machine_id = %request.machine_id, - node_id = %request.node_id, + node_id = %node_id, "Node pre-registered in ChainFire" ); return Ok(Json(PreRegisterResponse { success: true, message: Some("Node pre-registered successfully".to_string()), machine_id: request.machine_id, - node_id: request.node_id, + node_id, })); } Err(StorageError::Conflict(msg)) => { @@ -256,15 +206,15 @@ pub async fn pre_register( } } - // Fallback to in-memory storage - state.machine_configs.write().await.insert( - request.machine_id.clone(), - (request.node_id.clone(), config), - ); + state + .machine_configs + .write() + .await + .insert(request.machine_id.clone(), request.node_config.clone()); debug!( machine_id = %request.machine_id, - node_id = %request.node_id, + node_id = %node_id, "Node pre-registered in-memory (ChainFire unavailable)" ); @@ -276,13 +226,11 @@ pub async fn pre_register( "Node pre-registered (in-memory)".to_string() }), machine_id: request.machine_id, - node_id: request.node_id, + node_id, })) } /// GET /api/v1/admin/nodes -/// -/// List all registered nodes. pub async fn list_nodes( State(state): State>, headers: HeaderMap, @@ -303,7 +251,6 @@ pub async fn list_nodes( let cluster_namespace = state.config.cluster_namespace.trim(); let cluster_enabled = cluster_id.is_some() && !cluster_namespace.is_empty(); - // Prefer cluster node state from ChainFire (kept fresh by node-agent) if cluster_enabled { if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; @@ -340,7 +287,6 @@ pub async fn list_nodes( } } - // Fallback to local cluster nodes if ChainFire data is unavailable or missing nodes if cluster_enabled { if let Some(local_storage) = &state.local_storage { let storage = local_storage.lock().await; @@ -371,7 +317,6 @@ pub async fn list_nodes( } } - // Try ChainFire storage first if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; match storage.list_nodes().await { @@ -402,7 +347,6 @@ pub async fn list_nodes( } Err(e) => { error!(error = %e, "Failed to list nodes from ChainFire"); - // Continue with in-memory fallback } } } @@ -434,7 +378,6 @@ pub async fn list_nodes( } } - // Also include in-memory nodes (may have duplicates if ChainFire is available) let in_memory = state.nodes.read().await; for info in in_memory.values() { if seen.contains(&info.id) { @@ -459,20 +402,20 @@ pub async fn list_nodes( seen.insert(info.id.clone()); } - // Include pre-registered nodes that haven't phone-home yet (ChainFire) if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; match storage.list_machine_configs().await { Ok(configs) => { - for (_machine_id, node_id, config) in configs { + for (_machine_id, config) in configs { + let node_id = config.assignment.node_id.clone(); if seen.contains(&node_id) { continue; } nodes.push(NodeSummary { node_id: node_id.clone(), - hostname: config.hostname.clone(), - ip: config.ip.clone(), - role: config.role.clone(), + hostname: config.assignment.hostname.clone(), + ip: config.assignment.ip.clone(), + role: config.assignment.role.clone(), state: "pre-registered".to_string(), }); seen.insert(node_id); @@ -484,38 +427,38 @@ pub async fn list_nodes( } } - // Include pre-registered nodes from local storage if let Some(local_storage) = &state.local_storage { let storage = local_storage.lock().await; - for (_machine_id, node_id, config) in storage.list_machine_configs() { + for (_machine_id, config) in storage.list_machine_configs() { + let node_id = config.assignment.node_id.clone(); if seen.contains(&node_id) { continue; } nodes.push(NodeSummary { node_id: node_id.clone(), - hostname: config.hostname.clone(), - ip: config.ip.clone(), - role: config.role.clone(), + hostname: config.assignment.hostname.clone(), + ip: config.assignment.ip.clone(), + role: config.assignment.role.clone(), state: "pre-registered".to_string(), }); seen.insert(node_id); } } - // Include pre-registered nodes from in-memory config map let configs = state.machine_configs.read().await; - for (_machine_id, (node_id, config)) in configs.iter() { - if seen.contains(node_id) { + for config in configs.values() { + let node_id = config.assignment.node_id.clone(); + if seen.contains(&node_id) { continue; } nodes.push(NodeSummary { node_id: node_id.clone(), - hostname: config.hostname.clone(), - ip: config.ip.clone(), - role: config.role.clone(), + hostname: config.assignment.hostname.clone(), + ip: config.assignment.ip.clone(), + role: config.assignment.role.clone(), state: "pre-registered".to_string(), }); - seen.insert(node_id.clone()); + seen.insert(node_id); } let total = nodes.len(); @@ -528,6 +471,7 @@ mod tests { use crate::config::Config; use crate::state::AppState; use axum::http::HeaderMap; + use deployer_types::{BootstrapPlan, BootstrapSecrets, NodeAssignment}; fn test_headers() -> HeaderMap { let mut headers = HeaderMap::new(); @@ -542,23 +486,39 @@ mod tests { Arc::new(AppState::with_config(config)) } + fn test_node_config() -> NodeConfig { + NodeConfig::from_parts( + NodeAssignment { + node_id: "node-test".to_string(), + hostname: "node-test".to_string(), + role: "worker".to_string(), + ip: "10.0.1.50".to_string(), + labels: std::collections::HashMap::new(), + pool: None, + node_class: None, + failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string()], + nix_profile: None, + install_plan: None, + }, + BootstrapSecrets { + ssh_authorized_keys: vec!["ssh-ed25519 AAAA... test".to_string()], + ssh_host_key: None, + tls_cert: None, + tls_key: None, + }, + ) + } + #[tokio::test] async fn test_pre_register() { let state = test_state(); let request = PreRegisterRequest { machine_id: "new-machine-abc".to_string(), - node_id: "node-test".to_string(), - role: "worker".to_string(), - ip: Some("10.0.1.50".to_string()), - services: vec!["chainfire".to_string()], - ssh_authorized_keys: vec!["ssh-ed25519 AAAA... test".to_string()], - labels: std::collections::HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, + node_config: test_node_config(), }; let result = @@ -570,12 +530,10 @@ mod tests { assert_eq!(response.machine_id, "new-machine-abc"); assert_eq!(response.node_id, "node-test"); - // Verify stored in machine_configs let configs = state.machine_configs.read().await; - assert!(configs.contains_key("new-machine-abc")); - let (node_id, config) = configs.get("new-machine-abc").unwrap(); - assert_eq!(node_id, "node-test"); - assert_eq!(config.role, "worker"); + let config = configs.get("new-machine-abc").expect("stored config"); + assert_eq!(config.assignment.node_id, "node-test"); + assert_eq!(config.assignment.role, "worker"); } #[tokio::test] diff --git a/deployer/crates/deployer-server/src/bootstrap_assets.rs b/deployer/crates/deployer-server/src/bootstrap_assets.rs index 1b6dc01..9f23f07 100644 --- a/deployer/crates/deployer-server/src/bootstrap_assets.rs +++ b/deployer/crates/deployer-server/src/bootstrap_assets.rs @@ -47,7 +47,7 @@ pub async fn flake_bundle( ), ( header::CONTENT_DISPOSITION, - HeaderValue::from_static("attachment; filename=\"plasmacloud-flake-bundle.tar.gz\""), + HeaderValue::from_static("attachment; filename=\"ultracloud-flake-bundle.tar.gz\""), ), ]; diff --git a/deployer/crates/deployer-server/src/cloud_init.rs b/deployer/crates/deployer-server/src/cloud_init.rs index 592e02f..04587a9 100644 --- a/deployer/crates/deployer-server/src/cloud_init.rs +++ b/deployer/crates/deployer-server/src/cloud_init.rs @@ -20,7 +20,7 @@ pub async fn meta_data( require_bootstrap_auth(&state, &headers)?; validate_identifier(&machine_id, "machine_id")?; - let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else { + let Some(config) = lookup_node_config(&state, &machine_id).await else { return Err(( StatusCode::NOT_FOUND, "machine-id not registered".to_string(), @@ -29,7 +29,7 @@ pub async fn meta_data( let body = format!( "instance-id: {}\nlocal-hostname: {}\n", - node_id, config.hostname + config.assignment.node_id, config.assignment.hostname ); Ok(([(axum::http::header::CONTENT_TYPE, "text/plain")], body)) } @@ -43,14 +43,14 @@ pub async fn user_data( require_bootstrap_auth(&state, &headers)?; validate_identifier(&machine_id, "machine_id")?; - let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else { + let Some(config) = lookup_node_config(&state, &machine_id).await else { return Err(( StatusCode::NOT_FOUND, "machine-id not registered".to_string(), )); }; - let body = render_user_data(&node_id, &config) + let body = render_user_data(&config) .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; Ok(( [(axum::http::header::CONTENT_TYPE, "text/cloud-config")], @@ -80,9 +80,9 @@ fn indent_multiline(input: &str, indent: usize) -> String { .join("\n") } -fn render_user_data(node_id: &str, config: &NodeConfig) -> anyhow::Result { +fn render_user_data(config: &NodeConfig) -> anyhow::Result { let node_config_json = serde_json::to_string_pretty(config)?; - let ssh_keys = render_yaml_list(&config.ssh_authorized_keys, 2); + let ssh_keys = render_yaml_list(&config.bootstrap_secrets.ssh_authorized_keys, 2); Ok(format!( r#"#cloud-config @@ -92,18 +92,18 @@ manage_etc_hosts: true ssh_authorized_keys: {ssh_keys} write_files: - - path: /etc/plasmacloud/node-id + - path: /etc/ultracloud/node-id permissions: "0644" content: | {node_id_block} - - path: /etc/plasmacloud/node-config.json - permissions: "0644" + - path: /etc/ultracloud/node-config.json + permissions: "0600" content: | {node_config_block} "#, - hostname = config.hostname, + hostname = config.assignment.hostname, ssh_keys = ssh_keys, - node_id_block = indent_multiline(node_id, 6), + node_id_block = indent_multiline(&config.assignment.node_id, 6), node_config_block = indent_multiline(&node_config_json, 6), )) } @@ -115,38 +115,52 @@ mod tests { use crate::state::AppState; use axum::body::Body; use axum::http::Request; - use deployer_types::InstallPlan; + use deployer_types::{BootstrapPlan, BootstrapSecrets, InstallPlan, NodeAssignment}; use tower::ServiceExt; fn test_config() -> NodeConfig { - NodeConfig { - hostname: "node01".to_string(), - role: "worker".to_string(), - ip: "10.0.0.11".to_string(), - services: vec!["prismnet".to_string()], - ssh_authorized_keys: vec!["ssh-ed25519 AAAATEST test".to_string()], - labels: std::collections::HashMap::from([("tier".to_string(), "general".to_string())]), - pool: Some("general".to_string()), - node_class: Some("worker".to_string()), - failure_domain: Some("rack-a".to_string()), - nix_profile: Some("profiles/worker".to_string()), - install_plan: Some(InstallPlan { - nixos_configuration: Some("worker-golden".to_string()), - disko_config_path: Some("profiles/worker/disko.nix".to_string()), - target_disk: Some("/dev/vda".to_string()), - target_disk_by_id: None, - }), - } + NodeConfig::from_parts( + NodeAssignment { + node_id: "node01".to_string(), + hostname: "node01".to_string(), + role: "worker".to_string(), + ip: "10.0.0.11".to_string(), + labels: std::collections::HashMap::from([( + "tier".to_string(), + "general".to_string(), + )]), + pool: Some("general".to_string()), + node_class: Some("worker".to_string()), + failure_domain: Some("rack-a".to_string()), + }, + BootstrapPlan { + services: vec!["prismnet".to_string()], + nix_profile: Some("profiles/worker".to_string()), + install_plan: Some(InstallPlan { + nixos_configuration: Some("worker-golden".to_string()), + disko_config_path: Some("profiles/worker/disko.nix".to_string()), + target_disk: Some("/dev/vda".to_string()), + target_disk_by_id: None, + }), + }, + BootstrapSecrets { + ssh_authorized_keys: vec!["ssh-ed25519 AAAATEST test".to_string()], + ssh_host_key: None, + tls_cert: None, + tls_key: None, + }, + ) } #[test] fn test_render_user_data_contains_node_config() { - let rendered = render_user_data("node01", &test_config()).unwrap(); + let rendered = render_user_data(&test_config()).unwrap(); assert!(rendered.contains("#cloud-config")); assert!(rendered.contains("hostname: node01")); - assert!(rendered.contains("/etc/plasmacloud/node-config.json")); + assert!(rendered.contains("/etc/ultracloud/node-config.json")); assert!(rendered.contains("\"nix_profile\": \"profiles/worker\"")); assert!(rendered.contains("\"nixos_configuration\": \"worker-golden\"")); + assert!(rendered.contains("\"node_id\": \"node01\"")); } #[tokio::test] @@ -154,10 +168,11 @@ mod tests { let mut config = Config::default(); config.bootstrap_token = Some("test-token".to_string()); let state = Arc::new(AppState::with_config(config)); - state.machine_configs.write().await.insert( - "machine-1".to_string(), - ("node01".to_string(), test_config()), - ); + state + .machine_configs + .write() + .await + .insert("machine-1".to_string(), test_config()); let app = crate::build_router(state); let response = app diff --git a/deployer/crates/deployer-server/src/config.rs b/deployer/crates/deployer-server/src/config.rs index a79ddc4..f42fe21 100644 --- a/deployer/crates/deployer-server/src/config.rs +++ b/deployer/crates/deployer-server/src/config.rs @@ -14,11 +14,11 @@ pub struct Config { #[serde(default)] pub chainfire: ChainFireConfig, - /// PhotonCloud cluster ID (for writing desired state under photoncloud/clusters/...) + /// UltraCloud cluster ID (for writing desired state under ultracloud/clusters/...) #[serde(default)] pub cluster_id: Option, - /// Namespace prefix for PhotonCloud cluster state + /// Namespace prefix for UltraCloud cluster state #[serde(default = "default_cluster_namespace")] pub cluster_namespace: String, @@ -30,7 +30,7 @@ pub struct Config { #[serde(default = "default_local_state_path")] pub local_state_path: Option, - /// Optional tar.gz bundle containing the PhotonCloud flake source tree for bootstrap installs + /// Optional tar.gz bundle containing the UltraCloud flake source tree for bootstrap installs #[serde(default)] pub bootstrap_flake_bundle_path: Option, @@ -168,7 +168,7 @@ fn default_chainfire_namespace() -> String { } fn default_cluster_namespace() -> String { - "photoncloud".to_string() + "ultracloud".to_string() } fn default_heartbeat_timeout() -> u64 { @@ -222,7 +222,7 @@ mod tests { let config = Config::default(); assert_eq!(config.bind_addr.to_string(), "0.0.0.0:8080"); assert_eq!(config.chainfire.namespace, "deployer"); - assert_eq!(config.cluster_namespace, "photoncloud"); + assert_eq!(config.cluster_namespace, "ultracloud"); assert!(config.cluster_id.is_none()); assert_eq!(config.heartbeat_timeout_secs, 300); assert_eq!( @@ -259,7 +259,7 @@ mod tests { bind_addr = "127.0.0.1:18080" cluster_id = "cluster-a" allow_unauthenticated = true - bootstrap_flake_bundle_path = "/tmp/plasmacloud-flake-bundle.tar.gz" + bootstrap_flake_bundle_path = "/tmp/ultracloud-flake-bundle.tar.gz" [chainfire] endpoints = ["http://10.0.0.1:2379"] @@ -273,7 +273,7 @@ mod tests { assert_eq!(config.cluster_id.as_deref(), Some("cluster-a")); assert_eq!( config.bootstrap_flake_bundle_path, - Some(PathBuf::from("/tmp/plasmacloud-flake-bundle.tar.gz")) + Some(PathBuf::from("/tmp/ultracloud-flake-bundle.tar.gz")) ); assert!(config.allow_unauthenticated); assert_eq!(config.chainfire.namespace, "bootstrap"); diff --git a/deployer/crates/deployer-server/src/lib.rs b/deployer/crates/deployer-server/src/lib.rs index a93a58b..c72a5d4 100644 --- a/deployer/crates/deployer-server/src/lib.rs +++ b/deployer/crates/deployer-server/src/lib.rs @@ -79,7 +79,7 @@ pub async fn run(config: Config) -> anyhow::Result<()> { if state.config.cluster_id.is_none() { tracing::warn!( - "cluster_id not set; cluster node state won't be written to photoncloud/clusters" + "cluster_id not set; cluster node state won't be written to ultracloud/clusters" ); } diff --git a/deployer/crates/deployer-server/src/local_storage.rs b/deployer/crates/deployer-server/src/local_storage.rs index 4ae1cb4..4c17c6f 100644 --- a/deployer/crates/deployer-server/src/local_storage.rs +++ b/deployer/crates/deployer-server/src/local_storage.rs @@ -19,7 +19,7 @@ use deployer_types::{NodeConfig, NodeInfo}; #[derive(Debug, Clone, Serialize, Deserialize, Default)] struct LocalState { - machine_configs: HashMap, + machine_configs: HashMap, nodes: HashMap, cluster_nodes: HashMap, ssh_host_keys: HashMap, @@ -62,29 +62,23 @@ impl LocalStorage { Ok(Self { state_path, state }) } - pub fn register_node( - &mut self, - machine_id: &str, - node_id: &str, - config: &NodeConfig, - ) -> Result<()> { - if let Some((existing_id, _)) = self.state.machine_configs.get(machine_id) { - if existing_id != node_id { + pub fn register_node(&mut self, machine_id: &str, config: &NodeConfig) -> Result<()> { + if let Some(existing) = self.state.machine_configs.get(machine_id) { + if existing.assignment.node_id != config.assignment.node_id { anyhow::bail!( "machine_id {} already mapped to {}", machine_id, - existing_id + existing.assignment.node_id ); } } - self.state.machine_configs.insert( - machine_id.to_string(), - (node_id.to_string(), config.clone()), - ); + self.state + .machine_configs + .insert(machine_id.to_string(), config.clone()); self.save() } - pub fn get_node_config(&self, machine_id: &str) -> Option<(String, NodeConfig)> { + pub fn get_node_config(&self, machine_id: &str) -> Option { self.state.machine_configs.get(machine_id).cloned() } @@ -103,13 +97,11 @@ impl LocalStorage { self.state.nodes.values().cloned().collect() } - pub fn list_machine_configs(&self) -> Vec<(String, String, NodeConfig)> { + pub fn list_machine_configs(&self) -> Vec<(String, NodeConfig)> { self.state .machine_configs .iter() - .map(|(machine_id, (node_id, config))| { - (machine_id.clone(), node_id.clone(), config.clone()) - }) + .map(|(machine_id, config)| (machine_id.clone(), config.clone())) .collect() } @@ -297,6 +289,7 @@ fn generate_ssh_host_key(node_id: &str, parent: Option<&Path>) -> Result #[cfg(test)] mod tests { use super::*; + use deployer_types::{BootstrapPlan, BootstrapSecrets, NodeAssignment}; use std::collections::HashMap; use std::fs; @@ -315,22 +308,27 @@ mod tests { let dir = temp_state_dir(); let mut storage = LocalStorage::open(&dir).expect("open storage"); - let config = NodeConfig { - hostname: "node01".to_string(), - role: "control-plane".to_string(), - ip: "10.0.1.10".to_string(), - services: vec!["chainfire".to_string()], - ssh_authorized_keys: vec![], - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, - }; + let config = NodeConfig::from_parts( + NodeAssignment { + node_id: "node01".to_string(), + hostname: "node01".to_string(), + role: "control-plane".to_string(), + ip: "10.0.1.10".to_string(), + labels: HashMap::new(), + pool: None, + node_class: None, + failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string()], + nix_profile: None, + install_plan: None, + }, + BootstrapSecrets::default(), + ); storage - .register_node("machine-1", "node01", &config) + .register_node("machine-1", &config) .expect("register node"); let node_info = NodeInfo { @@ -351,8 +349,8 @@ mod tests { let reopened = LocalStorage::open(&dir).expect("reopen storage"); let loaded = reopened.get_node_config("machine-1"); assert!(loaded.is_some()); - let (_, loaded_config) = loaded.unwrap(); - assert_eq!(loaded_config.hostname, "node01"); + let loaded_config = loaded.unwrap(); + assert_eq!(loaded_config.assignment.hostname, "node01"); let loaded_node = reopened.get_node_info("node01").expect("node info"); assert_eq!(loaded_node.hostname, "node01"); diff --git a/deployer/crates/deployer-server/src/phone_home.rs b/deployer/crates/deployer-server/src/phone_home.rs index 317eee7..9a83e78 100644 --- a/deployer/crates/deployer-server/src/phone_home.rs +++ b/deployer/crates/deployer-server/src/phone_home.rs @@ -1,11 +1,12 @@ use axum::{extract::State, http::HeaderMap, http::StatusCode, Json}; use chrono::Utc; use deployer_types::{ - CommissionState, EnrollmentRuleSpec, HardwareFacts, InstallPlan, InstallState, - NodeClassSpec, NodeConfig, NodeInfo, NodePoolSpec, NodeState, PhoneHomeRequest, - PhoneHomeResponse, PowerState, + BootstrapPlan, BootstrapSecrets, CommissionState, EnrollmentRuleSpec, HardwareFacts, + InstallPlan, InstallState, NodeAssignment, NodeClassSpec, NodeConfig, NodeInfo, NodePoolSpec, + NodeState, PhoneHomeRequest, PhoneHomeResponse, PowerState, }; use sha2::{Digest, Sha256}; +use std::collections::HashMap; use std::sync::Arc; use tracing::{debug, error, info, warn}; @@ -22,7 +23,7 @@ fn merge_install_plan( } fn merge_hardware_summary_metadata( - metadata: &mut std::collections::HashMap, + metadata: &mut HashMap, hardware_facts: Option<&HardwareFacts>, ) { let Some(hardware_facts) = hardware_facts else { @@ -36,7 +37,10 @@ fn merge_hardware_summary_metadata( metadata.insert("hardware.cpu_cores".to_string(), cpu_cores.to_string()); } if let Some(memory_bytes) = hardware_facts.memory_bytes { - metadata.insert("hardware.memory_bytes".to_string(), memory_bytes.to_string()); + metadata.insert( + "hardware.memory_bytes".to_string(), + memory_bytes.to_string(), + ); } metadata.insert( "hardware.disk_count".to_string(), @@ -47,7 +51,10 @@ fn merge_hardware_summary_metadata( hardware_facts.nics.len().to_string(), ); if let Some(architecture) = hardware_facts.architecture.as_deref() { - metadata.insert("hardware.architecture".to_string(), architecture.to_string()); + metadata.insert( + "hardware.architecture".to_string(), + architecture.to_string(), + ); } } @@ -60,14 +67,6 @@ fn inventory_hash(hardware_facts: Option<&HardwareFacts>) -> Option { } /// POST /api/v1/phone-home -/// -/// Handles node registration during first boot. -/// Nodes send their machine-id, and Deployer returns: -/// - Node configuration (hostname, role, IP, services) -/// - SSH host key -/// - TLS certificates (optional) -/// -/// Uses ChainFire storage when available, falls back to in-memory. pub async fn phone_home( State(state): State>, headers: HeaderMap, @@ -87,18 +86,18 @@ pub async fn phone_home( "Phone home request received" ); - // Lookup node configuration (ChainFire or fallback) - let (node_id, mut node_config) = match lookup_node_config(&state, &request.machine_id).await { - Some((id, config)) => (id, config), - None => { - if let Some((id, config)) = resolve_enrollment_config(&state, &request).await? { + let mut node_config = match lookup_node_config(&state, &request.machine_id).await { + Some(config) => config, + None => match resolve_enrollment_config(&state, &request).await? { + Some(config) => { info!( machine_id = %request.machine_id, - node_id = %id, + node_id = %config.assignment.node_id, "Resolved unknown machine through enrollment rules" ); - (id, config) - } else { + config + } + None => { if !state.config.allow_unknown_nodes { warn!( machine_id = %request.machine_id, @@ -114,121 +113,95 @@ pub async fn phone_home( machine_id = %request.machine_id, "Unknown machine-id, assigning default configuration (unsafe)" ); - // Assign default configuration for unknown machines (dev-only). - // Prefer explicit node_id, then DHCP-provided hostname, then machine-id suffix. - let node_id = request - .node_id - .as_ref() - .map(|v| v.trim()) - .filter(|v| !v.is_empty()) - .map(|v| v.to_string()) - .or_else(|| { - request - .hostname - .as_ref() - .map(|v| v.trim()) - .filter(|v| !v.is_empty()) - .map(|v| v.to_string()) - }) - .unwrap_or_else(|| { - let max_suffix_len = 128usize.saturating_sub("node-".len()); - let suffix_len = std::cmp::min(max_suffix_len, request.machine_id.len()); - format!("node-{}", &request.machine_id[..suffix_len]) - }); - let config = NodeConfig { - hostname: node_id.clone(), - role: "worker".to_string(), - ip: request.ip.clone().unwrap_or_default(), - services: vec![], - ssh_authorized_keys: vec![], - labels: std::collections::HashMap::new(), - pool: None, - node_class: None, - failure_domain: request.metadata.get("failure_domain").cloned(), - nix_profile: None, - install_plan: None, - }; - (node_id, config) + default_unknown_node_config(&request) } - } + }, }; - if let Some(request_ip) = request.ip.as_ref() { - if !node_config.ip.is_empty() && node_config.ip != *request_ip { - warn!( - machine_id = %request.machine_id, - requested_ip = %request_ip, - expected_ip = %node_config.ip, - "Node IP mismatch in phone-home" - ); - return Err((StatusCode::BAD_REQUEST, "node ip mismatch".to_string())); - } - } - if let Some(requested_id) = request.node_id.as_ref() { - if requested_id != &node_id { + if requested_id != &node_config.assignment.node_id { warn!( machine_id = %request.machine_id, requested_id = %requested_id, - expected_id = %node_id, + expected_id = %node_config.assignment.node_id, "Node ID mismatch in phone-home" ); return Err((StatusCode::BAD_REQUEST, "node_id mismatch".to_string())); } } - if node_config.hostname.is_empty() { - if let Some(hostname) = request.hostname.as_ref() { - node_config.hostname = hostname.clone(); - } else { - node_config.hostname = node_id.clone(); + if node_config.assignment.hostname.is_empty() { + node_config.assignment.hostname = request + .hostname + .clone() + .filter(|value| !value.trim().is_empty()) + .unwrap_or_else(|| node_config.assignment.node_id.clone()); + } + + if let Some(request_ip) = request.ip.as_ref() { + if !node_config.assignment.ip.is_empty() && node_config.assignment.ip != *request_ip { + warn!( + machine_id = %request.machine_id, + requested_ip = %request_ip, + expected_ip = %node_config.assignment.ip, + "Node IP mismatch in phone-home" + ); + return Err((StatusCode::BAD_REQUEST, "node ip mismatch".to_string())); } } - if node_config.ip.is_empty() { + if node_config.assignment.ip.is_empty() { if let Some(ip) = request.ip.clone() { - node_config.ip = ip; + node_config.assignment.ip = ip; } else { warn!( machine_id = %request.machine_id, - node_id = %node_id, + node_id = %node_config.assignment.node_id, "Node config missing IP; refusing registration" ); return Err((StatusCode::BAD_REQUEST, "node ip missing".to_string())); } } - validate_ip(&node_config.ip, "node_config.ip")?; + validate_ip(&node_config.assignment.ip, "node_config.assignment.ip")?; - // Ensure metadata contains authoritative role/service info let mut metadata = request.metadata.clone(); - metadata.insert("role".to_string(), node_config.role.clone()); - metadata.insert("services".to_string(), node_config.services.join(",")); + metadata.insert("role".to_string(), node_config.assignment.role.clone()); + metadata.insert( + "services".to_string(), + node_config.bootstrap_plan.services.join(","), + ); merge_hardware_summary_metadata(&mut metadata, request.hardware_facts.as_ref()); - // Create NodeInfo for tracking let node_info = NodeInfo { - id: node_id.clone(), + id: node_config.assignment.node_id.clone(), machine_id: Some(request.machine_id.clone()), - hostname: node_config.hostname.clone(), - ip: node_config.ip.clone(), + hostname: node_config.assignment.hostname.clone(), + ip: node_config.assignment.ip.clone(), state: NodeState::Provisioning, cluster_config_hash: request.cluster_config_hash.unwrap_or_default(), last_heartbeat: Utc::now(), metadata, }; - // Persist config mapping for this machine (best-effort) - if let Err(e) = persist_node_config(&state, &request.machine_id, &node_id, &node_config).await { + let mut response_config = node_config.clone(); + response_config.bootstrap_secrets.ssh_host_key = + get_or_issue_ssh_host_key(&state, &node_info.id).await; + + let (tls_cert, tls_key) = + get_or_issue_tls_material(&state, &node_info.id, &node_info.hostname, &node_info.ip).await; + response_config.bootstrap_secrets.tls_cert = tls_cert; + response_config.bootstrap_secrets.tls_key = tls_key; + + if let Err(e) = persist_node_config(&state, &request.machine_id, &response_config).await { warn!( machine_id = %request.machine_id, - node_id = %node_id, + node_id = %node_info.id, error = %e, "Failed to persist node configuration" ); } - // Store in ChainFire or in-memory match store_node_info(&state, &node_info).await { Ok(_) => { let storage = if state.has_local_storage() { @@ -241,7 +214,7 @@ pub async fn phone_home( info!( node_id = %node_info.id, hostname = %node_info.hostname, - role = %node_config.role, + role = %response_config.assignment.role, storage = storage, "Node registered successfully" ); @@ -249,7 +222,7 @@ pub async fn phone_home( if let Err(e) = store_cluster_node_if_configured( &state, &node_info, - &node_config, + &response_config, &request.machine_id, request.hardware_facts.as_ref(), ) @@ -262,66 +235,11 @@ pub async fn phone_home( ); } - let ssh_host_key = if let Some(local_storage) = &state.local_storage { - let mut storage = local_storage.lock().await; - match storage.get_or_generate_ssh_host_key(&node_info.id) { - Ok(key) => Some(key), - Err(e) => { - warn!(error = %e, "Failed to generate ssh host key"); - None - } - } - } else { - None - }; - - let (tls_cert, tls_key) = if state.config.tls_self_signed - || (state.config.tls_ca_cert_path.is_some() - && state.config.tls_ca_key_path.is_some()) - { - if let Some(local_storage) = &state.local_storage { - let mut storage = local_storage.lock().await; - match storage.get_or_generate_tls_cert( - &node_info.id, - &node_config.hostname, - &node_config.ip, - state.config.tls_ca_cert_path.as_deref(), - state.config.tls_ca_key_path.as_deref(), - ) { - Ok((cert, key)) => (Some(cert), Some(key)), - Err(e) => { - warn!(error = %e, "Failed to issue node TLS certificate"); - (None, None) - } - } - } else { - match crate::tls::issue_node_cert( - &node_info.id, - &node_config.hostname, - &node_config.ip, - state.config.tls_ca_cert_path.as_deref(), - state.config.tls_ca_key_path.as_deref(), - ) { - Ok((cert, key)) => (Some(cert), Some(key)), - Err(e) => { - warn!(error = %e, "Failed to issue node TLS certificate"); - (None, None) - } - } - } - } else { - (None, None) - }; - Ok(Json(PhoneHomeResponse { success: true, message: Some(format!("Node {} registered successfully", node_info.id)), - node_id: node_id.clone(), state: NodeState::Provisioning, - node_config: Some(node_config), - ssh_host_key, - tls_cert, - tls_key, + node_config: response_config, })) } Err(e) => { @@ -339,39 +257,132 @@ pub async fn phone_home( } } -/// Lookup node configuration by machine-id -/// -/// Tries ChainFire first, then falls back to in-memory storage. -pub(crate) async fn lookup_node_config( +fn default_unknown_node_config(request: &PhoneHomeRequest) -> NodeConfig { + let node_id = request + .node_id + .as_ref() + .map(|v| v.trim()) + .filter(|v| !v.is_empty()) + .map(|v| v.to_string()) + .or_else(|| { + request + .hostname + .as_ref() + .map(|v| v.trim()) + .filter(|v| !v.is_empty()) + .map(|v| v.to_string()) + }) + .unwrap_or_else(|| { + let max_suffix_len = 128usize.saturating_sub("node-".len()); + let suffix_len = std::cmp::min(max_suffix_len, request.machine_id.len()); + format!("node-{}", &request.machine_id[..suffix_len]) + }); + + NodeConfig::from_parts( + NodeAssignment { + node_id: node_id.clone(), + hostname: request + .hostname + .clone() + .filter(|value| !value.trim().is_empty()) + .unwrap_or_else(|| node_id.clone()), + role: "worker".to_string(), + ip: request.ip.clone().unwrap_or_default(), + labels: HashMap::new(), + pool: None, + node_class: None, + failure_domain: request.metadata.get("failure_domain").cloned(), + }, + BootstrapPlan::default(), + BootstrapSecrets::default(), + ) +} + +async fn get_or_issue_ssh_host_key(state: &AppState, node_id: &str) -> Option { + let Some(local_storage) = &state.local_storage else { + return None; + }; + + let mut storage = local_storage.lock().await; + match storage.get_or_generate_ssh_host_key(node_id) { + Ok(key) => Some(key), + Err(e) => { + warn!(error = %e, "Failed to generate ssh host key"); + None + } + } +} + +async fn get_or_issue_tls_material( state: &AppState, - machine_id: &str, -) -> Option<(String, NodeConfig)> { + node_id: &str, + hostname: &str, + ip: &str, +) -> (Option, Option) { + if !(state.config.tls_self_signed + || (state.config.tls_ca_cert_path.is_some() && state.config.tls_ca_key_path.is_some())) + { + return (None, None); + } + + if let Some(local_storage) = &state.local_storage { + let mut storage = local_storage.lock().await; + match storage.get_or_generate_tls_cert( + node_id, + hostname, + ip, + state.config.tls_ca_cert_path.as_deref(), + state.config.tls_ca_key_path.as_deref(), + ) { + Ok((cert, key)) => (Some(cert), Some(key)), + Err(e) => { + warn!(error = %e, "Failed to issue node TLS certificate"); + (None, None) + } + } + } else { + match crate::tls::issue_node_cert( + node_id, + hostname, + ip, + state.config.tls_ca_cert_path.as_deref(), + state.config.tls_ca_key_path.as_deref(), + ) { + Ok((cert, key)) => (Some(cert), Some(key)), + Err(e) => { + warn!(error = %e, "Failed to issue node TLS certificate"); + (None, None) + } + } + } +} + +/// Lookup node configuration by machine-id. +pub(crate) async fn lookup_node_config(state: &AppState, machine_id: &str) -> Option { debug!(machine_id = %machine_id, "Looking up node configuration"); - // Try local storage first if let Some(local_storage) = &state.local_storage { let storage = local_storage.lock().await; - if let Some((node_id, config)) = storage.get_node_config(machine_id) { + if let Some(config) = storage.get_node_config(machine_id) { debug!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, "Found config in local storage" ); - return Some((node_id, config)); + return Some(config); } } - // Try ChainFire storage first if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; match storage.get_node_config(machine_id).await { - Ok(Some((node_id, config))) => { + Ok(Some(config)) => { debug!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, "Found config in ChainFire" ); - return Some((node_id, config)); + return Some(config); } Ok(None) => { debug!(machine_id = %machine_id, "Not found in ChainFire"); @@ -386,54 +397,56 @@ pub(crate) async fn lookup_node_config( } } - // Fallback to in-memory storage let configs = state.machine_configs.read().await; - if let Some((node_id, config)) = configs.get(machine_id) { + if let Some(config) = configs.get(machine_id) { debug!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, "Found config in in-memory storage" ); - return Some((node_id.clone(), config.clone())); + return Some(config.clone()); } - // Hardcoded test mappings (for development/testing) if state.config.allow_test_mappings { match machine_id { "test-machine-01" => { - return Some(( - "node01".to_string(), - NodeConfig { + return Some(NodeConfig::from_parts( + NodeAssignment { + node_id: "node01".to_string(), hostname: "node01".to_string(), role: "control-plane".to_string(), ip: "10.0.1.10".to_string(), - services: vec!["chainfire".to_string(), "flaredb".to_string()], - ssh_authorized_keys: vec![], - labels: std::collections::HashMap::new(), + labels: HashMap::new(), pool: None, node_class: None, failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string(), "flaredb".to_string()], nix_profile: None, install_plan: None, }, + BootstrapSecrets::default(), )); } "test-machine-02" => { - return Some(( - "node02".to_string(), - NodeConfig { + return Some(NodeConfig::from_parts( + NodeAssignment { + node_id: "node02".to_string(), hostname: "node02".to_string(), role: "worker".to_string(), ip: "10.0.1.11".to_string(), - services: vec!["chainfire".to_string()], - ssh_authorized_keys: vec![], - labels: std::collections::HashMap::new(), + labels: HashMap::new(), pool: None, node_class: None, failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string()], nix_profile: None, install_plan: None, }, + BootstrapSecrets::default(), )); } _ => {} @@ -446,7 +459,7 @@ pub(crate) async fn lookup_node_config( async fn resolve_enrollment_config( state: &AppState, request: &PhoneHomeRequest, -) -> Result, (StatusCode, String)> { +) -> Result, (StatusCode, String)> { let Some(cluster_id) = state.config.cluster_id.as_deref() else { return Ok(None); }; @@ -547,7 +560,7 @@ fn build_node_config_from_rule( request: &PhoneHomeRequest, node_classes: &[NodeClassSpec], pools: &[NodePoolSpec], -) -> (String, NodeConfig) { +) -> NodeConfig { let requested_id = request .node_id .as_ref() @@ -589,7 +602,7 @@ fn build_node_config_from_rule( .or_else(|| node_class_spec.and_then(|node_class| node_class.roles.first().cloned())) .unwrap_or_else(|| "worker".to_string()); - let mut labels = std::collections::HashMap::new(); + let mut labels = HashMap::new(); if let Some(node_class) = node_class_spec { labels.extend(node_class.labels.clone()); } @@ -618,9 +631,9 @@ fn build_node_config_from_rule( .cloned() .or_else(|| request.metadata.get("topology.kubernetes.io/zone").cloned()); - ( - node_id.clone(), - NodeConfig { + NodeConfig::from_parts( + NodeAssignment { + node_id: node_id.clone(), hostname: request .hostname .clone() @@ -628,12 +641,13 @@ fn build_node_config_from_rule( .unwrap_or_else(|| node_id.clone()), role, ip: request.ip.clone().unwrap_or_default(), - services: rule.services.clone(), - ssh_authorized_keys: rule.ssh_authorized_keys.clone(), labels, pool, node_class, failure_domain, + }, + BootstrapPlan { + services: rule.services.clone(), nix_profile: rule .nix_profile .clone() @@ -643,14 +657,18 @@ fn build_node_config_from_rule( node_class_spec.and_then(|node_class| node_class.install_plan.as_ref()), ), }, + BootstrapSecrets { + ssh_authorized_keys: rule.ssh_authorized_keys.clone(), + ssh_host_key: None, + tls_cert: None, + tls_key: None, + }, ) } -/// Store NodeInfo in ChainFire or in-memory async fn store_node_info(state: &AppState, node_info: &NodeInfo) -> anyhow::Result<()> { let mut stored = false; - // Prefer local storage when configured. if let Some(local_storage) = &state.local_storage { let mut storage = local_storage.lock().await; match storage.store_node_info(node_info) { @@ -664,7 +682,6 @@ async fn store_node_info(state: &AppState, node_info: &NodeInfo) -> anyhow::Resu } } - // Also try ChainFire if available. if let Some(storage_mutex) = &state.storage { let mut chainfire = storage_mutex.lock().await; match chainfire.store_node_info(node_info).await { @@ -682,7 +699,6 @@ async fn store_node_info(state: &AppState, node_info: &NodeInfo) -> anyhow::Resu return Ok(()); } - // Fallback to in-memory storage when all configured backends fail. state .nodes .write() @@ -697,19 +713,17 @@ async fn store_node_info(state: &AppState, node_info: &NodeInfo) -> anyhow::Resu Ok(()) } -/// Persist node config mapping in ChainFire and in-memory fallback async fn persist_node_config( state: &AppState, machine_id: &str, - node_id: &str, config: &NodeConfig, ) -> anyhow::Result<()> { if let Some(local_storage) = &state.local_storage { let mut storage = local_storage.lock().await; - if let Err(e) = storage.register_node(machine_id, node_id, config) { + if let Err(e) = storage.register_node(machine_id, config) { warn!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, error = %e, "Failed to persist node config to local storage" ); @@ -718,35 +732,29 @@ async fn persist_node_config( if let Some(storage_mutex) = &state.storage { let mut storage = storage_mutex.lock().await; - if let Err(e) = storage.register_node(machine_id, node_id, config).await { + if let Err(e) = storage.register_node(machine_id, config).await { warn!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, error = %e, "Failed to persist node config to ChainFire" ); } } - // Keep in-memory mapping in sync as a fallback cache - { - let mut map = state.machine_configs.write().await; - if let Some((existing_node, _)) = map.get(machine_id) { - if existing_node != node_id { - warn!( - machine_id = %machine_id, - existing_node = %existing_node, - requested_node = %node_id, - "Skipping in-memory mapping update due to conflict" - ); - return Ok(()); - } + let mut map = state.machine_configs.write().await; + if let Some(existing) = map.get(machine_id) { + if existing.assignment.node_id != config.assignment.node_id { + warn!( + machine_id = %machine_id, + existing_node = %existing.assignment.node_id, + requested_node = %config.assignment.node_id, + "Skipping in-memory mapping update due to conflict" + ); + return Ok(()); } - map.insert( - machine_id.to_string(), - (node_id.to_string(), config.clone()), - ); } + map.insert(machine_id.to_string(), config.clone()); Ok(()) } @@ -774,7 +782,7 @@ async fn store_cluster_node_if_configured( return Ok(()); } - let mut labels = node_config.labels.clone(); + let mut labels = node_config.assignment.labels.clone(); for (key, value) in &node_info.metadata { labels.insert(key.clone(), value.clone()); } @@ -782,8 +790,8 @@ async fn store_cluster_node_if_configured( labels.remove("services"); let mut roles = Vec::new(); - if !node_config.role.trim().is_empty() { - roles.push(node_config.role.clone()); + if !node_config.assignment.role.trim().is_empty() { + roles.push(node_config.assignment.role.clone()); } else if let Some(role) = node_info.metadata.get("role") { if !role.trim().is_empty() { roles.push(role.clone()); @@ -797,15 +805,19 @@ async fn store_cluster_node_if_configured( hostname: node_info.hostname.clone(), roles, labels, - pool: node_config.pool.clone(), - node_class: node_config.node_class.clone(), - failure_domain: node_config.failure_domain.clone(), - nix_profile: node_config.nix_profile.clone(), - install_plan: node_config.install_plan.clone(), + pool: node_config.assignment.pool.clone(), + node_class: node_config.assignment.node_class.clone(), + failure_domain: node_config.assignment.failure_domain.clone(), + nix_profile: node_config.bootstrap_plan.nix_profile.clone(), + install_plan: node_config.bootstrap_plan.install_plan.clone(), hardware_facts: hardware_facts.cloned(), state: Some(format!("{:?}", node_info.state).to_lowercase()), commission_state: hardware_facts.map(|_| CommissionState::Discovered), - install_state: node_config.install_plan.as_ref().map(|_| InstallState::Pending), + install_state: node_config + .bootstrap_plan + .install_plan + .as_ref() + .map(|_| InstallState::Pending), commissioned_at: None, last_inventory_hash: inventory_hash(hardware_facts), power_state: node_info @@ -852,7 +864,6 @@ mod tests { use crate::config::Config; use crate::state::AppState; use axum::http::HeaderMap; - use std::collections::HashMap; fn test_headers() -> HeaderMap { let mut headers = HeaderMap::new(); @@ -866,27 +877,39 @@ mod tests { Arc::new(AppState::with_config(config)) } + fn test_node_config(node_id: &str, role: &str, ip: &str, services: Vec<&str>) -> NodeConfig { + NodeConfig::from_parts( + NodeAssignment { + node_id: node_id.to_string(), + hostname: node_id.to_string(), + role: role.to_string(), + ip: ip.to_string(), + labels: HashMap::new(), + pool: None, + node_class: None, + failure_domain: None, + }, + BootstrapPlan { + services: services.into_iter().map(str::to_string).collect(), + nix_profile: None, + install_plan: None, + }, + BootstrapSecrets::default(), + ) + } + #[tokio::test] async fn test_phone_home_known_machine() { let state = test_state(); - // Pre-register a machine - let config = NodeConfig { - hostname: "node01".to_string(), - role: "control-plane".to_string(), - ip: "10.0.1.10".to_string(), - services: vec!["chainfire".to_string(), "flaredb".to_string()], - ssh_authorized_keys: vec![], - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, - }; state.machine_configs.write().await.insert( "test-machine-01".to_string(), - ("node01".to_string(), config), + test_node_config( + "node01", + "control-plane", + "10.0.1.10", + vec!["chainfire", "flaredb"], + ), ); let request = PhoneHomeRequest { @@ -904,16 +927,14 @@ mod tests { let response = result.unwrap().0; assert!(response.success); - assert_eq!(response.node_id, "node01"); assert_eq!(response.state, NodeState::Provisioning); - assert!(response.node_config.is_some()); - assert!(response.ssh_host_key.is_none()); + assert_eq!(response.node_config.assignment.node_id, "node01"); + assert_eq!(response.node_config.assignment.role, "control-plane"); + assert_eq!( + response.node_config.bootstrap_plan.services, + vec!["chainfire".to_string(), "flaredb".to_string()] + ); - let config = response.node_config.unwrap(); - assert_eq!(config.hostname, "node01"); - assert_eq!(config.role, "control-plane"); - - // Verify node was stored let nodes = state.nodes.read().await; assert!(nodes.contains_key("node01")); } @@ -940,35 +961,18 @@ mod tests { let response = result.unwrap().0; assert!(response.success); - assert!(response.node_id.starts_with("node-")); assert_eq!(response.state, NodeState::Provisioning); - assert!(response.node_config.is_some()); - - let config = response.node_config.unwrap(); - assert_eq!(config.role, "worker"); // Default role + assert!(response.node_config.assignment.node_id.starts_with("node-")); + assert_eq!(response.node_config.assignment.role, "worker"); } #[tokio::test] async fn test_phone_home_with_preregistered_config() { let state = test_state(); - // Pre-register a machine - let config = NodeConfig { - hostname: "my-node".to_string(), - role: "storage".to_string(), - ip: "10.0.2.50".to_string(), - services: vec!["lightningstor".to_string()], - ssh_authorized_keys: vec![], - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, - }; state.machine_configs.write().await.insert( "preregistered-123".to_string(), - ("my-node".to_string(), config), + test_node_config("my-node", "storage", "10.0.2.50", vec!["lightningstor"]), ); let request = PhoneHomeRequest { @@ -986,11 +990,9 @@ mod tests { let response = result.unwrap().0; assert!(response.success); - assert_eq!(response.node_id, "my-node"); - - let config = response.node_config.unwrap(); - assert_eq!(config.role, "storage"); - assert_eq!(config.ip, "10.0.2.50"); + assert_eq!(response.node_config.assignment.node_id, "my-node"); + assert_eq!(response.node_config.assignment.role, "storage"); + assert_eq!(response.node_config.assignment.ip, "10.0.2.50"); } #[test] @@ -1077,14 +1079,18 @@ mod tests { labels: HashMap::from([("pool-kind".to_string(), "accelerated".to_string())]), }]; - let (node_id, config) = build_node_config_from_rule(&rule, &request, &node_classes, &pools); + let config = build_node_config_from_rule(&rule, &request, &node_classes, &pools); - assert_eq!(node_id, "gpu-dyn-01"); - assert_eq!(config.role, "worker"); - assert_eq!(config.pool.as_deref(), Some("gpu")); - assert_eq!(config.node_class.as_deref(), Some("gpu-worker")); - assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker")); + assert_eq!(config.assignment.node_id, "gpu-dyn-01"); + assert_eq!(config.assignment.role, "worker"); + assert_eq!(config.assignment.pool.as_deref(), Some("gpu")); + assert_eq!(config.assignment.node_class.as_deref(), Some("gpu-worker")); + assert_eq!( + config.bootstrap_plan.nix_profile.as_deref(), + Some("profiles/gpu-worker") + ); let install_plan = config + .bootstrap_plan .install_plan .expect("install_plan should inherit from class"); assert_eq!( @@ -1095,15 +1101,30 @@ mod tests { install_plan.disko_config_path.as_deref(), Some("profiles/gpu-worker/disko.nix") ); - assert_eq!(config.labels.get("tier").map(String::as_str), Some("gpu")); assert_eq!( - config.labels.get("pool-kind").map(String::as_str), + config.assignment.labels.get("tier").map(String::as_str), + Some("gpu") + ); + assert_eq!( + config + .assignment + .labels + .get("pool-kind") + .map(String::as_str), Some("accelerated") ); assert_eq!( - config.labels.get("accelerator").map(String::as_str), + config + .assignment + .labels + .get("accelerator") + .map(String::as_str), Some("nvidia") ); - assert_eq!(config.failure_domain.as_deref(), Some("rack-z")); + assert_eq!(config.assignment.failure_domain.as_deref(), Some("rack-z")); + assert_eq!( + config.bootstrap_secrets.ssh_authorized_keys, + vec!["ssh-ed25519 test".to_string()] + ); } } diff --git a/deployer/crates/deployer-server/src/state.rs b/deployer/crates/deployer-server/src/state.rs index 6bafb01..320ce92 100644 --- a/deployer/crates/deployer-server/src/state.rs +++ b/deployer/crates/deployer-server/src/state.rs @@ -22,8 +22,8 @@ pub struct AppState { /// Key: node_id, Value: NodeInfo pub nodes: RwLock>, - /// Fallback in-memory machine_id → (node_id, NodeConfig) mapping - pub machine_configs: RwLock>, + /// Fallback in-memory machine_id → bootstrap node config mapping + pub machine_configs: RwLock>, } impl AppState { diff --git a/deployer/crates/deployer-server/src/storage.rs b/deployer/crates/deployer-server/src/storage.rs index 2253880..e6e8fb7 100644 --- a/deployer/crates/deployer-server/src/storage.rs +++ b/deployer/crates/deployer-server/src/storage.rs @@ -7,7 +7,6 @@ use chainfire_client::Client as ChainFireClient; use deployer_types::{EnrollmentRuleSpec, NodeClassSpec, NodeConfig, NodeInfo, NodePoolSpec}; use serde::de::DeserializeOwned; use serde::Serialize; -use std::collections::HashMap; use thiserror::Error; use tracing::{debug, error, warn}; @@ -63,11 +62,6 @@ impl NodeStorage { format!("{}/nodes/info/{}", self.namespace, node_id) } - /// Key for machine_id → node_id mapping - fn mapping_key(&self, machine_id: &str) -> String { - format!("{}/nodes/mapping/{}", self.namespace, machine_id) - } - fn cluster_node_key(&self, cluster_namespace: &str, cluster_id: &str, node_id: &str) -> String { format!( "{}/clusters/{}/nodes/{}", @@ -118,81 +112,49 @@ impl NodeStorage { pub async fn register_node( &mut self, machine_id: &str, - node_id: &str, config: &NodeConfig, ) -> Result<(), StorageError> { let config_key = self.config_key(machine_id); - let mapping_key = self.mapping_key(machine_id); let config_json = serde_json::to_vec(config)?; - if let Some(existing) = self.client.get(&mapping_key).await? { - let existing_node = String::from_utf8_lossy(&existing).to_string(); - if existing_node != node_id { + if let Some(existing) = self.client.get(&config_key).await? { + let existing_config: NodeConfig = serde_json::from_slice(&existing)?; + if existing_config.assignment.node_id != config.assignment.node_id { return Err(StorageError::Conflict(format!( "machine_id {} already mapped to {}", - machine_id, existing_node + machine_id, existing_config.assignment.node_id ))); } } debug!( machine_id = %machine_id, - node_id = %node_id, + node_id = %config.assignment.node_id, key = %config_key, "Registering node config in ChainFire" ); - // Store config self.client.put(&config_key, &config_json).await?; - - // Store machine_id → node_id mapping - self.client.put(&mapping_key, node_id.as_bytes()).await?; - Ok(()) } - /// Lookup node_id mapping by machine_id - pub async fn get_node_mapping( - &mut self, - machine_id: &str, - ) -> Result, StorageError> { - let mapping_key = self.mapping_key(machine_id); - match self.client.get(&mapping_key).await? { - Some(bytes) => Ok(Some(String::from_utf8_lossy(&bytes).to_string())), - None => Ok(None), - } - } - /// Lookup node config by machine_id pub async fn get_node_config( &mut self, machine_id: &str, - ) -> Result, StorageError> { + ) -> Result, StorageError> { let config_key = self.config_key(machine_id); - let mapping_key = self.mapping_key(machine_id); debug!(machine_id = %machine_id, key = %config_key, "Looking up node config"); - // Get node_id mapping - let node_id = match self.client.get(&mapping_key).await? { - Some(bytes) => String::from_utf8_lossy(&bytes).to_string(), - None => { - debug!(machine_id = %machine_id, "No mapping found"); - return Ok(None); - } - }; - // Get config match self.client.get(&config_key).await? { Some(bytes) => { let config: NodeConfig = serde_json::from_slice(&bytes)?; - Ok(Some((node_id, config))) + Ok(Some(config)) } None => { - warn!( - machine_id = %machine_id, - "Mapping exists but config not found" - ); + debug!(machine_id = %machine_id, "No config found"); Ok(None) } } @@ -213,7 +175,7 @@ impl NodeStorage { Ok(()) } - /// Store cluster node state under photoncloud/clusters/{cluster_id}/nodes/{node_id} + /// Store cluster node state under ultracloud/clusters/{cluster_id}/nodes/{node_id} pub async fn store_cluster_node( &mut self, cluster_namespace: &str, @@ -234,7 +196,7 @@ impl NodeStorage { Ok(()) } - /// List cluster nodes under photoncloud/clusters/{cluster_id}/nodes/ + /// List cluster nodes under ultracloud/clusters/{cluster_id}/nodes/ pub async fn list_cluster_nodes( &mut self, cluster_namespace: &str, @@ -298,43 +260,6 @@ impl NodeStorage { } } - /// Pre-register a machine mapping (admin API) - /// - /// This allows administrators to pre-configure node assignments - /// before machines boot and phone home. - pub async fn pre_register( - &mut self, - machine_id: &str, - node_id: &str, - role: &str, - ip: Option<&str>, - services: Vec, - ssh_authorized_keys: Vec, - ) -> Result<(), StorageError> { - let config = NodeConfig { - hostname: node_id.to_string(), - role: role.to_string(), - ip: ip.unwrap_or("").to_string(), - services, - ssh_authorized_keys, - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, - }; - - debug!( - machine_id = %machine_id, - node_id = %node_id, - role = %role, - "Pre-registering node" - ); - - self.register_node(machine_id, node_id, &config).await - } - /// List all registered nodes pub async fn list_nodes(&mut self) -> Result, StorageError> { let prefix = format!("{}/nodes/info/", self.namespace); @@ -354,44 +279,24 @@ impl NodeStorage { Ok(nodes) } - /// List all pre-registered machine configs (machine_id -> node_id, config) + /// List all pre-registered machine configs (machine_id -> config) pub async fn list_machine_configs( &mut self, - ) -> Result, StorageError> { + ) -> Result, StorageError> { let config_prefix = format!("{}/nodes/config/", self.namespace); - let mapping_prefix = format!("{}/nodes/mapping/", self.namespace); - let configs = self.client.get_prefix(&config_prefix).await?; - let mappings = self.client.get_prefix(&mapping_prefix).await?; - let mut config_map: HashMap = HashMap::new(); + let mut results = Vec::new(); for (key, value) in configs { let key_str = String::from_utf8_lossy(&key); if let Some(machine_id) = key_str.strip_prefix(&config_prefix) { if let Ok(config) = serde_json::from_slice::(&value) { - config_map.insert(machine_id.to_string(), config); + results.push((machine_id.to_string(), config)); } else { warn!(key = %key_str, "Failed to deserialize node config"); } } } - - let mut mappings_map: HashMap = HashMap::new(); - for (key, value) in mappings { - let key_str = String::from_utf8_lossy(&key); - if let Some(machine_id) = key_str.strip_prefix(&mapping_prefix) { - let node_id = String::from_utf8_lossy(&value).to_string(); - mappings_map.insert(machine_id.to_string(), node_id); - } - } - - let mut results = Vec::new(); - for (machine_id, node_id) in mappings_map { - if let Some(config) = config_map.get(&machine_id) { - results.push((machine_id.clone(), node_id.clone(), config.clone())); - } - } - Ok(results) } } @@ -399,6 +304,7 @@ impl NodeStorage { #[cfg(test)] mod tests { use super::*; + use deployer_types::{BootstrapPlan, BootstrapSecrets, NodeAssignment}; // Note: Integration tests require a running ChainFire instance. // These unit tests verify serialization and key generation. @@ -411,44 +317,50 @@ mod tests { let node_id = "node01"; let config_key = format!("{}/nodes/config/{}", namespace, machine_id); - let mapping_key = format!("{}/nodes/mapping/{}", namespace, machine_id); let info_key = format!("{}/nodes/info/{}", namespace, node_id); assert_eq!(config_key, "deployer/nodes/config/abc123"); - assert_eq!(mapping_key, "deployer/nodes/mapping/abc123"); assert_eq!(info_key, "deployer/nodes/info/node01"); - let cluster_namespace = "photoncloud"; + let cluster_namespace = "ultracloud"; let cluster_id = "cluster-a"; let cluster_key = format!( "{}/clusters/{}/nodes/{}", cluster_namespace, cluster_id, node_id ); - assert_eq!(cluster_key, "photoncloud/clusters/cluster-a/nodes/node01"); + assert_eq!(cluster_key, "ultracloud/clusters/cluster-a/nodes/node01"); } #[test] fn test_node_config_serialization() { - let config = NodeConfig { - hostname: "node01".to_string(), - role: "control-plane".to_string(), - ip: "10.0.1.10".to_string(), - services: vec!["chainfire".to_string(), "flaredb".to_string()], - ssh_authorized_keys: vec![], - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: None, - }; + let config = NodeConfig::from_parts( + NodeAssignment { + node_id: "node01".to_string(), + hostname: "node01".to_string(), + role: "control-plane".to_string(), + ip: "10.0.1.10".to_string(), + labels: std::collections::HashMap::new(), + pool: None, + node_class: None, + failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string(), "flaredb".to_string()], + nix_profile: None, + install_plan: None, + }, + BootstrapSecrets::default(), + ); let json = serde_json::to_vec(&config).unwrap(); let deserialized: NodeConfig = serde_json::from_slice(&json).unwrap(); - assert_eq!(deserialized.hostname, "node01"); - assert_eq!(deserialized.role, "control-plane"); - assert_eq!(deserialized.services.len(), 2); - assert!(deserialized.ssh_authorized_keys.is_empty()); + assert_eq!(deserialized.assignment.hostname, "node01"); + assert_eq!(deserialized.assignment.role, "control-plane"); + assert_eq!(deserialized.bootstrap_plan.services.len(), 2); + assert!(deserialized + .bootstrap_secrets + .ssh_authorized_keys + .is_empty()); } } diff --git a/deployer/crates/deployer-server/src/tls.rs b/deployer/crates/deployer-server/src/tls.rs index 782b159..3edd745 100644 --- a/deployer/crates/deployer-server/src/tls.rs +++ b/deployer/crates/deployer-server/src/tls.rs @@ -18,7 +18,7 @@ pub fn issue_node_cert( dns_names.push(hostname.to_string()); } if dns_names.is_empty() { - dns_names.push("photoncloud-node".to_string()); + dns_names.push("ultracloud-node".to_string()); } let mut params = diff --git a/deployer/crates/deployer-types/src/lib.rs b/deployer/crates/deployer-types/src/lib.rs index 744ef5e..5404c37 100644 --- a/deployer/crates/deployer-types/src/lib.rs +++ b/deployer/crates/deployer-types/src/lib.rs @@ -149,6 +149,71 @@ impl InstallPlan { } } +/// Stable node assignment returned by bootstrap enrollment. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub struct NodeAssignment { + pub node_id: String, + pub hostname: String, + pub role: String, + pub ip: String, + #[serde(default)] + pub labels: HashMap, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pool: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub node_class: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub failure_domain: Option, +} + +/// Bootstrap plan describing how the installer should materialize the node. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub struct BootstrapPlan { + #[serde(default)] + pub services: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub nix_profile: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub install_plan: Option, +} + +/// Bootstrap credentials and trust material issued for a node. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub struct BootstrapSecrets { + #[serde(default)] + pub ssh_authorized_keys: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ssh_host_key: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tls_cert: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tls_key: Option, +} + +/// Canonical bootstrap configuration for a node. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NodeConfig { + pub assignment: NodeAssignment, + #[serde(default)] + pub bootstrap_plan: BootstrapPlan, + #[serde(default)] + pub bootstrap_secrets: BootstrapSecrets, +} + +impl NodeConfig { + pub fn from_parts( + assignment: NodeAssignment, + bootstrap_plan: BootstrapPlan, + bootstrap_secrets: BootstrapSecrets, + ) -> Self { + Self { + assignment, + bootstrap_plan, + bootstrap_secrets, + } + } +} + /// Basic inventory record for a physical disk observed during commissioning. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] pub struct DiskFact { @@ -209,41 +274,6 @@ pub struct HardwareFacts { pub dmi: Option, } -/// Node configuration returned by Deployer -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NodeConfig { - /// Node hostname - pub hostname: String, - /// Node role (control-plane, worker) - pub role: String, - /// Node IP address - pub ip: String, - /// Services to run on this node - #[serde(default)] - pub services: Vec, - /// SSH authorized keys for bootstrap access - #[serde(default)] - pub ssh_authorized_keys: Vec, - /// Desired labels applied at enrollment time - #[serde(default)] - pub labels: HashMap, - /// Optional pool assignment - #[serde(default, skip_serializing_if = "Option::is_none")] - pub pool: Option, - /// Optional node class assignment - #[serde(default, skip_serializing_if = "Option::is_none")] - pub node_class: Option, - /// Optional failure domain - #[serde(default, skip_serializing_if = "Option::is_none")] - pub failure_domain: Option, - /// Optional Nix profile or flake attr to apply after bootstrap - #[serde(default, skip_serializing_if = "Option::is_none")] - pub nix_profile: Option, - /// Optional explicit install plan used by the bootstrap ISO/netboot path. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub install_plan: Option, -} - /// Phone Home request payload (machine-id based) #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PhoneHomeRequest { @@ -277,22 +307,10 @@ pub struct PhoneHomeResponse { /// Human-readable message #[serde(skip_serializing_if = "Option::is_none")] pub message: Option, - /// Assigned node identifier - pub node_id: String, /// Assigned node state pub state: NodeState, - /// Node configuration (topology, services, etc.) - #[serde(skip_serializing_if = "Option::is_none")] - pub node_config: Option, - /// SSH host private key (ed25519) - #[serde(skip_serializing_if = "Option::is_none")] - pub ssh_host_key: Option, - /// TLS certificate for node services - #[serde(skip_serializing_if = "Option::is_none")] - pub tls_cert: Option, - /// TLS private key for node services - #[serde(skip_serializing_if = "Option::is_none")] - pub tls_key: Option, + /// Canonical bootstrap configuration returned by the bootstrap API. + pub node_config: NodeConfig, } fn default_max_instances_per_node() -> u32 { @@ -537,7 +555,7 @@ pub struct LoadBalancerPublicationSpec { pub pool_protocol: Option, } -/// Desired service publication through PhotonCloud network components. +/// Desired service publication through UltraCloud network components. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] pub struct ServicePublicationSpec { #[serde(default)] @@ -581,7 +599,7 @@ impl Default for ServiceDependencySpec { } } -/// Cluster node record stored under photoncloud/clusters/{cluster_id}/nodes/{node_id}. +/// Cluster node record stored under ultracloud/clusters/{cluster_id}/nodes/{node_id}. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ClusterNodeRecord { pub node_id: String, @@ -679,7 +697,7 @@ pub struct DesiredSystemSpec { pub drain_before_apply: Option, } -/// Cluster metadata (PhotonCloud scope). +/// Cluster metadata (UltraCloud scope). #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ClusterSpec { pub cluster_id: String, @@ -993,7 +1011,7 @@ pub fn cluster_node_pool(node: &ClusterNodeRecord) -> Option<&str> { .or_else(|| node.labels.get("pool").map(String::as_str)) .or_else(|| { node.labels - .get("pool.photoncloud.io/name") + .get("pool.ultracloud.io/name") .map(String::as_str) }) } @@ -1004,7 +1022,7 @@ pub fn cluster_node_class(node: &ClusterNodeRecord) -> Option<&str> { .or_else(|| node.labels.get("node_class").map(String::as_str)) .or_else(|| { node.labels - .get("nodeclass.photoncloud.io/name") + .get("nodeclass.ultracloud.io/name") .map(String::as_str) }) } @@ -1477,51 +1495,115 @@ mod tests { #[test] fn test_phone_home_response_with_secrets() { - let node_config = NodeConfig { - hostname: "node01".to_string(), - role: "control-plane".to_string(), - ip: "10.0.1.10".to_string(), - services: vec!["chainfire".to_string(), "flaredb".to_string()], - ssh_authorized_keys: vec![], - labels: HashMap::new(), - pool: None, - node_class: None, - failure_domain: None, - nix_profile: None, - install_plan: Some(InstallPlan { - nixos_configuration: Some("node01".to_string()), - disko_config_path: Some("nix/nodes/vm-cluster/node01/disko.nix".to_string()), - target_disk: Some("/dev/vda".to_string()), - target_disk_by_id: None, - }), - }; + let node_config = NodeConfig::from_parts( + NodeAssignment { + node_id: "node01".to_string(), + hostname: "node01".to_string(), + role: "control-plane".to_string(), + ip: "10.0.1.10".to_string(), + labels: HashMap::new(), + pool: None, + node_class: None, + failure_domain: None, + }, + BootstrapPlan { + services: vec!["chainfire".to_string(), "flaredb".to_string()], + nix_profile: None, + install_plan: Some(InstallPlan { + nixos_configuration: Some("node01".to_string()), + disko_config_path: Some("nix/nodes/vm-cluster/node01/disko.nix".to_string()), + target_disk: Some("/dev/vda".to_string()), + target_disk_by_id: None, + }), + }, + BootstrapSecrets { + ssh_authorized_keys: vec![], + ssh_host_key: Some("ssh-key-data".to_string()), + tls_cert: None, + tls_key: None, + }, + ); let response = PhoneHomeResponse { success: true, message: Some("Node registered".to_string()), - node_id: "node01".to_string(), state: NodeState::Provisioning, - node_config: Some(node_config), - ssh_host_key: Some("ssh-key-data".to_string()), - tls_cert: None, - tls_key: None, + node_config, }; let json = serde_json::to_string(&response).unwrap(); let deserialized: PhoneHomeResponse = serde_json::from_str(&json).unwrap(); - assert_eq!(deserialized.node_id, "node01"); assert_eq!(deserialized.state, NodeState::Provisioning); - assert!(deserialized.node_config.is_some()); - assert!(deserialized.ssh_host_key.is_some()); + assert_eq!(deserialized.node_config.assignment.node_id, "node01"); + assert_eq!( + deserialized + .node_config + .bootstrap_secrets + .ssh_host_key + .as_deref(), + Some("ssh-key-data") + ); let install_plan = deserialized .node_config + .bootstrap_plan + .install_plan .as_ref() - .and_then(|config| config.install_plan.as_ref()) .expect("install_plan should round-trip"); assert_eq!(install_plan.nixos_configuration.as_deref(), Some("node01")); assert_eq!(install_plan.target_disk.as_deref(), Some("/dev/vda")); } + #[test] + fn test_node_config_roundtrip() { + let config = NodeConfig::from_parts( + NodeAssignment { + node_id: "node02".to_string(), + hostname: "node02".to_string(), + role: "worker".to_string(), + ip: "10.0.1.12".to_string(), + labels: HashMap::from([("tier".to_string(), "general".to_string())]), + pool: Some("general".to_string()), + node_class: Some("worker-linux".to_string()), + failure_domain: Some("rack-b".to_string()), + }, + BootstrapPlan { + services: vec!["plasmavmc".to_string()], + nix_profile: Some("profiles/worker-linux".to_string()), + install_plan: Some(InstallPlan { + nixos_configuration: Some("worker-linux".to_string()), + disko_config_path: Some("profiles/worker-linux/disko.nix".to_string()), + target_disk: None, + target_disk_by_id: Some("/dev/disk/by-id/worker-default".to_string()), + }), + }, + BootstrapSecrets { + ssh_authorized_keys: vec!["ssh-ed25519 AAAATEST test".to_string()], + ssh_host_key: Some("ssh-host-key".to_string()), + tls_cert: None, + tls_key: None, + }, + ); + + let json = serde_json::to_string(&config).unwrap(); + let decoded: NodeConfig = serde_json::from_str(&json).unwrap(); + assert_eq!(decoded.assignment.hostname, "node02"); + assert_eq!(decoded.assignment.role, "worker"); + assert_eq!(decoded.assignment.pool.as_deref(), Some("general")); + assert_eq!( + decoded.bootstrap_plan.nix_profile.as_deref(), + Some("profiles/worker-linux") + ); + assert_eq!( + decoded + .bootstrap_plan + .install_plan + .as_ref() + .and_then(|plan| plan.target_disk_by_id.as_deref()), + Some("/dev/disk/by-id/worker-default") + ); + assert_eq!(decoded.bootstrap_secrets.ssh_authorized_keys.len(), 1); + } + #[test] fn test_service_schedule_defaults() { let schedule = ServiceScheduleSpec::default(); @@ -1844,7 +1926,7 @@ mod tests { let observed = ObservedSystemState { node_id: "node01".to_string(), nixos_configuration: Some("node01".to_string()), - flake_root: Some("/opt/plasmacloud-src".to_string()), + flake_root: Some("/opt/ultracloud-src".to_string()), target_system: Some("/nix/store/system-node01".to_string()), configured_system: Some("/nix/store/system-node01".to_string()), current_system: Some("/nix/store/system-old".to_string()), @@ -1872,7 +1954,7 @@ mod tests { deployment_id: Some("worker-rollout".to_string()), nixos_configuration: Some("node01".to_string()), target_system: Some("/nix/store/system-node01".to_string()), - flake_ref: Some("/opt/plasmacloud-src".to_string()), + flake_ref: Some("/opt/ultracloud-src".to_string()), switch_action: Some("switch".to_string()), health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()], rollback_on_failure: Some(true), @@ -1906,7 +1988,7 @@ mod tests { }, nixos_configuration: Some("worker-golden".to_string()), target_system: Some("/nix/store/worker-golden".to_string()), - flake_ref: Some("/opt/plasmacloud-src".to_string()), + flake_ref: Some("/opt/ultracloud-src".to_string()), batch_size: Some(1), max_unavailable: Some(1), health_check_command: vec!["true".to_string()], diff --git a/deployer/crates/fleet-scheduler/src/main.rs b/deployer/crates/fleet-scheduler/src/main.rs index 39f4c27..31d4bfe 100644 --- a/deployer/crates/fleet-scheduler/src/main.rs +++ b/deployer/crates/fleet-scheduler/src/main.rs @@ -46,12 +46,12 @@ fn instances_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec { } #[derive(Debug, Parser)] -#[command(author, version, about = "PhotonCloud non-Kubernetes fleet scheduler")] +#[command(author, version, about = "UltraCloud non-Kubernetes fleet scheduler")] struct Cli { #[arg(long, default_value = "http://127.0.0.1:7000")] chainfire_endpoint: String, - #[arg(long, default_value = "photoncloud")] + #[arg(long, default_value = "ultracloud")] cluster_namespace: String, #[arg(long)] @@ -1507,7 +1507,7 @@ mod tests { fn test_scheduler() -> Scheduler { Scheduler::new(Cli { chainfire_endpoint: "http://127.0.0.1:7000".to_string(), - cluster_namespace: "photoncloud".to_string(), + cluster_namespace: "ultracloud".to_string(), cluster_id: "test-cluster".to_string(), interval_secs: 1, heartbeat_timeout_secs: 300, diff --git a/deployer/crates/nix-agent/src/main.rs b/deployer/crates/nix-agent/src/main.rs index abca814..3c00e05 100644 --- a/deployer/crates/nix-agent/src/main.rs +++ b/deployer/crates/nix-agent/src/main.rs @@ -51,7 +51,7 @@ struct Cli { #[arg(long, default_value = "http://127.0.0.1:7000")] chainfire_endpoint: String, - #[arg(long, default_value = "photoncloud")] + #[arg(long, default_value = "ultracloud")] cluster_namespace: String, #[arg(long)] @@ -796,7 +796,7 @@ mod tests { let resolved = resolve_desired_system( &test_node(), None, - "/opt/plasmacloud-src", + "/opt/ultracloud-src", "switch", &[], true, @@ -804,7 +804,7 @@ mod tests { .expect("desired system should resolve"); assert_eq!(resolved.nixos_configuration.as_deref(), Some("node01")); assert_eq!(resolved.target_system, None); - assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src"); + assert_eq!(resolved.flake_ref, "/opt/ultracloud-src"); assert_eq!(resolved.switch_action, "switch"); assert!(resolved.rollback_on_failure); } @@ -826,7 +826,7 @@ mod tests { let resolved = resolve_desired_system( &test_node(), Some(&desired), - "/opt/plasmacloud-src", + "/opt/ultracloud-src", "switch", &[], false, @@ -856,7 +856,7 @@ mod tests { let resolved = resolve_desired_system( &test_node(), Some(&desired), - "/opt/plasmacloud-src", + "/opt/ultracloud-src", "switch", &[], true, @@ -868,7 +868,7 @@ mod tests { resolved.target_system.as_deref(), Some("/nix/store/node01-next") ); - assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src"); + assert_eq!(resolved.flake_ref, "/opt/ultracloud-src"); } #[test] @@ -888,14 +888,14 @@ mod tests { let resolved = resolve_desired_system( &test_node(), Some(&desired), - "/opt/plasmacloud-src", + "/opt/ultracloud-src", "switch", &["systemctl".to_string(), "is-system-running".to_string()], true, ) .expect("desired system should resolve"); - assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src"); + assert_eq!(resolved.flake_ref, "/opt/ultracloud-src"); assert_eq!(resolved.switch_action, "switch"); assert_eq!( resolved.health_check_command, @@ -907,15 +907,15 @@ mod tests { #[test] fn target_flake_attr_is_rendered_from_root_and_configuration() { assert_eq!( - target_flake_attr("/opt/plasmacloud-src", "node01"), - "/opt/plasmacloud-src#nixosConfigurations.node01.config.system.build.toplevel" + target_flake_attr("/opt/ultracloud-src", "node01"), + "/opt/ultracloud-src#nixosConfigurations.node01.config.system.build.toplevel" ); } #[test] fn read_symlink_target_returns_none_for_missing_path() { assert_eq!( - read_symlink_target("/tmp/photoncloud-nix-agent-missing-link"), + read_symlink_target("/tmp/ultracloud-nix-agent-missing-link"), None ); } @@ -925,7 +925,7 @@ mod tests { let desired = ResolvedDesiredSystem { nixos_configuration: Some("node01".to_string()), target_system: None, - flake_ref: "/opt/plasmacloud-src".to_string(), + flake_ref: "/opt/ultracloud-src".to_string(), switch_action: "boot".to_string(), health_check_command: vec!["true".to_string()], rollback_on_failure: true, @@ -948,7 +948,7 @@ mod tests { let desired = ResolvedDesiredSystem { nixos_configuration: Some("node01".to_string()), target_system: None, - flake_ref: "/opt/plasmacloud-src".to_string(), + flake_ref: "/opt/ultracloud-src".to_string(), switch_action: "boot".to_string(), health_check_command: vec!["true".to_string()], rollback_on_failure: true, diff --git a/deployer/crates/node-agent/src/agent.rs b/deployer/crates/node-agent/src/agent.rs index 7a251df..4ebaf9c 100644 --- a/deployer/crates/node-agent/src/agent.rs +++ b/deployer/crates/node-agent/src/agent.rs @@ -316,7 +316,7 @@ impl Agent { warn!(error = %e, "failed to sync local service instances"); } } else { - info!("local instance upsert disabled; skipping /etc/photoncloud/instances.json"); + info!("local instance upsert disabled; skipping /etc/ultracloud/instances.json"); } if self.apply { @@ -619,10 +619,10 @@ impl Agent { Ok(()) } - /// ローカルファイル (/etc/photoncloud/instances.json) から ServiceInstance 定義を読み、 - /// Chainfire 上の `photoncloud/clusters/{cluster_id}/instances/{service}/{instance_id}` に upsert する。 + /// ローカルファイル (/etc/ultracloud/instances.json) から ServiceInstance 定義を読み、 + /// Chainfire 上の `ultracloud/clusters/{cluster_id}/instances/{service}/{instance_id}` に upsert する。 async fn sync_local_instances(&self, client: &mut Client) -> Result<()> { - let path = PathBuf::from("/etc/photoncloud/instances.json"); + let path = PathBuf::from("/etc/ultracloud/instances.json"); let contents = match fs::read_to_string(&path) { Ok(c) => c, Err(e) => { @@ -1139,14 +1139,14 @@ mod tests { fn test_agent() -> Agent { Agent::new( "http://127.0.0.1:7000".to_string(), - "photoncloud".to_string(), + "ultracloud".to_string(), "test-cluster".to_string(), "node01".to_string(), Duration::from_secs(1), 300, false, false, - PathBuf::from("/tmp/photoncloud-node-agent-tests"), + PathBuf::from("/tmp/ultracloud-node-agent-tests"), ) } diff --git a/deployer/crates/node-agent/src/main.rs b/deployer/crates/node-agent/src/main.rs index 6f42856..00ec9fd 100644 --- a/deployer/crates/node-agent/src/main.rs +++ b/deployer/crates/node-agent/src/main.rs @@ -9,9 +9,9 @@ mod agent; mod process; mod watcher; -/// PhotonCloud NodeAgent +/// UltraCloud NodeAgent /// -/// - Chainfire 上の `photoncloud/clusters/{cluster_id}/nodes/{node_id}` と +/// - Chainfire 上の `ultracloud/clusters/{cluster_id}/nodes/{node_id}` と /// `.../instances/*` を watch しつつ、周期 heartbeat/safety reconcile も行う。 /// - `--apply` が指定された場合のみプロセス起動/停止を行う(デフォルトは dry-run)。 #[derive(Parser, Debug)] @@ -21,11 +21,11 @@ struct Cli { #[arg(long, default_value = "http://127.0.0.1:7000")] chainfire_endpoint: String, - /// PhotonCloud cluster namespace (default: photoncloud) - #[arg(long, default_value = "photoncloud")] + /// UltraCloud cluster namespace (default: ultracloud) + #[arg(long, default_value = "ultracloud")] cluster_namespace: String, - /// PhotonCloud Cluster ID + /// UltraCloud Cluster ID #[arg(long)] cluster_id: String, @@ -42,7 +42,7 @@ struct Cli { heartbeat_timeout_secs: u64, /// PIDファイル出力ディレクトリ - #[arg(long, default_value = "/var/run/photoncloud")] + #[arg(long, default_value = "/var/run/ultracloud")] pid_dir: String, /// Desired State を実際に適用する(プロセス起動/停止、ヘルスチェック更新) diff --git a/deployer/crates/plasmacloud-reconciler/Cargo.toml b/deployer/crates/ultracloud-reconciler/Cargo.toml similarity index 95% rename from deployer/crates/plasmacloud-reconciler/Cargo.toml rename to deployer/crates/ultracloud-reconciler/Cargo.toml index f904416..ff1742d 100644 --- a/deployer/crates/plasmacloud-reconciler/Cargo.toml +++ b/deployer/crates/ultracloud-reconciler/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "plasmacloud-reconciler" +name = "ultracloud-reconciler" version.workspace = true edition.workspace = true rust-version.workspace = true diff --git a/deployer/crates/plasmacloud-reconciler/src/auth.rs b/deployer/crates/ultracloud-reconciler/src/auth.rs similarity index 98% rename from deployer/crates/plasmacloud-reconciler/src/auth.rs rename to deployer/crates/ultracloud-reconciler/src/auth.rs index cd13ad0..939b549 100644 --- a/deployer/crates/plasmacloud-reconciler/src/auth.rs +++ b/deployer/crates/ultracloud-reconciler/src/auth.rs @@ -73,7 +73,7 @@ async fn ensure_project_admin_binding( "roles/ProjectAdmin", scope, ) - .with_created_by("plasmacloud-reconciler"); + .with_created_by("ultracloud-reconciler"); client.create_binding(&binding).await?; Ok(()) } diff --git a/deployer/crates/plasmacloud-reconciler/src/hosts.rs b/deployer/crates/ultracloud-reconciler/src/hosts.rs similarity index 99% rename from deployer/crates/plasmacloud-reconciler/src/hosts.rs rename to deployer/crates/ultracloud-reconciler/src/hosts.rs index 313befe..7c37931 100644 --- a/deployer/crates/plasmacloud-reconciler/src/hosts.rs +++ b/deployer/crates/ultracloud-reconciler/src/hosts.rs @@ -72,7 +72,7 @@ pub struct HostsCommand { #[arg(long)] pub endpoint: String, - #[arg(long, default_value = "photoncloud")] + #[arg(long, default_value = "ultracloud")] pub cluster_namespace: String, #[arg(long)] @@ -1204,7 +1204,7 @@ mod tests { }, nixos_configuration: Some("worker-golden".to_string()), target_system: Some("/nix/store/worker-golden".to_string()), - flake_ref: Some("/opt/plasmacloud-src".to_string()), + flake_ref: Some("/opt/ultracloud-src".to_string()), batch_size: Some(1), max_unavailable: Some(1), health_check_command: vec!["true".to_string()], @@ -1219,7 +1219,7 @@ mod tests { fn test_controller() -> HostDeploymentController { HostDeploymentController::new(HostsCommand { endpoint: "http://127.0.0.1:7000".to_string(), - cluster_namespace: "photoncloud".to_string(), + cluster_namespace: "ultracloud".to_string(), cluster_id: "test-cluster".to_string(), interval_secs: 1, heartbeat_timeout_secs: 300, diff --git a/deployer/crates/plasmacloud-reconciler/src/main.rs b/deployer/crates/ultracloud-reconciler/src/main.rs similarity index 100% rename from deployer/crates/plasmacloud-reconciler/src/main.rs rename to deployer/crates/ultracloud-reconciler/src/main.rs diff --git a/deployer/crates/plasmacloud-reconciler/src/tenant_network.rs b/deployer/crates/ultracloud-reconciler/src/tenant_network.rs similarity index 100% rename from deployer/crates/plasmacloud-reconciler/src/tenant_network.rs rename to deployer/crates/ultracloud-reconciler/src/tenant_network.rs diff --git a/deployer/crates/plasmacloud-reconciler/src/watcher.rs b/deployer/crates/ultracloud-reconciler/src/watcher.rs similarity index 100% rename from deployer/crates/plasmacloud-reconciler/src/watcher.rs rename to deployer/crates/ultracloud-reconciler/src/watcher.rs diff --git a/deployer/scripts/verify-deployer-bootstrap-e2e.sh b/deployer/scripts/verify-deployer-bootstrap-e2e.sh index 0e851d0..bcebac2 100755 --- a/deployer/scripts/verify-deployer-bootstrap-e2e.sh +++ b/deployer/scripts/verify-deployer-bootstrap-e2e.sh @@ -3,29 +3,29 @@ set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then - exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@" +if [[ -z "${ULTRACLOUD_E2E_IN_NIX:-}" ]]; then + exec nix develop "$ROOT" -c env ULTRACLOUD_E2E_IN_NIX=1 bash "$0" "$@" fi run_chainfire_server_bin() { - if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then - "$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@" + if [[ -n "${ULTRACLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then + "$ULTRACLOUD_CHAINFIRE_SERVER_BIN" "$@" else cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@" fi } run_deployer_server_bin() { - if [[ -n "${PHOTONCLOUD_DEPLOYER_SERVER_BIN:-}" ]]; then - "$PHOTONCLOUD_DEPLOYER_SERVER_BIN" "$@" + if [[ -n "${ULTRACLOUD_DEPLOYER_SERVER_BIN:-}" ]]; then + "$ULTRACLOUD_DEPLOYER_SERVER_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-server -- "$@" fi } run_deployer_ctl_bin() { - if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then - "$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@" + if [[ -n "${ULTRACLOUD_DEPLOYER_CTL_BIN:-}" ]]; then + "$ULTRACLOUD_DEPLOYER_CTL_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@" fi @@ -164,7 +164,7 @@ wait_for_port "127.0.0.1" "$api_port" 120 cat >"$tmp_dir/deployer.toml" <"$tmp_dir/nodes.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes.dump" python3 - "$tmp_dir/nodes.dump" "$dynamic_node_id" <<'PY' import json import sys @@ -479,7 +479,7 @@ print("Deployer bootstrap records validated") PY echo "Inspecting desired-system state" -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node-seeded/desired-system" >"$tmp_dir/desired-system.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/nodes/node-seeded/desired-system" >"$tmp_dir/desired-system.dump" python3 - "$tmp_dir/desired-system.dump" <<'PY' import json import sys diff --git a/deployer/scripts/verify-fleet-scheduler-e2e.sh b/deployer/scripts/verify-fleet-scheduler-e2e.sh index a899514..f570563 100755 --- a/deployer/scripts/verify-fleet-scheduler-e2e.sh +++ b/deployer/scripts/verify-fleet-scheduler-e2e.sh @@ -3,37 +3,37 @@ set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then - exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@" +if [[ -z "${ULTRACLOUD_E2E_IN_NIX:-}" ]]; then + exec nix develop "$ROOT" -c env ULTRACLOUD_E2E_IN_NIX=1 bash "$0" "$@" fi run_chainfire_server_bin() { - if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then - "$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@" + if [[ -n "${ULTRACLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then + "$ULTRACLOUD_CHAINFIRE_SERVER_BIN" "$@" else cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@" fi } run_deployer_ctl_bin() { - if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then - "$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@" + if [[ -n "${ULTRACLOUD_DEPLOYER_CTL_BIN:-}" ]]; then + "$ULTRACLOUD_DEPLOYER_CTL_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@" fi } run_node_agent_bin() { - if [[ -n "${PHOTONCLOUD_NODE_AGENT_BIN:-}" ]]; then - "$PHOTONCLOUD_NODE_AGENT_BIN" "$@" + if [[ -n "${ULTRACLOUD_NODE_AGENT_BIN:-}" ]]; then + "$ULTRACLOUD_NODE_AGENT_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p node-agent -- "$@" fi } run_fleet_scheduler_bin() { - if [[ -n "${PHOTONCLOUD_FLEET_SCHEDULER_BIN:-}" ]]; then - "$PHOTONCLOUD_FLEET_SCHEDULER_BIN" "$@" + if [[ -n "${ULTRACLOUD_FLEET_SCHEDULER_BIN:-}" ]]; then + "$ULTRACLOUD_FLEET_SCHEDULER_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p fleet-scheduler -- "$@" fi @@ -63,7 +63,7 @@ cleanup() { wait "$cf_pid" 2>/dev/null || true fi - if [[ "${PHOTONCLOUD_KEEP_TMP:-}" == "1" ]]; then + if [[ "${ULTRACLOUD_KEEP_TMP:-}" == "1" ]]; then echo "Keeping temporary directory: $tmp_dir" >&2 else rm -rf "$tmp_dir" @@ -492,7 +492,7 @@ echo "Waiting for worker to remain blocked until api becomes healthy" wait_for_service_state worker blocked 0 - 120 echo "Validating dependency block before api is healthy" -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-blocked.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-blocked.dump" python3 - "$tmp_dir/worker-blocked.dump" <<'PY' import sys @@ -503,7 +503,7 @@ if lines: print("worker instances correctly blocked before dependency becomes healthy") PY -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/service-statuses/worker" >"$tmp_dir/worker-status-blocked.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/service-statuses/worker" >"$tmp_dir/worker-status-blocked.dump" python3 - "$tmp_dir/worker-status-blocked.dump" <<'PY' import json import sys @@ -596,7 +596,7 @@ print("HTTP endpoints are healthy") PY echo "Inspecting instance state in ChainFire" -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances.dump" python3 - "$tmp_dir/instances.dump" <<'PY' import json import sys @@ -629,7 +629,7 @@ if states != ["healthy", "healthy"]: print("Observed two healthy scheduled instances across node01 and node02") PY -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances.dump" python3 - "$tmp_dir/worker-instances.dump" <<'PY' import json import sys @@ -687,7 +687,7 @@ wait_for_service_state api healthy 1 healthy 120 wait_for_service_state worker healthy 1 healthy 120 echo "Inspecting scaled instance state in ChainFire" -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump" python3 - "$tmp_dir/instances-scaled.dump" <<'PY' import json import sys @@ -718,7 +718,7 @@ if instance.get("state") != "healthy": print("Observed one healthy scheduled instance on node01 after scale-down") PY -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances-scaled.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-instances-scaled.dump" python3 - "$tmp_dir/worker-instances-scaled.dump" <<'PY' import json import sys diff --git a/deployer/scripts/verify-host-lifecycle-e2e.sh b/deployer/scripts/verify-host-lifecycle-e2e.sh index 34b6f10..51dfab2 100644 --- a/deployer/scripts/verify-host-lifecycle-e2e.sh +++ b/deployer/scripts/verify-host-lifecycle-e2e.sh @@ -3,31 +3,31 @@ set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -if [[ -z "${PHOTONCLOUD_E2E_IN_NIX:-}" ]]; then - exec nix develop "$ROOT" -c env PHOTONCLOUD_E2E_IN_NIX=1 bash "$0" "$@" +if [[ -z "${ULTRACLOUD_E2E_IN_NIX:-}" ]]; then + exec nix develop "$ROOT" -c env ULTRACLOUD_E2E_IN_NIX=1 bash "$0" "$@" fi run_chainfire_server_bin() { - if [[ -n "${PHOTONCLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then - "$PHOTONCLOUD_CHAINFIRE_SERVER_BIN" "$@" + if [[ -n "${ULTRACLOUD_CHAINFIRE_SERVER_BIN:-}" ]]; then + "$ULTRACLOUD_CHAINFIRE_SERVER_BIN" "$@" else cargo run --manifest-path "$ROOT/chainfire/Cargo.toml" -p chainfire-server -- "$@" fi } run_deployer_ctl_bin() { - if [[ -n "${PHOTONCLOUD_DEPLOYER_CTL_BIN:-}" ]]; then - "$PHOTONCLOUD_DEPLOYER_CTL_BIN" "$@" + if [[ -n "${ULTRACLOUD_DEPLOYER_CTL_BIN:-}" ]]; then + "$ULTRACLOUD_DEPLOYER_CTL_BIN" "$@" else cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p deployer-ctl -- "$@" fi } -run_plasmacloud_reconciler_bin() { - if [[ -n "${PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN:-}" ]]; then - "$PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN" "$@" +run_ultracloud_reconciler_bin() { + if [[ -n "${ULTRACLOUD_RECONCILER_BIN:-}" ]]; then + "$ULTRACLOUD_RECONCILER_BIN" "$@" else - cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p plasmacloud-reconciler -- "$@" + cargo run --quiet --manifest-path "$ROOT/deployer/Cargo.toml" -p ultracloud-reconciler -- "$@" fi } @@ -50,7 +50,7 @@ cleanup() { kill "$cf_pid" 2>/dev/null || true wait "$cf_pid" 2>/dev/null || true fi - if [[ "${PHOTONCLOUD_KEEP_TMP:-}" == "1" ]]; then + if [[ "${ULTRACLOUD_KEEP_TMP:-}" == "1" ]]; then echo "Keeping temporary directory: $tmp_dir" >&2 else rm -rf "$tmp_dir" @@ -253,16 +253,16 @@ run_deployer_ctl() { run_deployer_ctl_bin \ --chainfire-endpoint "$chainfire_endpoint" \ --cluster-id test-cluster \ - --cluster-namespace photoncloud \ + --cluster-namespace ultracloud \ --deployer-namespace deployer \ "$@" } run_hosts_bg() { - run_plasmacloud_reconciler_bin \ + run_ultracloud_reconciler_bin \ hosts \ --endpoint "$chainfire_endpoint" \ - --cluster-namespace photoncloud \ + --cluster-namespace ultracloud \ --cluster-id test-cluster \ --heartbeat-timeout-secs 300 \ --interval-secs 300 \ @@ -346,7 +346,7 @@ assert status["failed_nodes"] == [], payload print("initial rollout wave validated") PY -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-1.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-1.dump" python3 - "$tmp_dir/nodes-1.dump" <<'PY' import json import sys @@ -454,7 +454,7 @@ assert any('"ResetType":"PowerCycle"' in line for line in lines), lines print("reinstall orchestration validated") PY -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node01" >"$tmp_dir/node01-post-reinstall.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/nodes/node01" >"$tmp_dir/node01-post-reinstall.dump" python3 - "$tmp_dir/node01-post-reinstall.dump" <<'PY' import sys @@ -478,7 +478,7 @@ PY wait_for_deployment_state aborted true - - - 120 -run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-2.dump" +run_deployer_ctl dump --prefix "ultracloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-2.dump" python3 - "$tmp_dir/nodes-2.dump" <<'PY' import json import sys diff --git a/docs/README.md b/docs/README.md index 6e83105..cca3840 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ # Docs -This directory is the public documentation entrypoint for PhotonCloud. +This directory is the public documentation entrypoint for UltraCloud. ## Read First diff --git a/docs/component-matrix.md b/docs/component-matrix.md index 955585d..ef48b89 100644 --- a/docs/component-matrix.md +++ b/docs/component-matrix.md @@ -1,6 +1,6 @@ # Component Matrix -PhotonCloud is intended to validate meaningful service combinations, not only a single all-on deployment. +UltraCloud is intended to validate meaningful service combinations, not only a single all-on deployment. This page summarizes the compositions that are exercised by the VM-cluster harness today. ## Validated Control Plane diff --git a/docs/testing.md b/docs/testing.md index fb3d1d6..fb73ad4 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -1,6 +1,6 @@ # Testing -PhotonCloud treats VM-first validation as the canonical local proof path. +UltraCloud treats VM-first validation as the canonical local proof path. ## Canonical Validation @@ -28,7 +28,7 @@ nix build .#checks.x86_64-linux.deployer-vm-smoke Use these commands as the release-facing local proof set: - `fresh-smoke`: whole-cluster readiness, core behavior, and fault injection -- `fresh-demo-vm-webapp`: focused VM demo showing a web app inside the guest with SQLite state persisted on the attached PhotonCloud volume across restart and migration +- `fresh-demo-vm-webapp`: focused VM demo showing a web app inside the guest with FlareDB-backed state and LightningStor object snapshots surviving restart and migration - `fresh-matrix`: composed service scenarios such as `prismnet + flashdns + fiberlb` and PrismNet-backed VM hosting bundles with `plasmavmc + coronafs + lightningstor` - `fresh-bench-storage`: CoronaFS local-vs-shared-volume throughput, cross-worker volume visibility, and LightningStor large/small-object throughput capture - `deployer-vm-smoke`: prebuilt NixOS system closure handoff into `nix-agent`, proving host rollout can activate a host-built target without guest-side compilation @@ -40,6 +40,7 @@ nix run ./nix/test-cluster#cluster -- status nix run ./nix/test-cluster#cluster -- logs node01 nix run ./nix/test-cluster#cluster -- ssh node04 nix run ./nix/test-cluster#cluster -- demo-vm-webapp +nix run ./nix/test-cluster#cluster -- serve-vm-webapp nix run ./nix/test-cluster#cluster -- matrix nix run ./nix/test-cluster#cluster -- bench-storage nix run ./nix/test-cluster#cluster -- fresh-matrix diff --git a/flake.lock b/flake.lock index eb7aa8f..e4178a2 100644 --- a/flake.lock +++ b/flake.lock @@ -38,22 +38,6 @@ "type": "github" } }, - "nix-nos": { - "inputs": { - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "path": "./nix-nos", - "type": "path" - }, - "original": { - "path": "./nix-nos", - "type": "path" - }, - "parent": [] - }, "nixpkgs": { "locked": { "lastModified": 1765186076, @@ -74,7 +58,6 @@ "inputs": { "disko": "disko", "flake-utils": "flake-utils", - "nix-nos": "nix-nos", "nixpkgs": "nixpkgs", "rust-overlay": "rust-overlay", "systems": "systems_2" diff --git a/flake.nix b/flake.nix index 81d5e47..9166f57 100644 --- a/flake.nix +++ b/flake.nix @@ -1,5 +1,5 @@ { - description = "PhotonCloud - Japanese Cloud Platform"; + description = "UltraCloud - Japanese Cloud Platform"; # ============================================================================ # INPUTS: External dependencies @@ -23,247 +23,244 @@ inputs.nixpkgs.follows = "nixpkgs"; }; - # Nix-NOS generic network operating system modules - nix-nos = { - url = "path:./nix-nos"; - inputs.nixpkgs.follows = "nixpkgs"; - }; }; # ============================================================================ # OUTPUTS: What this flake provides # ============================================================================ - outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }: - flake-utils.lib.eachDefaultSystem (system: - let - # Apply rust-overlay to get rust-bin attribute - overlays = [ (import rust-overlay) ]; + outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, systems ? null }: + flake-utils.lib.eachDefaultSystem + (system: + let + # Apply rust-overlay to get rust-bin attribute + overlays = [ (import rust-overlay) ]; - pkgs = import nixpkgs { - inherit system overlays; - }; + pkgs = import nixpkgs { + inherit system overlays; + }; - # Rust toolchain configuration - # Using stable channel with rust-src (for rust-analyzer) and rust-analyzer - rustToolchain = pkgs.rust-bin.stable.latest.default.override { - extensions = [ "rust-src" "rust-analyzer" ]; - }; + # Rust toolchain configuration + # Using stable channel with rust-src (for rust-analyzer) and rust-analyzer + rustToolchain = pkgs.rust-bin.stable.latest.default.override { + extensions = [ "rust-src" "rust-analyzer" ]; + }; - # Common build inputs needed by all Rust packages - commonBuildInputs = with pkgs; [ - rocksdb # RocksDB storage engine - openssl # TLS/SSL support - ]; + # Common build inputs needed by all Rust packages + commonBuildInputs = with pkgs; [ + rocksdb # RocksDB storage engine + openssl # TLS/SSL support + ]; - # Common native build inputs (build-time only) - commonNativeBuildInputs = with pkgs; [ - pkg-config # For finding libraries - protobuf # Protocol Buffers compiler - rustToolchain - ]; + # Common native build inputs (build-time only) + commonNativeBuildInputs = with pkgs; [ + pkg-config # For finding libraries + protobuf # Protocol Buffers compiler + rustToolchain + ]; - # Common environment variables for building - commonEnvVars = { - LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; - PROTOC = "${pkgs.protobuf}/bin/protoc"; - ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib"; - }; + # Common environment variables for building + commonEnvVars = { + LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; + PROTOC = "${pkgs.protobuf}/bin/protoc"; + ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib"; + }; - clusterPython = pkgs.python3.withPackages (ps: [ ps.python-snappy ]); + clusterPython = pkgs.python3.withPackages (ps: [ ps.python-snappy ]); - # Keep Rust package builds stable without invalidating every package on - # unrelated workspace changes. - workspaceSourceRoots = { - chainfire = [ "chainfire" ]; - flaredb = [ "flaredb" ]; - iam = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - ]; - coronafs = [ "coronafs" ]; - plasmavmc = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - "lightningstor" - "plasmavmc" - "prismnet" - ]; - prismnet = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - "prismnet" - ]; - flashdns = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flashdns" - "flaredb" - "iam" - ]; - fiberlb = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "fiberlb" - "flaredb" - "iam" - ]; - lightningstor = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - "lightningstor" - ]; - nightlight = [ "nightlight" ]; - creditservice = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - ]; - apigateway = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "flaredb" - "iam" - ]; - k8shost = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "fiberlb" - "flaredb" - "flashdns" - "iam" - "k8shost" - "lightningstor" - "plasmavmc" - "prismnet" - ]; - deployer = [ - "apigateway" - "chainfire" - "creditservice" - "crates/photon-auth-client" - "crates/photon-config" - "crates/photon-runtime" - "crates/photon-state" - "deployer" - "fiberlb" - "flaredb" - "flashdns" - "iam" - "prismnet" - ]; - }; + # Keep Rust package builds stable without invalidating every package on + # unrelated workspace changes. + workspaceSourceRoots = { + chainfire = [ "chainfire" ]; + flaredb = [ "flaredb" ]; + iam = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + ]; + coronafs = [ "coronafs" ]; + plasmavmc = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + "lightningstor" + "plasmavmc" + "prismnet" + ]; + prismnet = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + "prismnet" + ]; + flashdns = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flashdns" + "flaredb" + "iam" + ]; + fiberlb = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "fiberlb" + "flaredb" + "iam" + ]; + lightningstor = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + "lightningstor" + ]; + nightlight = [ "nightlight" ]; + creditservice = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + ]; + apigateway = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "flaredb" + "iam" + ]; + k8shost = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "fiberlb" + "flaredb" + "flashdns" + "iam" + "k8shost" + "lightningstor" + "plasmavmc" + "prismnet" + ]; + deployer = [ + "apigateway" + "chainfire" + "creditservice" + "crates/photon-auth-client" + "crates/photon-config" + "crates/photon-runtime" + "crates/photon-state" + "deployer" + "fiberlb" + "flaredb" + "flashdns" + "iam" + "prismnet" + ]; + }; - mkWorkspaceSrc = workspaceSubdir: - let - sourceRoots = workspaceSourceRoots.${workspaceSubdir} or [ workspaceSubdir ]; - in + mkWorkspaceSrc = workspaceSubdir: + let + sourceRoots = workspaceSourceRoots.${workspaceSubdir} or [ workspaceSubdir ]; + in pkgs.lib.cleanSourceWith { src = ./.; filter = path: type: let - rel = pkgs.lib.removePrefix ((toString ./. ) + "/") (toString path); + rel = pkgs.lib.removePrefix ((toString ./.) + "/") (toString path); in - rel == "" - || builtins.elem rel [ "flake.nix" "flake.lock" ] - || builtins.any (root: + rel == "" + || builtins.elem rel [ "flake.nix" "flake.lock" ] + || builtins.any + (root: rel == root || pkgs.lib.hasPrefix "${root}/" rel || pkgs.lib.hasPrefix "${rel}/" root - ) sourceRoots; + ) + sourceRoots; }; - flakeBundleSrc = pkgs.lib.cleanSourceWith { - src = ./.; - filter = path: type: - let - rel = pkgs.lib.removePrefix ((toString ./. ) + "/") (toString path); - topLevel = builtins.head (pkgs.lib.splitString "/" rel); - includedTopLevels = [ - "apigateway" - "baremetal" - "chainfire" - "coronafs" - "crates" - "creditservice" - "deployer" - "fiberlb" - "flashdns" - "flaredb" - "iam" - "k8shost" - "lightningstor" - "mtls-agent" - "nightlight" - "nix" - "nix-nos" - "plasmavmc" - "prismnet" - ]; - isTargetDir = builtins.match "(.*/)?target(/.*)?" rel != null; - in + flakeBundleSrc = pkgs.lib.cleanSourceWith { + src = ./.; + filter = path: type: + let + rel = pkgs.lib.removePrefix ((toString ./.) + "/") (toString path); + topLevel = builtins.head (pkgs.lib.splitString "/" rel); + includedTopLevels = [ + "apigateway" + "baremetal" + "chainfire" + "coronafs" + "crates" + "creditservice" + "deployer" + "fiberlb" + "flashdns" + "flaredb" + "iam" + "k8shost" + "lightningstor" + "mtls-agent" + "nightlight" + "nix" + "plasmavmc" + "prismnet" + ]; + isTargetDir = builtins.match "(.*/)?target(/.*)?" rel != null; + in !isTargetDir && ( rel == "" || builtins.elem rel [ "flake.nix" "flake.lock" ] || builtins.elem topLevel includedTopLevels ); - }; + }; - flakeInputsBlock = '' + flakeInputsBlock = '' inputs = { # Use unstable nixpkgs for latest packages nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; @@ -283,15 +280,10 @@ inputs.nixpkgs.follows = "nixpkgs"; }; - # Nix-NOS generic network operating system modules - nix-nos = { - url = "path:./nix-nos"; - inputs.nixpkgs.follows = "nixpkgs"; - }; }; - ''; + ''; - bundledInputsBlock = '' + bundledInputsBlock = '' inputs = { nixpkgs.url = "path:./.bundle-inputs/nixpkgs"; @@ -312,1214 +304,1140 @@ inputs.nixpkgs.follows = "nixpkgs"; }; - nix-nos = { - url = "path:./nix-nos"; + }; + ''; + + flakeHeaderBlock = '' + # ============================================================================ + # INPUTS: External dependencies + # ============================================================================ + inputs = { + # Use unstable nixpkgs for latest packages + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + + # Rust overlay for managing Rust toolchains + rust-overlay = { + url = "github:oxalica/rust-overlay"; inputs.nixpkgs.follows = "nixpkgs"; }; - }; - ''; - flakeHeaderBlock = '' - # ============================================================================ - # INPUTS: External dependencies - # ============================================================================ - inputs = { - # Use unstable nixpkgs for latest packages - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + # Flake utilities for multi-system support + flake-utils.url = "github:numtide/flake-utils"; + + # Disko for declarative disk partitioning + disko = { + url = "github:nix-community/disko"; + inputs.nixpkgs.follows = "nixpkgs"; + }; - # Rust overlay for managing Rust toolchains - rust-overlay = { - url = "github:oxalica/rust-overlay"; - inputs.nixpkgs.follows = "nixpkgs"; }; - # Flake utilities for multi-system support - flake-utils.url = "github:numtide/flake-utils"; + # ============================================================================ + # OUTPUTS: What this flake provides + # ============================================================================ + outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, systems ? null }: + ''; + + bundledHeaderBlock = '' + # ============================================================================ + # INPUTS: External dependencies + # ============================================================================ + inputs = { + nixpkgs.url = "path:./.bundle-inputs/nixpkgs"; + + rust-overlay = { + url = "path:./.bundle-inputs/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + + flake-utils = { + url = "path:./.bundle-inputs/flake-utils"; + inputs.systems.follows = "systems"; + }; + + systems.url = "path:./.bundle-inputs/systems"; + + disko = { + url = "path:./.bundle-inputs/disko"; + inputs.nixpkgs.follows = "nixpkgs"; + }; - # Disko for declarative disk partitioning - disko = { - url = "github:nix-community/disko"; - inputs.nixpkgs.follows = "nixpkgs"; }; - # Nix-NOS generic network operating system modules - nix-nos = { - url = "path:./nix-nos"; - inputs.nixpkgs.follows = "nixpkgs"; - }; + # ============================================================================ + # OUTPUTS: What this flake provides + # ============================================================================ + outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, systems ? null }: + ''; + + bundledFlakeNix = + pkgs.writeText + "ultracloud-bundled-flake.nix" + ( + builtins.replaceStrings + [ flakeHeaderBlock ] + [ bundledHeaderBlock ] + (builtins.readFile ./flake.nix) + ); + + bundledFlakeHeaderFile = + pkgs.writeText "ultracloud-bundled-flake-header" bundledHeaderBlock; + + baseFlakeLock = builtins.fromJSON (builtins.readFile ./flake.lock); + + bundleInputRelPaths = { + nixpkgs = "./.bundle-inputs/nixpkgs"; + "rust-overlay" = "./.bundle-inputs/rust-overlay"; + "flake-utils" = "./.bundle-inputs/flake-utils"; + disko = "./.bundle-inputs/disko"; + systems = "./.bundle-inputs/systems"; }; - # ============================================================================ - # OUTPUTS: What this flake provides - # ============================================================================ - outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }: - ''; + fetchLockedInput = + nodeName: + let + tree = builtins.fetchTree baseFlakeLock.nodes.${nodeName}.locked; + in + if builtins.isAttrs tree && tree ? outPath then tree.outPath else tree; - bundledHeaderBlock = '' - # ============================================================================ - # INPUTS: External dependencies - # ============================================================================ - inputs = { - nixpkgs.url = "path:./.bundle-inputs/nixpkgs"; - - rust-overlay = { - url = "path:./.bundle-inputs/rust-overlay"; - inputs.nixpkgs.follows = "nixpkgs"; - }; - - flake-utils = { - url = "path:./.bundle-inputs/flake-utils"; - inputs.systems.follows = "systems"; - }; - - systems.url = "path:./.bundle-inputs/systems"; - - disko = { - url = "path:./.bundle-inputs/disko"; - inputs.nixpkgs.follows = "nixpkgs"; - }; - - nix-nos = { - url = "path:./nix-nos"; - inputs.nixpkgs.follows = "nixpkgs"; - }; + vendoredFlakeInputs = { + nixpkgs = fetchLockedInput "nixpkgs"; + "rust-overlay" = fetchLockedInput "rust-overlay"; + "flake-utils" = fetchLockedInput "flake-utils"; + disko = fetchLockedInput "disko"; + systems = fetchLockedInput "systems"; }; - # ============================================================================ - # OUTPUTS: What this flake provides - # ============================================================================ - outputs = { self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems ? null }: - ''; - - bundledFlakeNix = - pkgs.writeText - "plasmacloud-bundled-flake.nix" - ( - builtins.replaceStrings - [ flakeHeaderBlock ] - [ bundledHeaderBlock ] - (builtins.readFile ./flake.nix) - ); - - bundledFlakeHeaderFile = - pkgs.writeText "plasmacloud-bundled-flake-header" bundledHeaderBlock; - - baseFlakeLock = builtins.fromJSON (builtins.readFile ./flake.lock); - - bundleInputRelPaths = { - nixpkgs = "./.bundle-inputs/nixpkgs"; - "rust-overlay" = "./.bundle-inputs/rust-overlay"; - "flake-utils" = "./.bundle-inputs/flake-utils"; - disko = "./.bundle-inputs/disko"; - systems = "./.bundle-inputs/systems"; - }; - - fetchLockedInput = - nodeName: - let - tree = builtins.fetchTree baseFlakeLock.nodes.${nodeName}.locked; - in - if builtins.isAttrs tree && tree ? outPath then tree.outPath else tree; - - vendoredFlakeInputs = { - nixpkgs = fetchLockedInput "nixpkgs"; - "rust-overlay" = fetchLockedInput "rust-overlay"; - "flake-utils" = fetchLockedInput "flake-utils"; - disko = fetchLockedInput "disko"; - systems = fetchLockedInput "systems"; - }; - - makeBundledLockNode = - nodeName: relPath: - let - node = baseFlakeLock.nodes.${nodeName}; - in - node - // { - locked = { - type = "path"; - path = relPath; - }; - original = { - type = "path"; - path = relPath; - }; - }; - - bundledFlakeLock = baseFlakeLock // { - nodes = - baseFlakeLock.nodes + makeBundledLockNode = + nodeName: relPath: + let + node = baseFlakeLock.nodes.${nodeName}; + in + node // { - root = - baseFlakeLock.nodes.root - // { - inputs = - baseFlakeLock.nodes.root.inputs - // { - systems = "systems"; - }; - }; - nixpkgs = makeBundledLockNode "nixpkgs" bundleInputRelPaths.nixpkgs; - "rust-overlay" = makeBundledLockNode "rust-overlay" bundleInputRelPaths."rust-overlay"; - "flake-utils" = makeBundledLockNode "flake-utils" bundleInputRelPaths."flake-utils"; - disko = makeBundledLockNode "disko" bundleInputRelPaths.disko; - systems = makeBundledLockNode "systems" bundleInputRelPaths.systems; - }; - }; - - bundledFlakeLockFile = - pkgs.writeText "plasmacloud-bundled-flake.lock" (builtins.toJSON bundledFlakeLock); - - inBundledEval = builtins.pathExists ./.bundle-eval-marker; - - bundledFlakeRootDrv = pkgs.runCommand "plasmacloud-bundled-flake-root" { - nativeBuildInputs = [ - pkgs.coreutils - pkgs.python3 - ]; - } '' - mkdir -p "$out" - cp -a ${flakeBundleSrc}/. "$out"/ - chmod -R u+w "$out" - touch "$out/.bundle-eval-marker" - mkdir -p "$out/.bundle-inputs" - cp -a ${vendoredFlakeInputs.nixpkgs} "$out/.bundle-inputs/nixpkgs" - cp -a ${vendoredFlakeInputs."rust-overlay"} "$out/.bundle-inputs/rust-overlay" - cp -a ${vendoredFlakeInputs."flake-utils"} "$out/.bundle-inputs/flake-utils" - cp -a ${vendoredFlakeInputs.disko} "$out/.bundle-inputs/disko" - cp -a ${vendoredFlakeInputs.systems} "$out/.bundle-inputs/systems" - cp ${bundledFlakeLockFile} "$out/flake.lock" - python3 - <<'PY' "$out/flake.nix" ${bundledFlakeHeaderFile} - from pathlib import Path - import re - import sys - - flake_path = Path(sys.argv[1]) - header = Path(sys.argv[2]).read_text() - source = flake_path.read_text() - pattern = re.compile( - r" # ============================================================================\n" - r" # INPUTS: External dependencies\n" - r" # ============================================================================\n" - r" inputs = \{.*?\n" - r" # ============================================================================\n" - r" # OUTPUTS: What this flake provides\n" - r" # ============================================================================\n" - r" outputs = \{ self, nixpkgs, rust-overlay, flake-utils, disko, nix-nos, systems \? null \}:", - re.S, - ) - rewritten, count = pattern.subn(header.rstrip("\n"), source, count=1) - if count != 1: - raise SystemExit(f"expected to rewrite 1 flake header, rewrote {count}") - flake_path.write_text(rewritten) - PY - ''; - - bundledFlakeRoot = - if inBundledEval then - null - else - builtins.path { - path = bundledFlakeRootDrv; - name = "plasmacloud-bundled-flake-root-src"; + locked = { + type = "path"; + path = relPath; + }; + original = { + type = "path"; + path = relPath; + }; }; - bundledFlakeRootNarHashFile = - if inBundledEval then - null - else - pkgs.runCommand "plasmacloud-bundled-flake-root-narhash" { - nativeBuildInputs = [ pkgs.nix ]; + bundledFlakeLock = baseFlakeLock // { + nodes = + baseFlakeLock.nodes + // { + root = + baseFlakeLock.nodes.root + // { + inputs = + baseFlakeLock.nodes.root.inputs + // { + systems = "systems"; + }; + }; + nixpkgs = makeBundledLockNode "nixpkgs" bundleInputRelPaths.nixpkgs; + "rust-overlay" = makeBundledLockNode "rust-overlay" bundleInputRelPaths."rust-overlay"; + "flake-utils" = makeBundledLockNode "flake-utils" bundleInputRelPaths."flake-utils"; + disko = makeBundledLockNode "disko" bundleInputRelPaths.disko; + systems = makeBundledLockNode "systems" bundleInputRelPaths.systems; + }; + }; + + bundledFlakeLockFile = + pkgs.writeText "ultracloud-bundled-flake.lock" (builtins.toJSON bundledFlakeLock); + + inBundledEval = builtins.pathExists ./.bundle-eval-marker; + + bundledFlakeRootDrv = pkgs.runCommand "ultracloud-bundled-flake-root" + { + nativeBuildInputs = [ + pkgs.coreutils + pkgs.python3 + ]; } '' - ${pkgs.nix}/bin/nix \ - --extra-experimental-features nix-command \ - hash path --sri ${bundledFlakeRoot} \ - | tr -d '\n' > "$out" - ''; - - bundledFlakeRootNarHash = - if inBundledEval then - null - else - builtins.readFile bundledFlakeRootNarHashFile; - - bundledFlake = - if inBundledEval then - null - else - builtins.getFlake ( - builtins.unsafeDiscardStringContext - "path:${toString bundledFlakeRoot}?narHash=${bundledFlakeRootNarHash}" - ); - - bundledVmSmokeTargetToplevel = - if inBundledEval then - null - else - bundledFlake.nixosConfigurations.vm-smoke-target.config.system.build.toplevel; - - # Helper function to build a Rust workspace package - # Parameters: - # name: package name (e.g., "chainfire-server") - # workspaceSubdir: subdirectory containing Cargo.toml (e.g., "chainfire") - # mainCrate: optional main crate name if different from workspace - # description: package description for meta - # doCheck: whether to run tests during build (default: false) - buildRustWorkspace = { name, workspaceSubdir, mainCrate ? null, description ? "", doCheck ? false }: - pkgs.rustPlatform.buildRustPackage ({ - pname = name; - version = "0.1.0"; - src = mkWorkspaceSrc workspaceSubdir; - - cargoLock = { - lockFile = ./${workspaceSubdir}/Cargo.lock; - }; - - # Build from the workspace subdirectory - buildAndTestSubdir = workspaceSubdir; - - # Copy Cargo.lock to root for nix validation (expects it at src root) - postUnpack = '' - cp $sourceRoot/${workspaceSubdir}/Cargo.lock $sourceRoot/Cargo.lock - ''; - - nativeBuildInputs = commonNativeBuildInputs; - buildInputs = commonBuildInputs; - - # Set environment variables for build - inherit (commonEnvVars) LIBCLANG_PATH PROTOC ROCKSDB_LIB_DIR; - - # Enable cargo tests during build (can be overridden per-package) - inherit doCheck; - - # Test flags: run tests for the main crate only - cargoTestFlags = pkgs.lib.optionals (mainCrate != null) [ "-p" mainCrate ]; - - # Metadata for the package - meta = with pkgs.lib; { - description = description; - homepage = "https://github.com/yourorg/plasmacloud"; - license = licenses.asl20; # Apache 2.0 - maintainers = [ ]; - platforms = platforms.linux; - }; - - # Build only the server binary if mainCrate is specified - # This avoids building test binaries and examples - } // pkgs.lib.optionalAttrs (mainCrate != null) { - cargoBuildFlags = [ "-p" mainCrate ]; - }); - - # Helper function to build multiple binaries from the same workspace in - # one cargo invocation. This is mainly used by the VM cluster builds so - # a single host build can satisfy several services from the same - # workspace. - buildRustWorkspaceBundle = { name, workspaceSubdir, crates, description ? "", doCheck ? false }: - pkgs.rustPlatform.buildRustPackage { - pname = name; - version = "0.1.0"; - src = mkWorkspaceSrc workspaceSubdir; - - cargoLock = { - lockFile = ./${workspaceSubdir}/Cargo.lock; - }; - - buildAndTestSubdir = workspaceSubdir; - - postUnpack = '' - cp $sourceRoot/${workspaceSubdir}/Cargo.lock $sourceRoot/Cargo.lock - ''; - - nativeBuildInputs = commonNativeBuildInputs; - buildInputs = commonBuildInputs; - - inherit (commonEnvVars) LIBCLANG_PATH PROTOC ROCKSDB_LIB_DIR; - inherit doCheck; - - cargoBuildFlags = pkgs.lib.concatMap (crate: [ "-p" crate ]) crates; - - meta = with pkgs.lib; { - description = description; - homepage = "https://github.com/yourorg/plasmacloud"; - license = licenses.asl20; - maintainers = [ ]; - platforms = platforms.linux; - }; - }; - - in - { - # ====================================================================== - # DEVELOPMENT SHELL: Drop-in replacement for shell.nix - # ====================================================================== - devShells.default = pkgs.mkShell { - name = "cloud-dev"; - - buildInputs = with pkgs; [ - # Rust toolchain (replaces rustup/cargo/rustc from shell.nix) - rustToolchain - - # Protocol Buffers - protobuf - - # LLVM/Clang (for bindgen/clang-sys) - llvmPackages.libclang - llvmPackages.clang - - # Build essentials - pkg-config - openssl - - # Development tools - git - curl - jq - grpcurl - openssh - sshpass - clusterPython - qemu - vde2 - bind - - # For RocksDB (chainfire dependency) - rocksdb - ]; - - # Environment variables for clang-sys and other build tools - LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; - PROTOC = "${pkgs.protobuf}/bin/protoc"; - ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib"; - - shellHook = '' - echo "Cloud Platform Development Environment" - echo "=======================================" - echo "Rust: $(rustc --version)" - echo "Protoc: $(protoc --version)" - echo "Clang: $(clang --version | head -1)" - echo "" - echo "Environment variables set:" - echo " LIBCLANG_PATH=$LIBCLANG_PATH" - echo " PROTOC=$PROTOC" - echo " ROCKSDB_LIB_DIR=$ROCKSDB_LIB_DIR" - echo "" - echo "Available workspaces:" - echo " - chainfire (distributed cluster coordination store)" - echo " - flaredb (distributed SQL/KV database for metadata and tenant data)" - echo " - iam (identity & access management)" - echo " - plasmavmc (VM control plane)" - echo " - prismnet (SDN controller)" - echo " - flashdns (DNS server)" - echo " - fiberlb (load balancer)" - echo " - lightningstor (block storage)" - echo " - nightlight (metrics store)" - echo " - creditservice (quota & billing)" - echo " - k8shost (kubernetes hosting)" - ''; - }; - - # ====================================================================== - # PACKAGES: Buildable artifacts from each workspace - # ====================================================================== - packages = { - # -------------------------------------------------------------------- - # Chainfire: Distributed Cluster Coordination Store - # -------------------------------------------------------------------- - chainfire-server = buildRustWorkspace { - name = "chainfire-server"; - workspaceSubdir = "chainfire"; - mainCrate = "chainfire-server"; - description = "Distributed cluster coordination store with consensus, watches, and membership"; - }; - - # -------------------------------------------------------------------- - # FlareDB: Distributed SQL/KV Database - # -------------------------------------------------------------------- - flaredb-server = buildRustWorkspace { - name = "flaredb-server"; - workspaceSubdir = "flaredb"; - mainCrate = "flaredb-server"; - description = "Distributed Postgres-like SQL/KV database for service metadata, tenant data, and DBaaS"; - }; - - # -------------------------------------------------------------------- - # IAM: Identity and Access Management Service - # -------------------------------------------------------------------- - iam-server = buildRustWorkspace { - name = "iam-server"; - workspaceSubdir = "iam"; - mainCrate = "iam-server"; - description = "Identity and access management service with RBAC and multi-tenant support"; - }; - - # -------------------------------------------------------------------- - # CoronaFS: Shared Block Volume Service - # -------------------------------------------------------------------- - coronafs-server = buildRustWorkspace { - name = "coronafs-server"; - workspaceSubdir = "coronafs"; - mainCrate = "coronafs-server"; - description = "Shared block volume service exporting raw VM volumes over NBD"; - }; - - # -------------------------------------------------------------------- - # PlasmaVMC: Virtual Machine Control Plane - # -------------------------------------------------------------------- - plasmavmc-server = buildRustWorkspace { - name = "plasmavmc-server"; - workspaceSubdir = "plasmavmc"; - mainCrate = "plasmavmc-server"; - description = "Virtual machine control plane for managing compute instances"; - }; - - # -------------------------------------------------------------------- - # PrismNet: Software-Defined Networking Controller - # -------------------------------------------------------------------- - prismnet-server = buildRustWorkspace { - name = "prismnet-server"; - workspaceSubdir = "prismnet"; - mainCrate = "prismnet-server"; - description = "Software-defined networking controller with OVN integration"; - }; - - # -------------------------------------------------------------------- - # FlashDNS: High-Performance DNS Server - # -------------------------------------------------------------------- - flashdns-server = buildRustWorkspace { - name = "flashdns-server"; - workspaceSubdir = "flashdns"; - mainCrate = "flashdns-server"; - description = "High-performance DNS server with pattern-based reverse DNS"; - }; - - # -------------------------------------------------------------------- - # FiberLB: Layer 4/7 Load Balancer - # -------------------------------------------------------------------- - fiberlb-server = buildRustWorkspace { - name = "fiberlb-server"; - workspaceSubdir = "fiberlb"; - mainCrate = "fiberlb-server"; - description = "Layer 4/7 load balancer for distributing traffic across services"; - }; - - # -------------------------------------------------------------------- - # LightningStor: Block Storage Service - # -------------------------------------------------------------------- - lightningstor-server = buildRustWorkspace { - name = "lightningstor-server"; - workspaceSubdir = "lightningstor"; - mainCrate = "lightningstor-server"; - description = "Distributed block storage service for persistent volumes"; - }; - - lightningstor-node = buildRustWorkspace { - name = "lightningstor-node"; - workspaceSubdir = "lightningstor"; - mainCrate = "lightningstor-node"; - description = "LightningStor distributed storage node daemon"; - }; - - lightningstor-workspace = buildRustWorkspaceBundle { - name = "lightningstor-workspace"; - workspaceSubdir = "lightningstor"; - crates = [ - "lightningstor-server" - "lightningstor-node" - ]; - description = "Combined LightningStor server and node workspace build"; - }; - - # -------------------------------------------------------------------- - # NightLight: Prometheus-compatible Metrics Store - # -------------------------------------------------------------------- - nightlight-server = buildRustWorkspace { - name = "nightlight-server"; - workspaceSubdir = "nightlight"; - mainCrate = "nightlight-server"; - description = "Prometheus-compatible metrics storage (NightLight)"; - }; - - # -------------------------------------------------------------------- - # CreditService: Quota and Billing Controller - # -------------------------------------------------------------------- - creditservice-server = buildRustWorkspace { - name = "creditservice-server"; - workspaceSubdir = "creditservice"; - mainCrate = "creditservice-server"; - description = "Credit/quota management service with billing integration"; - }; - - # -------------------------------------------------------------------- - # APIGateway: API Gateway Service - # -------------------------------------------------------------------- - apigateway-server = buildRustWorkspace { - name = "apigateway-server"; - workspaceSubdir = "apigateway"; - mainCrate = "apigateway-server"; - description = "API Gateway for PlasmaCloud services"; - }; - - # -------------------------------------------------------------------- - # k8shost: Kubernetes Hosting Component - # -------------------------------------------------------------------- - k8shost-server = buildRustWorkspace { - name = "k8shost-server"; - workspaceSubdir = "k8shost"; - mainCrate = "k8shost-server"; - description = "Lightweight Kubernetes hosting with multi-tenant isolation"; - }; - - # -------------------------------------------------------------------- - # Deployer: Bare-metal bootstrap orchestration service - # -------------------------------------------------------------------- - deployer-server = buildRustWorkspace { - name = "deployer-server"; - workspaceSubdir = "deployer"; - mainCrate = "deployer-server"; - description = "Node bootstrap and phone-home orchestration service"; - }; - - deployer-ctl = buildRustWorkspace { - name = "deployer-ctl"; - workspaceSubdir = "deployer"; - mainCrate = "deployer-ctl"; - description = "Declarative control utility for PhotonCloud deployer state"; - }; - - node-agent = buildRustWorkspace { - name = "node-agent"; - workspaceSubdir = "deployer"; - mainCrate = "node-agent"; - description = "Node-local runtime agent for PhotonCloud scheduled services"; - }; - - nix-agent = buildRustWorkspace { - name = "nix-agent"; - workspaceSubdir = "deployer"; - mainCrate = "nix-agent"; - description = "Node-local NixOS reconciliation agent for PhotonCloud hosts"; - }; - - plasmacloud-reconciler = buildRustWorkspace { - name = "plasmacloud-reconciler"; - workspaceSubdir = "deployer"; - mainCrate = "plasmacloud-reconciler"; - description = "Declarative reconciler for host rollouts and published resources"; - }; - - plasmacloudFlakeBundle = pkgs.runCommand "plasmacloud-flake-bundle.tar.gz" { - nativeBuildInputs = [ - pkgs.coreutils - pkgs.gnutar - pkgs.gzip - ]; - } '' - bundle_root="$(mktemp -d)" - cp -a ${bundledFlakeRootDrv}/. "$bundle_root"/ - chmod -R u+w "$bundle_root" - - tar \ - --sort=name \ - --mtime='@1' \ - --owner=0 \ - --group=0 \ - --numeric-owner \ - -C "$bundle_root" \ - -cf - . \ - | gzip -n > "$out" - ''; - - # -------------------------------------------------------------------- - # Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes - # -------------------------------------------------------------------- - fleet-scheduler = buildRustWorkspace { - name = "fleet-scheduler"; - workspaceSubdir = "deployer"; - mainCrate = "fleet-scheduler"; - description = "Label-aware service scheduler for PhotonCloud bare-metal fleets"; - }; - - deployer-workspace = buildRustWorkspaceBundle { - name = "deployer-workspace"; - workspaceSubdir = "deployer"; - crates = [ - "deployer-server" - "deployer-ctl" - "node-agent" - "nix-agent" - "plasmacloud-reconciler" - "fleet-scheduler" - ]; - description = "Combined deployer workspace build for cluster images and checks"; - }; - - vmClusterDeployerState = - self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState; - - vmClusterFlakeBundle = self.packages.${system}.plasmacloudFlakeBundle; - vmSmokeBundledTargetToplevel = bundledVmSmokeTargetToplevel; - - # -------------------------------------------------------------------- - # Default package: Build all servers - # -------------------------------------------------------------------- - default = pkgs.symlinkJoin { - name = "photoncloud-all"; - paths = [ - self.packages.${system}.chainfire-server - self.packages.${system}.flaredb-server - self.packages.${system}.iam-server - self.packages.${system}.plasmavmc-server - self.packages.${system}.prismnet-server - self.packages.${system}.flashdns-server - self.packages.${system}.fiberlb-server - self.packages.${system}.lightningstor-workspace - self.packages.${system}.nightlight-server - self.packages.${system}.creditservice-server - self.packages.${system}.apigateway-server - self.packages.${system}.k8shost-server - self.packages.${system}.deployer-workspace - self.packages.${system}.vmClusterDeployerState - ]; - }; - }; - - # ====================================================================== - # APPS: Runnable applications from packages - # ====================================================================== - apps = { - chainfire-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.chainfire-server; - }; - - flaredb-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.flaredb-server; - }; - - iam-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.iam-server; - }; - - plasmavmc-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.plasmavmc-server; - }; - - prismnet-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.prismnet-server; - }; - - flashdns-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.flashdns-server; - }; - - fiberlb-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.fiberlb-server; - }; - - lightningstor-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.lightningstor-server; - }; - - lightningstor-node = flake-utils.lib.mkApp { - drv = self.packages.${system}.lightningstor-node; - }; - - nightlight-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.nightlight-server; - }; - - creditservice-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.creditservice-server; - }; - - apigateway-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.apigateway-server; - }; - - k8shost-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.k8shost-server; - }; - - deployer-server = flake-utils.lib.mkApp { - drv = self.packages.${system}.deployer-server; - }; - - deployer-ctl = flake-utils.lib.mkApp { - drv = self.packages.${system}.deployer-ctl; - }; - - plasmacloud-reconciler = flake-utils.lib.mkApp { - drv = self.packages.${system}.plasmacloud-reconciler; - }; - - nix-agent = flake-utils.lib.mkApp { - drv = self.packages.${system}.nix-agent; - }; - - node-agent = flake-utils.lib.mkApp { - drv = self.packages.${system}.node-agent; - }; - - fleet-scheduler = flake-utils.lib.mkApp { - drv = self.packages.${system}.fleet-scheduler; - }; - }; - - checks = { - workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit" { - nativeBuildInputs = [ pkgs.python3 ]; - } '' - ${pkgs.python3}/bin/python - <<'PY' ${./.} - from __future__ import annotations - + mkdir -p "$out" + cp -a ${flakeBundleSrc}/. "$out"/ + chmod -R u+w "$out" + touch "$out/.bundle-eval-marker" + mkdir -p "$out/.bundle-inputs" + cp -a ${vendoredFlakeInputs.nixpkgs} "$out/.bundle-inputs/nixpkgs" + cp -a ${vendoredFlakeInputs."rust-overlay"} "$out/.bundle-inputs/rust-overlay" + cp -a ${vendoredFlakeInputs."flake-utils"} "$out/.bundle-inputs/flake-utils" + cp -a ${vendoredFlakeInputs.disko} "$out/.bundle-inputs/disko" + cp -a ${vendoredFlakeInputs.systems} "$out/.bundle-inputs/systems" + cp ${bundledFlakeLockFile} "$out/flake.lock" + python3 - <<'PY' "$out/flake.nix" ${bundledFlakeHeaderFile} + from pathlib import Path import re import sys - import tomllib - from pathlib import Path - from typing import Any - - def extract_workspace_source_roots(flake_path: Path) -> dict[str, list[str]]: - source = flake_path.read_text() - match = re.search(r"workspaceSourceRoots\s*=\s*\{(.*?)\n\s*\};", source, re.S) - if match is None: - raise ValueError(f"Could not find workspaceSourceRoots in {flake_path}") - - roots: dict[str, list[str]] = {} - for name, body in re.findall(r"\n\s*(\w+)\s*=\s*\[(.*?)\];", match.group(1), re.S): - roots[name] = re.findall(r'"([^"]+)"', body) - return roots - - - def collect_path_dependencies(value: Any) -> list[str]: - found: list[str] = [] - - if isinstance(value, dict): - path = value.get("path") - if isinstance(path, str): - found.append(path) - for nested in value.values(): - found.extend(collect_path_dependencies(nested)) - elif isinstance(value, list): - for nested in value: - found.extend(collect_path_dependencies(nested)) - - return found - - - def workspace_manifests(repo_root: Path, workspace_name: str) -> list[Path]: - workspace_manifest = repo_root / workspace_name / "Cargo.toml" - manifests = [workspace_manifest] - workspace_data = tomllib.loads(workspace_manifest.read_text()) - members = workspace_data.get("workspace", {}).get("members", []) - - for member in members: - for candidate in workspace_manifest.parent.glob(member): - manifest = candidate if candidate.name == "Cargo.toml" else candidate / "Cargo.toml" - if manifest.is_file(): - manifests.append(manifest) - - unique_manifests: list[Path] = [] - seen: set[Path] = set() - for manifest in manifests: - resolved = manifest.resolve() - if resolved in seen: - continue - seen.add(resolved) - unique_manifests.append(manifest) - return unique_manifests - - - def required_root(dep_rel: Path) -> str: - parts = dep_rel.parts - if not parts: - return "" - if parts[0] == "crates" and len(parts) >= 2: - return "/".join(parts[:2]) - return parts[0] - - - def is_covered(dep_rel: str, configured_roots: list[str]) -> bool: - return any(dep_rel == root or dep_rel.startswith(f"{root}/") for root in configured_roots) - - - def main() -> int: - repo_root = Path(sys.argv[1]).resolve() - workspace_roots = extract_workspace_source_roots(repo_root / "flake.nix") - failures: list[str] = [] - - for workspace_name, configured_roots in sorted(workspace_roots.items()): - workspace_manifest = repo_root / workspace_name / "Cargo.toml" - if not workspace_manifest.is_file(): - continue - - for manifest in workspace_manifests(repo_root, workspace_name): - manifest_data = tomllib.loads(manifest.read_text()) - for dep_path in collect_path_dependencies(manifest_data): - dependency_dir = (manifest.parent / dep_path).resolve() - try: - dep_rel = dependency_dir.relative_to(repo_root) - except ValueError: - continue - - dep_rel_str = dep_rel.as_posix() - if is_covered(dep_rel_str, configured_roots): - continue - - failures.append( - f"{workspace_name}: missing source root '{required_root(dep_rel)}' " - f"for dependency '{dep_rel_str}' referenced by " - f"{manifest.relative_to(repo_root).as_posix()}" - ) - - if failures: - print("workspaceSourceRoots is missing path dependencies:", file=sys.stderr) - for failure in failures: - print(f" - {failure}", file=sys.stderr) - return 1 - - print("workspaceSourceRoots covers all workspace path dependencies.") - return 0 - - - raise SystemExit(main()) + flake_path = Path(sys.argv[1]) + header = Path(sys.argv[2]).read_text() + source = flake_path.read_text() + pattern = re.compile( + r" # ============================================================================\n" + r" # INPUTS: External dependencies\n" + r" # ============================================================================\n" + r" inputs = \{.*?\n" + r" # ============================================================================\n" + r" # OUTPUTS: What this flake provides\n" + r" # ============================================================================\n" + r" outputs = \{ self, nixpkgs, rust-overlay, flake-utils, disko, systems \? null \}:", + re.S, + ) + rewritten, count = pattern.subn(header.rstrip("\n"), source, count=1) + if count != 1: + raise SystemExit(f"expected to rewrite 1 flake header, rewrote {count}") + flake_path.write_text(rewritten) PY - touch "$out" ''; - first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/first-boot-topology-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - photoncloudModule = self.nixosModules.default; - nixNosModule = nix-nos.nixosModules.default; - } - ); - - deployer-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/deployer-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel; - } - ); - - deployer-vm-rollback = pkgs.testers.runNixOSTest ( - import ./nix/tests/deployer-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel; - desiredSystemOverrides = { - health_check_command = [ "false" ]; - rollback_on_failure = true; + bundledFlakeRoot = + if inBundledEval then + null + else + builtins.path { + path = bundledFlakeRootDrv; + name = "ultracloud-bundled-flake-root-src"; }; - expectedStatus = "rolled-back"; - expectCurrentSystemMatchesTarget = false; - expectMarkerPresent = false; - } - ); - fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - photoncloudModule = self.nixosModules.default; - nixNosModule = nix-nos.nixosModules.default; - } - ); + bundledFlakeRootNarHashFile = + if inBundledEval then + null + else + pkgs.runCommand "ultracloud-bundled-flake-root-narhash" + { + nativeBuildInputs = [ pkgs.nix ]; + } '' + ${pkgs.nix}/bin/nix \ + --extra-experimental-features nix-command \ + hash path --sri ${bundledFlakeRoot} \ + | tr -d '\n' > "$out" + ''; - fiberlb-native-bgp-multipath-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - photoncloudModule = self.nixosModules.default; - nixNosModule = nix-nos.nixosModules.default; - } - ); + bundledFlakeRootNarHash = + if inBundledEval then + null + else + builtins.readFile bundledFlakeRootNarHashFile; - fiberlb-native-bgp-interop-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - photoncloudModule = self.nixosModules.default; - nixNosModule = nix-nos.nixosModules.default; - } - ); + bundledFlake = + if inBundledEval then + null + else + builtins.getFlake ( + builtins.unsafeDiscardStringContext + "path:${toString bundledFlakeRoot}?narHash=${bundledFlakeRootNarHash}" + ); - fiberlb-native-bgp-ecmp-drain-vm-smoke = pkgs.testers.runNixOSTest ( - import ./nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix { - inherit pkgs; - photoncloudPackages = self.packages.${system}; - photoncloudModule = self.nixosModules.default; - nixNosModule = nix-nos.nixosModules.default; - } - ); + bundledVmSmokeTargetToplevel = + if inBundledEval then + null + else + bundledFlake.nixosConfigurations.vm-smoke-target.config.system.build.toplevel; - deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" { - nativeBuildInputs = with pkgs; [ - bash - coreutils + # Helper function to build a Rust workspace package + # Parameters: + # name: package name (e.g., "chainfire-server") + # workspaceSubdir: subdirectory containing Cargo.toml (e.g., "chainfire") + # mainCrate: optional main crate name if different from workspace + # description: package description for meta + # doCheck: whether to run tests during build (default: false) + buildRustWorkspace = { name, workspaceSubdir, mainCrate ? null, description ? "", doCheck ? false }: + pkgs.rustPlatform.buildRustPackage ({ + pname = name; + version = "0.1.0"; + src = mkWorkspaceSrc workspaceSubdir; + + cargoLock = { + lockFile = ./${workspaceSubdir}/Cargo.lock; + }; + + # Build from the workspace subdirectory + buildAndTestSubdir = workspaceSubdir; + + # Copy Cargo.lock to root for nix validation (expects it at src root) + postUnpack = '' + cp $sourceRoot/${workspaceSubdir}/Cargo.lock $sourceRoot/Cargo.lock + ''; + + nativeBuildInputs = commonNativeBuildInputs; + buildInputs = commonBuildInputs; + + # Set environment variables for build + inherit (commonEnvVars) LIBCLANG_PATH PROTOC ROCKSDB_LIB_DIR; + + # Enable cargo tests during build (can be overridden per-package) + inherit doCheck; + + # Test flags: run tests for the main crate only + cargoTestFlags = pkgs.lib.optionals (mainCrate != null) [ "-p" mainCrate ]; + + # Metadata for the package + meta = with pkgs.lib; { + description = description; + homepage = "https://github.com/yourorg/ultracloud"; + license = licenses.asl20; # Apache 2.0 + maintainers = [ ]; + platforms = platforms.linux; + }; + + # Build only the server binary if mainCrate is specified + # This avoids building test binaries and examples + } // pkgs.lib.optionalAttrs (mainCrate != null) { + cargoBuildFlags = [ "-p" mainCrate ]; + }); + + # Helper function to build multiple binaries from the same workspace in + # one cargo invocation. This is mainly used by the VM cluster builds so + # a single host build can satisfy several services from the same + # workspace. + buildRustWorkspaceBundle = { name, workspaceSubdir, crates, description ? "", doCheck ? false }: + pkgs.rustPlatform.buildRustPackage { + pname = name; + version = "0.1.0"; + src = mkWorkspaceSrc workspaceSubdir; + + cargoLock = { + lockFile = ./${workspaceSubdir}/Cargo.lock; + }; + + buildAndTestSubdir = workspaceSubdir; + + postUnpack = '' + cp $sourceRoot/${workspaceSubdir}/Cargo.lock $sourceRoot/Cargo.lock + ''; + + nativeBuildInputs = commonNativeBuildInputs; + buildInputs = commonBuildInputs; + + inherit (commonEnvVars) LIBCLANG_PATH PROTOC ROCKSDB_LIB_DIR; + inherit doCheck; + + cargoBuildFlags = pkgs.lib.concatMap (crate: [ "-p" crate ]) crates; + + meta = with pkgs.lib; { + description = description; + homepage = "https://github.com/yourorg/ultracloud"; + license = licenses.asl20; + maintainers = [ ]; + platforms = platforms.linux; + }; + }; + + in + { + # ====================================================================== + # DEVELOPMENT SHELL: Drop-in replacement for shell.nix + # ====================================================================== + devShells.default = pkgs.mkShell { + name = "cloud-dev"; + + buildInputs = with pkgs; [ + # Rust toolchain (replaces rustup/cargo/rustc from shell.nix) + rustToolchain + + # Protocol Buffers + protobuf + + # LLVM/Clang (for bindgen/clang-sys) + llvmPackages.libclang + llvmPackages.clang + + # Build essentials + pkg-config + openssl + + # Development tools + git curl - findutils - gawk - gnugrep - gnused - procps - python3 - ]; - PHOTONCLOUD_E2E_IN_NIX = "1"; - PHOTONCLOUD_CHAINFIRE_SERVER_BIN = - "${self.packages.${system}.chainfire-server}/bin/chainfire"; - PHOTONCLOUD_DEPLOYER_SERVER_BIN = - "${self.packages.${system}.deployer-workspace}/bin/deployer-server"; - PHOTONCLOUD_DEPLOYER_CTL_BIN = - "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; - } '' - export HOME="$TMPDIR/home" - mkdir -p "$HOME" - export PATH="${pkgs.lib.makeBinPath [ - pkgs.bash - pkgs.coreutils - pkgs.curl - pkgs.findutils - pkgs.gawk - pkgs.gnugrep - pkgs.gnused - pkgs.procps - pkgs.python3 - ]}" - bash ${./deployer/scripts/verify-deployer-bootstrap-e2e.sh} - touch "$out" - ''; + jq + grpcurl + openssh + sshpass + clusterPython + qemu + vde2 + bind - host-lifecycle-e2e = pkgs.runCommand "host-lifecycle-e2e" { - nativeBuildInputs = with pkgs; [ - bash - coreutils - curl - findutils - gawk - gnugrep - gnused - procps - python3 + # For RocksDB (chainfire dependency) + rocksdb ]; - PHOTONCLOUD_E2E_IN_NIX = "1"; - PHOTONCLOUD_CHAINFIRE_SERVER_BIN = - "${self.packages.${system}.chainfire-server}/bin/chainfire"; - PHOTONCLOUD_DEPLOYER_CTL_BIN = - "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; - PHOTONCLOUD_PLASMACLOUD_RECONCILER_BIN = - "${self.packages.${system}.deployer-workspace}/bin/plasmacloud-reconciler"; - } '' - export HOME="$TMPDIR/home" - mkdir -p "$HOME" - export PATH="${pkgs.lib.makeBinPath [ - pkgs.bash - pkgs.coreutils - pkgs.curl - pkgs.findutils - pkgs.gawk - pkgs.gnugrep - pkgs.gnused - pkgs.procps - pkgs.python3 - ]}" - bash ${./deployer/scripts/verify-host-lifecycle-e2e.sh} - touch "$out" - ''; - fleet-scheduler-e2e = pkgs.runCommand "fleet-scheduler-e2e" { - nativeBuildInputs = with pkgs; [ - bash - coreutils - curl - findutils - gawk - gnugrep - gnused - procps - python3 - ]; - PHOTONCLOUD_E2E_IN_NIX = "1"; - PHOTONCLOUD_CHAINFIRE_SERVER_BIN = - "${self.packages.${system}.chainfire-server}/bin/chainfire"; - PHOTONCLOUD_DEPLOYER_CTL_BIN = - "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; - PHOTONCLOUD_NODE_AGENT_BIN = - "${self.packages.${system}.deployer-workspace}/bin/node-agent"; - PHOTONCLOUD_FLEET_SCHEDULER_BIN = - "${self.packages.${system}.deployer-workspace}/bin/fleet-scheduler"; - } '' - export HOME="$TMPDIR/home" - mkdir -p "$HOME" - export PATH="${pkgs.lib.makeBinPath [ - pkgs.bash - pkgs.coreutils - pkgs.curl - pkgs.findutils - pkgs.gawk - pkgs.gnugrep - pkgs.gnused - pkgs.procps - pkgs.python3 - ]}" - bash ${./deployer/scripts/verify-fleet-scheduler-e2e.sh} - touch "$out" - ''; - }; - } - ) // { + # Environment variables for clang-sys and other build tools + LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; + PROTOC = "${pkgs.protobuf}/bin/protoc"; + ROCKSDB_LIB_DIR = "${pkgs.rocksdb}/lib"; + + shellHook = '' + echo "Cloud Platform Development Environment" + echo "=======================================" + echo "Rust: $(rustc --version)" + echo "Protoc: $(protoc --version)" + echo "Clang: $(clang --version | head -1)" + echo "" + echo "Environment variables set:" + echo " LIBCLANG_PATH=$LIBCLANG_PATH" + echo " PROTOC=$PROTOC" + echo " ROCKSDB_LIB_DIR=$ROCKSDB_LIB_DIR" + echo "" + echo "Available workspaces:" + echo " - chainfire (distributed cluster coordination store)" + echo " - flaredb (distributed SQL/KV database for metadata and tenant data)" + echo " - iam (identity & access management)" + echo " - plasmavmc (VM control plane)" + echo " - prismnet (SDN controller)" + echo " - flashdns (DNS server)" + echo " - fiberlb (load balancer)" + echo " - lightningstor (block storage)" + echo " - nightlight (metrics store)" + echo " - creditservice (quota & billing)" + echo " - k8shost (kubernetes hosting)" + ''; + }; + + # ====================================================================== + # PACKAGES: Buildable artifacts from each workspace + # ====================================================================== + packages = { + # -------------------------------------------------------------------- + # Chainfire: Distributed Cluster Coordination Store + # -------------------------------------------------------------------- + chainfire-server = buildRustWorkspace { + name = "chainfire-server"; + workspaceSubdir = "chainfire"; + mainCrate = "chainfire-server"; + description = "Distributed cluster coordination store with consensus, watches, and membership"; + }; + + # -------------------------------------------------------------------- + # FlareDB: Distributed SQL/KV Database + # -------------------------------------------------------------------- + flaredb-server = buildRustWorkspace { + name = "flaredb-server"; + workspaceSubdir = "flaredb"; + mainCrate = "flaredb-server"; + description = "Distributed Postgres-like SQL/KV database for service metadata, tenant data, and DBaaS"; + }; + + # -------------------------------------------------------------------- + # IAM: Identity and Access Management Service + # -------------------------------------------------------------------- + iam-server = buildRustWorkspace { + name = "iam-server"; + workspaceSubdir = "iam"; + mainCrate = "iam-server"; + description = "Identity and access management service with RBAC and multi-tenant support"; + }; + + # -------------------------------------------------------------------- + # CoronaFS: Shared Block Volume Service + # -------------------------------------------------------------------- + coronafs-server = buildRustWorkspace { + name = "coronafs-server"; + workspaceSubdir = "coronafs"; + mainCrate = "coronafs-server"; + description = "Shared block volume service exporting raw VM volumes over NBD"; + }; + + # -------------------------------------------------------------------- + # PlasmaVMC: Virtual Machine Control Plane + # -------------------------------------------------------------------- + plasmavmc-server = buildRustWorkspace { + name = "plasmavmc-server"; + workspaceSubdir = "plasmavmc"; + mainCrate = "plasmavmc-server"; + description = "Virtual machine control plane for managing compute instances"; + }; + + # -------------------------------------------------------------------- + # PrismNet: Software-Defined Networking Controller + # -------------------------------------------------------------------- + prismnet-server = buildRustWorkspace { + name = "prismnet-server"; + workspaceSubdir = "prismnet"; + mainCrate = "prismnet-server"; + description = "Software-defined networking controller with OVN integration"; + }; + + # -------------------------------------------------------------------- + # FlashDNS: High-Performance DNS Server + # -------------------------------------------------------------------- + flashdns-server = buildRustWorkspace { + name = "flashdns-server"; + workspaceSubdir = "flashdns"; + mainCrate = "flashdns-server"; + description = "High-performance DNS server with pattern-based reverse DNS"; + }; + + # -------------------------------------------------------------------- + # FiberLB: Layer 4/7 Load Balancer + # -------------------------------------------------------------------- + fiberlb-server = buildRustWorkspace { + name = "fiberlb-server"; + workspaceSubdir = "fiberlb"; + mainCrate = "fiberlb-server"; + description = "Layer 4/7 load balancer for distributing traffic across services"; + }; + + # -------------------------------------------------------------------- + # LightningStor: Block Storage Service + # -------------------------------------------------------------------- + lightningstor-server = buildRustWorkspace { + name = "lightningstor-server"; + workspaceSubdir = "lightningstor"; + mainCrate = "lightningstor-server"; + description = "Distributed block storage service for persistent volumes"; + }; + + lightningstor-node = buildRustWorkspace { + name = "lightningstor-node"; + workspaceSubdir = "lightningstor"; + mainCrate = "lightningstor-node"; + description = "LightningStor distributed storage node daemon"; + }; + + lightningstor-workspace = buildRustWorkspaceBundle { + name = "lightningstor-workspace"; + workspaceSubdir = "lightningstor"; + crates = [ + "lightningstor-server" + "lightningstor-node" + ]; + description = "Combined LightningStor server and node workspace build"; + }; + + # -------------------------------------------------------------------- + # NightLight: Prometheus-compatible Metrics Store + # -------------------------------------------------------------------- + nightlight-server = buildRustWorkspace { + name = "nightlight-server"; + workspaceSubdir = "nightlight"; + mainCrate = "nightlight-server"; + description = "Prometheus-compatible metrics storage (NightLight)"; + }; + + # -------------------------------------------------------------------- + # CreditService: Quota and Billing Controller + # -------------------------------------------------------------------- + creditservice-server = buildRustWorkspace { + name = "creditservice-server"; + workspaceSubdir = "creditservice"; + mainCrate = "creditservice-server"; + description = "Credit/quota management service with billing integration"; + }; + + # -------------------------------------------------------------------- + # APIGateway: API Gateway Service + # -------------------------------------------------------------------- + apigateway-server = buildRustWorkspace { + name = "apigateway-server"; + workspaceSubdir = "apigateway"; + mainCrate = "apigateway-server"; + description = "API Gateway for UltraCloud services"; + }; + + # -------------------------------------------------------------------- + # k8shost: Kubernetes Hosting Component + # -------------------------------------------------------------------- + k8shost-server = buildRustWorkspace { + name = "k8shost-server"; + workspaceSubdir = "k8shost"; + mainCrate = "k8shost-server"; + description = "Lightweight Kubernetes hosting with multi-tenant isolation"; + }; + + # -------------------------------------------------------------------- + # Deployer: Bare-metal bootstrap orchestration service + # -------------------------------------------------------------------- + deployer-server = buildRustWorkspace { + name = "deployer-server"; + workspaceSubdir = "deployer"; + mainCrate = "deployer-server"; + description = "Node bootstrap and phone-home orchestration service"; + }; + + deployer-ctl = buildRustWorkspace { + name = "deployer-ctl"; + workspaceSubdir = "deployer"; + mainCrate = "deployer-ctl"; + description = "Declarative control utility for UltraCloud deployer state"; + }; + + node-agent = buildRustWorkspace { + name = "node-agent"; + workspaceSubdir = "deployer"; + mainCrate = "node-agent"; + description = "Node-local runtime agent for UltraCloud scheduled services"; + }; + + nix-agent = buildRustWorkspace { + name = "nix-agent"; + workspaceSubdir = "deployer"; + mainCrate = "nix-agent"; + description = "Node-local NixOS reconciliation agent for UltraCloud hosts"; + }; + + ultracloud-reconciler = buildRustWorkspace { + name = "ultracloud-reconciler"; + workspaceSubdir = "deployer"; + mainCrate = "ultracloud-reconciler"; + description = "Declarative reconciler for host rollouts and published resources"; + }; + + ultracloudFlakeBundle = pkgs.runCommand "ultracloud-flake-bundle.tar.gz" + { + nativeBuildInputs = [ + pkgs.coreutils + pkgs.gnutar + pkgs.gzip + ]; + } '' + bundle_root="$(mktemp -d)" + cp -a ${bundledFlakeRootDrv}/. "$bundle_root"/ + chmod -R u+w "$bundle_root" + + tar \ + --sort=name \ + --mtime='@1' \ + --owner=0 \ + --group=0 \ + --numeric-owner \ + -C "$bundle_root" \ + -cf - . \ + | gzip -n > "$out" + ''; + + # -------------------------------------------------------------------- + # Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes + # -------------------------------------------------------------------- + fleet-scheduler = buildRustWorkspace { + name = "fleet-scheduler"; + workspaceSubdir = "deployer"; + mainCrate = "fleet-scheduler"; + description = "Label-aware service scheduler for UltraCloud bare-metal fleets"; + }; + + deployer-workspace = buildRustWorkspaceBundle { + name = "deployer-workspace"; + workspaceSubdir = "deployer"; + crates = [ + "deployer-server" + "deployer-ctl" + "node-agent" + "nix-agent" + "ultracloud-reconciler" + "fleet-scheduler" + ]; + description = "Combined deployer workspace build for cluster images and checks"; + }; + + vmClusterDeployerState = + self.nixosConfigurations.node01.config.system.build.ultracloudDeployerClusterState; + + vmClusterFlakeBundle = self.packages.${system}.ultracloudFlakeBundle; + vmSmokeBundledTargetToplevel = bundledVmSmokeTargetToplevel; + + # -------------------------------------------------------------------- + # Default package: Build all servers + # -------------------------------------------------------------------- + default = pkgs.symlinkJoin { + name = "ultracloud-all"; + paths = [ + self.packages.${system}.chainfire-server + self.packages.${system}.flaredb-server + self.packages.${system}.iam-server + self.packages.${system}.plasmavmc-server + self.packages.${system}.prismnet-server + self.packages.${system}.flashdns-server + self.packages.${system}.fiberlb-server + self.packages.${system}.lightningstor-workspace + self.packages.${system}.nightlight-server + self.packages.${system}.creditservice-server + self.packages.${system}.apigateway-server + self.packages.${system}.k8shost-server + self.packages.${system}.deployer-workspace + self.packages.${system}.vmClusterDeployerState + ]; + }; + }; + + # ====================================================================== + # APPS: Runnable applications from packages + # ====================================================================== + apps = { + chainfire-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.chainfire-server; + }; + + flaredb-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.flaredb-server; + }; + + iam-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.iam-server; + }; + + plasmavmc-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.plasmavmc-server; + }; + + prismnet-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.prismnet-server; + }; + + flashdns-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.flashdns-server; + }; + + fiberlb-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.fiberlb-server; + }; + + lightningstor-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.lightningstor-server; + }; + + lightningstor-node = flake-utils.lib.mkApp { + drv = self.packages.${system}.lightningstor-node; + }; + + nightlight-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.nightlight-server; + }; + + creditservice-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.creditservice-server; + }; + + apigateway-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.apigateway-server; + }; + + k8shost-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.k8shost-server; + }; + + deployer-server = flake-utils.lib.mkApp { + drv = self.packages.${system}.deployer-server; + }; + + deployer-ctl = flake-utils.lib.mkApp { + drv = self.packages.${system}.deployer-ctl; + }; + + ultracloud-reconciler = flake-utils.lib.mkApp { + drv = self.packages.${system}.ultracloud-reconciler; + }; + + nix-agent = flake-utils.lib.mkApp { + drv = self.packages.${system}.nix-agent; + }; + + node-agent = flake-utils.lib.mkApp { + drv = self.packages.${system}.node-agent; + }; + + fleet-scheduler = flake-utils.lib.mkApp { + drv = self.packages.${system}.fleet-scheduler; + }; + }; + + checks = { + workspace-source-roots-audit = pkgs.runCommand "workspace-source-roots-audit" + { + nativeBuildInputs = [ pkgs.python3 ]; + } '' + ${pkgs.python3}/bin/python - <<'PY' ${./.} + from __future__ import annotations + + import re + import sys + import tomllib + from pathlib import Path + from typing import Any + + + def extract_workspace_source_roots(flake_path: Path) -> dict[str, list[str]]: + source = flake_path.read_text() + match = re.search(r"workspaceSourceRoots\s*=\s*\{(.*?)\n\s*\};", source, re.S) + if match is None: + raise ValueError(f"Could not find workspaceSourceRoots in {flake_path}") + + roots: dict[str, list[str]] = {} + for name, body in re.findall(r"\n\s*(\w+)\s*=\s*\[(.*?)\];", match.group(1), re.S): + roots[name] = re.findall(r'"([^"]+)"', body) + return roots + + + def collect_path_dependencies(value: Any) -> list[str]: + found: list[str] = [] + + if isinstance(value, dict): + path = value.get("path") + if isinstance(path, str): + found.append(path) + for nested in value.values(): + found.extend(collect_path_dependencies(nested)) + elif isinstance(value, list): + for nested in value: + found.extend(collect_path_dependencies(nested)) + + return found + + + def workspace_manifests(repo_root: Path, workspace_name: str) -> list[Path]: + workspace_manifest = repo_root / workspace_name / "Cargo.toml" + manifests = [workspace_manifest] + workspace_data = tomllib.loads(workspace_manifest.read_text()) + members = workspace_data.get("workspace", {}).get("members", []) + + for member in members: + for candidate in workspace_manifest.parent.glob(member): + manifest = candidate if candidate.name == "Cargo.toml" else candidate / "Cargo.toml" + if manifest.is_file(): + manifests.append(manifest) + + unique_manifests: list[Path] = [] + seen: set[Path] = set() + for manifest in manifests: + resolved = manifest.resolve() + if resolved in seen: + continue + seen.add(resolved) + unique_manifests.append(manifest) + return unique_manifests + + + def required_root(dep_rel: Path) -> str: + parts = dep_rel.parts + if not parts: + return "" + if parts[0] == "crates" and len(parts) >= 2: + return "/".join(parts[:2]) + return parts[0] + + + def is_covered(dep_rel: str, configured_roots: list[str]) -> bool: + return any(dep_rel == root or dep_rel.startswith(f"{root}/") for root in configured_roots) + + + def main() -> int: + repo_root = Path(sys.argv[1]).resolve() + workspace_roots = extract_workspace_source_roots(repo_root / "flake.nix") + failures: list[str] = [] + + for workspace_name, configured_roots in sorted(workspace_roots.items()): + workspace_manifest = repo_root / workspace_name / "Cargo.toml" + if not workspace_manifest.is_file(): + continue + + for manifest in workspace_manifests(repo_root, workspace_name): + manifest_data = tomllib.loads(manifest.read_text()) + for dep_path in collect_path_dependencies(manifest_data): + dependency_dir = (manifest.parent / dep_path).resolve() + try: + dep_rel = dependency_dir.relative_to(repo_root) + except ValueError: + continue + + dep_rel_str = dep_rel.as_posix() + if is_covered(dep_rel_str, configured_roots): + continue + + failures.append( + f"{workspace_name}: missing source root '{required_root(dep_rel)}' " + f"for dependency '{dep_rel_str}' referenced by " + f"{manifest.relative_to(repo_root).as_posix()}" + ) + + if failures: + print("workspaceSourceRoots is missing path dependencies:", file=sys.stderr) + for failure in failures: + print(f" - {failure}", file=sys.stderr) + return 1 + + print("workspaceSourceRoots covers all workspace path dependencies.") + return 0 + + + raise SystemExit(main()) + PY + touch "$out" + ''; + + first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/first-boot-topology-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + ultracloudModule = self.nixosModules.default; + } + ); + + deployer-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/deployer-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel; + } + ); + + deployer-vm-rollback = pkgs.testers.runNixOSTest ( + import ./nix/tests/deployer-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + smokeTargetToplevel = self.packages.${system}.vmSmokeBundledTargetToplevel; + desiredSystemOverrides = { + health_check_command = [ "false" ]; + rollback_on_failure = true; + }; + expectedStatus = "rolled-back"; + expectCurrentSystemMatchesTarget = false; + expectMarkerPresent = false; + } + ); + + fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + ultracloudModule = self.nixosModules.default; + } + ); + + fiberlb-native-bgp-multipath-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + ultracloudModule = self.nixosModules.default; + } + ); + + fiberlb-native-bgp-interop-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + ultracloudModule = self.nixosModules.default; + } + ); + + fiberlb-native-bgp-ecmp-drain-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix { + inherit pkgs; + ultracloudPackages = self.packages.${system}; + ultracloudModule = self.nixosModules.default; + } + ); + + deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" + { + nativeBuildInputs = with pkgs; [ + bash + coreutils + curl + findutils + gawk + gnugrep + gnused + procps + python3 + ]; + ULTRACLOUD_E2E_IN_NIX = "1"; + ULTRACLOUD_CHAINFIRE_SERVER_BIN = + "${self.packages.${system}.chainfire-server}/bin/chainfire"; + ULTRACLOUD_DEPLOYER_SERVER_BIN = + "${self.packages.${system}.deployer-workspace}/bin/deployer-server"; + ULTRACLOUD_DEPLOYER_CTL_BIN = + "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; + } '' + export HOME="$TMPDIR/home" + mkdir -p "$HOME" + export PATH="${pkgs.lib.makeBinPath [ + pkgs.bash + pkgs.coreutils + pkgs.curl + pkgs.findutils + pkgs.gawk + pkgs.gnugrep + pkgs.gnused + pkgs.procps + pkgs.python3 + ]}" + bash ${./deployer/scripts/verify-deployer-bootstrap-e2e.sh} + touch "$out" + ''; + + host-lifecycle-e2e = pkgs.runCommand "host-lifecycle-e2e" + { + nativeBuildInputs = with pkgs; [ + bash + coreutils + curl + findutils + gawk + gnugrep + gnused + procps + python3 + ]; + ULTRACLOUD_E2E_IN_NIX = "1"; + ULTRACLOUD_CHAINFIRE_SERVER_BIN = + "${self.packages.${system}.chainfire-server}/bin/chainfire"; + ULTRACLOUD_DEPLOYER_CTL_BIN = + "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; + ULTRACLOUD_RECONCILER_BIN = + "${self.packages.${system}.deployer-workspace}/bin/ultracloud-reconciler"; + } '' + export HOME="$TMPDIR/home" + mkdir -p "$HOME" + export PATH="${pkgs.lib.makeBinPath [ + pkgs.bash + pkgs.coreutils + pkgs.curl + pkgs.findutils + pkgs.gawk + pkgs.gnugrep + pkgs.gnused + pkgs.procps + pkgs.python3 + ]}" + bash ${./deployer/scripts/verify-host-lifecycle-e2e.sh} + touch "$out" + ''; + + fleet-scheduler-e2e = pkgs.runCommand "fleet-scheduler-e2e" + { + nativeBuildInputs = with pkgs; [ + bash + coreutils + curl + findutils + gawk + gnugrep + gnused + procps + python3 + ]; + ULTRACLOUD_E2E_IN_NIX = "1"; + ULTRACLOUD_CHAINFIRE_SERVER_BIN = + "${self.packages.${system}.chainfire-server}/bin/chainfire"; + ULTRACLOUD_DEPLOYER_CTL_BIN = + "${self.packages.${system}.deployer-workspace}/bin/deployer-ctl"; + ULTRACLOUD_NODE_AGENT_BIN = + "${self.packages.${system}.deployer-workspace}/bin/node-agent"; + ULTRACLOUD_FLEET_SCHEDULER_BIN = + "${self.packages.${system}.deployer-workspace}/bin/fleet-scheduler"; + } '' + export HOME="$TMPDIR/home" + mkdir -p "$HOME" + export PATH="${pkgs.lib.makeBinPath [ + pkgs.bash + pkgs.coreutils + pkgs.curl + pkgs.findutils + pkgs.gawk + pkgs.gnugrep + pkgs.gnused + pkgs.procps + pkgs.python3 + ]}" + bash ${./deployer/scripts/verify-fleet-scheduler-e2e.sh} + touch "$out" + ''; + }; + } + ) // { # ======================================================================== # NIXOS MODULES: System-level service modules (non-system-specific) # ======================================================================== nixosModules.default = import ./nix/modules; - nixosModules.photoncloud = import ./nix/modules; - nixosModules.plasmacloud = import ./nix/modules; # backwards compatibility + nixosModules.ultracloud = import ./nix/modules; # ======================================================================== # NIXOS CONFIGURATIONS: Netboot images for bare-metal provisioning # ======================================================================== - nixosConfigurations = { - # Control Plane netboot image (all 8 services) - netboot-control-plane = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ ./nix/images/netboot-control-plane.nix ]; - }; + nixosConfigurations = + let + vmClusterLib = import ./nix/nodes/vm-cluster/lib.nix { lib = nixpkgs.lib; }; + mkVmClusterSystem = nodeName: + nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + disko.nixosModules.disko + vmClusterLib.nodeConfigurationPaths.${nodeName} + self.nixosModules.default + (vmClusterLib.mkBootstrapServicesModule { + inherit self nodeName; + enableDeployer = nodeName == vmClusterLib.bootstrapNodeName; + }) + { nixpkgs.overlays = [ self.overlays.default ]; } + ]; + }; + in + { + # Control Plane netboot image (all 8 services) + netboot-control-plane = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./nix/images/netboot-control-plane.nix ]; + }; - # Worker netboot image (compute-focused services) - netboot-worker = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ ./nix/images/netboot-worker.nix ]; - }; + # Worker netboot image (compute-focused services) + netboot-worker = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./nix/images/netboot-worker.nix ]; + }; - # All-in-One netboot image (single-node deployment) - netboot-all-in-one = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ ./nix/images/netboot-all-in-one.nix ]; - }; + # All-in-One netboot image (single-node deployment) + netboot-all-in-one = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./nix/images/netboot-all-in-one.nix ]; + }; - # Base netboot image (minimal, for VM testing and provisioning) - netboot-base = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ ./nix/images/netboot-base.nix ]; - }; + # Base netboot image (minimal, for VM testing and provisioning) + netboot-base = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./nix/images/netboot-base.nix ]; + }; - # Offline-friendly target used by deployer VM smoke tests. - vm-smoke-target = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ ./nix/images/deployer-vm-smoke-target.nix ]; - }; + # Offline-friendly target used by deployer VM smoke tests. + vm-smoke-target = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ ./nix/images/deployer-vm-smoke-target.nix ]; + }; - # PlasmaCloud ISO (T061.S5 - bootable ISO with cluster-config embedding) - plasmacloud-iso = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - ./nix/iso/plasmacloud-iso.nix - nix-nos.nixosModules.default - self.nixosModules.default - { nixpkgs.overlays = [ self.overlays.default ]; } - ]; - }; + # UltraCloud ISO (T061.S5 - bootable ISO with cluster-config embedding) + ultracloud-iso = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + ./nix/iso/ultracloud-iso.nix + self.nixosModules.default + { nixpkgs.overlays = [ self.overlays.default ]; } + ]; + }; - # T036 VM Cluster Nodes (for nixos-anywhere deployment) - pxe-server = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - ./baremetal/vm-cluster/pxe-server/configuration.nix - ./baremetal/vm-cluster/pxe-server/disko.nix - self.nixosModules.default - { nixpkgs.overlays = [ self.overlays.default ]; } - ]; - }; - - node01 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - nix-nos.nixosModules.default - ./nix/nodes/vm-cluster/node01/configuration.nix - self.nixosModules.default - ({ pkgs, ... }: { - services.deployer = { - enable = true; - bindAddr = "0.0.0.0:8088"; - chainfireEndpoints = [ "http://192.168.100.11:2379" ]; - clusterId = "plasmacloud-vm-cluster"; - requireChainfire = true; - allowUnknownNodes = false; - allowUnauthenticated = false; - bootstrapToken = "vm-cluster-bootstrap-token"; - adminToken = "vm-cluster-admin-token"; - bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle; - seedClusterState = true; - }; - - services.nix-agent = { - enable = true; - chainfireEndpoint = "http://192.168.100.11:2379"; - clusterId = "plasmacloud-vm-cluster"; - nodeId = "node01"; - flakeRoot = self.outPath; - intervalSecs = 30; - apply = true; - }; - }) - { nixpkgs.overlays = [ self.overlays.default ]; } - ]; - }; - - node02 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - nix-nos.nixosModules.default - ./nix/nodes/vm-cluster/node02/configuration.nix - self.nixosModules.default - { - services.nix-agent = { - enable = true; - chainfireEndpoint = "http://192.168.100.11:2379"; - clusterId = "plasmacloud-vm-cluster"; - nodeId = "node02"; - flakeRoot = self.outPath; - intervalSecs = 30; - apply = true; - }; - } - { nixpkgs.overlays = [ self.overlays.default ]; } - ]; - }; - - node03 = nixpkgs.lib.nixosSystem { - system = "x86_64-linux"; - modules = [ - disko.nixosModules.disko - nix-nos.nixosModules.default - ./nix/nodes/vm-cluster/node03/configuration.nix - self.nixosModules.default - { - services.nix-agent = { - enable = true; - chainfireEndpoint = "http://192.168.100.11:2379"; - clusterId = "plasmacloud-vm-cluster"; - nodeId = "node03"; - flakeRoot = self.outPath; - intervalSecs = 30; - apply = true; - }; - } - { nixpkgs.overlays = [ self.overlays.default ]; } - ]; - }; - }; + # T036 VM Cluster Nodes (for nixos-anywhere deployment) + pxe-server = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + disko.nixosModules.disko + ./baremetal/vm-cluster/pxe-server/configuration.nix + ./baremetal/vm-cluster/pxe-server/disko.nix + self.nixosModules.default + { nixpkgs.overlays = [ self.overlays.default ]; } + ]; + }; + } + // nixpkgs.lib.genAttrs vmClusterLib.controlPlaneNodeNames mkVmClusterSystem; # ======================================================================== - # OVERLAY: Provides PhotonCloud packages to nixpkgs + # OVERLAY: Provides UltraCloud packages to nixpkgs # ======================================================================== # Usage in NixOS configuration: - # nixpkgs.overlays = [ inputs.photoncloud.overlays.default ]; + # nixpkgs.overlays = [ inputs.ultracloud.overlays.default ]; overlays.default = final: prev: { chainfire-server = self.packages.${final.system}.chainfire-server; flaredb-server = self.packages.${final.system}.flaredb-server; @@ -1539,8 +1457,8 @@ deployer-workspace = self.packages.${final.system}.deployer-workspace; deployer-server = self.packages.${final.system}.deployer-workspace; deployer-ctl = self.packages.${final.system}.deployer-workspace; - plasmacloud-reconciler = self.packages.${final.system}.deployer-workspace; - plasmacloudFlakeBundle = self.packages.${final.system}.plasmacloudFlakeBundle; + ultracloud-reconciler = self.packages.${final.system}.deployer-workspace; + ultracloudFlakeBundle = self.packages.${final.system}.ultracloudFlakeBundle; nix-agent = self.packages.${final.system}.deployer-workspace; node-agent = self.packages.${final.system}.deployer-workspace; fleet-scheduler = self.packages.${final.system}.deployer-workspace; diff --git a/k8shost/crates/k8shost-controllers/src/main.rs b/k8shost/crates/k8shost-controllers/src/main.rs index 0125010..269caab 100644 --- a/k8shost/crates/k8shost-controllers/src/main.rs +++ b/k8shost/crates/k8shost-controllers/src/main.rs @@ -1,13 +1,13 @@ //! k8shost Controllers //! -//! This binary runs the PlasmaCloud integration controllers for k8shost: +//! This binary runs the UltraCloud integration controllers for k8shost: //! - FiberLB Controller: Manages LoadBalancer services //! - FlashDNS Controller: Manages Service DNS records //! - IAM Webhook: Handles TokenReview authentication //! //! Each controller follows the watch-reconcile pattern: //! 1. Watch k8s API for resource changes -//! 2. Reconcile desired state with PlasmaCloud components +//! 2. Reconcile desired state with UltraCloud components //! 3. Update k8s resource status use anyhow::Result; diff --git a/k8shost/crates/k8shost-server/src/fiberlb_controller.rs b/k8shost/crates/k8shost-server/src/fiberlb_controller.rs index 6199fca..ac894b6 100644 --- a/k8shost/crates/k8shost-server/src/fiberlb_controller.rs +++ b/k8shost/crates/k8shost-server/src/fiberlb_controller.rs @@ -26,8 +26,8 @@ use tonic::{transport::Channel, Code}; use tracing::{debug, info, warn}; const CONTROLLER_PRINCIPAL_ID: &str = "k8shost-controller"; -const LB_ID_ANNOTATION: &str = "fiberlb.plasmacloud.io/lb-id"; -const POOL_ID_ANNOTATION: &str = "fiberlb.plasmacloud.io/pool-id"; +const LB_ID_ANNOTATION: &str = "fiberlb.ultracloud.io/lb-id"; +const POOL_ID_ANNOTATION: &str = "fiberlb.ultracloud.io/pool-id"; /// FiberLB controller for managing LoadBalancer service VIPs pub struct FiberLbController { diff --git a/k8shost/crates/k8shost-server/src/flashdns_controller.rs b/k8shost/crates/k8shost-server/src/flashdns_controller.rs index 9f77f56..699612d 100644 --- a/k8shost/crates/k8shost-server/src/flashdns_controller.rs +++ b/k8shost/crates/k8shost-server/src/flashdns_controller.rs @@ -25,8 +25,8 @@ use tracing::{debug, info, warn}; const CLUSTER_DOMAIN: &str = "cluster.local"; const DNS_RECORD_TTL: u32 = 60; const CONTROLLER_PRINCIPAL_ID: &str = "k8shost-controller"; -const RECORD_ID_ANNOTATION: &str = "flashdns.plasmacloud.io/record-id"; -const ZONE_ID_ANNOTATION: &str = "flashdns.plasmacloud.io/zone-id"; +const RECORD_ID_ANNOTATION: &str = "flashdns.ultracloud.io/record-id"; +const ZONE_ID_ANNOTATION: &str = "flashdns.ultracloud.io/zone-id"; /// FlashDNS controller for managing cluster.local DNS records pub struct FlashDnsController { @@ -365,8 +365,8 @@ impl FlashDnsController { name: CLUSTER_DOMAIN.to_string(), org_id: tenant.org_id.clone(), project_id: tenant.project_id.clone(), - primary_ns: "ns1.plasmacloud.io".to_string(), - admin_email: "admin@plasmacloud.io".to_string(), + primary_ns: "ns1.ultracloud.io".to_string(), + admin_email: "admin@ultracloud.io".to_string(), }, auth_token, )) diff --git a/k8shost/crates/k8shost-server/src/main.rs b/k8shost/crates/k8shost-server/src/main.rs index f3ecbb5..739e385 100644 --- a/k8shost/crates/k8shost-server/src/main.rs +++ b/k8shost/crates/k8shost-server/src/main.rs @@ -35,7 +35,7 @@ use tracing_subscriber::EnvFilter; /// k8shost API Server #[derive(Parser, Debug)] #[command(name = "k8shost-server")] -#[command(about = "Kubernetes API server for PlasmaCloud's k8shost component")] +#[command(about = "Kubernetes API server for UltraCloud's k8shost component")] struct Args { /// Configuration file path #[arg(short, long, default_value = "k8shost.toml")] diff --git a/k8shost/crates/k8shost-server/src/services/deployment.rs b/k8shost/crates/k8shost-server/src/services/deployment.rs index 667f8df..e434026 100644 --- a/k8shost/crates/k8shost-server/src/services/deployment.rs +++ b/k8shost/crates/k8shost-server/src/services/deployment.rs @@ -25,9 +25,9 @@ const ACTION_DEPLOYMENT_LIST: &str = "k8s:deployments:list"; const ACTION_DEPLOYMENT_UPDATE: &str = "k8s:deployments:update"; const ACTION_DEPLOYMENT_DELETE: &str = "k8s:deployments:delete"; -pub(crate) const DEPLOYMENT_NAME_ANNOTATION: &str = "k8shost.photoncloud.io/deployment-name"; -pub(crate) const DEPLOYMENT_UID_ANNOTATION: &str = "k8shost.photoncloud.io/deployment-uid"; -pub(crate) const TEMPLATE_HASH_ANNOTATION: &str = "k8shost.photoncloud.io/template-hash"; +pub(crate) const DEPLOYMENT_NAME_ANNOTATION: &str = "k8shost.ultracloud.io/deployment-name"; +pub(crate) const DEPLOYMENT_UID_ANNOTATION: &str = "k8shost.ultracloud.io/deployment-uid"; +pub(crate) const TEMPLATE_HASH_ANNOTATION: &str = "k8shost.ultracloud.io/template-hash"; #[derive(Clone)] pub struct DeploymentServiceImpl { diff --git a/k8shost/crates/k8shost-types/src/lib.rs b/k8shost/crates/k8shost-types/src/lib.rs index c1eedbe..b00c040 100644 --- a/k8shost/crates/k8shost-types/src/lib.rs +++ b/k8shost/crates/k8shost-types/src/lib.rs @@ -27,7 +27,7 @@ pub struct ObjectMeta { #[serde(default, skip_serializing_if = "HashMap::is_empty")] pub annotations: HashMap, - // Multi-tenant fields for PlasmaCloud integration + // Multi-tenant fields for UltraCloud integration #[serde(skip_serializing_if = "Option::is_none")] pub org_id: Option, #[serde(skip_serializing_if = "Option::is_none")] diff --git a/mtls-agent/src/discovery.rs b/mtls-agent/src/discovery.rs index fbbd6d6..287b3a3 100644 --- a/mtls-agent/src/discovery.rs +++ b/mtls-agent/src/discovery.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; use tracing::{info, warn}; -const PHOTON_PREFIX: &str = "photoncloud"; +const PHOTON_PREFIX: &str = "ultracloud"; const CACHE_TTL: Duration = Duration::from_secs(30); const POLICY_CACHE_TTL: Duration = Duration::from_secs(30); diff --git a/mtls-agent/src/main.rs b/mtls-agent/src/main.rs index 20ce002..f908095 100644 --- a/mtls-agent/src/main.rs +++ b/mtls-agent/src/main.rs @@ -404,7 +404,7 @@ mod tests { "-days", "3650", "-subj", - "/CN=PhotonCloud Test CA", + "/CN=UltraCloud Test CA", "-out", ca_pem.to_string_lossy().as_ref(), ]); @@ -464,7 +464,7 @@ mod tests { "-key", client_key.to_string_lossy().as_ref(), "-subj", - "/CN=photoncloud-test-client", + "/CN=ultracloud-test-client", "-out", client_csr.to_string_lossy().as_ref(), ]); diff --git a/nix-nos/modules/default.nix b/nix-nos/modules/default.nix index 68a5862..e797eb1 100644 --- a/nix-nos/modules/default.nix +++ b/nix-nos/modules/default.nix @@ -2,7 +2,6 @@ { imports = [ - ./topology.nix ./network/interfaces.nix ./network/vlans.nix ./bgp/default.nix @@ -10,7 +9,7 @@ ]; options.nix-nos = { - enable = lib.mkEnableOption "Nix-NOS network operating system modules"; + enable = lib.mkEnableOption "Nix-NOS network primitive modules"; version = lib.mkOption { type = lib.types.str; diff --git a/nix-nos/modules/topology.nix b/nix-nos/modules/topology.nix deleted file mode 100644 index 83b766b..0000000 --- a/nix-nos/modules/topology.nix +++ /dev/null @@ -1,68 +0,0 @@ -{ config, lib, pkgs, ... }: - -with lib; - -let - cfg = config.nix-nos; - clusterConfigLib = import ../lib/cluster-config-lib.nix { inherit lib; }; - nodeType = clusterConfigLib.mkNodeType types; - - # Cluster definition type - clusterType = types.submodule { - options = { - name = mkOption { - type = types.str; - default = "plasmacloud-cluster"; - description = "Cluster name"; - }; - - nodes = mkOption { - type = types.attrsOf nodeType; - default = {}; - description = "Map of node names to their configurations"; - example = literalExpression '' - { - "node01" = { - role = "control-plane"; - ip = "10.0.1.10"; - services = [ "chainfire" "flaredb" ]; - }; - } - ''; - }; - - bootstrapNode = mkOption { - type = types.nullOr types.str; - default = null; - description = "Name of the bootstrap node (first control-plane node if null)"; - }; - }; - }; - -in { - options.nix-nos = { - clusters = mkOption { - type = types.attrsOf clusterType; - default = {}; - description = "Map of cluster names to their configurations"; - }; - - # Helper function to generate cluster-config.json for a specific node - generateClusterConfig = mkOption { - type = types.functionTo types.attrs; - default = { hostname, clusterName ? "plasmacloud" }: - let - cluster = cfg.clusters.${clusterName} or (throw "Cluster ${clusterName} not found"); - in clusterConfigLib.mkClusterConfig { - inherit cluster hostname; - bootstrapNodeName = - if cluster.bootstrapNode != null - then cluster.bootstrapNode - else null; - }; - description = "Function to generate cluster-config.json for a specific hostname"; - }; - }; - - config = mkIf cfg.enable { }; -} diff --git a/nix/ci/flake.lock b/nix/ci/flake.lock index 5a9868a..81da7aa 100644 --- a/nix/ci/flake.lock +++ b/nix/ci/flake.lock @@ -3,7 +3,7 @@ "disko": { "inputs": { "nixpkgs": [ - "photoncloud", + "ultracloud", "nixpkgs" ] }, @@ -57,32 +57,13 @@ "type": "github" } }, - "nix-nos": { - "inputs": { - "nixpkgs": [ - "photoncloud", - "nixpkgs" - ] - }, - "locked": { - "path": "./nix-nos", - "type": "path" - }, - "original": { - "path": "./nix-nos", - "type": "path" - }, - "parent": [ - "photoncloud" - ] - }, "nixpkgs": { "locked": { - "lastModified": 1765186076, - "narHash": "sha256-hM20uyap1a0M9d344I692r+ik4gTMyj60cQWO+hAYP8=", + "lastModified": 1775036866, + "narHash": "sha256-ZojAnPuCdy657PbTq5V0Y+AHKhZAIwSIT2cb8UgAz/U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "addf7cf5f383a3101ecfba091b98d0a1263dc9b8", + "rev": "6201e203d09599479a3b3450ed24fa81537ebc4e", "type": "github" }, "original": { @@ -108,46 +89,26 @@ "type": "github" } }, - "photoncloud": { - "inputs": { - "disko": "disko", - "flake-utils": "flake-utils_2", - "nix-nos": "nix-nos", - "nixpkgs": "nixpkgs_2", - "rust-overlay": "rust-overlay", - "systems": "systems_3" - }, - "locked": { - "path": "../..", - "type": "path" - }, - "original": { - "path": "../..", - "type": "path" - }, - "parent": [] - }, "root": { "inputs": { "flake-utils": "flake-utils", "nixpkgs": "nixpkgs", - "photoncloud": "photoncloud", - "rust-overlay": "rust-overlay_2" + "rust-overlay": "rust-overlay", + "ultracloud": "ultracloud" } }, "rust-overlay": { "inputs": { "nixpkgs": [ - "photoncloud", "nixpkgs" ] }, "locked": { - "lastModified": 1765465581, - "narHash": "sha256-fCXT0aZXmTalM3NPCTedVs9xb0egBG5BOZkcrYo5PGE=", + "lastModified": 1775272153, + "narHash": "sha256-FwYb64ysv8J2TxaqsYYcDyHAHBUEaQlriPMWPMi1K7M=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "99cc5667eece98bb35dcf35f7e511031a8b7a125", + "rev": "740fb0203b2852917b909a72b948d34d0b171ec0", "type": "github" }, "original": { @@ -159,15 +120,16 @@ "rust-overlay_2": { "inputs": { "nixpkgs": [ + "ultracloud", "nixpkgs" ] }, "locked": { - "lastModified": 1765507345, - "narHash": "sha256-fq34mBLvAgv93EuZjGp7cVV633pxnph9AVuB/Ql5y5Q=", + "lastModified": 1765465581, + "narHash": "sha256-fCXT0aZXmTalM3NPCTedVs9xb0egBG5BOZkcrYo5PGE=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "a9471b23bf656d69ceb2d5ddccdc5082d51fc0e3", + "rev": "99cc5667eece98bb35dcf35f7e511031a8b7a125", "type": "github" }, "original": { @@ -219,6 +181,24 @@ "id": "systems", "type": "indirect" } + }, + "ultracloud": { + "inputs": { + "disko": "disko", + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs_2", + "rust-overlay": "rust-overlay_2", + "systems": "systems_3" + }, + "locked": { + "path": "../..", + "type": "path" + }, + "original": { + "path": "../..", + "type": "path" + }, + "parent": [] } }, "root": "root", diff --git a/nix/ci/flake.nix b/nix/ci/flake.nix index f776de1..d797f50 100644 --- a/nix/ci/flake.nix +++ b/nix/ci/flake.nix @@ -1,11 +1,11 @@ { - description = "PhotonCloud local CI gates (Nix-first, CI-provider-agnostic)"; + description = "UltraCloud local CI gates (Nix-first, CI-provider-agnostic)"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-utils.url = "github:numtide/flake-utils"; - photoncloud.url = "path:../.."; + ultracloud.url = "path:../.."; rust-overlay = { url = "github:oxalica/rust-overlay"; @@ -13,7 +13,7 @@ }; }; - outputs = { self, nixpkgs, flake-utils, photoncloud, rust-overlay }: + outputs = { self, nixpkgs, flake-utils, ultracloud, rust-overlay }: flake-utils.lib.eachDefaultSystem (system: let overlays = [ (import rust-overlay) ]; @@ -32,7 +32,7 @@ supportedWorkspaces = pkgs.lib.concatStringsSep ", " wsList; gate = pkgs.writeShellApplication { - name = "photoncloud-gate"; + name = "ultracloud-gate"; runtimeInputs = with pkgs; [ bash @@ -59,10 +59,10 @@ usage() { cat <<'USAGE' - PhotonCloud local CI gates (provider-agnostic) + UltraCloud local CI gates (provider-agnostic) Usage: - photoncloud-gate [--tier 0|1|2] [--workspace ] [--shared-crates] [--shared-crate ] [--no-logs] [--fix] + ultracloud-gate [--tier 0|1|2] [--workspace ] [--shared-crates] [--shared-crate ] [--no-logs] [--fix] Tiers: 0: fmt + clippy + unit tests (lib) (fast, stable default) @@ -374,19 +374,19 @@ packages.gate-ci = gate; # Checks are minimal and mirror tier0 (provider-agnostic). - checks.gate-tier0 = pkgs.runCommand "photoncloud-gate-tier0" { } '' + checks.gate-tier0 = pkgs.runCommand "ultracloud-gate-tier0" { } '' mkdir -p $out - ${gate}/bin/photoncloud-gate --tier 0 --no-logs + ${gate}/bin/ultracloud-gate --tier 0 --no-logs touch $out/ok ''; - checks.deployer-vm-smoke = photoncloud.checks.${system}.deployer-vm-smoke; - checks.deployer-vm-rollback = photoncloud.checks.${system}.deployer-vm-rollback; - checks.deployer-bootstrap-e2e = photoncloud.checks.${system}.deployer-bootstrap-e2e; - checks.host-lifecycle-e2e = photoncloud.checks.${system}.host-lifecycle-e2e; - checks.fleet-scheduler-e2e = photoncloud.checks.${system}.fleet-scheduler-e2e; + checks.deployer-vm-smoke = ultracloud.checks.${system}.deployer-vm-smoke; + checks.deployer-vm-rollback = ultracloud.checks.${system}.deployer-vm-rollback; + checks.deployer-bootstrap-e2e = ultracloud.checks.${system}.deployer-bootstrap-e2e; + checks.host-lifecycle-e2e = ultracloud.checks.${system}.host-lifecycle-e2e; + checks.fleet-scheduler-e2e = ultracloud.checks.${system}.fleet-scheduler-e2e; devShells.default = pkgs.mkShell { - name = "photoncloud-ci-dev"; + name = "ultracloud-ci-dev"; buildInputs = with pkgs; [ rustToolchain protobuf diff --git a/nix/ci/workspaces.json b/nix/ci/workspaces.json index 186125f..ebf140e 100644 --- a/nix/ci/workspaces.json +++ b/nix/ci/workspaces.json @@ -4,7 +4,6 @@ "flake.lock", "shell.nix", "nix/**", - "nix-nos/**", ".github/workflows/nix.yml", "Cargo.toml", "Cargo.lock", @@ -145,7 +144,7 @@ "deployer-ctl", "node-agent", "nix-agent", - "plasmacloud-reconciler", + "ultracloud-reconciler", "fleet-scheduler" ] }, diff --git a/nix/images/netboot-all-in-one.nix b/nix/images/netboot-all-in-one.nix index 7d64290..b829e3d 100644 --- a/nix/images/netboot-all-in-one.nix +++ b/nix/images/netboot-all-in-one.nix @@ -3,13 +3,13 @@ { imports = [ ./netboot-base.nix - ../modules # Import PlasmaCloud service modules + ../modules # Import UltraCloud service modules ]; # ============================================================================ # ALL-IN-ONE PROFILE # ============================================================================ - # This profile includes all 8 PlasmaCloud services for a single-node deployment: + # This profile includes all 8 UltraCloud services for a single-node deployment: # - Chainfire: Distributed configuration and coordination # - FlareDB: Time-series metrics and events database # - IAM: Identity and access management diff --git a/nix/images/netboot-control-plane.nix b/nix/images/netboot-control-plane.nix index fccc8e3..38be1a3 100644 --- a/nix/images/netboot-control-plane.nix +++ b/nix/images/netboot-control-plane.nix @@ -3,13 +3,13 @@ { imports = [ ./netboot-base.nix - ../modules # Import PlasmaCloud service modules + ../modules # Import UltraCloud service modules ]; # ============================================================================ # CONTROL PLANE PROFILE # ============================================================================ - # This profile includes all 8 PlasmaCloud services for a control plane node: + # This profile includes all 8 UltraCloud services for a control plane node: # - Chainfire: Distributed configuration and coordination # - FlareDB: Time-series metrics and events database # - IAM: Identity and access management diff --git a/nix/images/netboot-worker.nix b/nix/images/netboot-worker.nix index 7bd0f72..baa33b5 100644 --- a/nix/images/netboot-worker.nix +++ b/nix/images/netboot-worker.nix @@ -3,7 +3,7 @@ { imports = [ ./netboot-base.nix - ../modules # Import PlasmaCloud service modules + ../modules # Import UltraCloud service modules ]; # ============================================================================ diff --git a/nix/iso/plasmacloud-iso.nix b/nix/iso/ultracloud-iso.nix similarity index 51% rename from nix/iso/plasmacloud-iso.nix rename to nix/iso/ultracloud-iso.nix index 70eb23b..0c35c1f 100644 --- a/nix/iso/plasmacloud-iso.nix +++ b/nix/iso/ultracloud-iso.nix @@ -1,4 +1,4 @@ -# PlasmaCloud Bootstrap ISO +# UltraCloud Bootstrap ISO # Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install # For VM cluster deployment: boots, phones home, partitions disk, installs NixOS @@ -10,15 +10,15 @@ ]; # ISO metadata + image.fileName = "ultracloud-bootstrap.iso"; isoImage = { - isoName = "plasmacloud-bootstrap.iso"; makeEfiBootable = true; makeUsbBootable = true; }; # Embed the repository into the ISO for offline flake install isoImage.contents = [ - { source = ../../.; target = "/opt/plasmacloud-src"; } + { source = ../../.; target = "/opt/ultracloud-src"; } ]; # Minimal network: DHCP on all interfaces @@ -30,8 +30,8 @@ }; # Phone Home service — fetches secrets from Deployer - systemd.services.plasmacloud-bootstrap = { - description = "PlasmaCloud Bootstrap via Phone Home"; + systemd.services.ultracloud-bootstrap = { + description = "UltraCloud Bootstrap via Phone Home"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; @@ -58,12 +58,12 @@ return 1 } - mkdir -p /etc/plasmacloud + mkdir -p /etc/ultracloud # Discover Deployer via environment, kernel cmdline, or fallback. DEPLOYER_URL="''${DEPLOYER_URL:-}" if [ -z "$DEPLOYER_URL" ]; then - DEPLOYER_URL="$(cmdline_value plasmacloud.deployer_url || true)" + DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)" fi if [ -z "$DEPLOYER_URL" ]; then DEPLOYER_URL="http://192.168.100.1:8080" @@ -72,26 +72,26 @@ # Get machine identity MACHINE_ID=$(cat /etc/machine-id) - echo "PlasmaCloud Bootstrap starting..." + echo "UltraCloud Bootstrap starting..." echo "Machine ID: $MACHINE_ID" echo "Deployer URL: $DEPLOYER_URL" # Optional bootstrap token (from file or environment) - TOKEN_FILE="/etc/plasmacloud/bootstrap-token" + TOKEN_FILE="/etc/ultracloud/bootstrap-token" DEPLOYER_TOKEN="" if [ -s "$TOKEN_FILE" ]; then DEPLOYER_TOKEN=$(cat "$TOKEN_FILE") elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}" else - DEPLOYER_TOKEN="$(cmdline_value plasmacloud.bootstrap_token || true)" + DEPLOYER_TOKEN="$(cmdline_value ultracloud.bootstrap_token || true)" fi DEPLOYER_CA_CERT_PATH="''${DEPLOYER_CA_CERT:-}" if [ -z "$DEPLOYER_CA_CERT_PATH" ]; then - DEPLOYER_CA_CERT_URL="$(cmdline_value plasmacloud.ca_cert_url || true)" + DEPLOYER_CA_CERT_URL="$(cmdline_value ultracloud.ca_cert_url || true)" if [ -n "$DEPLOYER_CA_CERT_URL" ]; then - DEPLOYER_CA_CERT_PATH="/etc/plasmacloud/bootstrap-ca.crt" + DEPLOYER_CA_CERT_PATH="/etc/ultracloud/bootstrap-ca.crt" ${pkgs.curl}/bin/curl -sfL --connect-timeout 5 --max-time 30 \ "$DEPLOYER_CA_CERT_URL" \ -o "$DEPLOYER_CA_CERT_PATH" @@ -197,7 +197,7 @@ echo "✓ Phone Home successful" # Create directories - mkdir -p /etc/ssh /etc/plasmacloud /root/.ssh + mkdir -p /etc/ssh /etc/ultracloud /root/.ssh # Validate success flag SUCCESS=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.success // false' || echo "false") @@ -208,18 +208,19 @@ continue fi - # Extract and apply secrets NODE_CONFIG=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -c '.node_config // empty' || true) if [ -z "$NODE_CONFIG" ] || [ "$NODE_CONFIG" = "null" ]; then echo "✗ Phone Home response missing node_config" sleep $((2 ** i)) continue fi - echo "$NODE_CONFIG" > /etc/plasmacloud/node-config.json - echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.ssh_authorized_keys[]?' > /root/.ssh/authorized_keys + echo "$NODE_CONFIG" > /etc/ultracloud/node-config.json + echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r ' + .node_config.bootstrap_secrets.ssh_authorized_keys[]? + ' > /root/.ssh/authorized_keys # Apply SSH host key if provided - SSH_HOST_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.ssh_host_key // empty') + SSH_HOST_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.bootstrap_secrets.ssh_host_key // empty') if [ -n "$SSH_HOST_KEY" ]; then umask 077 echo "$SSH_HOST_KEY" > /etc/ssh/ssh_host_ed25519_key @@ -227,13 +228,13 @@ fi # Apply TLS material if provided - TLS_CERT=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.tls_cert // empty') - TLS_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.tls_key // empty') + TLS_CERT=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.bootstrap_secrets.tls_cert // empty') + TLS_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.bootstrap_secrets.tls_key // empty') if [ -n "$TLS_CERT" ] && [ -n "$TLS_KEY" ]; then umask 077 - mkdir -p /etc/plasmacloud/tls - echo "$TLS_CERT" > /etc/plasmacloud/tls/node.crt - echo "$TLS_KEY" > /etc/plasmacloud/tls/node.key + mkdir -p /etc/ultracloud/tls + echo "$TLS_CERT" > /etc/ultracloud/tls/node.crt + echo "$TLS_KEY" > /etc/ultracloud/tls/node.key fi # Generate host keys locally if missing @@ -242,16 +243,16 @@ fi # Set permissions - chmod 644 /etc/plasmacloud/node-config.json 2>/dev/null || true + chmod 644 /etc/ultracloud/node-config.json 2>/dev/null || true chmod 700 /root/.ssh 2>/dev/null || true chmod 600 /root/.ssh/authorized_keys 2>/dev/null || true chmod 600 /etc/ssh/ssh_host_ed25519_key 2>/dev/null || true chmod 644 /etc/ssh/ssh_host_ed25519_key.pub 2>/dev/null || true - chmod 600 /etc/plasmacloud/tls/node.key 2>/dev/null || true - chmod 644 /etc/plasmacloud/tls/node.crt 2>/dev/null || true + chmod 600 /etc/ultracloud/tls/node.key 2>/dev/null || true + chmod 644 /etc/ultracloud/tls/node.crt 2>/dev/null || true # Signal success - NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_id // "unknown"') + NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.assignment.node_id // "unknown"') echo "✓ Bootstrap complete: $NODE_ID" exit 0 else @@ -266,11 +267,11 @@ }; # Auto-install service - partitions disk and runs nixos-install - systemd.services.plasmacloud-install = { - description = "PlasmaCloud Auto-Install to Disk"; + systemd.services.ultracloud-install = { + description = "UltraCloud Auto-Install to Disk"; wantedBy = [ "multi-user.target" ]; - after = [ "plasmacloud-bootstrap.service" ]; - requires = [ "plasmacloud-bootstrap.service" ]; + after = [ "ultracloud-bootstrap.service" ]; + requires = [ "ultracloud-bootstrap.service" ]; serviceConfig = { Type = "oneshot"; @@ -280,183 +281,194 @@ }; script = '' - set -euo pipefail + set -euo pipefail - cmdline_value() { - local key="$1" - local arg - for arg in $(cat /proc/cmdline); do - case "$arg" in - "$key"=*) - echo "''${arg#*=}" - return 0 - ;; - esac - done - return 1 + cmdline_value() { + local key="$1" + local arg + for arg in $(cat /proc/cmdline); do + case "$arg" in + "$key"=*) + echo "''${arg#*=}" + return 0 + ;; + esac + done + return 1 + } + + if [ ! -s /etc/ultracloud/node-config.json ]; then + echo "ERROR: node-config.json missing (bootstrap not complete?)" + exit 1 + fi + + NODE_ID=$(${pkgs.jq}/bin/jq -r '.assignment.hostname // .assignment.node_id // empty' /etc/ultracloud/node-config.json) + NODE_IP=$(${pkgs.jq}/bin/jq -r '.assignment.ip // empty' /etc/ultracloud/node-config.json) + NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.nixos_configuration // .assignment.hostname // empty' /etc/ultracloud/node-config.json) + DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.disko_config_path // empty' /etc/ultracloud/node-config.json) + TARGET_DISK=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk // empty' /etc/ultracloud/node-config.json) + TARGET_DISK_BY_ID=$(${pkgs.jq}/bin/jq -r '.bootstrap_plan.install_plan.target_disk_by_id // empty' /etc/ultracloud/node-config.json) + DEPLOYER_URL="''${DEPLOYER_URL:-}" + if [ -z "$DEPLOYER_URL" ]; then + DEPLOYER_URL="$(cmdline_value ultracloud.deployer_url || true)" + fi + if [ -z "$DEPLOYER_URL" ]; then + DEPLOYER_URL="http://192.168.100.1:8080" + fi + SRC_ROOT="/opt/ultracloud-src" + + if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then + echo "ERROR: node-config.json missing hostname/ip" + exit 1 + fi + + if [ -z "$NIXOS_CONFIGURATION" ]; then + echo "ERROR: node-config.json missing install_plan.nixos_configuration" + exit 1 + fi + + TOKEN_FILE="/etc/ultracloud/bootstrap-token" + DEPLOYER_TOKEN="" + if [ -s "$TOKEN_FILE" ]; then + DEPLOYER_TOKEN=$(cat "$TOKEN_FILE") + elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then + DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}" + else + DEPLOYER_TOKEN="$(cmdline_value ultracloud.bootstrap_token || true)" + fi + + DEPLOYER_CA_CERT_PATH="''${DEPLOYER_CA_CERT:-}" + if [ -z "$DEPLOYER_CA_CERT_PATH" ]; then + DEPLOYER_CA_CERT_URL="$(cmdline_value ultracloud.ca_cert_url || true)" + if [ -n "$DEPLOYER_CA_CERT_URL" ]; then + DEPLOYER_CA_CERT_PATH="/etc/ultracloud/bootstrap-ca.crt" + ${pkgs.curl}/bin/curl -sfL --connect-timeout 5 --max-time 30 \ + "$DEPLOYER_CA_CERT_URL" \ + -o "$DEPLOYER_CA_CERT_PATH" + fi + fi + + CURL_ARGS=(-sfL --connect-timeout 5 --max-time 120) + if [ -n "$DEPLOYER_TOKEN" ]; then + CURL_ARGS+=(-H "X-Deployer-Token: $DEPLOYER_TOKEN") + fi + if [ -n "$DEPLOYER_CA_CERT_PATH" ] && [ -f "$DEPLOYER_CA_CERT_PATH" ]; then + CURL_ARGS+=(--cacert "$DEPLOYER_CA_CERT_PATH") + fi + + BUNDLE_PATH="/run/ultracloud/flake-bundle.tar.gz" + mkdir -p /run/ultracloud + if ${pkgs.curl}/bin/curl "''${CURL_ARGS[@]}" \ + "$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \ + -o "$BUNDLE_PATH"; then + echo "Downloaded bootstrap flake bundle from deployer" + rm -rf "$SRC_ROOT" + mkdir -p "$SRC_ROOT" + ${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT" + else + echo "No deployer flake bundle available; using embedded source tree" + fi + + if [ -z "$DISKO_PATH" ]; then + CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix" + if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then + DISKO_PATH="$CANDIDATE_DISKO" + fi + fi + + if [ -z "$DISKO_PATH" ]; then + echo "ERROR: node-config.json missing install_plan.disko_config_path and no default Disko path exists for $NODE_ID" + exit 1 + fi + + if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then + echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH" + exit 1 + fi + + echo "UltraCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, disko_path=$DISKO_PATH)" + + # Resolve installation target disk. + if [ -n "$TARGET_DISK_BY_ID" ]; then + if [ ! -b "$TARGET_DISK_BY_ID" ]; then + echo "ERROR: target_disk_by_id does not exist: $TARGET_DISK_BY_ID" + exit 1 + fi + DISK="$TARGET_DISK_BY_ID" + elif [ -n "$TARGET_DISK" ]; then + if [ ! -b "$TARGET_DISK" ]; then + echo "ERROR: target_disk does not exist: $TARGET_DISK" + exit 1 + fi + DISK="$TARGET_DISK" + else + DISK=$(${pkgs.util-linux}/bin/lsblk -dpno NAME,TYPE | ${pkgs.gawk}/bin/awk '$2=="disk"{print $1; exit}') + fi + if [ -z "$DISK" ]; then + echo "ERROR: No disk found" + exit 1 + fi + + ROOT_PART=$(${pkgs.util-linux}/bin/lsblk -lnpo NAME,TYPE "$DISK" 2>/dev/null | ${pkgs.gawk}/bin/awk '$2=="part"{print $1}' | sed -n '2p') + mkdir -p /mnt + + # Skip if already installed + if [ -n "$ROOT_PART" ] && ${pkgs.util-linux}/bin/lsblk -no FSTYPE "$ROOT_PART" 2>/dev/null | ${pkgs.gnugrep}/bin/grep -q '^ext4$'; then + mount "$ROOT_PART" /mnt 2>/dev/null || true + if [ -e /mnt/etc/NIXOS ]; then + echo "✓ Existing NixOS detected; skipping install" + umount /mnt || true + exit 0 + fi + umount /mnt || true + fi + + echo "Validating NixOS configuration output..." + nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null + + EFFECTIVE_DISKO_PATH="$SRC_ROOT/$DISKO_PATH" + if [ -n "$DISK" ]; then + cat > /run/ultracloud/disko-wrapper.nix </dev/null | ${pkgs.gawk}/bin/awk '$2=="part"{print $1}' | sed -n '2p') - mkdir -p /mnt - - # Skip if already installed - if [ -n "$ROOT_PART" ] && ${pkgs.util-linux}/bin/lsblk -no FSTYPE "$ROOT_PART" 2>/dev/null | ${pkgs.gnugrep}/bin/grep -q '^ext4$'; then - mount "$ROOT_PART" /mnt 2>/dev/null || true - if [ -e /mnt/etc/NIXOS ]; then - echo "✓ Existing NixOS detected; skipping install" - umount /mnt || true - exit 0 - fi - umount /mnt || true - fi - - echo "Validating NixOS configuration output..." - nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null - - EFFECTIVE_DISKO_PATH="$SRC_ROOT/$DISKO_PATH" - if [ -n "$DISK" ]; then - cat > /run/plasmacloud/disko-wrapper.nix < 0; - nixNOSClusterConfig = - if useNixNOS then - config.nix-nos.generateClusterConfig { - hostname = config.networking.hostName; - clusterName = resolvedNixNOSClusterName; - } - else - null; + hasUltraCloudManagedClusterConfig = + (config ? ultracloud) + && (config.ultracloud ? cluster) + && (config.ultracloud.cluster.generated.nodeClusterConfig or null) != null; # Helper function to create cluster join service mkClusterJoinService = { @@ -194,22 +165,10 @@ in options.services.first-boot-automation = { enable = lib.mkEnableOption "first-boot cluster join automation"; - useNixNOS = lib.mkOption { - type = lib.types.bool; - default = false; - description = "Use nix-nos topology for cluster configuration instead of cluster-config.json"; - }; - - nixnosClusterName = lib.mkOption { - type = lib.types.str; - default = "plasmacloud"; - description = "Name of the nix-nos cluster to use (only used when useNixNOS is true)"; - }; - configFile = lib.mkOption { type = lib.types.path; default = "/etc/nixos/secrets/cluster-config.json"; - description = "Path to cluster configuration JSON file (used when useNixNOS is false)"; + description = "Path to the cluster configuration JSON file consumed at first boot."; }; enableChainfire = lib.mkOption { @@ -258,32 +217,11 @@ in config = lib.mkIf cfg.enable { assertions = [ { - assertion = (!cfg.useNixNOS) || (config.nix-nos.enable or false); - message = "services.first-boot-automation.useNixNOS requires nix-nos.enable = true"; - } - { - assertion = (!cfg.useNixNOS) || ((builtins.length availableNixNOSClusters) > 0); - message = "services.first-boot-automation.useNixNOS requires at least one nix-nos.clusters entry"; - } - { - assertion = (!cfg.useNixNOS) || (configEtcPath != null); - message = "services.first-boot-automation.useNixNOS requires services.first-boot-automation.configFile to live under /etc"; - } - { - assertion = (!cfg.useNixNOS) || builtins.elem resolvedNixNOSClusterName availableNixNOSClusters; - message = "services.first-boot-automation.useNixNOS could not resolve nix-nos cluster '${cfg.nixnosClusterName}' (available: ${lib.concatStringsSep ", " availableNixNOSClusters})"; + assertion = (!hasUltraCloudManagedClusterConfig) || (configFilePath == "/etc/nixos/secrets/cluster-config.json"); + message = "services.first-boot-automation.configFile must remain /etc/nixos/secrets/cluster-config.json when ultracloud.cluster manages the node cluster config"; } ]; - environment.etc = lib.mkIf (useNixNOS && !hasPlasmacloudManagedClusterConfig) ( - lib.optionalAttrs (configEtcPath != null) { - "${configEtcPath}" = { - text = builtins.toJSON nixNOSClusterConfig; - mode = "0600"; - }; - } - ); - # Chainfire cluster join service systemd.services.chainfire-cluster-join = lib.mkIf cfg.enableChainfire ( mkClusterJoinService { diff --git a/nix/modules/fleet-scheduler.nix b/nix/modules/fleet-scheduler.nix index e6209a4..f119438 100644 --- a/nix/modules/fleet-scheduler.nix +++ b/nix/modules/fleet-scheduler.nix @@ -15,14 +15,14 @@ in clusterNamespace = lib.mkOption { type = lib.types.str; - default = "photoncloud"; + default = "ultracloud"; description = "Cluster namespace prefix"; }; clusterId = lib.mkOption { type = lib.types.str; description = "Cluster ID to reconcile"; - example = "plasmacloud-vm-cluster"; + example = "ultracloud-vm-cluster"; }; intervalSecs = lib.mkOption { @@ -103,7 +103,7 @@ in users.groups.fleet-scheduler = { }; systemd.services.fleet-scheduler = { - description = "PhotonCloud Fleet Scheduler"; + description = "UltraCloud Fleet Scheduler"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; diff --git a/nix/modules/install-target.nix b/nix/modules/install-target.nix index 0778ea5..f3cdb2d 100644 --- a/nix/modules/install-target.nix +++ b/nix/modules/install-target.nix @@ -2,10 +2,10 @@ let hostName = config.networking.hostName; - hasClusterModule = lib.hasAttrByPath [ "plasmacloud" "cluster" "enable" ] options; + hasClusterModule = lib.hasAttrByPath [ "ultracloud" "cluster" "enable" ] options; clusterNode = - if hasClusterModule && config.plasmacloud.cluster.enable && config.plasmacloud.cluster.nodes ? "${hostName}" then - config.plasmacloud.cluster.nodes.${hostName} + if hasClusterModule && config.ultracloud.cluster.enable && config.ultracloud.cluster.nodes ? "${hostName}" then + config.ultracloud.cluster.nodes.${hostName} else null; clusterInstallPlan = @@ -22,11 +22,11 @@ let null; in { - options.plasmacloud.install.diskDevice = lib.mkOption { + options.ultracloud.install.diskDevice = lib.mkOption { type = lib.types.nullOr lib.types.str; default = null; description = "Install target disk path used by Disko-enabled host configurations"; }; - config.plasmacloud.install.diskDevice = lib.mkDefault defaultDiskDevice; + config.ultracloud.install.diskDevice = lib.mkDefault defaultDiskDevice; } diff --git a/nix/modules/nix-agent.nix b/nix/modules/nix-agent.nix index 5decd88..8f78373 100644 --- a/nix/modules/nix-agent.nix +++ b/nix/modules/nix-agent.nix @@ -10,7 +10,7 @@ let in { options.services.nix-agent = { - enable = lib.mkEnableOption "PhotonCloud nix-agent service"; + enable = lib.mkEnableOption "UltraCloud nix-agent service"; chainfireEndpoint = lib.mkOption { type = lib.types.str; @@ -20,7 +20,7 @@ in clusterNamespace = lib.mkOption { type = lib.types.str; - default = "photoncloud"; + default = "ultracloud"; description = "Cluster namespace prefix"; }; @@ -80,7 +80,7 @@ in config = lib.mkIf cfg.enable { systemd.services.nix-agent = { - description = "PhotonCloud Nix Agent"; + description = "UltraCloud Nix Agent"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; diff --git a/nix/modules/nix-nos/cluster-config-generator.nix b/nix/modules/nix-nos/cluster-config-generator.nix deleted file mode 100644 index 4b8f51e..0000000 --- a/nix/modules/nix-nos/cluster-config-generator.nix +++ /dev/null @@ -1,36 +0,0 @@ -# Standalone cluster-config.json generator -# Usage: nix-build cluster-config-generator.nix --argstr hostname node01 --argstr clusterName plasmacloud -{ pkgs ? import {} -, hostname ? "node01" -, clusterName ? "plasmacloud" -, topologyFile ? ./example-topology.nix -}: - -let - # Import topology module - lib = pkgs.lib; - clusterConfigLib = import ../cluster-config-lib.nix { inherit lib; }; - - # Evaluate the topology file - topologyEval = import topologyFile { inherit lib; }; - - # Get the cluster configuration - cluster = topologyEval.nix-nos.clusters.${clusterName} or (throw "Cluster ${clusterName} not found"); - - # Generate cluster config - clusterConfig = clusterConfigLib.mkClusterConfig { - inherit cluster hostname; - bootstrapNodeName = - if cluster ? bootstrapNode && cluster.bootstrapNode != null - then cluster.bootstrapNode - else null; - }; - - # Convert to JSON - configJson = builtins.toJSON clusterConfig; - -in pkgs.writeTextFile { - name = "cluster-config-${hostname}.json"; - text = configJson; - destination = "/cluster-config.json"; -} diff --git a/nix/modules/nix-nos/example-topology.nix b/nix/modules/nix-nos/example-topology.nix deleted file mode 100644 index bd96f74..0000000 --- a/nix/modules/nix-nos/example-topology.nix +++ /dev/null @@ -1,94 +0,0 @@ -# Example 3-node PlasmaCloud cluster topology -{ lib ? (import {}).lib }: - -{ - nix-nos = { - enable = true; - - clusters = { - plasmacloud = { - name = "plasmacloud-cluster"; - - # Bootstrap node (first control-plane node by default) - bootstrapNode = "node01"; - - nodes = { - # Control plane node 1 (bootstrap) - node01 = { - role = "control-plane"; - ip = "10.0.1.10"; - raftPort = 2380; - apiPort = 2379; - services = [ - "chainfire" - "flaredb" - "iam" - "creditservice" - "fiberlb" - "flashdns" - ]; - metadata = { - datacenter = "dc1"; - rack = "rack1"; - }; - }; - - # Control plane node 2 - node02 = { - role = "control-plane"; - ip = "10.0.1.11"; - raftPort = 2380; - apiPort = 2379; - services = [ - "chainfire" - "flaredb" - "iam" - "creditservice" - "fiberlb" - "flashdns" - ]; - metadata = { - datacenter = "dc1"; - rack = "rack2"; - }; - }; - - # Control plane node 3 - node03 = { - role = "control-plane"; - ip = "10.0.1.12"; - raftPort = 2380; - apiPort = 2379; - services = [ - "chainfire" - "flaredb" - "iam" - "creditservice" - "fiberlb" - "flashdns" - ]; - metadata = { - datacenter = "dc1"; - rack = "rack3"; - }; - }; - - # Worker node (optional - for workload separation) - # node04 = { - # role = "worker"; - # ip = "10.0.1.20"; - # services = [ - # "plasmavmc" - # "lightningstor" - # "k8shost" - # ]; - # metadata = { - # datacenter = "dc1"; - # rack = "rack1"; - # }; - # }; - }; - }; - }; - }; -} diff --git a/nix/modules/nix-nos/topology.nix b/nix/modules/nix-nos/topology.nix deleted file mode 100644 index def826e..0000000 --- a/nix/modules/nix-nos/topology.nix +++ /dev/null @@ -1,3 +0,0 @@ -{ config, lib, pkgs, ... }: - -import ../../../nix-nos/modules/topology.nix { inherit config lib pkgs; } diff --git a/nix/modules/node-agent.nix b/nix/modules/node-agent.nix index a60b92a..fb4ef96d 100644 --- a/nix/modules/node-agent.nix +++ b/nix/modules/node-agent.nix @@ -6,7 +6,7 @@ let in { options.services.node-agent = { - enable = lib.mkEnableOption "PhotonCloud node-agent service"; + enable = lib.mkEnableOption "UltraCloud node-agent service"; chainfireEndpoint = lib.mkOption { type = lib.types.str; @@ -16,7 +16,7 @@ in clusterNamespace = lib.mkOption { type = lib.types.str; - default = "photoncloud"; + default = "ultracloud"; description = "Cluster namespace prefix"; }; @@ -46,7 +46,7 @@ in allowLocalInstanceUpsert = lib.mkOption { type = lib.types.bool; default = false; - description = "Allow /etc/photoncloud/instances.json upserts into ChainFire"; + description = "Allow /etc/ultracloud/instances.json upserts into ChainFire"; }; enableContainers = lib.mkOption { @@ -86,7 +86,7 @@ in ]; systemd.services.node-agent = { - description = "PhotonCloud Node Agent"; + description = "UltraCloud Node Agent"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; diff --git a/nix/modules/service-port-reservations.nix b/nix/modules/service-port-reservations.nix index 7165312..84d0067 100644 --- a/nix/modules/service-port-reservations.nix +++ b/nix/modules/service-port-reservations.nix @@ -2,7 +2,7 @@ { boot.kernel.sysctl = { - # PhotonCloud control-plane services bind within this band. Reserve it from the + # UltraCloud control-plane services bind within this band. Reserve it from the # ephemeral allocator so outbound peer/backend connections cannot steal a service # port during boot and block the later listener bind. "net.ipv4.ip_local_reserved_ports" = lib.mkDefault "50051-50090"; diff --git a/nix/modules/plasmacloud-cluster.nix b/nix/modules/ultracloud-cluster.nix similarity index 73% rename from nix/modules/plasmacloud-cluster.nix rename to nix/modules/ultracloud-cluster.nix index 168e870..6e937ac 100644 --- a/nix/modules/plasmacloud-cluster.nix +++ b/nix/modules/ultracloud-cluster.nix @@ -3,8 +3,8 @@ with lib; let - cfg = config.plasmacloud.cluster; - clusterConfigLib = import ../../nix-nos/lib/cluster-config-lib.nix { inherit lib; }; + cfg = config.ultracloud.cluster; + clusterConfigLib = import ../lib/cluster-schema.nix { inherit lib; }; nodeType = clusterConfigLib.mkNodeType types; nodeClassType = clusterConfigLib.mkNodeClassType types; nodePoolType = clusterConfigLib.mkNodePoolType types; @@ -28,22 +28,22 @@ let else null; - generatedNixNOSTopologyCluster = clusterConfigLib.mkNixNOSTopologyCluster cfg; generatedDeployerClusterState = clusterConfigLib.mkDeployerClusterState cfg; -in { - options.plasmacloud.cluster = { - enable = mkEnableOption "PlasmaCloud cluster configuration"; +in +{ + options.ultracloud.cluster = { + enable = mkEnableOption "UltraCloud cluster configuration"; name = mkOption { type = types.str; - default = "plasmacloud-cluster"; + default = "ultracloud-cluster"; description = "Cluster name"; }; nodes = mkOption { type = types.attrsOf nodeType; - default = {}; + default = { }; description = "Map of node names to their configurations"; example = literalExpression '' { @@ -144,11 +144,11 @@ in { assertions = [ { assertion = (length (attrNames cfg.nodes)) > 0; - message = "plasmacloud.cluster.nodes must contain at least one node"; + message = "ultracloud.cluster.nodes must contain at least one node"; } { assertion = (length cfg.bootstrap.initialPeers) > 0; - message = "plasmacloud.cluster.bootstrap.initialPeers must contain at least one node"; + message = "ultracloud.cluster.bootstrap.initialPeers must contain at least one node"; } { assertion = all (peer: cfg.nodes ? "${peer}") cfg.bootstrap.initialPeers; @@ -159,50 +159,60 @@ in { message = "BGP ASN must be between 1 and 4294967295"; } { - assertion = all (nodeName: - let - node = cfg.nodes.${nodeName}; - in + assertion = all + (nodeName: + let + node = cfg.nodes.${nodeName}; + in node.pool == null || cfg.deployer.pools ? "${node.pool}" - ) (attrNames cfg.nodes); - message = "All node pools referenced in plasmacloud.cluster.nodes must exist in plasmacloud.cluster.deployer.pools"; + ) + (attrNames cfg.nodes); + message = "All node pools referenced in ultracloud.cluster.nodes must exist in ultracloud.cluster.deployer.pools"; } { - assertion = all (nodeName: - let - node = cfg.nodes.${nodeName}; - in + assertion = all + (nodeName: + let + node = cfg.nodes.${nodeName}; + in node.nodeClass == null || cfg.deployer.nodeClasses ? "${node.nodeClass}" - ) (attrNames cfg.nodes); - message = "All node classes referenced in plasmacloud.cluster.nodes must exist in plasmacloud.cluster.deployer.nodeClasses"; + ) + (attrNames cfg.nodes); + message = "All node classes referenced in ultracloud.cluster.nodes must exist in ultracloud.cluster.deployer.nodeClasses"; } { - assertion = all (poolName: - let - pool = cfg.deployer.pools.${poolName}; - in + assertion = all + (poolName: + let + pool = cfg.deployer.pools.${poolName}; + in pool.nodeClass == null || cfg.deployer.nodeClasses ? "${pool.nodeClass}" - ) (attrNames cfg.deployer.pools); + ) + (attrNames cfg.deployer.pools); message = "All deployer pools must reference existing deployer node classes"; } { - assertion = all (ruleName: - let - rule = cfg.deployer.enrollmentRules.${ruleName}; - in + assertion = all + (ruleName: + let + rule = cfg.deployer.enrollmentRules.${ruleName}; + in (rule.pool == null || cfg.deployer.pools ? "${rule.pool}") && (rule.nodeClass == null || cfg.deployer.nodeClasses ? "${rule.nodeClass}") - ) (attrNames cfg.deployer.enrollmentRules); + ) + (attrNames cfg.deployer.enrollmentRules); message = "All deployer enrollment rules must reference existing pools and node classes"; } { - assertion = all (deploymentName: - let - deployment = cfg.deployer.hostDeployments.${deploymentName}; - in + assertion = all + (deploymentName: + let + deployment = cfg.deployer.hostDeployments.${deploymentName}; + in all (pool: cfg.deployer.pools ? "${pool}") deployment.selector.pools && all (nodeClass: cfg.deployer.nodeClasses ? "${nodeClass}") deployment.selector.nodeClasses - ) (attrNames cfg.deployer.hostDeployments); + ) + (attrNames cfg.deployer.hostDeployments); message = "All deployer host deployments must reference existing pools and node classes"; } ]; @@ -213,15 +223,10 @@ in { mode = "0600"; }; - nix-nos.enable = mkDefault true; - nix-nos.clusters = { - "${cfg.name}" = mkDefault generatedNixNOSTopologyCluster; - }; + ultracloud.cluster.generated.nodeClusterConfig = generatedNodeClusterConfig; + ultracloud.cluster.generated.deployerClusterState = generatedDeployerClusterState; - plasmacloud.cluster.generated.nodeClusterConfig = generatedNodeClusterConfig; - plasmacloud.cluster.generated.deployerClusterState = generatedDeployerClusterState; - - system.build.plasmacloudDeployerClusterState = - jsonFormat.generate "plasmacloud-deployer-cluster-state.json" generatedDeployerClusterState; + system.build.ultracloudDeployerClusterState = + jsonFormat.generate "ultracloud-deployer-cluster-state.json" generatedDeployerClusterState; }; } diff --git a/nix/modules/plasmacloud-network.nix b/nix/modules/ultracloud-network.nix similarity index 62% rename from nix/modules/plasmacloud-network.nix rename to nix/modules/ultracloud-network.nix index 49fb666..83513d3 100644 --- a/nix/modules/plasmacloud-network.nix +++ b/nix/modules/ultracloud-network.nix @@ -3,8 +3,17 @@ with lib; let - cfg = config.plasmacloud.network; - clusterCfg = config.plasmacloud.cluster; + cfg = config.ultracloud.network; + clusterCfg = config.ultracloud.cluster; + hostName = config.networking.hostName; + clusterNode = + if clusterCfg.enable or false && clusterCfg.nodes ? "${hostName}" + then clusterCfg.nodes.${hostName} + else null; + clusterNodeIp = + if clusterNode != null + then clusterNode.ip + else "127.0.0.1"; # BGP peer type for FiberLB bgpPeerType = types.submodule { @@ -30,21 +39,22 @@ let }; }; -in { - options.plasmacloud.network = { +in +{ + options.ultracloud.network = { fiberlbBgp = { enable = mkEnableOption "FiberLB BGP VIP advertisement"; vips = mkOption { type = types.listOf types.str; - default = []; + default = [ ]; description = "Legacy static VIP hints. FiberLB native BGP ignores this list and advertises active load balancer VIPs dynamically."; example = [ "203.0.113.10/32" "203.0.113.11/32" ]; }; peers = mkOption { type = types.listOf bgpPeerType; - default = []; + default = [ ]; description = "BGP peers (ToR switches, upstream routers)"; example = literalExpression '' [ @@ -63,6 +73,18 @@ in { prismnetIntegration = { enable = mkEnableOption "PrismNET OVN integration"; + + encapType = mkOption { + type = types.enum [ "geneve" "vxlan" ]; + default = "geneve"; + description = "Encapsulation type reserved for future OVN controller wiring."; + }; + + encapIp = mkOption { + type = types.nullOr types.str; + default = null; + description = "Explicit OVN encapsulation IP. Defaults to the node IP when future controller wiring is added."; + }; }; }; @@ -73,15 +95,15 @@ in { assertions = [ { assertion = clusterCfg.bgp.asn > 0; - message = "plasmacloud.cluster.bgp.asn must be configured for FiberLB BGP"; + message = "ultracloud.cluster.bgp.asn must be configured for FiberLB BGP"; } { assertion = (length cfg.fiberlbBgp.peers) > 0; - message = "plasmacloud.network.fiberlbBgp.peers must contain at least one BGP peer"; + message = "ultracloud.network.fiberlbBgp.peers must contain at least one BGP peer"; } { assertion = config.services.fiberlb.enable or false; - message = "plasmacloud.network.fiberlbBgp.enable requires services.fiberlb.enable"; + message = "ultracloud.network.fiberlbBgp.enable requires services.fiberlb.enable"; } ]; @@ -91,12 +113,7 @@ in { routerId = if cfg.fiberlbBgp.routerId != null then cfg.fiberlbBgp.routerId - else - let - hostname = config.networking.hostName; - node = clusterCfg.nodes.${hostname} or null; - in - if node != null then node.ip else "127.0.0.1"; + else clusterNodeIp; peers = cfg.fiberlbBgp.peers; }; @@ -105,23 +122,11 @@ in { # PrismNET OVN integration (mkIf cfg.prismnetIntegration.enable { - # Enable OVN Controller - virtualisation.switch.enable = true; - virtualisation.ovn = { - enable = true; - controller = { - enable = true; - # Use Geneve encapsulation to avoid VXLAN VNI limitations and allow richer metadata - encapType = "geneve"; - # Auto-detect IP from cluster config - encapIp = - let - hostname = config.networking.hostName; - node = clusterCfg.nodes.${hostname} or null; - in - if node != null then node.ip else "127.0.0.1"; - }; - }; + virtualisation.vswitch.enable = true; + environment.systemPackages = [ pkgs.ovn ]; + warnings = [ + "ultracloud.network.prismnetIntegration.enable currently enables Open vSwitch and installs OVN tooling only. Wire ovn-controller explicitly before expecting PrismNET dataplane automation." + ]; }) ]; } diff --git a/nix/modules/plasmacloud-resources.nix b/nix/modules/ultracloud-resources.nix similarity index 91% rename from nix/modules/plasmacloud-resources.nix rename to nix/modules/ultracloud-resources.nix index 26aeea5..819ec8d 100644 --- a/nix/modules/plasmacloud-resources.nix +++ b/nix/modules/ultracloud-resources.nix @@ -488,16 +488,16 @@ let }; }; - lbCfg = config.plasmacloud.lb; - dnsCfg = config.plasmacloud.dns; + lbCfg = config.ultracloud.lb; + dnsCfg = config.ultracloud.dns; - lbConfigFile = jsonFormat.generate "plasmacloud-lb.json" { + lbConfigFile = jsonFormat.generate "ultracloud-lb.json" { load_balancers = lbCfg.loadBalancers; }; lbConfigPath = lbCfg.configPath; lbConfigRelative = removePrefix "/etc/" lbConfigPath; - dnsConfigFile = jsonFormat.generate "plasmacloud-dns.json" { + dnsConfigFile = jsonFormat.generate "ultracloud-dns.json" { zones = dnsCfg.zones; reverse_zones = dnsCfg.reverseZones; }; @@ -505,8 +505,8 @@ let dnsConfigRelative = removePrefix "/etc/" dnsConfigPath; in { - options.plasmacloud.lb = { - enable = mkEnableOption "PlasmaCloud load balancer declarations"; + options.ultracloud.lb = { + enable = mkEnableOption "UltraCloud load balancer declarations"; endpoint = mkOption { type = types.str; @@ -522,7 +522,7 @@ in { configPath = mkOption { type = types.str; - default = "/etc/plasmacloud/lb.json"; + default = "/etc/ultracloud/lb.json"; description = "Path for rendered load balancer config"; }; @@ -546,13 +546,13 @@ in { package = mkOption { type = types.package; - default = pkgs.plasmacloud-reconciler or (throw "plasmacloud-reconciler package not found"); + default = pkgs.ultracloud-reconciler or (throw "ultracloud-reconciler package not found"); description = "Reconciler package for load balancer declarations"; }; }; - options.plasmacloud.dns = { - enable = mkEnableOption "PlasmaCloud DNS declarations"; + options.ultracloud.dns = { + enable = mkEnableOption "UltraCloud DNS declarations"; endpoint = mkOption { type = types.str; @@ -574,7 +574,7 @@ in { configPath = mkOption { type = types.str; - default = "/etc/plasmacloud/dns.json"; + default = "/etc/ultracloud/dns.json"; description = "Path for rendered DNS config"; }; @@ -598,7 +598,7 @@ in { package = mkOption { type = types.package; - default = pkgs.plasmacloud-reconciler or (throw "plasmacloud-reconciler package not found"); + default = pkgs.ultracloud-reconciler or (throw "ultracloud-reconciler package not found"); description = "Reconciler package for DNS declarations"; }; }; @@ -608,25 +608,25 @@ in { assertions = [ { assertion = hasPrefix "/etc/" lbConfigPath; - message = "plasmacloud.lb.configPath must be under /etc"; + message = "ultracloud.lb.configPath must be under /etc"; } ]; environment.etc."${lbConfigRelative}".source = lbConfigFile; - systemd.services.plasmacloud-lb-apply = { - description = "Apply PlasmaCloud load balancer declarations"; + systemd.services.ultracloud-lb-apply = { + description = "Apply UltraCloud load balancer declarations"; after = [ "network-online.target" ] ++ (optional config.services.fiberlb.enable "fiberlb.service"); wants = [ "network-online.target" ] ++ (optional config.services.fiberlb.enable "fiberlb.service"); wantedBy = optional lbCfg.applyOnBoot "multi-user.target"; serviceConfig = { Type = "oneshot"; - ExecStart = "${lbCfg.package}/bin/plasmacloud-reconciler lb --config ${lbConfigPath} --endpoint ${lbCfg.endpoint}${optionalString lbCfg.prune " --prune"}"; + ExecStart = "${lbCfg.package}/bin/ultracloud-reconciler lb --config ${lbConfigPath} --endpoint ${lbCfg.endpoint}${optionalString lbCfg.prune " --prune"}"; }; }; - systemd.paths.plasmacloud-lb-apply = mkIf lbCfg.applyOnChange { + systemd.paths.ultracloud-lb-apply = mkIf lbCfg.applyOnChange { wantedBy = [ "multi-user.target" ]; pathConfig = { PathChanged = lbConfigPath; @@ -638,25 +638,25 @@ in { assertions = [ { assertion = hasPrefix "/etc/" dnsConfigPath; - message = "plasmacloud.dns.configPath must be under /etc"; + message = "ultracloud.dns.configPath must be under /etc"; } ]; environment.etc."${dnsConfigRelative}".source = dnsConfigFile; - systemd.services.plasmacloud-dns-apply = { - description = "Apply PlasmaCloud DNS declarations"; + systemd.services.ultracloud-dns-apply = { + description = "Apply UltraCloud DNS declarations"; after = [ "network-online.target" ] ++ (optional config.services.flashdns.enable "flashdns.service"); wants = [ "network-online.target" ] ++ (optional config.services.flashdns.enable "flashdns.service"); wantedBy = optional dnsCfg.applyOnBoot "multi-user.target"; serviceConfig = { Type = "oneshot"; - ExecStart = "${dnsCfg.package}/bin/plasmacloud-reconciler dns --config ${dnsConfigPath} --endpoint ${dnsCfg.endpoint}${optionalString dnsCfg.prune " --prune"}"; + ExecStart = "${dnsCfg.package}/bin/ultracloud-reconciler dns --config ${dnsConfigPath} --endpoint ${dnsCfg.endpoint}${optionalString dnsCfg.prune " --prune"}"; }; }; - systemd.paths.plasmacloud-dns-apply = mkIf dnsCfg.applyOnChange { + systemd.paths.ultracloud-dns-apply = mkIf dnsCfg.applyOnChange { wantedBy = [ "multi-user.target" ]; pathConfig = { PathChanged = dnsConfigPath; diff --git a/nix/modules/plasmacloud-tenant-networking.nix b/nix/modules/ultracloud-tenant-networking.nix similarity index 91% rename from nix/modules/plasmacloud-tenant-networking.nix rename to nix/modules/ultracloud-tenant-networking.nix index 3c17134..0be9079 100644 --- a/nix/modules/plasmacloud-tenant-networking.nix +++ b/nix/modules/ultracloud-tenant-networking.nix @@ -3,7 +3,7 @@ with lib; let - cfg = config.plasmacloud.tenantNetworking; + cfg = config.ultracloud.tenantNetworking; jsonFormat = pkgs.formats.json {}; serviceIpPoolType = types.submodule { @@ -263,14 +263,14 @@ let }; }; - configFile = jsonFormat.generate "plasmacloud-tenant-networking.json" { + configFile = jsonFormat.generate "ultracloud-tenant-networking.json" { inherit (cfg) tenants; }; configPath = cfg.configPath; configRelative = removePrefix "/etc/" configPath; in { - options.plasmacloud.tenantNetworking = { + options.ultracloud.tenantNetworking = { enable = mkEnableOption "tenant-scoped PrismNET declarations"; endpoint = mkOption { @@ -287,19 +287,19 @@ in { controllerPrincipalId = mkOption { type = types.str; - default = "plasmacloud-reconciler"; + default = "ultracloud-reconciler"; description = "Service account used by the reconciler when applying tenant declarations"; }; tenants = mkOption { type = types.listOf tenantType; default = []; - description = "Tenant-scoped network declarations. This is separate from platform networking under plasmacloud.network."; + description = "Tenant-scoped network declarations. This is separate from platform networking under ultracloud.network."; }; configPath = mkOption { type = types.str; - default = "/etc/plasmacloud/tenant-networking.json"; + default = "/etc/ultracloud/tenant-networking.json"; description = "Path for rendered tenant networking config"; }; @@ -323,7 +323,7 @@ in { package = mkOption { type = types.package; - default = pkgs.plasmacloud-reconciler or (throw "plasmacloud-reconciler package not found"); + default = pkgs.ultracloud-reconciler or (throw "ultracloud-reconciler package not found"); description = "Reconciler package for tenant networking declarations"; }; }; @@ -332,14 +332,14 @@ in { assertions = [ { assertion = hasPrefix "/etc/" configPath; - message = "plasmacloud.tenantNetworking.configPath must be under /etc"; + message = "ultracloud.tenantNetworking.configPath must be under /etc"; } ]; environment.etc."${configRelative}".source = configFile; - systemd.services.plasmacloud-tenant-networking-apply = { - description = "Apply PlasmaCloud tenant networking declarations"; + systemd.services.ultracloud-tenant-networking-apply = { + description = "Apply UltraCloud tenant networking declarations"; after = [ "network-online.target" ] ++ optional config.services.prismnet.enable "prismnet.service" @@ -354,7 +354,7 @@ in { Type = "oneshot"; RemainAfterExit = true; ExecStart = - "${cfg.package}/bin/plasmacloud-reconciler tenant-network" + "${cfg.package}/bin/ultracloud-reconciler tenant-network" + " --config ${configPath}" + " --endpoint ${cfg.endpoint}" + " --iam-endpoint ${cfg.iamEndpoint}" @@ -363,7 +363,7 @@ in { }; }; - systemd.paths.plasmacloud-tenant-networking-apply = mkIf cfg.applyOnChange { + systemd.paths.ultracloud-tenant-networking-apply = mkIf cfg.applyOnChange { wantedBy = [ "multi-user.target" ]; pathConfig = { PathChanged = configPath; diff --git a/nix/nodes/vm-cluster/cluster.nix b/nix/nodes/vm-cluster/cluster.nix index d16a79a..303bc44 100644 --- a/nix/nodes/vm-cluster/cluster.nix +++ b/nix/nodes/vm-cluster/cluster.nix @@ -1,129 +1,25 @@ -{ ... }: +{ lib, ... }: +let + vmCluster = import ./lib.nix { inherit lib; }; +in { - plasmacloud.cluster = { + ultracloud.cluster = { enable = true; - name = "plasmacloud-vm-cluster"; + name = vmCluster.clusterName; - nodes = { - node01 = { - role = "control-plane"; - ip = "192.168.100.11"; - services = [ "chainfire" "flaredb" "iam" ]; - labels = { - tier = "control-plane"; - platform = "vm-cluster"; - }; - pool = "control"; - nodeClass = "control-plane"; - failureDomain = "rack-a"; - nixProfile = "profiles/control-plane"; - installPlan = { - nixosConfiguration = "node01"; - diskoConfigPath = "nix/nodes/vm-cluster/node01/disko.nix"; - targetDisk = "/dev/vda"; - }; - desiredSystem = { - healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ]; - rollbackOnFailure = true; - }; - raftPort = 2380; - apiPort = 2379; - }; - - node02 = { - role = "control-plane"; - ip = "192.168.100.12"; - services = [ "chainfire" "flaredb" "iam" ]; - labels = { - tier = "control-plane"; - platform = "vm-cluster"; - }; - pool = "control"; - nodeClass = "control-plane"; - failureDomain = "rack-b"; - nixProfile = "profiles/control-plane"; - installPlan = { - nixosConfiguration = "node02"; - diskoConfigPath = "nix/nodes/vm-cluster/node02/disko.nix"; - targetDisk = "/dev/vda"; - }; - desiredSystem = { - healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ]; - rollbackOnFailure = true; - }; - raftPort = 2380; - apiPort = 2379; - }; - - node03 = { - role = "control-plane"; - ip = "192.168.100.13"; - services = [ "chainfire" "flaredb" "iam" ]; - labels = { - tier = "control-plane"; - platform = "vm-cluster"; - }; - pool = "control"; - nodeClass = "control-plane"; - failureDomain = "rack-c"; - nixProfile = "profiles/control-plane"; - installPlan = { - nixosConfiguration = "node03"; - diskoConfigPath = "nix/nodes/vm-cluster/node03/disko.nix"; - targetDisk = "/dev/vda"; - }; - desiredSystem = { - healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ]; - rollbackOnFailure = true; - }; - raftPort = 2380; - apiPort = 2379; - }; - }; + nodes = vmCluster.clusterNodes; deployer = { - clusterId = "plasmacloud-vm-cluster"; - environment = "dev"; + clusterId = vmCluster.clusterId; + environment = vmCluster.environment; - nodeClasses = { - control-plane = { - description = "Control-plane VM cluster nodes"; - nixProfile = "profiles/control-plane"; - roles = [ "control-plane" ]; - labels = { - tier = "control-plane"; - platform = "vm-cluster"; - }; - }; - }; - - pools = { - control = { - description = "VM cluster control-plane pool"; - nodeClass = "control-plane"; - labels = { - plane = "control"; - cluster = "vm-cluster"; - }; - }; - }; - - hostDeployments = { - control-plane-canary = { - selector.nodeIds = [ "node01" ]; - nixosConfiguration = "node01"; - flakeRef = "github:centra/cloud"; - batchSize = 1; - maxUnavailable = 1; - healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ]; - switchAction = "switch"; - rollbackOnFailure = true; - }; - }; + nodeClasses.control-plane = vmCluster.controlPlaneNodeClass; + pools.control = vmCluster.controlPlanePool; + hostDeployments = vmCluster.hostDeployments; }; - bootstrap.initialPeers = [ "node01" "node02" "node03" ]; - bgp.asn = 64512; + bootstrap.initialPeers = vmCluster.controlPlaneNodeNames; + bgp.asn = vmCluster.bgpAsn; }; } diff --git a/nix/nodes/vm-cluster/common-disko.nix b/nix/nodes/vm-cluster/common-disko.nix new file mode 100644 index 0000000..d7d773c --- /dev/null +++ b/nix/nodes/vm-cluster/common-disko.nix @@ -0,0 +1,33 @@ +{ config, ... }: + +{ + disko.devices = { + disk.main = { + type = "disk"; + device = config.ultracloud.install.diskDevice or "/dev/vda"; + content = { + type = "gpt"; + partitions = { + ESP = { + size = "512M"; + type = "EF00"; + content = { + type = "filesystem"; + format = "vfat"; + mountpoint = "/boot"; + mountOptions = [ "umask=0077" ]; + }; + }; + root = { + size = "100%"; + content = { + type = "filesystem"; + format = "ext4"; + mountpoint = "/"; + }; + }; + }; + }; + }; + }; +} diff --git a/nix/nodes/vm-cluster/lib.nix b/nix/nodes/vm-cluster/lib.nix new file mode 100644 index 0000000..5b14f28 --- /dev/null +++ b/nix/nodes/vm-cluster/lib.nix @@ -0,0 +1,240 @@ +{ lib }: + +let + inherit (lib) concatStringsSep genAttrs mkIf; + + clusterName = "ultracloud-vm-cluster"; + clusterId = clusterName; + environment = "dev"; + bgpAsn = 64512; + + bootstrapToken = "vm-cluster-bootstrap-token"; + adminToken = "vm-cluster-admin-token"; + + chainfireApiPort = 2379; + chainfireRaftPort = 2380; + flaredbApiPort = 2479; + flaredbRaftPort = 2480; + iamPort = 50080; + + controlPlaneLabels = { + tier = "control-plane"; + platform = "vm-cluster"; + }; + + controlPlaneNodes = { + node01 = { + ip = "192.168.100.11"; + failureDomain = "rack-a"; + }; + node02 = { + ip = "192.168.100.12"; + failureDomain = "rack-b"; + }; + node03 = { + ip = "192.168.100.13"; + failureDomain = "rack-c"; + }; + }; + + controlPlaneNodeNames = builtins.attrNames controlPlaneNodes; + bootstrapNodeName = builtins.head controlPlaneNodeNames; + + mkEndpoint = nodeName: port: + let + node = controlPlaneNodes.${nodeName}; + in + "${node.ip}:${toString port}"; + + chainfireInitialPeers = + map (nodeName: "${nodeName}=${mkEndpoint nodeName chainfireRaftPort}") controlPlaneNodeNames; + + flaredbInitialPeers = + map (nodeName: "${nodeName}=${mkEndpoint nodeName flaredbRaftPort}") controlPlaneNodeNames; + + chainfireControlPlaneAddrs = + concatStringsSep "," (map (nodeName: mkEndpoint nodeName chainfireApiPort) controlPlaneNodeNames); + + flaredbControlPlaneAddrs = + concatStringsSep "," (map (nodeName: mkEndpoint nodeName flaredbApiPort) controlPlaneNodeNames); + + sharedDesiredSystem = { + healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ]; + rollbackOnFailure = true; + }; + + mkInstallPlan = nodeName: { + nixosConfiguration = nodeName; + diskoConfigPath = "nix/nodes/vm-cluster/${nodeName}/disko.nix"; + targetDisk = "/dev/vda"; + }; + + mkClusterNode = nodeName: + let + node = controlPlaneNodes.${nodeName}; + in + { + role = "control-plane"; + ip = node.ip; + services = [ "chainfire" "flaredb" "iam" ]; + labels = controlPlaneLabels; + pool = "control"; + nodeClass = "control-plane"; + failureDomain = node.failureDomain; + nixProfile = "profiles/control-plane"; + installPlan = mkInstallPlan nodeName; + desiredSystem = sharedDesiredSystem; + raftPort = chainfireRaftPort; + apiPort = chainfireApiPort; + }; + + clusterNodes = genAttrs controlPlaneNodeNames mkClusterNode; + + nodeConfigurationPaths = { + node01 = ./node01/configuration.nix; + node02 = ./node02/configuration.nix; + node03 = ./node03/configuration.nix; + }; + + nodeDiskoPaths = { + node01 = ./node01/disko.nix; + node02 = ./node02/disko.nix; + node03 = ./node03/disko.nix; + }; +in +{ + inherit + adminToken + bgpAsn + bootstrapNodeName + bootstrapToken + chainfireApiPort + chainfireControlPlaneAddrs + chainfireInitialPeers + chainfireRaftPort + clusterId + clusterName + clusterNodes + controlPlaneLabels + controlPlaneNodeNames + controlPlaneNodes + environment + flaredbApiPort + flaredbControlPlaneAddrs + flaredbInitialPeers + flaredbRaftPort + iamPort + nodeConfigurationPaths + nodeDiskoPaths + sharedDesiredSystem + ; + + controlPlaneNodeClass = { + description = "Control-plane VM cluster nodes"; + nixProfile = "profiles/control-plane"; + roles = [ "control-plane" ]; + labels = controlPlaneLabels; + }; + + controlPlanePool = { + description = "VM cluster control-plane pool"; + nodeClass = "control-plane"; + labels = { + plane = "control"; + cluster = "vm-cluster"; + }; + }; + + hostDeployments = { + control-plane-canary = { + selector.nodeIds = [ bootstrapNodeName ]; + nixosConfiguration = bootstrapNodeName; + flakeRef = "github:centra/cloud"; + batchSize = 1; + maxUnavailable = 1; + healthCheckCommand = sharedDesiredSystem.healthCheckCommand; + switchAction = "switch"; + rollbackOnFailure = sharedDesiredSystem.rollbackOnFailure; + }; + }; + + mkControlPlaneNodeModule = nodeName: + { lib, ... }: + { + imports = [ + ./cluster.nix + nodeDiskoPaths.${nodeName} + ]; + + networking.hostName = nodeName; + networking.useDHCP = lib.mkDefault true; + + boot.loader.grub = { + enable = true; + devices = [ "/dev/vda" ]; + efiSupport = true; + efiInstallAsRemovable = true; + }; + + services.chainfire = { + enable = true; + nodeId = nodeName; + apiAddr = mkEndpoint nodeName chainfireApiPort; + raftAddr = mkEndpoint nodeName chainfireRaftPort; + initialPeers = chainfireInitialPeers; + }; + + services.flaredb = { + enable = true; + nodeId = nodeName; + apiAddr = mkEndpoint nodeName flaredbApiPort; + raftAddr = mkEndpoint nodeName flaredbRaftPort; + initialPeers = flaredbInitialPeers; + }; + + services.iam = { + enable = true; + port = iamPort; + chainfireAddr = chainfireControlPlaneAddrs; + flaredbAddr = flaredbControlPlaneAddrs; + }; + + services.openssh.enable = true; + users.users.root.openssh.authorizedKeys.keys = [ ]; + + system.stateVersion = "24.05"; + }; + + mkBootstrapServicesModule = + { self + , nodeName + , enableDeployer ? false + , + }: + { pkgs, lib, ... }: + { + services.nix-agent = { + enable = true; + chainfireEndpoint = "http://${mkEndpoint bootstrapNodeName chainfireApiPort}"; + clusterId = clusterId; + nodeId = nodeName; + flakeRoot = self.outPath; + intervalSecs = 30; + apply = true; + }; + + services.deployer = mkIf enableDeployer { + enable = true; + bindAddr = "0.0.0.0:8088"; + chainfireEndpoints = [ "http://${mkEndpoint bootstrapNodeName chainfireApiPort}" ]; + clusterId = clusterId; + requireChainfire = true; + allowUnknownNodes = false; + allowUnauthenticated = false; + bootstrapToken = bootstrapToken; + adminToken = adminToken; + bootstrapFlakeBundle = pkgs.ultracloudFlakeBundle; + seedClusterState = true; + }; + }; +} diff --git a/nix/nodes/vm-cluster/node01/configuration.nix b/nix/nodes/vm-cluster/node01/configuration.nix index efbf194..7a2b5c3 100644 --- a/nix/nodes/vm-cluster/node01/configuration.nix +++ b/nix/nodes/vm-cluster/node01/configuration.nix @@ -1,43 +1,6 @@ -{ config, lib, pkgs, ... }: +args@{ lib, ... }: -{ - imports = [ - ../cluster.nix - ./disko.nix - ]; - - networking.hostName = "node01"; - networking.useDHCP = lib.mkDefault true; - boot.loader.grub = { - enable = true; - devices = [ "/dev/vda" ]; - efiSupport = true; - efiInstallAsRemovable = true; - }; - - services.chainfire = { - enable = true; - nodeId = "node01"; - apiAddr = "192.168.100.11:2379"; - raftAddr = "192.168.100.11:2380"; - }; - - services.flaredb = { - enable = true; - nodeId = "node01"; - apiAddr = "192.168.100.11:2479"; - raftAddr = "192.168.100.11:2480"; - }; - - services.iam = { - enable = true; - port = 50080; - chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379"; - flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479"; - }; - - services.openssh.enable = true; - users.users.root.openssh.authorizedKeys.keys = [ ]; - - system.stateVersion = "24.05"; -} +let + vmCluster = import ../lib.nix { inherit lib; }; +in +(vmCluster.mkControlPlaneNodeModule "node01") args diff --git a/nix/nodes/vm-cluster/node01/disko.nix b/nix/nodes/vm-cluster/node01/disko.nix index 08109e1..27192fc 100644 --- a/nix/nodes/vm-cluster/node01/disko.nix +++ b/nix/nodes/vm-cluster/node01/disko.nix @@ -1,33 +1,5 @@ -{ config, lib, ... }: +{ ... }: { - disko.devices = { - disk.main = { - type = "disk"; - device = config.plasmacloud.install.diskDevice or "/dev/vda"; - content = { - type = "gpt"; - partitions = { - ESP = { - size = "512M"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; + imports = [ ../common-disko.nix ]; } diff --git a/nix/nodes/vm-cluster/node02/configuration.nix b/nix/nodes/vm-cluster/node02/configuration.nix index 62aed95..1a21670 100644 --- a/nix/nodes/vm-cluster/node02/configuration.nix +++ b/nix/nodes/vm-cluster/node02/configuration.nix @@ -1,53 +1,6 @@ -{ config, lib, pkgs, ... }: +args@{ lib, ... }: -{ - imports = [ - ../cluster.nix - ./disko.nix - ]; - - networking.hostName = "node02"; - networking.useDHCP = lib.mkDefault true; - boot.loader.grub = { - enable = true; - devices = [ "/dev/vda" ]; - efiSupport = true; - efiInstallAsRemovable = true; - }; - - services.chainfire = { - enable = true; - nodeId = "node02"; - apiAddr = "192.168.100.12:2379"; - raftAddr = "192.168.100.12:2380"; - initialPeers = [ - "node01=192.168.100.11:2380" - "node02=192.168.100.12:2380" - "node03=192.168.100.13:2380" - ]; - }; - - services.flaredb = { - enable = true; - nodeId = "node02"; - apiAddr = "192.168.100.12:2479"; - raftAddr = "192.168.100.12:2480"; - initialPeers = [ - "node01=192.168.100.11:2480" - "node02=192.168.100.12:2480" - "node03=192.168.100.13:2480" - ]; - }; - - services.iam = { - enable = true; - port = 50080; - chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379"; - flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479"; - }; - - services.openssh.enable = true; - users.users.root.openssh.authorizedKeys.keys = [ ]; - - system.stateVersion = "24.05"; -} +let + vmCluster = import ../lib.nix { inherit lib; }; +in +(vmCluster.mkControlPlaneNodeModule "node02") args diff --git a/nix/nodes/vm-cluster/node02/disko.nix b/nix/nodes/vm-cluster/node02/disko.nix index 08109e1..27192fc 100644 --- a/nix/nodes/vm-cluster/node02/disko.nix +++ b/nix/nodes/vm-cluster/node02/disko.nix @@ -1,33 +1,5 @@ -{ config, lib, ... }: +{ ... }: { - disko.devices = { - disk.main = { - type = "disk"; - device = config.plasmacloud.install.diskDevice or "/dev/vda"; - content = { - type = "gpt"; - partitions = { - ESP = { - size = "512M"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; + imports = [ ../common-disko.nix ]; } diff --git a/nix/nodes/vm-cluster/node03/configuration.nix b/nix/nodes/vm-cluster/node03/configuration.nix index f286e5f..c8eedce 100644 --- a/nix/nodes/vm-cluster/node03/configuration.nix +++ b/nix/nodes/vm-cluster/node03/configuration.nix @@ -1,53 +1,6 @@ -{ config, lib, pkgs, ... }: +args@{ lib, ... }: -{ - imports = [ - ../cluster.nix - ./disko.nix - ]; - - networking.hostName = "node03"; - networking.useDHCP = lib.mkDefault true; - boot.loader.grub = { - enable = true; - devices = [ "/dev/vda" ]; - efiSupport = true; - efiInstallAsRemovable = true; - }; - - services.chainfire = { - enable = true; - nodeId = "node03"; - apiAddr = "192.168.100.13:2379"; - raftAddr = "192.168.100.13:2380"; - initialPeers = [ - "node01=192.168.100.11:2380" - "node02=192.168.100.12:2380" - "node03=192.168.100.13:2380" - ]; - }; - - services.flaredb = { - enable = true; - nodeId = "node03"; - apiAddr = "192.168.100.13:2479"; - raftAddr = "192.168.100.13:2480"; - initialPeers = [ - "node01=192.168.100.11:2480" - "node02=192.168.100.12:2480" - "node03=192.168.100.13:2480" - ]; - }; - - services.iam = { - enable = true; - port = 50080; - chainfireAddr = "192.168.100.11:2379,192.168.100.12:2379,192.168.100.13:2379"; - flaredbAddr = "192.168.100.11:2479,192.168.100.12:2479,192.168.100.13:2479"; - }; - - services.openssh.enable = true; - users.users.root.openssh.authorizedKeys.keys = [ ]; - - system.stateVersion = "24.05"; -} +let + vmCluster = import ../lib.nix { inherit lib; }; +in +(vmCluster.mkControlPlaneNodeModule "node03") args diff --git a/nix/nodes/vm-cluster/node03/disko.nix b/nix/nodes/vm-cluster/node03/disko.nix index 08109e1..27192fc 100644 --- a/nix/nodes/vm-cluster/node03/disko.nix +++ b/nix/nodes/vm-cluster/node03/disko.nix @@ -1,33 +1,5 @@ -{ config, lib, ... }: +{ ... }: { - disko.devices = { - disk.main = { - type = "disk"; - device = config.plasmacloud.install.diskDevice or "/dev/vda"; - content = { - type = "gpt"; - partitions = { - ESP = { - size = "512M"; - type = "EF00"; - content = { - type = "filesystem"; - format = "vfat"; - mountpoint = "/boot"; - mountOptions = [ "umask=0077" ]; - }; - }; - root = { - size = "100%"; - content = { - type = "filesystem"; - format = "ext4"; - mountpoint = "/"; - }; - }; - }; - }; - }; - }; + imports = [ ../common-disko.nix ]; } diff --git a/nix/templates/iam-flaredb-minimal.nix b/nix/templates/iam-flaredb-minimal.nix index b2da606..d8fa278 100644 --- a/nix/templates/iam-flaredb-minimal.nix +++ b/nix/templates/iam-flaredb-minimal.nix @@ -1,9 +1,9 @@ { inputs, pkgs, config, ... }: { # Minimal footprint: chainfire + flaredb + iam only (for auth/metadata testing). - imports = [ inputs.self.nixosModules.plasmacloud ]; + imports = [ inputs.self.nixosModules.ultracloud ]; - networking.hostName = "plasmacloud-minimal"; + networking.hostName = "ultracloud-minimal"; networking.firewall.allowedTCPPorts = [ 8081 8082 9000 ]; services.chainfire.enable = true; diff --git a/nix/templates/plasmacloud-3node-ha.nix b/nix/templates/ultracloud-3node-ha.nix similarity index 95% rename from nix/templates/plasmacloud-3node-ha.nix rename to nix/templates/ultracloud-3node-ha.nix index c84adde..b653926 100644 --- a/nix/templates/plasmacloud-3node-ha.nix +++ b/nix/templates/ultracloud-3node-ha.nix @@ -1,9 +1,9 @@ { inputs, pkgs, lib, config, ... }: { # Example: 3-node HA control plane. Replace IPs/hostnames to match your cluster. - imports = [ inputs.self.nixosModules.plasmacloud ]; + imports = [ inputs.self.nixosModules.ultracloud ]; - networking.hostName = lib.mkDefault "plasmacloud-node01"; + networking.hostName = lib.mkDefault "ultracloud-node01"; networking.firewall.allowedTCPPorts = [ 8080 8081 8082 8083 8084 8085 8086 8087 9000 9001 9002 2379 2380 2381 2479 2480 ]; # Core data stores diff --git a/nix/templates/plasmacloud-single-node.nix b/nix/templates/ultracloud-single-node.nix similarity index 90% rename from nix/templates/plasmacloud-single-node.nix rename to nix/templates/ultracloud-single-node.nix index 4a1f604..2bbb702 100644 --- a/nix/templates/plasmacloud-single-node.nix +++ b/nix/templates/ultracloud-single-node.nix @@ -1,9 +1,9 @@ { inputs, pkgs, config, ... }: { - # Import all PlasmaCloud modules (chainfire, flaredb, iam, plasmavmc, prismnet, flashdns, fiberlb, lightningstor, creditservice). - imports = [ inputs.self.nixosModules.plasmacloud ]; + # Import all UltraCloud modules (chainfire, flaredb, iam, plasmavmc, prismnet, flashdns, fiberlb, lightningstor, creditservice). + imports = [ inputs.self.nixosModules.ultracloud ]; - networking.hostName = "plasmacloud-single"; + networking.hostName = "ultracloud-single"; networking.firewall.allowedTCPPorts = [ 8080 8081 8082 8083 8084 8085 8086 8087 9000 9001 9002 ]; # Enable all services with default ports and data dirs. diff --git a/nix/test-cluster/README.md b/nix/test-cluster/README.md index e63ddc3..c7b1000 100644 --- a/nix/test-cluster/README.md +++ b/nix/test-cluster/README.md @@ -1,6 +1,6 @@ -# PhotonCloud VM Test Cluster +# UltraCloud VM Test Cluster -`nix/test-cluster` is the canonical local validation path for PhotonCloud. +`nix/test-cluster` is the canonical local validation path for UltraCloud. It boots six QEMU VMs, treats them as hardware-like nodes, and validates representative control-plane, worker, and gateway behavior over SSH and service endpoints. All VM images are built on the host in a single Nix invocation and then booted as prebuilt artifacts. The guests do not compile the stack locally. @@ -47,6 +47,8 @@ nix run ./nix/test-cluster#cluster -- smoke nix run ./nix/test-cluster#cluster -- fresh-smoke nix run ./nix/test-cluster#cluster -- demo-vm-webapp nix run ./nix/test-cluster#cluster -- fresh-demo-vm-webapp +nix run ./nix/test-cluster#cluster -- serve-vm-webapp +nix run ./nix/test-cluster#cluster -- fresh-serve-vm-webapp nix run ./nix/test-cluster#cluster -- matrix nix run ./nix/test-cluster#cluster -- fresh-matrix nix run ./nix/test-cluster#cluster -- bench-storage @@ -63,7 +65,9 @@ Preferred entrypoint for publishable verification: `nix run ./nix/test-cluster#c `make cluster-smoke` is a convenience wrapper for the same clean host-build VM validation flow. -`nix run ./nix/test-cluster#cluster -- demo-vm-webapp` creates a PrismNet-attached VM, boots a tiny web app inside the guest, stores its state in SQLite on the attached data volume, and then proves that the counter survives guest restart plus cross-worker migration. +`nix run ./nix/test-cluster#cluster -- demo-vm-webapp` creates a PrismNet-attached VM, boots a tiny web app inside the guest, stores its counter in FlareDB, writes JSON snapshots to LightningStor object storage, and then proves that the state survives guest restart plus cross-worker migration. The attached data volume is still used by the guest for its local bootstrap config. + +`nix run ./nix/test-cluster#cluster -- serve-vm-webapp` runs the same VM web app flow but leaves the guest running and prints a `http://127.0.0.1:/` URL that is forwarded from the host into the tenant network so you can inspect `/state` or send `POST /visit` yourself. `nix run ./nix/test-cluster#cluster -- matrix` reuses the current running cluster to exercise composed service scenarios such as `prismnet + flashdns + fiberlb`, PrismNet-backed VM hosting with `plasmavmc + prismnet + coronafs + lightningstor`, the Kubernetes-style hosting bundle, and API-gateway-mediated `nightlight` / `creditservice` flows. @@ -92,7 +96,7 @@ nix develop ./nix/test-cluster -c ./nix/test-cluster/run-cluster.sh fresh-smoke ## Runtime state -The harness stores build links and VM runtime state under `${PHOTON_VM_DIR:-$HOME/.photoncloud-test-cluster}` for the default profile and uses profile-suffixed siblings such as `${PHOTON_VM_DIR:-$HOME/.photoncloud-test-cluster}-storage` for alternate build profiles. +The harness stores build links and VM runtime state under `${PHOTON_VM_DIR:-$HOME/.ultracloud-test-cluster}` for the default profile and uses profile-suffixed siblings such as `${PHOTON_VM_DIR:-$HOME/.ultracloud-test-cluster}-storage` for alternate build profiles. Logs for each VM are written to `//vm.log`. ## Scope note diff --git a/nix/test-cluster/common.nix b/nix/test-cluster/common.nix index ccb5a5b..d1fb637 100644 --- a/nix/test-cluster/common.nix +++ b/nix/test-cluster/common.nix @@ -1,4 +1,4 @@ -# PhotonCloud 6-Node Test Cluster +# UltraCloud 6-Node Test Cluster # # Common configuration shared by all nodes # @@ -12,8 +12,7 @@ in { imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") - ../../nix-nos/modules/default.nix - ../modules/plasmacloud-cluster.nix + ../modules/ultracloud-cluster.nix ]; options.photonTestCluster = { @@ -25,7 +24,7 @@ in vdeSock = lib.mkOption { type = lib.types.str; - default = "/tmp/photoncloud-test-cluster-vde.sock"; + default = "/tmp/ultracloud-test-cluster-vde.sock"; description = "VDE control socket path used for the east-west cluster NIC."; }; @@ -82,7 +81,7 @@ in }; }; users.mutableUsers = false; - users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; + users.users.root.hashedPassword = "$6$ultracloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; # qemu-vm.nix provides the default SLiRP NIC as eth0. # The extra multicast NIC above becomes eth1 and carries intra-cluster traffic. @@ -148,9 +147,9 @@ in qemu ]; - plasmacloud.cluster = { + ultracloud.cluster = { enable = true; - name = "photoncloud-test"; + name = "ultracloud-test"; nodes = { node01 = { @@ -272,7 +271,7 @@ in description = "General-purpose native worker pool"; nodeClass = "worker-linux"; labels = { - "pool.photoncloud.io/name" = "general"; + "pool.ultracloud.io/name" = "general"; }; }; }; diff --git a/nix/test-cluster/flake.lock b/nix/test-cluster/flake.lock index 051a897..33e1d02 100644 --- a/nix/test-cluster/flake.lock +++ b/nix/test-cluster/flake.lock @@ -3,7 +3,7 @@ "disko": { "inputs": { "nixpkgs": [ - "photoncloud", + "ultracloud", "nixpkgs" ] }, @@ -39,32 +39,13 @@ "type": "github" } }, - "nix-nos": { - "inputs": { - "nixpkgs": [ - "photoncloud", - "nixpkgs" - ] - }, - "locked": { - "path": "./nix-nos", - "type": "path" - }, - "original": { - "path": "./nix-nos", - "type": "path" - }, - "parent": [ - "photoncloud" - ] - }, "nixpkgs": { "locked": { - "lastModified": 1769018530, - "narHash": "sha256-MJ27Cy2NtBEV5tsK+YraYr2g851f3Fl1LpNHDzDX15c=", + "lastModified": 1775036866, + "narHash": "sha256-ZojAnPuCdy657PbTq5V0Y+AHKhZAIwSIT2cb8UgAz/U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "88d3861acdd3d2f0e361767018218e51810df8a1", + "rev": "6201e203d09599479a3b3450ed24fa81537ebc4e", "type": "github" }, "original": { @@ -90,34 +71,16 @@ "type": "github" } }, - "photoncloud": { - "inputs": { - "disko": "disko", - "flake-utils": "flake-utils", - "nix-nos": "nix-nos", - "nixpkgs": "nixpkgs_2", - "rust-overlay": "rust-overlay" - }, - "locked": { - "path": "../..", - "type": "path" - }, - "original": { - "path": "../..", - "type": "path" - }, - "parent": [] - }, "root": { "inputs": { "nixpkgs": "nixpkgs", - "photoncloud": "photoncloud" + "ultracloud": "ultracloud" } }, "rust-overlay": { "inputs": { "nixpkgs": [ - "photoncloud", + "ultracloud", "nixpkgs" ] }, @@ -149,6 +112,38 @@ "repo": "default", "type": "github" } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "id": "systems", + "type": "indirect" + } + }, + "ultracloud": { + "inputs": { + "disko": "disko", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs_2", + "rust-overlay": "rust-overlay", + "systems": "systems_2" + }, + "locked": { + "path": "../..", + "type": "path" + }, + "original": { + "path": "../..", + "type": "path" + }, + "parent": [] } }, "root": "root", diff --git a/nix/test-cluster/flake.nix b/nix/test-cluster/flake.nix index 88ab10f..ed040f4 100644 --- a/nix/test-cluster/flake.nix +++ b/nix/test-cluster/flake.nix @@ -1,12 +1,12 @@ { - description = "PhotonCloud Test Cluster"; + description = "UltraCloud Test Cluster"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - photoncloud.url = "path:../.."; + ultracloud.url = "path:../.."; }; - outputs = { self, nixpkgs, photoncloud }: + outputs = { self, nixpkgs, ultracloud }: let system = "x86_64-linux"; pkgs = import nixpkgs { inherit system; }; @@ -41,18 +41,18 @@ modules = [ ./${nodeName}.nix { - nixpkgs.overlays = [ photoncloud.overlays.default testClusterOverlay ]; + nixpkgs.overlays = [ ultracloud.overlays.default testClusterOverlay ]; } ]; }; clusterHarness = pkgs.writeShellApplication { - name = "photoncloud-test-cluster"; + name = "ultracloud-test-cluster"; runtimeInputs = with pkgs; [ bash coreutils curl - photoncloud.packages.${system}.deployer-ctl + ultracloud.packages.${system}.deployer-ctl findutils gawk gitMinimal @@ -102,12 +102,12 @@ vmGuestImage = vmGuestImage; vmBenchGuestImage = vmBenchGuestImage; deployerClusterState = - self.nixosConfigurations.node06.config.system.build.plasmacloudDeployerClusterState; + self.nixosConfigurations.node06.config.system.build.ultracloudDeployerClusterState; }; apps.${system}.cluster = { type = "app"; - program = "${clusterHarness}/bin/photoncloud-test-cluster"; + program = "${clusterHarness}/bin/ultracloud-test-cluster"; }; devShells.${system}.default = pkgs.mkShell { diff --git a/nix/test-cluster/node01.nix b/nix/test-cluster/node01.nix index c04cbc3..a663985 100644 --- a/nix/test-cluster/node01.nix +++ b/nix/test-cluster/node01.nix @@ -11,7 +11,7 @@ ../modules/flaredb.nix ../modules/iam.nix ../modules/prismnet.nix - ../modules/plasmacloud-tenant-networking.nix + ../modules/ultracloud-tenant-networking.nix ../modules/flashdns.nix ../modules/fiberlb.nix ../modules/k8shost.nix @@ -165,14 +165,14 @@ flashdnsAddr = "http://10.100.0.11:50084"; }; - services.lightningstor.s3AccessKeyId = "photoncloud-test"; - services.lightningstor.s3SecretKey = "photoncloud-test-secret"; + services.lightningstor.s3AccessKeyId = "ultracloud-test"; + services.lightningstor.s3SecretKey = "ultracloud-test-secret"; - plasmacloud.tenantNetworking = { + ultracloud.tenantNetworking = { enable = true; endpoint = "http://127.0.0.1:50081"; iamEndpoint = "http://127.0.0.1:50080"; - controllerPrincipalId = "plasmacloud-reconciler"; + controllerPrincipalId = "ultracloud-reconciler"; prune = true; tenants = [ { diff --git a/nix/test-cluster/node06.nix b/nix/test-cluster/node06.nix index 0127616..38ce4f9 100644 --- a/nix/test-cluster/node06.nix +++ b/nix/test-cluster/node06.nix @@ -36,42 +36,42 @@ { name = "iam-auth"; pathPrefix = "/api/v1/auth"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8083"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8083"; } { name = "prismnet-vpcs"; pathPrefix = "/api/v1/vpcs"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "prismnet-subnets"; pathPrefix = "/api/v1/subnets"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "prismnet-routers"; pathPrefix = "/api/v1/routers"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "prismnet-security-groups"; pathPrefix = "/api/v1/security-groups"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "prismnet-ports"; pathPrefix = "/api/v1/ports"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "prismnet-service-ip-pools"; pathPrefix = "/api/v1/service-ip-pools"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8087"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8087"; } { name = "plasmavmc-vms"; pathPrefix = "/api/v1/vms"; - upstream = "http://${config.plasmacloud.cluster.nodes.node01.ip}:8084"; + upstream = "http://${config.ultracloud.cluster.nodes.node01.ip}:8084"; timeoutMs = 1200000; } { @@ -117,7 +117,7 @@ requireChainfire = true; bootstrapToken = "test-bootstrap-token"; adminToken = "test-admin-token"; - bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle; + bootstrapFlakeBundle = pkgs.ultracloudFlakeBundle; seedClusterState = true; }; diff --git a/nix/test-cluster/run-cluster.sh b/nix/test-cluster/run-cluster.sh index 6d3ad09..2babaac 100755 --- a/nix/test-cluster/run-cluster.sh +++ b/nix/test-cluster/run-cluster.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# PhotonCloud VM test-cluster harness +# UltraCloud VM test-cluster harness # # Commands: # build Build one or more VM derivations @@ -27,8 +27,8 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" CLUSTER_DIR="${SCRIPT_DIR}" CLUSTER_FLAKE_REF="${PHOTON_CLUSTER_FLAKE:-${CLUSTER_DIR}}" -VM_DIR_BASE="${PHOTON_VM_DIR:-${HOME}/.photoncloud-test-cluster}" -VDE_SWITCH_DIR_BASE="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-/tmp/photoncloud-test-cluster-vde.sock}" +VM_DIR_BASE="${PHOTON_VM_DIR:-${HOME}/.ultracloud-test-cluster}" +VDE_SWITCH_DIR_BASE="${PHOTON_CLUSTER_VDE_SWITCH_DIR:-/tmp/ultracloud-test-cluster-vde.sock}" CORONAFS_API_PORT="${PHOTON_CORONAFS_API_PORT:-50088}" CORONAFS_VOLUME_ROOT="/var/lib/coronafs/volumes" SSH_PASSWORD="${PHOTON_VM_ROOT_PASSWORD:-test}" @@ -38,6 +38,7 @@ UNIT_WAIT_TIMEOUT="${PHOTON_VM_UNIT_WAIT_TIMEOUT:-240}" UNIT_CHECK_TIMEOUT="${PHOTON_VM_UNIT_CHECK_TIMEOUT:-15}" HTTP_WAIT_TIMEOUT="${PHOTON_VM_HTTP_WAIT_TIMEOUT:-180}" VM_DEMO_HTTP_PORT="${PHOTON_VM_DEMO_HTTP_PORT:-8080}" +VM_DEMO_FORWARD_START_PORT="${PHOTON_VM_DEMO_FORWARD_START_PORT:-18280}" KVM_WAIT_TIMEOUT="${PHOTON_VM_KVM_WAIT_TIMEOUT:-180}" FLAREDB_WAIT_TIMEOUT="${PHOTON_VM_FLAREDB_WAIT_TIMEOUT:-180}" GRPCURL_MAX_MSG_SIZE="${PHOTON_VM_GRPCURL_MAX_MSG_SIZE:-1073741824}" @@ -90,6 +91,7 @@ MATRIX_TENANT_PROJECT_ID="matrix-tenant-project" MATRIX_TENANT_VPC_NAME="matrix-vpc" MATRIX_TENANT_SUBNET_NAME="matrix-subnet" MATRIX_TENANT_ROUTER_NAME="matrix-router" +MATRIX_TENANT_GATEWAY_IP="10.62.10.1" MATRIX_TENANT_DEFAULT_SG_NAME="vm-default" MATRIX_TENANT_WEB_SG_NAME="web" MATRIX_TENANT_CLUSTER_POOL_NAME="cluster-services" @@ -229,7 +231,7 @@ acquire_cluster_lock() { fi if [[ -n "${owner}" ]] && ! kill -0 "${owner}" >/dev/null 2>&1; then - warn "reclaiming stale PhotonCloud test-cluster lock from pid ${owner}" + warn "reclaiming stale UltraCloud test-cluster lock from pid ${owner}" rm -f "${lock_dir}/pid" rmdir "${lock_dir}" 2>/dev/null || true if mkdir "${lock_dir}" 2>/dev/null; then @@ -241,7 +243,7 @@ acquire_cluster_lock() { fi fi - die "another PhotonCloud test-cluster run is active${owner:+ (pid ${owner})}; lock: ${lock_dir}" + die "another UltraCloud test-cluster run is active${owner:+ (pid ${owner})}; lock: ${lock_dir}" } lightningstor_data_root() { @@ -376,6 +378,46 @@ capture_stable_lightningstor_count_triplet() { done } +capture_stable_equal_lightningstor_count_triplet() { + local min_node01="${1:-0}" + local min_node04="${2:-0}" + local min_node05="${3:-0}" + local settle_secs="${4:-6}" + local timeout="${5:-${HTTP_WAIT_TIMEOUT}}" + local deadline=$((SECONDS + timeout)) + local stable_since=0 + local last_count="" + local count_node01=0 + local count_node04=0 + local count_node05=0 + + while true; do + read -r count_node01 count_node04 count_node05 < <(lightningstor_count_triplet) + if (( count_node01 >= min_node01 )) && + (( count_node04 >= min_node04 )) && + (( count_node05 >= min_node05 )) && + (( count_node01 == count_node04 )) && + (( count_node01 == count_node05 )); then + if [[ "${count_node01}" == "${last_count}" ]]; then + if (( stable_since > 0 )) && (( SECONDS - stable_since >= settle_secs )); then + printf '%s %s %s\n' "${count_node01}" "${count_node04}" "${count_node05}" + return 0 + fi + else + last_count="${count_node01}" + stable_since="${SECONDS}" + fi + else + last_count="" + stable_since=0 + fi + if (( SECONDS >= deadline )); then + die "timed out waiting for distributed LightningStor counts to converge: minimum ${min_node01}/${min_node04}/${min_node05}, last ${count_node01:-?}/${count_node04:-?}/${count_node05:-?}" + fi + sleep 2 + done +} + wait_for_lightningstor_counts_greater_than() { local before_node01="$1" local before_node04="$2" @@ -1245,6 +1287,175 @@ stop_ssh_tunnel() { fi } +start_vm_demo_gateway_proxy() { + local node="$1" + local listen_port="$2" + local target_host="$3" + local target_port="$4" + + ssh_node_script "${node}" "${listen_port}" "${target_host}" "${target_port}" <<'EOF' +set -euo pipefail +listen_port="$1" +target_host="$2" +target_port="$3" +script_path="/run/photon-vm-demo-tcp-proxy.py" +pid_file="/run/photon-vm-demo-proxy-${listen_port}.pid" +log_file="/var/log/photon-vm-demo-proxy-${listen_port}.log" +proxy_match="python3 ${script_path} 0.0.0.0 ${listen_port} ${target_host} ${target_port}" + +cat >"${script_path}" <<'PY' +import socket +import sys +import threading + +LISTEN_HOST = sys.argv[1] +LISTEN_PORT = int(sys.argv[2]) +TARGET_HOST = sys.argv[3] +TARGET_PORT = int(sys.argv[4]) + + +def pump(source, destination): + try: + while True: + data = source.recv(65536) + if not data: + break + destination.sendall(data) + except OSError: + pass + finally: + try: + destination.shutdown(socket.SHUT_WR) + except OSError: + pass + + +def handle(client): + upstream = None + try: + upstream = socket.create_connection((TARGET_HOST, TARGET_PORT), timeout=10) + upstream.settimeout(None) + client.settimeout(None) + upstream_to_client = threading.Thread(target=pump, args=(upstream, client), daemon=True) + upstream_to_client.start() + pump(client, upstream) + upstream_to_client.join(timeout=1) + finally: + for conn in (client, upstream): + if conn is None: + continue + try: + conn.close() + except OSError: + pass + + +server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +server.bind((LISTEN_HOST, LISTEN_PORT)) +server.listen(128) + +while True: + client, _ = server.accept() + threading.Thread(target=handle, args=(client,), daemon=True).start() +PY +chmod 0755 "${script_path}" + +if [[ -f "${pid_file}" ]] && kill -0 "$(cat "${pid_file}")" 2>/dev/null; then + exit 0 +fi + +if pgrep -f -- "${proxy_match}" >/dev/null 2>&1; then + pkill -f -- "${proxy_match}" >/dev/null 2>&1 || true + for _ in $(seq 1 20); do + if ! pgrep -f -- "${proxy_match}" >/dev/null 2>&1 && ! ss -H -ltn "( sport = :${listen_port} )" | grep -q .; then + break + fi + sleep 1 + done +fi + +if ss -H -ltn "( sport = :${listen_port} )" | grep -q .; then + echo "port ${listen_port} already in use on $(hostname)" >&2 + ss -H -ltnp "( sport = :${listen_port} )" >&2 || true + exit 1 +fi + +rm -f "${pid_file}" "${log_file}" +nohup python3 "${script_path}" 0.0.0.0 "${listen_port}" "${target_host}" "${target_port}" >"${log_file}" 2>&1 & +echo $! >"${pid_file}" + +for _ in $(seq 1 20); do + if kill -0 "$(cat "${pid_file}")" 2>/dev/null && ss -H -ltn "( sport = :${listen_port} )" | grep -q .; then + exit 0 + fi + sleep 1 +done + +cat "${log_file}" >&2 || true +exit 1 +EOF +} + +stop_vm_demo_gateway_proxy() { + local node="$1" + local listen_port="$2" + + ssh_node_script "${node}" "${listen_port}" <<'EOF' +set -euo pipefail +listen_port="$1" +pid_file="/run/photon-vm-demo-proxy-${listen_port}.pid" +script_path="/run/photon-vm-demo-tcp-proxy.py" + +if [[ -f "${pid_file}" ]]; then + pid="$(cat "${pid_file}")" + if kill -0 "${pid}" 2>/dev/null; then + kill "${pid}" || true + for _ in $(seq 1 10); do + if ! kill -0 "${pid}" 2>/dev/null; then + break + fi + sleep 1 + done + if kill -0 "${pid}" 2>/dev/null; then + kill -9 "${pid}" || true + fi + fi + rm -f "${pid_file}" +fi + +pkill -f -- "python3 ${script_path} 0.0.0.0 ${listen_port} " >/dev/null 2>&1 || true +for _ in $(seq 1 10); do + if ! ss -H -ltn "( sport = :${listen_port} )" | grep -q .; then + break + fi + sleep 1 +done +EOF +} + +start_vm_demo_gateway_proxies() { + local node + + for node in node04 node05; do + start_vm_demo_gateway_proxy "${node}" 50080 10.100.0.11 50080 + start_vm_demo_gateway_proxy "${node}" 2479 10.100.0.11 2479 + start_vm_demo_gateway_proxy "${node}" 8082 10.100.0.11 8082 + start_vm_demo_gateway_proxy "${node}" 9000 10.100.0.11 9000 + done +} + +stop_vm_demo_gateway_proxies() { + local node + + for node in node04 node05; do + stop_vm_demo_gateway_proxy "${node}" 50080 || true + stop_vm_demo_gateway_proxy "${node}" 2479 || true + stop_vm_demo_gateway_proxy "${node}" 8082 || true + stop_vm_demo_gateway_proxy "${node}" 9000 || true + done +} + issue_project_admin_token() { local iam_port="$1" local org_id="$2" @@ -1688,6 +1899,101 @@ download_lightningstor_object_to_file() { | base64 -d >"${output_path}" } +lightningstor_head_object_json() { + local ls_port="$1" + local token="$2" + local bucket="$3" + local key="$4" + local head_json output + + head_json="$(jq -cn --arg bucket "${bucket}" --arg key "${key}" '{bucket:$bucket, key:$key}')" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "${head_json}" \ + 127.0.0.1:"${ls_port}" lightningstor.v1.ObjectService/HeadObject + )" || die "failed to head LightningStor object ${bucket}/${key}: ${output}" + printf '%s\n' "${output}" +} + +lightningstor_list_object_keys() { + local ls_port="$1" + local token="$2" + local bucket="$3" + local list_json output + + list_json="$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket, maxKeys:1000}')" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "${list_json}" \ + 127.0.0.1:"${ls_port}" lightningstor.v1.ObjectService/ListObjects + )" || die "failed to list LightningStor bucket ${bucket}: ${output}" + printf '%s\n' "${output}" | jq -r '.objects[]?.key // empty' +} + +delete_lightningstor_bucket_recursive() { + local ls_port="$1" + local token="$2" + local bucket="$3" + local deadline=$((SECONDS + HTTP_WAIT_TIMEOUT)) + local delete_bucket_json output + + delete_bucket_json="$(jq -cn --arg bucket "${bucket}" '{bucket:$bucket}')" + + while true; do + local -a object_keys=() + mapfile -t object_keys < <(lightningstor_list_object_keys "${ls_port}" "${token}" "${bucket}") + + if (( ${#object_keys[@]} == 0 )); then + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "${delete_bucket_json}" \ + 127.0.0.1:"${ls_port}" lightningstor.v1.BucketService/DeleteBucket + )" && return 0 + + if grep -Eqi 'NotFound|NoSuchBucket|not found' <<<"${output}"; then + return 0 + fi + if (( SECONDS >= deadline )); then + die "timed out deleting LightningStor bucket ${bucket}: ${output}" + fi + sleep 2 + continue + fi + + local key delete_json + for key in "${object_keys[@]}"; do + delete_json="$(jq -cn --arg bucket "${bucket}" --arg key "${key}" '{bucket:$bucket, key:$key}')" + output="$( + grpcurl_capture -plaintext \ + -H "authorization: Bearer ${token}" \ + -import-path "${LIGHTNINGSTOR_PROTO_DIR}" \ + -proto "${LIGHTNINGSTOR_PROTO}" \ + -d "${delete_json}" \ + 127.0.0.1:"${ls_port}" lightningstor.v1.ObjectService/DeleteObject + )" && continue + + if grep -Eqi 'NotFound|NoSuchKey|not found' <<<"${output}"; then + continue + fi + die "failed to delete LightningStor object ${bucket}/${key}: ${output}" + done + + if (( SECONDS >= deadline )); then + die "timed out deleting LightningStor bucket ${bucket}" + fi + sleep 2 + done +} + calc_mib_per_s() { local bytes="$1" local elapsed_ns="$2" @@ -1761,6 +2067,21 @@ exit 1 EOS } +allocate_local_listener_port() { + local start_port="${1:-18280}" + local end_port="${2:-18999}" + local port + + for ((port=start_port; port<=end_port; port++)); do + if ! ss -ltnH "( sport = :${port} )" | grep -q .; then + printf '%s\n' "${port}" + return 0 + fi + done + + return 1 +} + run_remote_fio_json() { local node="$1" local target_path="$2" @@ -2329,27 +2650,234 @@ curl -fsS -X "${method}" "${url}" EOF } +vm_demo_create_todo_json() { + local node="$1" + local ip="$2" + local title="$3" + local details="$4" + local attachment_name="$5" + local attachment_body="$6" + local title_b64 details_b64 attachment_name_b64 attachment_body_b64 + + title_b64="$(printf '%s' "${title}" | base64 -w0)" + details_b64="$(printf '%s' "${details}" | base64 -w0)" + attachment_name_b64="$(printf '%s' "${attachment_name}" | base64 -w0)" + attachment_body_b64="$(printf '%s' "${attachment_body}" | base64 -w0)" + + ssh_node_script "${node}" "$(vm_demo_url "${ip}" "/api/todos")" "${title_b64}" "${details_b64}" "${attachment_name_b64}" "${attachment_body_b64}" <<'EOF' +set -euo pipefail +url="$1" +title="$(printf '%s' "$2" | base64 -d)" +details="$(printf '%s' "$3" | base64 -d)" +attachment_name="$(printf '%s' "$4" | base64 -d)" +attachment_body="$(printf '%s' "$5" | base64 -d)" +attachment_file="$(mktemp)" +trap 'rm -f "${attachment_file}"' EXIT +printf '%s' "${attachment_body}" >"${attachment_file}" +curl -fsS -X POST \ + -F "title=${title}" \ + -F "details=${details}" \ + -F "attachment=@${attachment_file};type=text/plain;filename=${attachment_name}" \ + "${url}" +EOF +} + +vm_demo_download_attachment() { + local node="$1" + local ip="$2" + local todo_id="$3" + + ssh_node_script "${node}" "$(vm_demo_url "${ip}" "/api/todos/${todo_id}/attachment")" <<'EOF' +set -euo pipefail +url="$1" +curl -fsS "${url}" +EOF +} + +assert_vm_demo_attachment_body() { + local node="$1" + local ip="$2" + local todo_id="$3" + local expected_body="$4" + local actual_body + + actual_body="$(vm_demo_download_attachment "${node}" "${ip}" "${todo_id}")" + [[ "${actual_body}" == "${expected_body}" ]] || die "unexpected attachment body for VM demo todo ${todo_id}: ${actual_body}" +} + assert_vm_demo_state() { local state_json="$1" - local expected_visits="$2" - local expected_root_boots="$3" - local expected_data_boots="$4" + local expected_todo_count="$2" + local expected_attachment_count="$3" + local expected_root_boots="$4" + local expected_data_boots="$5" + local expected_latest_todo_title="${6:-}" + local expected_latest_attachment_name="${7:-}" + local expected_latest_attachment_key="" + + if (( expected_todo_count > 0 )) && [[ -n "${expected_latest_attachment_name}" ]]; then + printf -v expected_latest_attachment_key 'attachments/%06d/%s' "${expected_todo_count}" "${expected_latest_attachment_name}" + fi printf '%s' "${state_json}" | jq -e \ - --argjson visits "${expected_visits}" \ + --argjson todo_count "${expected_todo_count}" \ + --argjson attachment_count "${expected_attachment_count}" \ --argjson root_boots "${expected_root_boots}" \ --argjson data_boots "${expected_data_boots}" \ --argjson listen_port "${VM_DEMO_HTTP_PORT}" \ - --arg db_path "/mnt/photon-vm-data/demo.sqlite3" ' + --arg state_backend "flaredb" \ + --arg state_endpoint "http://${MATRIX_TENANT_GATEWAY_IP}:8082" \ + --arg state_namespace "validation" \ + --arg object_store_backend "lightningstor" \ + --arg object_store_endpoint "http://${MATRIX_TENANT_GATEWAY_IP}:9000" \ + --arg latest_object_key "state.json" \ + --arg latest_todo_title "${expected_latest_todo_title}" \ + --arg latest_attachment_name "${expected_latest_attachment_name}" \ + --arg latest_attachment_key "${expected_latest_attachment_key}" ' .status == "ok" - and .visits == $visits + and (.hostname | type == "string" and length > 0) + and .todo_count == $todo_count + and .attachment_count == $attachment_count and .root_boot_count == $root_boots and .data_boot_count == $data_boots and .listen_port == $listen_port - and .db_path == $db_path + and .state_backend == $state_backend + and .state_endpoint == $state_endpoint + and .state_namespace == $state_namespace + and (.todo_prefix | type == "string" and startswith("vm-demo-") and endswith("/todos/")) + and (.next_id_key | type == "string" and startswith("vm-demo-") and endswith("/next-id")) + and .object_store_backend == $object_store_backend + and .object_store_endpoint == $object_store_endpoint + and (.bucket | type == "string" and length > 0) + and .latest_object_key == $latest_object_key + and .latest_object_todo_count == $todo_count + and (.todos | type == "array" and length == $todo_count) + and ( + if $todo_count == 0 then + .latest_todo_id == 0 + and .latest_todo_title == "" + and .latest_attachment_object_key == "" + and .latest_attachment_filename == "" + else + .latest_todo_id == $todo_count + and .latest_todo_title == $latest_todo_title + and .latest_attachment_filename == $latest_attachment_name + and .latest_attachment_object_key == $latest_attachment_key + and .todos[-1].id == $todo_count + and .todos[-1].title == $latest_todo_title + and .todos[-1].attachment.filename == $latest_attachment_name + and .todos[-1].attachment.object_key == $latest_attachment_key + and .todos[-1].attachment_url == ("/api/todos/" + ($todo_count | tostring) + "/attachment") + end + ) ' >/dev/null || die "unexpected VM demo payload: ${state_json}" } +vm_demo_flaredb_value() { + local namespace="$1" + local state_key="$2" + + ssh_node_script node01 "${namespace}" "${state_key}" <<'EOF' +set -euo pipefail +namespace="$1" +state_key="$2" + +curl -fsS --get http://127.0.0.1:8082/api/v1/scan \ + --data-urlencode "start=${state_key}" \ + --data-urlencode "end=${state_key}~" \ + --data-urlencode "namespace=${namespace}" \ + | jq -er '.data.items[0].value' +EOF +} + +vm_demo_flaredb_todo_count() { + local namespace="$1" + local todo_prefix="$2" + + ssh_node_script node01 "${namespace}" "${todo_prefix}" <<'EOF' +set -euo pipefail +namespace="$1" +todo_prefix="$2" + +curl -fsS --get http://127.0.0.1:8082/api/v1/scan \ + --data-urlencode "start=${todo_prefix}" \ + --data-urlencode "end=${todo_prefix}~" \ + --data-urlencode "namespace=${namespace}" \ + | jq -er '.data.items | length' +EOF +} + +assert_vm_demo_backend_artifacts() { + local token="$1" + local state_json="$2" + local expected_todo_count="$3" + local expected_attachment_count="$4" + local expected_root_boots="$5" + local expected_data_boots="$6" + local expected_latest_attachment_body="${7:-}" + local bucket state_namespace todo_prefix next_id_key latest_object_key latest_attachment_object_key latest_attachment_filename + local flaredb_todo_count flaredb_next_id snapshot_file latest_attachment_file + + bucket="$(printf '%s' "${state_json}" | jq -r '.bucket')" + state_namespace="$(printf '%s' "${state_json}" | jq -r '.state_namespace')" + todo_prefix="$(printf '%s' "${state_json}" | jq -r '.todo_prefix')" + next_id_key="$(printf '%s' "${state_json}" | jq -r '.next_id_key')" + latest_object_key="$(printf '%s' "${state_json}" | jq -r '.latest_object_key')" + latest_attachment_object_key="$(printf '%s' "${state_json}" | jq -r '.latest_attachment_object_key')" + latest_attachment_filename="$(printf '%s' "${state_json}" | jq -r '.latest_attachment_filename')" + + flaredb_todo_count="$(vm_demo_flaredb_todo_count "${state_namespace}" "${todo_prefix}")" + [[ "${flaredb_todo_count}" == "${expected_todo_count}" ]] || die "unexpected FlareDB todo count for ${state_namespace}/${todo_prefix}: expected ${expected_todo_count}, got ${flaredb_todo_count}" + flaredb_next_id="$(vm_demo_flaredb_value "${state_namespace}" "${next_id_key}")" + [[ "${flaredb_next_id}" == "$((expected_todo_count + 1))" ]] || die "unexpected FlareDB next-id for ${state_namespace}/${next_id_key}: expected $((expected_todo_count + 1)), got ${flaredb_next_id}" + + lightningstor_head_object_json 15086 "${token}" "${bucket}" "${latest_object_key}" \ + | jq -e '(.object.size | tonumber) > 0' >/dev/null \ + || die "VM demo state object missing from LightningStor: ${bucket}/${latest_object_key}" + + if (( expected_attachment_count > 0 )); then + [[ -n "${latest_attachment_object_key}" ]] || die "VM demo expected an attachment object key but none was returned" + lightningstor_head_object_json 15086 "${token}" "${bucket}" "${latest_attachment_object_key}" \ + | jq -e '(.object.size | tonumber) > 0' >/dev/null \ + || die "VM demo attachment object missing from LightningStor: ${bucket}/${latest_attachment_object_key}" + latest_attachment_file="$(mktemp)" + download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${latest_attachment_object_key}" "${latest_attachment_file}" + [[ "$(<"${latest_attachment_file}")" == "${expected_latest_attachment_body}" ]] || die "unexpected VM demo attachment body in LightningStor: ${bucket}/${latest_attachment_object_key}" + rm -f "${latest_attachment_file}" + else + [[ -z "${latest_attachment_object_key}" ]] || die "VM demo unexpectedly returned attachment key ${latest_attachment_object_key}" + [[ -z "${latest_attachment_filename}" ]] || die "VM demo unexpectedly returned attachment name ${latest_attachment_filename}" + fi + + snapshot_file="$(mktemp)" + download_lightningstor_object_to_file 15086 "${token}" "${bucket}" "${latest_object_key}" "${snapshot_file}" + jq -e \ + --argjson todo_count "${expected_todo_count}" \ + --argjson attachment_count "${expected_attachment_count}" \ + --argjson root_boots "${expected_root_boots}" \ + --argjson data_boots "${expected_data_boots}" \ + --arg bucket "${bucket}" \ + --arg state_namespace "${state_namespace}" \ + --arg todo_prefix "${todo_prefix}" \ + --arg next_id_key "${next_id_key}" \ + --arg latest_object_key "${latest_object_key}" \ + --arg latest_attachment_object_key "${latest_attachment_object_key}" \ + --arg latest_attachment_filename "${latest_attachment_filename}" ' + .bucket == $bucket + and .todo_count == $todo_count + and .attachment_count == $attachment_count + and .root_boot_count == $root_boots + and .data_boot_count == $data_boots + and .state_namespace == $state_namespace + and .todo_prefix == $todo_prefix + and .next_id_key == $next_id_key + and .latest_object_key == $latest_object_key + and .latest_attachment_object_key == $latest_attachment_object_key + and .latest_attachment_filename == $latest_attachment_filename + ' "${snapshot_file}" >/dev/null || die "unexpected VM demo snapshot object payload in LightningStor" + rm -f "${snapshot_file}" +} + wait_for_host_http() { local url="$1" local timeout="${2:-${HTTP_WAIT_TIMEOUT}}" @@ -3459,11 +3987,11 @@ validate_tenant_networking_flow() { gateway_tunnel="$(start_ssh_tunnel node06 18080 8080)" trap 'stop_ssh_tunnel node06 "${gateway_tunnel}"; stop_ssh_tunnel node01 "${prism_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"' RETURN - wait_for_unit node01 plasmacloud-tenant-networking-apply 120 + wait_for_unit node01 ultracloud-tenant-networking-apply 120 wait_for_http node06 http://127.0.0.1:8080/health - ssh_node node01 "systemctl start plasmacloud-tenant-networking-apply.service" - wait_for_unit node01 plasmacloud-tenant-networking-apply 120 + ssh_node node01 "systemctl start ultracloud-tenant-networking-apply.service" + wait_for_unit node01 ultracloud-tenant-networking-apply 120 local org_id="${MATRIX_TENANT_ORG_ID}" local project_id="${MATRIX_TENANT_PROJECT_ID}" @@ -4036,8 +4564,8 @@ validate_k8shost_flow() { 127.0.0.1:15087 k8shost.ServiceService/GetService 2>/dev/null || true)" if [[ -n "${service_json}" ]] && printf '%s' "${service_json}" | jq -e ' .service.status.loadBalancer.ingress[0].ip != null and - .service.metadata.annotations["fiberlb.plasmacloud.io/lb-id"] != null and - .service.metadata.annotations["flashdns.plasmacloud.io/record-id"] != null' >/dev/null 2>&1; then + .service.metadata.annotations["fiberlb.ultracloud.io/lb-id"] != null and + .service.metadata.annotations["flashdns.ultracloud.io/record-id"] != null' >/dev/null 2>&1; then break fi if (( SECONDS >= deadline )); then @@ -4047,9 +4575,9 @@ validate_k8shost_flow() { done cluster_ip="$(printf '%s' "${service_json}" | jq -r '.service.spec.clusterIp')" - lb_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["fiberlb.plasmacloud.io/lb-id"]')" - record_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["flashdns.plasmacloud.io/record-id"]')" - zone_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["flashdns.plasmacloud.io/zone-id"]')" + lb_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["fiberlb.ultracloud.io/lb-id"]')" + record_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["flashdns.ultracloud.io/record-id"]')" + zone_id="$(printf '%s' "${service_json}" | jq -r '.service.metadata.annotations["flashdns.ultracloud.io/zone-id"]')" [[ -n "${cluster_ip}" && "${cluster_ip}" != "null" ]] || die "K8sHost service did not get a cluster IP" [[ -n "${lb_id}" && "${lb_id}" != "null" ]] || die "K8sHost service did not get a FiberLB load balancer" [[ -n "${record_id}" && "${record_id}" != "null" ]] || die "K8sHost service did not get a FlashDNS record" @@ -4098,8 +4626,8 @@ validate_k8shost_flow() { -d "$(jq -cn --arg ns "default" --arg name "${service_name}" '{namespace:$ns, name:$name}')" \ 127.0.0.1:15087 k8shost.ServiceService/GetService 2>/dev/null || true)" if [[ -n "${service_json}" ]] && printf '%s' "${service_json}" | jq -e --arg lb "${lb_id}" --arg record "${record_id}" ' - .service.metadata.annotations["fiberlb.plasmacloud.io/lb-id"] == $lb and - .service.metadata.annotations["flashdns.plasmacloud.io/record-id"] == $record' >/dev/null 2>&1; then + .service.metadata.annotations["fiberlb.ultracloud.io/lb-id"] == $lb and + .service.metadata.annotations["flashdns.ultracloud.io/record-id"] == $record' >/dev/null 2>&1; then break fi if (( SECONDS >= deadline )); then @@ -4393,8 +4921,12 @@ validate_vm_storage_flow() { local iam_tunnel="" prism_tunnel="" ls_tunnel="" vm_tunnel="" coronafs_tunnel="" gateway_tunnel="" local node04_coronafs_tunnel="" node05_coronafs_tunnel="" + local demo_access_tunnel="" demo_access_node="" demo_access_port="" local current_worker_coronafs_port="" peer_worker_coronafs_port="" local demo_http_sg_id="" + local demo_bucket="" + local preserve_vm_demo_gateway_proxies=0 + local keep_running="${PHOTON_VM_DEMO_KEEP_RUNNING:-0}" local vm_port=15082 iam_tunnel="$(start_ssh_tunnel node01 15080 50080)" prism_tunnel="$(start_ssh_tunnel node01 15081 50081)" @@ -4426,6 +4958,12 @@ validate_vm_storage_flow() { if [[ -n "${token:-}" && -n "${demo_http_sg_id:-}" ]]; then api_gateway_request DELETE "${token}" "/api/v1/security-groups/${demo_http_sg_id}" >/dev/null 2>&1 || true fi + if [[ -n "${demo_access_tunnel:-}" && -n "${demo_access_node:-}" ]]; then + stop_ssh_tunnel "${demo_access_node}" "${demo_access_tunnel}" + fi + if [[ "${preserve_vm_demo_gateway_proxies}" != "1" ]]; then + stop_vm_demo_gateway_proxies + fi stop_ssh_tunnel node05 "${node05_coronafs_tunnel}" stop_ssh_tunnel node04 "${node04_coronafs_tunnel}" stop_ssh_tunnel node01 "${coronafs_tunnel}" @@ -4444,8 +4982,10 @@ validate_vm_storage_flow() { local principal_id="plasmavmc-smoke-$(date +%s)" local token local demo_state_json="" - local demo_visit_json="" + local demo_todo_json="" token="$(issue_project_admin_token 15080 "${org_id}" "${project_id}" "${principal_id}")" + wait_for_http node06 http://127.0.0.1:8080/health + wait_for_host_http http://127.0.0.1:18080/health log "Matrix case: PlasmaVMC + declarative PrismNet tenant networking" vpc_id="$(api_gateway_request GET "${token}" "/api/v1/vpcs" \ @@ -4492,6 +5032,8 @@ validate_vm_storage_flow() { }' )" >/dev/null + start_vm_demo_gateway_proxies + ensure_lightningstor_bucket 15086 "${token}" "plasmavmc-images" "${org_id}" "${project_id}" wait_for_lightningstor_write_quorum 15086 "${token}" "plasmavmc-images" "PlasmaVMC image import" @@ -4756,15 +5298,45 @@ EOS [[ -n "${current_data_volume_qemu_ref}" ]] || die "worker ${node_id} did not expose an attachable local ref for ${data_volume_id}" wait_for_qemu_volume_present "${node_id}" "${volume_path}" "${current_volume_qemu_ref}" wait_for_qemu_volume_present "${node_id}" "${data_volume_path}" "${current_data_volume_qemu_ref}" - wait_for_lightningstor_counts_equal "${image_after_node01}" "${image_after_node04}" "${image_after_node05}" "shared-fs VM startup" + local demo_start_node01 demo_start_node04 demo_start_node05 + read -r demo_start_node01 demo_start_node04 demo_start_node05 < <( + capture_stable_equal_lightningstor_count_triplet \ + "${image_after_node01}" \ + "${image_after_node04}" \ + "${image_after_node05}" + ) + local todo1_title="Attach architecture sketch" + local todo1_details="Created before the first restart." + local todo1_attachment_name="task-1.txt" + local todo1_attachment_body="todo-1 attachment from boot 1" + local todo2_title="Verify restart persistence" + local todo2_details="Created after the same-node restart." + local todo2_attachment_name="task-2.txt" + local todo2_attachment_body="todo-2 attachment after restart" + local todo3_title="Confirm migrated worker" + local todo3_details="Created after moving to the other worker." + local todo3_attachment_name="task-3.txt" + local todo3_attachment_body="todo-3 attachment after migration" wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_SMOKE_READY count=1" wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_SMOKE_DATA_READY count=1" wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_DEMO_WEB_READY count=1" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" - assert_vm_demo_state "${demo_state_json}" 0 1 1 - demo_visit_json="$(vm_demo_request_json "${node_id}" POST "${port_ip}" "/visit")" - assert_vm_demo_state "${demo_visit_json}" 1 1 1 + assert_vm_demo_state "${demo_state_json}" 0 0 1 1 + assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 0 0 1 1 + demo_bucket="$(printf '%s' "${demo_state_json}" | jq -r '.bucket')" + demo_todo_json="$(vm_demo_create_todo_json "${node_id}" "${port_ip}" "${todo1_title}" "${todo1_details}" "${todo1_attachment_name}" "${todo1_attachment_body}")" + assert_vm_demo_state "${demo_todo_json}" 1 1 1 1 "${todo1_title}" "${todo1_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_todo_json}" 1 1 1 1 "${todo1_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 1 "${todo1_attachment_body}" + local demo_after_visit1_node01 demo_after_visit1_node04 demo_after_visit1_node05 + wait_for_lightningstor_counts_greater_than "${demo_start_node01}" "${demo_start_node04}" "${demo_start_node05}" "VM demo first todo attachment" + read -r demo_after_visit1_node01 demo_after_visit1_node04 demo_after_visit1_node05 < <( + capture_stable_equal_lightningstor_count_triplet \ + "$((demo_start_node01 + 1))" \ + "$((demo_start_node04 + 1))" \ + "$((demo_start_node05 + 1))" + ) local get_root_volume_json get_data_volume_json local root_volume_state_json data_volume_state_json local root_attachment_generation data_attachment_generation @@ -4874,10 +5446,21 @@ EOS wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_DEMO_WEB_READY count=2" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" - assert_vm_demo_state "${demo_state_json}" 1 2 2 - demo_visit_json="$(vm_demo_request_json "${node_id}" POST "${port_ip}" "/visit")" - assert_vm_demo_state "${demo_visit_json}" 2 2 2 - wait_for_lightningstor_counts_equal "${image_after_node01}" "${image_after_node04}" "${image_after_node05}" "shared-fs VM restart" + assert_vm_demo_state "${demo_state_json}" 1 1 2 2 "${todo1_title}" "${todo1_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 1 1 2 2 "${todo1_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 1 "${todo1_attachment_body}" + demo_todo_json="$(vm_demo_create_todo_json "${node_id}" "${port_ip}" "${todo2_title}" "${todo2_details}" "${todo2_attachment_name}" "${todo2_attachment_body}")" + assert_vm_demo_state "${demo_todo_json}" 2 2 2 2 "${todo2_title}" "${todo2_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_todo_json}" 2 2 2 2 "${todo2_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 2 "${todo2_attachment_body}" + local demo_after_visit2_node01 demo_after_visit2_node04 demo_after_visit2_node05 + wait_for_lightningstor_counts_greater_than "${demo_after_visit1_node01}" "${demo_after_visit1_node04}" "${demo_after_visit1_node05}" "VM demo second todo attachment" + read -r demo_after_visit2_node01 demo_after_visit2_node04 demo_after_visit2_node05 < <( + capture_stable_equal_lightningstor_count_triplet \ + "$((demo_after_visit1_node01 + 1))" \ + "$((demo_after_visit1_node04 + 1))" \ + "$((demo_after_visit1_node05 + 1))" + ) root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} drifted away from node ${node_id} after restart" @@ -4962,9 +5545,21 @@ EOS wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_SMOKE_HEARTBEAT count=3" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" - assert_vm_demo_state "${demo_state_json}" 2 3 3 - demo_visit_json="$(vm_demo_request_json "${node_id}" POST "${port_ip}" "/visit")" - assert_vm_demo_state "${demo_visit_json}" 3 3 3 + assert_vm_demo_state "${demo_state_json}" 2 2 3 3 "${todo2_title}" "${todo2_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 2 2 3 3 "${todo2_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 2 "${todo2_attachment_body}" + demo_todo_json="$(vm_demo_create_todo_json "${node_id}" "${port_ip}" "${todo3_title}" "${todo3_details}" "${todo3_attachment_name}" "${todo3_attachment_body}")" + assert_vm_demo_state "${demo_todo_json}" 3 3 3 3 "${todo3_title}" "${todo3_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_todo_json}" 3 3 3 3 "${todo3_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 3 "${todo3_attachment_body}" + local demo_after_visit3_node01 demo_after_visit3_node04 demo_after_visit3_node05 + wait_for_lightningstor_counts_greater_than "${demo_after_visit2_node01}" "${demo_after_visit2_node04}" "${demo_after_visit2_node05}" "VM demo third todo attachment" + read -r demo_after_visit3_node01 demo_after_visit3_node04 demo_after_visit3_node05 < <( + capture_stable_equal_lightningstor_count_triplet \ + "$((demo_after_visit2_node01 + 1))" \ + "$((demo_after_visit2_node04 + 1))" \ + "$((demo_after_visit2_node05 + 1))" + ) root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} is not owned by migrated node ${node_id}" @@ -5051,8 +5646,15 @@ EOS wait_for_vm_console_pattern "${node_id}" "${vm_id}" "PHOTON_VM_DEMO_WEB_READY count=4" wait_for_vm_demo_http "${node_id}" "${port_ip}" demo_state_json="$(vm_demo_request_json "${node_id}" GET "${port_ip}" "/state")" - assert_vm_demo_state "${demo_state_json}" 3 4 4 - wait_for_lightningstor_counts_equal "${image_after_node01}" "${image_after_node04}" "${image_after_node05}" "shared-fs VM post-migration restart" + assert_vm_demo_state "${demo_state_json}" 3 3 4 4 "${todo3_title}" "${todo3_attachment_name}" + assert_vm_demo_backend_artifacts "${token}" "${demo_state_json}" 3 3 4 4 "${todo3_attachment_body}" + assert_vm_demo_attachment_body "${node_id}" "${port_ip}" 3 "${todo3_attachment_body}" + read -r _ _ _ < <( + capture_stable_equal_lightningstor_count_triplet \ + "${demo_after_visit3_node01}" \ + "${demo_after_visit3_node04}" \ + "${demo_after_visit3_node05}" + ) root_volume_state_json="$(try_get_volume_json "${token}" "${get_root_volume_json}")" data_volume_state_json="$(try_get_volume_json "${token}" "${get_data_volume_json}")" [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.attachedToNode // empty')" == "${node_id}" ]] || die "root volume ${volume_id} drifted away from migrated node ${node_id} after restart" @@ -5062,6 +5664,31 @@ EOS [[ "$(printf '%s' "${root_volume_state_json}" | jq -r '.lastFlushedAttachmentGeneration // 0')" == "${root_attachment_generation}" ]] || die "root volume ${volume_id} was not flushed before migrated-node restart" [[ "$(printf '%s' "${data_volume_state_json}" | jq -r '.lastFlushedAttachmentGeneration // 0')" == "${data_attachment_generation}" ]] || die "data volume ${data_volume_id} was not flushed before migrated-node restart" + if [[ "${keep_running}" == "1" ]]; then + demo_access_port="$(allocate_local_listener_port "${VM_DEMO_FORWARD_START_PORT}" 18999)" || die "failed to allocate a local port for VM web app access" + demo_access_node="${node_id}" + demo_access_tunnel="$(start_ssh_tunnel "${demo_access_node}" "${demo_access_port}" "${VM_DEMO_HTTP_PORT}" "${port_ip}")" + wait_for_host_http "http://127.0.0.1:${demo_access_port}/health" 60 + preserve_vm_demo_gateway_proxies=1 + + trap - RETURN + stop_ssh_tunnel node05 "${node05_coronafs_tunnel}" + stop_ssh_tunnel node04 "${node04_coronafs_tunnel}" + stop_ssh_tunnel node01 "${coronafs_tunnel}" + stop_ssh_tunnel node01 "${vm_tunnel}" + stop_ssh_tunnel node01 "${ls_tunnel}" + stop_ssh_tunnel node01 "${prism_tunnel}" + stop_ssh_tunnel node01 "${iam_tunnel}" + stop_ssh_tunnel node06 "${gateway_tunnel}" + + log "VM web app left running for manual access" + log "Access URL: http://127.0.0.1:${demo_access_port}/" + log "State URL: http://127.0.0.1:${demo_access_port}/state" + log "Create TODO command: curl -X POST -F 'title=New task' -F 'details=Created from host' -F 'attachment=@/etc/hosts' http://127.0.0.1:${demo_access_port}/api/todos" + log "VM metadata: vm_id=${vm_id} node=${node_id} tenant_ip=${port_ip}" + return 0 + fi + grpcurl -plaintext \ -H "authorization: Bearer ${token}" \ -import-path "${PLASMAVMC_PROTO_DIR}" \ @@ -5138,6 +5765,9 @@ EOS if coronafs_get_volume_json 35088 "${data_volume_id}" >/dev/null 2>&1; then die "worker node05 retained mutable data volume metadata after VM deletion" fi + if [[ -n "${demo_bucket}" ]]; then + delete_lightningstor_bucket_recursive 15086 "${token}" "${demo_bucket}" + fi wait_for_lightningstor_counts_equal "${image_after_node01}" "${image_after_node04}" "${image_after_node05}" "shared-fs VM deletion" grpcurl -plaintext \ @@ -5250,7 +5880,7 @@ validate_nightlight_flow_with_base() { nightlight_remote_write_sample "${base_url}" "${metric_name}" "${metric_value}" auto \ --label source=smoke \ - --label cluster=photoncloud + --label cluster=ultracloud wait_for_nightlight_query_result "${base_url}" "${flow_name}" "${metric_name}" "${metric_value}" "source=\"smoke\"" @@ -5470,7 +6100,7 @@ validate_nightlight_grpc_and_persistence() { --metric "${metric_name}" \ --value "${metric_value}" \ --label source=grpc \ - --label cluster=photoncloud + --label cluster=ultracloud wait_for_nightlight_query_result "${base_url}" "NightLight persistence pre-restart" "${metric_name}" "${metric_value}" "source=\"grpc\"" @@ -5698,7 +6328,7 @@ validate_deployer_flow() { -d "$(jq -cn \ --arg machine "${machine_id}" \ --arg node "${node_id}" \ - '{machine_id:$machine, node_id:$node, role:"worker", ip:"10.100.0.250", services:["plasmavmc"], ssh_authorized_keys:["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFiberLBSmokeKey smoke@test"]}')" \ + '{machine_id:$machine, node_config:{assignment:{node_id:$node, hostname:$node, role:"worker", ip:"10.100.0.250"}, bootstrap_plan:{services:["plasmavmc"]}, bootstrap_secrets:{ssh_authorized_keys:["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFiberLBSmokeKey smoke@test"]}}}')" \ http://127.0.0.1:13012/api/v1/admin/nodes \ | jq -e --arg machine "${machine_id}" --arg node "${node_id}" '.success == true and .machine_id == $machine and .node_id == $node' >/dev/null @@ -5717,11 +6347,11 @@ validate_deployer_flow() { http://127.0.0.1:13012/api/v1/phone-home)" printf '%s' "${phone_home_json}" | jq -e --arg node "${node_id}" ' .success == true and - .node_id == $node and + .node_config.assignment.node_id == $node and .state == "provisioning" and - .node_config.hostname == $node and - .node_config.role == "worker" and - (.node_config.services | index("plasmavmc")) != null + .node_config.assignment.hostname == $node and + .node_config.assignment.role == "worker" and + (.node_config.bootstrap_plan.services | index("plasmavmc")) != null ' >/dev/null trap - RETURN @@ -5752,7 +6382,7 @@ validate_native_runtime_flow() { run_deployer_ctl \ --chainfire-endpoint "${chainfire_endpoint}" \ --cluster-id "test-cluster" \ - --cluster-namespace "photoncloud" \ + --cluster-namespace "ultracloud" \ --deployer-namespace "deployer" \ dump --prefix "${prefix}" --format json \ | jq -rc '.value' @@ -5785,7 +6415,7 @@ validate_native_runtime_flow() { native_first_healthy_instance() { local service="$1" - native_dump_values "photoncloud/clusters/test-cluster/instances/${service}/" \ + native_dump_values "ultracloud/clusters/test-cluster/instances/${service}/" \ | sed '/^$/d' \ | jq -sr "${native_fresh_healthy_map_expr} | sort_by(.instance_id) | first" } @@ -5799,7 +6429,7 @@ validate_native_runtime_flow() { while true; do instance_value="$( - native_dump_values "photoncloud/clusters/test-cluster/instances/${service}/" \ + native_dump_values "ultracloud/clusters/test-cluster/instances/${service}/" \ | sed '/^$/d' \ | jq -sr \ --arg node "${expected_node}" \ @@ -5819,7 +6449,7 @@ validate_native_runtime_flow() { native_publication_state() { local service="$1" - native_dump_values "photoncloud/clusters/test-cluster/publications/" \ + native_dump_values "ultracloud/clusters/test-cluster/publications/" \ | sed '/^$/d' \ | jq -sr --arg service "${service}" 'map(select(.service == $service)) | first' } @@ -5914,48 +6544,48 @@ validate_native_runtime_flow() { run_deployer_ctl \ --chainfire-endpoint "${chainfire_endpoint}" \ --cluster-id "test-cluster" \ - --cluster-namespace "photoncloud" \ + --cluster-namespace "ultracloud" \ --deployer-namespace "deployer" \ node set-state --node-id "${node_id}" --state "${state}" } wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/services/" \ + "ultracloud/clusters/test-cluster/services/" \ 'map(select(.name == "native-web" or .name == "native-container" or .name == "native-daemon")) | length' \ "3" \ 180 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/nodes/" \ + "ultracloud/clusters/test-cluster/nodes/" \ 'map(select(.labels.runtime == "native" and .state == "active")) | length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ 'length' \ "2" \ 300 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 300 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ 'length' \ "1" \ 360 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 360 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ 'length' \ "2" \ 300 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 300 @@ -5974,7 +6604,7 @@ validate_native_runtime_flow() { wait_for_http node01 "http://127.0.0.1:18191/" 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/publications/" \ + "ultracloud/clusters/test-cluster/publications/" \ 'map(select(.service == "native-web" or .service == "native-daemon")) | length' \ "2" \ 180 @@ -6012,37 +6642,37 @@ validate_native_runtime_flow() { log "Draining node04 through deployer lifecycle state" set_native_node_state "node04" "draining" wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/nodes/" \ + "ultracloud/clusters/test-cluster/nodes/" \ 'map(select(.node_id == "node04" and .state == "draining")) | length' \ "1" \ 120 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ 'length' \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ 'length' \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ 'length' \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 @@ -6069,32 +6699,32 @@ validate_native_runtime_flow() { log "Restoring node04 and ensuring capacity returns without moving healthy singleton work" set_native_node_state "node04" "active" wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/nodes/" \ + "ultracloud/clusters/test-cluster/nodes/" \ 'map(select(.node_id == "node04" and .state == "active")) | length' \ "1" \ 120 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ 'length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ 'length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 240 @@ -6120,17 +6750,17 @@ validate_native_runtime_flow() { wait_for_ssh_down node05 120 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 @@ -6162,32 +6792,32 @@ validate_native_runtime_flow() { wait_for_unit node05 lightningstor wait_for_unit node05 node-agent wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/nodes/" \ + "ultracloud/clusters/test-cluster/nodes/" \ 'map(select(.labels.runtime == "native" and .state == "active")) | length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ 'length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-web/" \ + "ultracloud/clusters/test-cluster/instances/native-web/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-container/" \ + "ultracloud/clusters/test-cluster/instances/native-container/" \ "${native_fresh_healthy_count_expr}" \ "1" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ 'length' \ "2" \ 240 wait_for_native_dump_count \ - "photoncloud/clusters/test-cluster/instances/native-daemon/" \ + "ultracloud/clusters/test-cluster/instances/native-daemon/" \ "${native_fresh_healthy_count_expr}" \ "2" \ 240 @@ -7761,6 +8391,16 @@ fresh_demo_vm_webapp_requested() { demo_vm_webapp_requested "$@" } +serve_vm_webapp_requested() { + start_requested "$@" + PHOTON_VM_DEMO_KEEP_RUNNING=1 validate_vm_storage_flow +} + +fresh_serve_vm_webapp_requested() { + clean_requested "$@" + serve_vm_webapp_requested "$@" +} + matrix_requested() { start_requested "$@" validate_component_matrix @@ -7973,7 +8613,7 @@ status_requested() { validate_nodes_exist "${nodes[@]}" local node pid_path - printf 'PhotonCloud test cluster status\n' + printf 'UltraCloud test cluster status\n' printf '===============================\n' for node in "${nodes[@]}"; do pid_path="$(pid_file "${node}")" @@ -8035,7 +8675,7 @@ logs_requested() { usage() { cat < [nodes...] @@ -8048,8 +8688,10 @@ Commands: fresh-smoke clean local runtime state, rebuild on the host, start, and validate storage-smoke start the storage lab (node01-05) and validate CoronaFS/LightningStor/PlasmaVMC fresh-storage-smoke clean local runtime state, rebuild node01-05 on the host, start, and validate the storage lab - demo-vm-webapp start the cluster and run the VM web app demo with persistent volume state - fresh-demo-vm-webapp clean local runtime state, rebuild on the host, start, and run the VM web app demo + demo-vm-webapp start the cluster and run the VM web app demo backed by FlareDB and LightningStor + fresh-demo-vm-webapp clean local runtime state, rebuild on the host, start, and run the VM web app demo backed by FlareDB and LightningStor + serve-vm-webapp start the cluster, run the VM web app demo, and leave the guest app reachable on localhost + fresh-serve-vm-webapp clean local runtime state, rebuild on the host, start, run the VM web app demo, and leave it reachable on localhost matrix Start the cluster and validate composed service configurations against the current running VMs fresh-matrix clean local runtime state, rebuild on the host, start, and validate composed service configurations bench-storage start the cluster and benchmark CoronaFS plus LightningStor against the current running VMs @@ -8078,6 +8720,8 @@ Examples: $0 fresh-storage-smoke $0 demo-vm-webapp $0 fresh-demo-vm-webapp + $0 serve-vm-webapp + $0 fresh-serve-vm-webapp $0 matrix $0 fresh-matrix $0 bench-storage @@ -8095,6 +8739,9 @@ Examples: $0 start node01 node02 node03 $0 validate $0 ssh node04 + +Environment: + PHOTON_VM_DEMO_FORWARD_START_PORT preferred local port for serve-vm-webapp (default: 18280) USAGE } @@ -8113,6 +8760,8 @@ main() { fresh-storage-smoke) fresh_storage_smoke_requested ;; demo-vm-webapp) demo_vm_webapp_requested "$@" ;; fresh-demo-vm-webapp) fresh_demo_vm_webapp_requested "$@" ;; + serve-vm-webapp) serve_vm_webapp_requested "$@" ;; + fresh-serve-vm-webapp) fresh_serve_vm_webapp_requested "$@" ;; matrix) matrix_requested "$@" ;; fresh-matrix) fresh_matrix_requested "$@" ;; bench-storage) bench_storage_requested "$@" ;; diff --git a/nix/test-cluster/storage-node01.nix b/nix/test-cluster/storage-node01.nix index d668308..a64051f 100644 --- a/nix/test-cluster/storage-node01.nix +++ b/nix/test-cluster/storage-node01.nix @@ -17,7 +17,7 @@ photonTestCluster = { sshBasePort = 2300; - vdeSock = "/tmp/photoncloud-test-cluster-vde.sock-storage"; + vdeSock = "/tmp/ultracloud-test-cluster-vde.sock-storage"; }; networking.hostName = "node01"; @@ -126,6 +126,6 @@ region = "test"; }; - services.lightningstor.s3AccessKeyId = "photoncloud-test"; - services.lightningstor.s3SecretKey = "photoncloud-test-secret"; + services.lightningstor.s3AccessKeyId = "ultracloud-test"; + services.lightningstor.s3SecretKey = "ultracloud-test-secret"; } diff --git a/nix/test-cluster/storage-node02.nix b/nix/test-cluster/storage-node02.nix index f3849d1..a38f454 100644 --- a/nix/test-cluster/storage-node02.nix +++ b/nix/test-cluster/storage-node02.nix @@ -14,7 +14,7 @@ photonTestCluster = { sshBasePort = 2300; - vdeSock = "/tmp/photoncloud-test-cluster-vde.sock-storage"; + vdeSock = "/tmp/ultracloud-test-cluster-vde.sock-storage"; }; networking.hostName = "node02"; diff --git a/nix/test-cluster/storage-node03.nix b/nix/test-cluster/storage-node03.nix index d070603..fa8f8bf 100644 --- a/nix/test-cluster/storage-node03.nix +++ b/nix/test-cluster/storage-node03.nix @@ -14,7 +14,7 @@ photonTestCluster = { sshBasePort = 2300; - vdeSock = "/tmp/photoncloud-test-cluster-vde.sock-storage"; + vdeSock = "/tmp/ultracloud-test-cluster-vde.sock-storage"; }; networking.hostName = "node03"; diff --git a/nix/test-cluster/storage-node04.nix b/nix/test-cluster/storage-node04.nix index db3caf6..967b493 100644 --- a/nix/test-cluster/storage-node04.nix +++ b/nix/test-cluster/storage-node04.nix @@ -15,7 +15,7 @@ photonTestCluster = { sshBasePort = 2300; - vdeSock = "/tmp/photoncloud-test-cluster-vde.sock-storage"; + vdeSock = "/tmp/ultracloud-test-cluster-vde.sock-storage"; }; networking.hostName = "node04"; diff --git a/nix/test-cluster/storage-node05.nix b/nix/test-cluster/storage-node05.nix index 3865057..8f15eeb 100644 --- a/nix/test-cluster/storage-node05.nix +++ b/nix/test-cluster/storage-node05.nix @@ -15,7 +15,7 @@ photonTestCluster = { sshBasePort = 2300; - vdeSock = "/tmp/photoncloud-test-cluster-vde.sock-storage"; + vdeSock = "/tmp/ultracloud-test-cluster-vde.sock-storage"; }; networking.hostName = "node05"; diff --git a/nix/test-cluster/vm-bench-guest-image.nix b/nix/test-cluster/vm-bench-guest-image.nix index 61e70a0..c2d2248 100644 --- a/nix/test-cluster/vm-bench-guest-image.nix +++ b/nix/test-cluster/vm-bench-guest-image.nix @@ -21,14 +21,14 @@ services.getty.autologinUser = "root"; users.mutableUsers = false; - users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; + users.users.root.hashedPassword = "$6$ultracloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; documentation.enable = false; services.openssh.enable = false; environment.systemPackages = with pkgs; [ e2fsprogs fio jq util-linux ]; systemd.services.photon-vm-bench = { - description = "PhotonCloud VM benchmark marker"; + description = "UltraCloud VM benchmark marker"; wantedBy = [ "multi-user.target" ]; wants = [ "systemd-udev-settle.service" ]; after = [ "local-fs.target" "systemd-udev-settle.service" ]; diff --git a/nix/test-cluster/vm-guest-image.nix b/nix/test-cluster/vm-guest-image.nix index e758fab..9f83ea0 100644 --- a/nix/test-cluster/vm-guest-image.nix +++ b/nix/test-cluster/vm-guest-image.nix @@ -1,21 +1,637 @@ { modulesPath, lib, pkgs, ... }: let + pythonWithBoto3 = pkgs.python3.withPackages (ps: [ ps.boto3 ps.python-multipart ]); + photonVmDemoIamProto = pkgs.writeText "photon-vm-demo-iam.proto" (builtins.readFile ../../iam/proto/iam.proto); photonVmDemoApi = pkgs.writeText "photon-vm-demo-api.py" '' + import html import json + import mimetypes import os import socket - import sqlite3 + import struct + import subprocess + import threading + import time + import traceback + import urllib.parse + import urllib.request + import uuid from http import HTTPStatus from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + import boto3 + from botocore.config import Config + from botocore.exceptions import ClientError + from python_multipart import create_form_parser + DATA_MOUNT = "/mnt/photon-vm-data" - DB_PATH = os.path.join(DATA_MOUNT, "demo.sqlite3") + CONFIG_PATH = os.path.join(DATA_MOUNT, "demo-config.json") ROOT_BOOT_COUNT_PATH = "/var/lib/photon-vm-smoke/boot-count" DATA_BOOT_COUNT_PATH = os.path.join(DATA_MOUNT, "boot-count") CONSOLE_PATH = "/dev/ttyS0" LISTEN_HOST = "0.0.0.0" LISTEN_PORT = 8080 + GATEWAY_IP_FALLBACK = "10.62.10.1" + UPLOAD_TMP_DIR = os.path.join(DATA_MOUNT, ".upload-tmp") + MAX_IN_MEMORY_UPLOAD = 1024 * 1024 + AWS_REGION = "us-east-1" + ORG_ID = "matrix-tenant-org" + PROJECT_ID = "matrix-tenant-project" + FLAREDB_SHARED_NAMESPACE = "validation" + STATE_OBJECT_KEY = "state.json" + ATTACHMENT_PREFIX = "attachments" + GRPCURL_BIN = "${pkgs.grpcurl}/bin/grpcurl" + IAM_PROTO_PATH = "${photonVmDemoIamProto}" + IAM_PROTO_DIR = os.path.dirname(IAM_PROTO_PATH) + IAM_PROTO_FILE = os.path.basename(IAM_PROTO_PATH) + UI_HTML = """ + + + + + Photon Tasks + + + +
+
+

UltraCloud demo

+

Photon Tasks

+

+ TODO items live in FlareDB, attached files live in LightningStor, and the whole app stays alive across restart and migration. +

+
+ +
+
+
+
+
+

New Task

+

Add a title, optional notes, and a file attachment.

+
+ +
+ +
+
+ + +
+
+ + +
+
+ + +
+
+

Files are uploaded into LightningStor and served back through the guest app.

+ +
+
+
+
+
+ +
+
+

Cluster Snapshot

+

Useful while you are bouncing the VM around the cluster.

+
+
+ 0 + Tasks +
+
+ 0 + Attachments +
+
+ 0 + Root boots +
+
+ 0 + Data disk boots +
+
+
+
Bucket: -
+
FlareDB namespace: -
+
Last task: No tasks yet
+
+
+
+
+ +
+
+
+
+

Tasks

+

Mark tasks done or open the attached file straight from the VM.

+
+
+
No tasks yet. Add one with a file to exercise FlareDB and LightningStor together.
+
    +
    +
    +
    + + + + + """ + + + def detect_default_gateway() -> str: + try: + with open("/proc/net/route", "r", encoding="utf-8") as handle: + next(handle, None) + for line in handle: + fields = line.strip().split() + if len(fields) < 4: + continue + if fields[1] != "00000000" or fields[2] == "00000000": + continue + if not (int(fields[3], 16) & 0x2): + continue + return socket.inet_ntoa(struct.pack(" None: @@ -26,105 +642,727 @@ let pass + def sanitize(message: str) -> str: + return message.replace("\n", " ").replace("\r", " ")[:240] + + def read_int(path: str) -> int: try: with open(path, "r", encoding="utf-8") as handle: return int(handle.read().strip() or "0") - except (FileNotFoundError, ValueError, OSError): + except (FileNotFoundError, OSError, ValueError): return 0 - def init_db() -> None: - os.makedirs(DATA_MOUNT, exist_ok=True) - conn = sqlite3.connect(DB_PATH) - try: - conn.execute( - "CREATE TABLE IF NOT EXISTS counters (name TEXT PRIMARY KEY, value INTEGER NOT NULL)" - ) - conn.execute( - "INSERT INTO counters (name, value) VALUES ('visits', 0) " - "ON CONFLICT(name) DO NOTHING" - ) - conn.commit() - finally: - conn.close() + def write_json_atomic(path: str, payload: dict) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + tmp_path = path + ".tmp" + with open(tmp_path, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + handle.write("\n") + os.replace(tmp_path, path) - def current_state(increment: bool = False) -> dict: - conn = sqlite3.connect(DB_PATH, timeout=30) + def now_iso() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + + def safe_filename(name: str) -> str: + base = os.path.basename(name or "attachment.bin").strip() + cleaned = [] + for char in base: + if char.isalnum() or char in "._-": + cleaned.append(char) + else: + cleaned.append("-") + value = "".join(cleaned).strip(".-") + return value or "attachment.bin" + + + def load_or_create_config() -> dict: + if os.path.exists(CONFIG_PATH): + with open(CONFIG_PATH, "r", encoding="utf-8") as handle: + config = json.load(handle) + else: + suffix = uuid.uuid4().hex[:12] + prefix = "vm-demo-" + suffix + config = { + "principal_id": prefix, + "bucket": prefix, + "todo_prefix": prefix + "/todos/", + "next_id_key": prefix + "/next-id", + "state_namespace": FLAREDB_SHARED_NAMESPACE, + "access_key_id": "", + "secret_key": "", + } + write_json_atomic(CONFIG_PATH, config) + + if "todo_prefix" not in config: + legacy_prefix = config.get("principal_id") or ("vm-demo-" + uuid.uuid4().hex[:12]) + config["todo_prefix"] = legacy_prefix + "/todos/" + if "next_id_key" not in config: + config["next_id_key"] = config["todo_prefix"].rstrip("/") + "/next-id" + write_json_atomic(CONFIG_PATH, config) + return config + + + def grpcurl_json(endpoint: str, proto_dir: str, proto_file: str, service: str, payload: dict, ignore_errors=()) -> dict: + command = [ + GRPCURL_BIN, + "-plaintext", + "-import-path", + proto_dir, + "-proto", + proto_file, + "-d", + json.dumps(payload, sort_keys=True), + endpoint, + service, + ] + result = subprocess.run( + command, + capture_output=True, + text=True, + timeout=20, + check=False, + ) + combined = (result.stdout or "") + "\n" + (result.stderr or "") + if result.returncode != 0: + lowered = combined.lower() + for pattern in ignore_errors: + if pattern.lower() in lowered: + return {} + raise RuntimeError("grpcurl %s failed: %s" % (service, sanitize(combined))) + output = result.stdout.strip() + return json.loads(output) if output else {} + + + def http_json(method: str, url: str, payload=None, timeout: int = 10) -> dict: + body = None + headers = {} + if payload is not None: + body = json.dumps(payload, sort_keys=True).encode("utf-8") + headers["Content-Type"] = "application/json" + request = urllib.request.Request(url, data=body, headers=headers, method=method) + with urllib.request.urlopen(request, timeout=timeout) as response: + return json.loads(response.read().decode("utf-8")) + + + def ensure_service_account(config: dict) -> None: + grpcurl_json( + IAM_ENDPOINT, + IAM_PROTO_DIR, + IAM_PROTO_FILE, + "iam.v1.IamAdmin/CreatePrincipal", + { + "id": config["principal_id"], + "kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", + "name": config["principal_id"], + "orgId": ORG_ID, + "projectId": PROJECT_ID, + }, + ignore_errors=("already exists", "alreadyexists"), + ) + + + def ensure_s3_credentials(config: dict) -> None: + if config.get("access_key_id") and config.get("secret_key"): + return + + response = grpcurl_json( + IAM_ENDPOINT, + IAM_PROTO_DIR, + IAM_PROTO_FILE, + "iam.v1.IamCredential/CreateS3Credential", + { + "principalId": config["principal_id"], + "principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", + "orgId": ORG_ID, + "projectId": PROJECT_ID, + "description": "vm-demo", + }, + ) + config["access_key_id"] = response["accessKeyId"] + config["secret_key"] = response["secretKey"] + write_json_atomic(CONFIG_PATH, config) + + + def s3_client(config: dict): + return boto3.session.Session().client( + "s3", + endpoint_url=LIGHTNINGSTOR_S3_ENDPOINT, + region_name=AWS_REGION, + aws_access_key_id=config["access_key_id"], + aws_secret_access_key=config["secret_key"], + use_ssl=False, + verify=False, + config=Config( + retries={"max_attempts": 8, "mode": "standard"}, + s3={"addressing_style": "path"}, + signature_version="s3v4", + ), + ) + + + def ensure_bucket(config: dict, client) -> None: try: - conn.execute( - "CREATE TABLE IF NOT EXISTS counters (name TEXT PRIMARY KEY, value INTEGER NOT NULL)" - ) - conn.execute( - "INSERT INTO counters (name, value) VALUES ('visits', 0) " - "ON CONFLICT(name) DO NOTHING" - ) - if increment: - conn.execute( - "UPDATE counters SET value = value + 1 WHERE name = 'visits'" - ) - visits = conn.execute( - "SELECT value FROM counters WHERE name = 'visits'" - ).fetchone()[0] - conn.commit() - finally: - conn.close() + client.head_bucket(Bucket=config["bucket"]) + return + except ClientError as error: + code = str(error.response.get("Error", {}).get("Code", "")) + if code not in ("404", "NoSuchBucket", "NotFound", "400", "403"): + raise + client.create_bucket(Bucket=config["bucket"]) + + + def flaredb_scan(config: dict, start: str, end: str) -> list: + query = urllib.parse.urlencode( + { + "start": start, + "end": end, + "namespace": config["state_namespace"], + } + ) + payload = http_json("GET", FLAREDB_BASE_URL + "/api/v1/scan?" + query) + return payload.get("data", {}).get("items", []) + + + def flaredb_get_value(config: dict, key: str): + items = flaredb_scan(config, key, key + "~") + if not items: + return None + return items[0].get("value") + + + def flaredb_put_value(config: dict, key: str, value: str) -> None: + http_json( + "PUT", + FLAREDB_BASE_URL + "/api/v1/kv/" + urllib.parse.quote(key, safe=""), + { + "value": value, + "namespace": config["state_namespace"], + }, + ) + + + def flaredb_put_json(config: dict, key: str, payload: dict) -> None: + flaredb_put_value(config, key, json.dumps(payload, sort_keys=True)) + + + def flaredb_get_json(config: dict, key: str): + value = flaredb_get_value(config, key) + if value is None: + return None + return json.loads(value) + + + def flaredb_next_todo_id(config: dict) -> int: + value = flaredb_get_value(config, config["next_id_key"]) + if value is None: + return 1 + return int(value) + + + def todo_record_key(config: dict, todo_id: int) -> str: + return config["todo_prefix"] + ("%06d" % todo_id) + + + def todo_attachment_key(todo_id: int, filename: str) -> str: + return ATTACHMENT_PREFIX + "/" + ("%06d" % todo_id) + "/" + safe_filename(filename) + + + def todo_view(todo: dict) -> dict: + attachment = todo.get("attachment") + view = { + "id": int(todo["id"]), + "title": todo.get("title", ""), + "details": todo.get("details", ""), + "done": bool(todo.get("done", False)), + "created_at": todo.get("created_at", ""), + "updated_at": todo.get("updated_at", ""), + "attachment": None, + "attachment_url": "", + } + if isinstance(attachment, dict): + view["attachment"] = { + "filename": attachment.get("filename", ""), + "content_type": attachment.get("content_type", "application/octet-stream"), + "object_key": attachment.get("object_key", ""), + "size": int(attachment.get("size", 0)), + } + view["attachment_url"] = "/api/todos/%s/attachment" % int(todo["id"]) + return view + + + def list_todos(config: dict) -> list: + todos = [] + for item in flaredb_scan(config, config["todo_prefix"], config["todo_prefix"] + "~"): + try: + payload = json.loads(item.get("value", "{}")) + except json.JSONDecodeError: + continue + if isinstance(payload, dict) and "id" in payload: + todos.append(payload) + todos.sort(key=lambda todo: int(todo.get("id", 0))) + return todos + + + def snapshot_payload(config: dict, todos: list) -> dict: + latest = todos[-1] if todos else None + latest_attachment = latest.get("attachment") if isinstance(latest, dict) else None + return { + "bucket": config["bucket"], + "hostname": socket.gethostname(), + "latest_object_key": STATE_OBJECT_KEY, + "latest_todo_id": int(latest["id"]) if latest else 0, + "latest_todo_title": latest.get("title", "") if latest else "", + "latest_attachment_object_key": latest_attachment.get("object_key", "") if isinstance(latest_attachment, dict) else "", + "latest_attachment_filename": latest_attachment.get("filename", "") if isinstance(latest_attachment, dict) else "", + "object_store_backend": "lightningstor", + "object_store_endpoint": LIGHTNINGSTOR_S3_ENDPOINT, + "root_boot_count": read_int(ROOT_BOOT_COUNT_PATH), + "data_boot_count": read_int(DATA_BOOT_COUNT_PATH), + "state_backend": "flaredb", + "state_endpoint": FLAREDB_BASE_URL, + "state_namespace": config["state_namespace"], + "todo_prefix": config["todo_prefix"], + "next_id_key": config["next_id_key"], + "todo_count": len(todos), + "attachment_count": sum(1 for todo in todos if isinstance(todo.get("attachment"), dict)), + "todos": [todo_view(todo) for todo in todos], + } + + + def write_snapshot(config: dict, client, todos: list) -> dict: + payload = snapshot_payload(config, todos) + encoded = json.dumps(payload, sort_keys=True).encode("utf-8") + client.put_object( + Bucket=config["bucket"], + Key=STATE_OBJECT_KEY, + Body=encoded, + ContentType="application/json", + ) + return payload + + + def read_snapshot(config: dict, client) -> dict: + response = client.get_object(Bucket=config["bucket"], Key=STATE_OBJECT_KEY) + return json.loads(response["Body"].read().decode("utf-8")) + + + def response_payload(config: dict, todos: list, snapshot: dict) -> dict: return { "status": "ok", "hostname": socket.gethostname(), "listen_port": LISTEN_PORT, - "db_path": DB_PATH, - "visits": visits, "root_boot_count": read_int(ROOT_BOOT_COUNT_PATH), "data_boot_count": read_int(DATA_BOOT_COUNT_PATH), + "state_backend": "flaredb", + "state_endpoint": FLAREDB_BASE_URL, + "state_namespace": config["state_namespace"], + "todo_prefix": config["todo_prefix"], + "next_id_key": config["next_id_key"], + "object_store_backend": "lightningstor", + "object_store_endpoint": LIGHTNINGSTOR_S3_ENDPOINT, + "bucket": config["bucket"], + "latest_object_key": snapshot["latest_object_key"], + "latest_todo_id": int(snapshot["latest_todo_id"]), + "latest_todo_title": snapshot["latest_todo_title"], + "latest_attachment_object_key": snapshot["latest_attachment_object_key"], + "latest_attachment_filename": snapshot["latest_attachment_filename"], + "latest_object_todo_count": int(snapshot["todo_count"]), + "todo_count": len(todos), + "attachment_count": sum(1 for todo in todos if isinstance(todo.get("attachment"), dict)), + "todos": [todo_view(todo) for todo in todos], + } + + + def bootstrap() -> None: + deadline = time.time() + 120 + while True: + try: + config = load_or_create_config() + ensure_service_account(config) + ensure_s3_credentials(config) + client = s3_client(config) + ensure_bucket(config, client) + if flaredb_get_value(config, config["next_id_key"]) is None: + flaredb_put_value(config, config["next_id_key"], "1") + todos = list_todos(config) + write_snapshot(config, client, todos) + with BACKEND_LOCK: + BACKEND["ready"] = True + BACKEND["config"] = config + BACKEND["s3_client"] = client + BACKEND["last_error"] = "" + return + except Exception as error: + message = sanitize(str(error)) + with BACKEND_LOCK: + BACKEND["last_error"] = message + if time.time() >= deadline: + raise RuntimeError(message) + log_console("PHOTON_VM_DEMO_BOOTSTRAP_RETRY detail=%s" % message) + time.sleep(2) + + + def get_backend(): + with BACKEND_LOCK: + if not BACKEND["ready"]: + raise RuntimeError(BACKEND["last_error"] or "backend not ready") + return BACKEND["config"], BACKEND["s3_client"] + + + def current_state() -> dict: + config, client = get_backend() + todos = list_todos(config) + snapshot = write_snapshot(config, client, todos) + snapshot = read_snapshot(config, client) + return response_payload(config, todos, snapshot) + + + def create_todo(title: str, details: str, attachment_name: str, attachment_file, attachment_size: int, attachment_content_type: str) -> dict: + config, client = get_backend() + try: + with MUTATION_LOCK: + todo_id = flaredb_next_todo_id(config) + created_at = now_iso() + attachment = None + safe_name = safe_filename(attachment_name) + + if attachment_file is not None and attachment_size > 0: + object_key = todo_attachment_key(todo_id, safe_name) + attachment_file.file_object.seek(0) + client.put_object( + Bucket=config["bucket"], + Key=object_key, + Body=attachment_file.file_object, + ContentLength=attachment_size, + ContentType=attachment_content_type or "application/octet-stream", + ) + attachment = { + "filename": safe_name, + "content_type": attachment_content_type or "application/octet-stream", + "object_key": object_key, + "size": attachment_size, + } + + todo = { + "id": todo_id, + "title": title.strip() or "Untitled task", + "details": details.strip(), + "done": False, + "created_at": created_at, + "updated_at": created_at, + "attachment": attachment, + } + flaredb_put_json(config, todo_record_key(config, todo_id), todo) + flaredb_put_value(config, config["next_id_key"], str(todo_id + 1)) + + todos = list_todos(config) + snapshot = write_snapshot(config, client, todos) + snapshot = read_snapshot(config, client) + return response_payload(config, todos, snapshot) + finally: + if attachment_file is not None: + try: + attachment_file.close() + except Exception: + pass + + + def toggle_todo(todo_id: int) -> dict: + config, client = get_backend() + with MUTATION_LOCK: + todo = flaredb_get_json(config, todo_record_key(config, todo_id)) + if not isinstance(todo, dict): + raise KeyError("todo %s not found" % todo_id) + + todo["done"] = not bool(todo.get("done", False)) + todo["updated_at"] = now_iso() + flaredb_put_json(config, todo_record_key(config, todo_id), todo) + + todos = list_todos(config) + snapshot = write_snapshot(config, client, todos) + snapshot = read_snapshot(config, client) + return response_payload(config, todos, snapshot) + + + def attachment_response(todo_id: int): + config, client = get_backend() + todo = flaredb_get_json(config, todo_record_key(config, todo_id)) + if not isinstance(todo, dict): + raise KeyError("todo %s not found" % todo_id) + attachment = todo.get("attachment") + if not isinstance(attachment, dict): + raise FileNotFoundError("todo %s has no attachment" % todo_id) + + response = client.get_object(Bucket=config["bucket"], Key=attachment["object_key"]) + return { + "filename": attachment.get("filename", "attachment.bin"), + "content_type": attachment.get("content_type", "application/octet-stream"), + "body": response["Body"].read(), } class Handler(BaseHTTPRequestHandler): - server_version = "PhotonVMDemo/1.0" + server_version = "PhotonVMDemo/2.0" def log_message(self, format: str, *args) -> None: return - def _send_json(self, payload: dict, status: int = HTTPStatus.OK) -> None: - body = json.dumps(payload, sort_keys=True).encode("utf-8") + def _send_bytes(self, body: bytes, status: int, content_type: str, extra_headers=None) -> None: self.send_response(status) - self.send_header("Content-Type", "application/json") + self.send_header("Content-Type", content_type) self.send_header("Content-Length", str(len(body))) + if extra_headers: + for key, value in extra_headers.items(): + self.send_header(key, value) self.end_headers() self.wfile.write(body) + def _send_json(self, payload: dict, status: int = HTTPStatus.OK) -> None: + body = json.dumps(payload, sort_keys=True).encode("utf-8") + self._send_bytes(body, status, "application/json") + + def _send_html(self, body: str) -> None: + self._send_bytes(body.encode("utf-8"), HTTPStatus.OK, "text/html; charset=utf-8") + + def _parsed_path(self): + return urllib.parse.urlparse(self.path).path + + def _todo_id_from_path(self, path: str, suffix: str): + prefix = "/api/todos/" + if not path.startswith(prefix) or not path.endswith(suffix): + return None + value = path[len(prefix):len(path) - len(suffix)] + if not value.isdigit(): + return None + return int(value) + + def _parse_todo_submission(self): + content_type = self.headers.get("Content-Type", "") + content_length = int(self.headers.get("Content-Length", "0") or "0") + + if content_type.startswith("application/json"): + raw = self.rfile.read(content_length) + payload = json.loads(raw.decode("utf-8") or "{}") + return { + "title": str(payload.get("title", "")), + "details": str(payload.get("details", "")), + "attachment_name": "", + "attachment_file": None, + "attachment_size": 0, + "attachment_content_type": "", + } + + if content_type.startswith("multipart/form-data"): + os.makedirs(UPLOAD_TMP_DIR, exist_ok=True) + fields = {} + files = {} + headers = { + "Content-Type": content_type, + "Content-Length": str(content_length), + } + + def on_field(field) -> None: + field_name = (field.field_name or b"").decode("utf-8", errors="replace") + fields[field_name] = (field.value or b"").decode("utf-8", errors="replace") + + def on_file(file_obj) -> None: + field_name = (file_obj.field_name or b"").decode("utf-8", errors="replace") + files[field_name] = file_obj + + try: + parser = create_form_parser( + headers, + on_field, + on_file, + config={ + "UPLOAD_DIR": UPLOAD_TMP_DIR, + "UPLOAD_DELETE_TMP": True, + "MAX_MEMORY_FILE_SIZE": MAX_IN_MEMORY_UPLOAD, + }, + ) + bytes_read = 0 + while bytes_read < content_length: + chunk = self.rfile.read(min(262144, content_length - bytes_read)) + if not chunk: + break + parser.write(chunk) + bytes_read += len(chunk) + parser.finalize() + except Exception: + for file_obj in files.values(): + try: + file_obj.close() + except Exception: + pass + raise + + title = fields.get("title", "") + details = fields.get("details", "") + attachment_file = files.get("attachment") + attachment_name = "" + attachment_size = 0 + attachment_content_type = "" + + if attachment_file is not None and int(attachment_file.size or 0) > 0: + attachment_name = (attachment_file.file_name or b"").decode("utf-8", errors="replace") + attachment_size = int(attachment_file.size or 0) + attachment_content_type = mimetypes.guess_type(attachment_name)[0] or "application/octet-stream" + + return { + "title": title, + "details": details, + "attachment_name": attachment_name, + "attachment_file": attachment_file, + "attachment_size": attachment_size, + "attachment_content_type": attachment_content_type, + } + else: + raw = self.rfile.read(content_length) + fields = urllib.parse.parse_qs(raw.decode("utf-8", errors="replace"), keep_blank_values=True) + title = fields.get("title", [""])[0] + details = fields.get("details", [""])[0] + return { + "title": title, + "details": details, + "attachment_name": "", + "attachment_file": None, + "attachment_size": 0, + "attachment_content_type": "", + } + def do_GET(self) -> None: - if self.path == "/health": - self._send_json({"status": "ok"}) + path = self._parsed_path() + if path == "/": + self._send_html(UI_HTML) return - if self.path == "/state": - self._send_json(current_state()) + + if path == "/health": + with BACKEND_LOCK: + if BACKEND["ready"]: + self._send_json({"status": "ok"}) + else: + self._send_json( + {"status": "starting", "detail": BACKEND["last_error"]}, + HTTPStatus.SERVICE_UNAVAILABLE, + ) return + + if path in ("/state", "/api/todos"): + try: + self._send_json(current_state()) + except Exception as error: + self._send_json( + {"error": "backend_error", "detail": sanitize(str(error))}, + HTTPStatus.SERVICE_UNAVAILABLE, + ) + return + + attachment_todo_id = self._todo_id_from_path(path, "/attachment") + if attachment_todo_id is not None: + try: + payload = attachment_response(attachment_todo_id) + self._send_bytes( + payload["body"], + HTTPStatus.OK, + payload["content_type"], + { + "Content-Disposition": 'inline; filename="%s"' % payload["filename"], + }, + ) + except FileNotFoundError: + self._send_json({"error": "not_found"}, HTTPStatus.NOT_FOUND) + except KeyError: + self._send_json({"error": "not_found"}, HTTPStatus.NOT_FOUND) + except Exception as error: + self._send_json( + {"error": "backend_error", "detail": sanitize(str(error))}, + HTTPStatus.SERVICE_UNAVAILABLE, + ) + return + self._send_json({"error": "not_found"}, HTTPStatus.NOT_FOUND) def do_POST(self) -> None: - if self.path == "/visit": - payload = current_state(increment=True) - log_console("PHOTON_VM_DEMO_VISIT visits=%s" % payload["visits"]) - self._send_json(payload) + path = self._parsed_path() + + if path == "/api/todos": + try: + submission = self._parse_todo_submission() + payload = create_todo( + submission["title"], + submission["details"], + submission["attachment_name"], + submission["attachment_file"], + submission["attachment_size"], + submission["attachment_content_type"], + ) + log_console( + "PHOTON_VM_DEMO_TODO_CREATED count=%s title=%s attachment=%s" + % ( + payload["todo_count"], + sanitize(payload["latest_todo_title"]), + payload["latest_attachment_filename"] or "none", + ) + ) + self._send_json(payload, HTTPStatus.CREATED) + except Exception as error: + self._send_json( + {"error": "backend_error", "detail": sanitize(str(error))}, + HTTPStatus.SERVICE_UNAVAILABLE, + ) return + + toggle_todo_id = self._todo_id_from_path(path, "/toggle") + if toggle_todo_id is not None: + try: + payload = toggle_todo(toggle_todo_id) + log_console( + "PHOTON_VM_DEMO_TODO_TOGGLED id=%s latest=%s" + % (toggle_todo_id, sanitize(payload["latest_todo_title"])) + ) + self._send_json(payload) + except KeyError: + self._send_json({"error": "not_found"}, HTTPStatus.NOT_FOUND) + except Exception as error: + self._send_json( + {"error": "backend_error", "detail": sanitize(str(error))}, + HTTPStatus.SERVICE_UNAVAILABLE, + ) + return + self._send_json({"error": "not_found"}, HTTPStatus.NOT_FOUND) def main() -> None: - init_db() + os.makedirs(DATA_MOUNT, exist_ok=True) + bootstrap() + with BACKEND_LOCK: + config = BACKEND["config"] server = ThreadingHTTPServer((LISTEN_HOST, LISTEN_PORT), Handler) log_console( - "PHOTON_VM_DEMO_WEB_READY count=%s port=%s db=%s" - % (read_int(ROOT_BOOT_COUNT_PATH), LISTEN_PORT, DB_PATH) + "PHOTON_VM_DEMO_WEB_READY count=%s port=%s app=todo bucket=%s namespace=%s" + % ( + read_int(ROOT_BOOT_COUNT_PATH), + LISTEN_PORT, + config["bucket"], + config["state_namespace"], + ) ) server.serve_forever() if __name__ == "__main__": - main() + try: + main() + except Exception: + log_console("PHOTON_VM_DEMO_FATAL") + for line in traceback.format_exc().splitlines(): + log_console(sanitize(line)) + raise ''; in { imports = [ @@ -148,14 +1386,14 @@ in { services.getty.autologinUser = "root"; users.mutableUsers = false; - users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; + users.users.root.hashedPassword = "$6$ultracloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; documentation.enable = false; services.openssh.enable = false; environment.systemPackages = [ pkgs.e2fsprogs pkgs.util-linux ]; systemd.services.photon-vm-smoke = { - description = "PhotonCloud VM smoke marker"; + description = "UltraCloud VM smoke marker"; wantedBy = [ "multi-user.target" ]; wants = [ "systemd-udev-settle.service" ]; after = [ "local-fs.target" "systemd-udev-settle.service" ]; @@ -272,14 +1510,13 @@ in { }; systemd.services.photon-vm-demo-api = { - description = "PhotonCloud VM demo web app"; + description = "UltraCloud VM demo web app"; wantedBy = [ "multi-user.target" ]; wants = [ "network-online.target" "photon-vm-smoke.service" ]; after = [ "network-online.target" "photon-vm-smoke.service" ]; path = with pkgs; [ bash coreutils - python3 util-linux ]; serviceConfig = { @@ -297,7 +1534,7 @@ in { sleep 1 done - exec python3 ${photonVmDemoApi} + exec ${pythonWithBoto3}/bin/python3 ${photonVmDemoApi} >>/dev/ttyS0 2>&1 ''; }; diff --git a/nix/tests/deployer-vm-smoke.nix b/nix/tests/deployer-vm-smoke.nix index df7a6d1..3e3b923 100644 --- a/nix/tests/deployer-vm-smoke.nix +++ b/nix/tests/deployer-vm-smoke.nix @@ -1,11 +1,11 @@ -{ - pkgs, - photoncloudPackages, - smokeTargetToplevel, - desiredSystemOverrides ? { }, - expectedStatus ? "active", - expectCurrentSystemMatchesTarget ? true, - expectMarkerPresent ? true, +{ pkgs +, ultracloudPackages +, smokeTargetToplevel +, desiredSystemOverrides ? { } +, expectedStatus ? "active" +, expectCurrentSystemMatchesTarget ? true +, expectMarkerPresent ? true +, }: let @@ -35,13 +35,13 @@ in services.chainfire = { enable = true; nodeId = "deployer01"; - package = photoncloudPackages.chainfire-server; + package = ultracloudPackages.chainfire-server; }; services.deployer = { enable = true; - package = photoncloudPackages.deployer-server; - ctlPackage = photoncloudPackages.deployer-ctl; + package = ultracloudPackages.deployer-server; + ctlPackage = ultracloudPackages.deployer-ctl; bindAddr = "0.0.0.0:8088"; chainfireEndpoints = [ "http://127.0.0.1:2379" ]; clusterId = "vm-smoke"; @@ -50,7 +50,7 @@ in requireChainfire = true; allowUnknownNodes = false; allowUnauthenticated = false; - bootstrapFlakeBundle = photoncloudPackages.plasmacloudFlakeBundle; + bootstrapFlakeBundle = ultracloudPackages.ultracloudFlakeBundle; }; environment.systemPackages = with pkgs; [ @@ -58,7 +58,7 @@ in gnutar gzip jq - photoncloudPackages.deployer-ctl + ultracloudPackages.deployer-ctl ]; virtualisation.memorySize = 1536; @@ -86,8 +86,8 @@ in gnutar gzip jq - photoncloudPackages.deployer-ctl - photoncloudPackages.nix-agent + ultracloudPackages.deployer-ctl + ultracloudPackages.nix-agent ]; virtualisation.memorySize = 4096; @@ -174,7 +174,7 @@ in "description": "General-purpose worker pool", "node_class": "worker-linux", "labels": { - "pool.photoncloud.io/name": "general", + "pool.ultracloud.io/name": "general", }, } ], @@ -189,7 +189,7 @@ in "deployer-ctl " "--chainfire-endpoint http://127.0.0.1:2379 " "--cluster-id vm-smoke " - "--cluster-namespace photoncloud " + "--cluster-namespace ultracloud " "--deployer-namespace deployer " "apply --config /tmp/cluster-state.json --prune", timeout=120, @@ -200,14 +200,14 @@ in "curl -fsS " "-H 'x-deployer-token: vm-smoke-bootstrap-token' " "http://{deployer_ip}:8088/api/v1/bootstrap/flake-bundle " - "-o /tmp/plasmacloud-flake-bundle.tar.gz".format( + "-o /tmp/ultracloud-flake-bundle.tar.gz".format( deployer_ip=deployer_ip, ), timeout=120, ) print("bundle_downloaded") worker.succeed("mkdir -p /var/lib/photon-src", timeout=30) - worker.succeed("tar xzf /tmp/plasmacloud-flake-bundle.tar.gz -C /var/lib/photon-src", timeout=180) + worker.succeed("tar xzf /tmp/ultracloud-flake-bundle.tar.gz -C /var/lib/photon-src", timeout=180) print("bundle_extracted") worker.succeed("test -f /var/lib/photon-src/flake.nix") worker.succeed("test -d /var/lib/photon-src/nix") @@ -264,18 +264,18 @@ in timeout=120, ) phone_home_payload = json.loads(phone_home_response) - assert phone_home_payload["node_id"] == "worker" - assert phone_home_payload["node_config"]["install_plan"]["nixos_configuration"] == "vm-smoke-target" - assert phone_home_payload["node_config"]["install_plan"]["target_disk"] == "/dev/vda" + assert phone_home_payload["node_config"]["assignment"]["node_id"] == "worker" + assert phone_home_payload["node_config"]["bootstrap_plan"]["install_plan"]["nixos_configuration"] == "vm-smoke-target" + assert phone_home_payload["node_config"]["bootstrap_plan"]["install_plan"]["target_disk"] == "/dev/vda" print("phone_home_complete") node_dump_output = deployer.succeed( "deployer-ctl " "--chainfire-endpoint http://127.0.0.1:2379 " "--cluster-id vm-smoke " - "--cluster-namespace photoncloud " + "--cluster-namespace ultracloud " "--deployer-namespace deployer " - "dump --prefix photoncloud/clusters/vm-smoke/nodes/worker --format json" + "dump --prefix ultracloud/clusters/vm-smoke/nodes/worker --format json" ) node_entries = [json.loads(line) for line in node_dump_output.splitlines() if line.strip()] node_record = next(entry["value"] for entry in node_entries if entry["key"].endswith("/nodes/worker")) @@ -286,12 +286,12 @@ in assert node_record["labels"]["hardware.disk_count"] == "1" worker.succeed( - "${photoncloudPackages.deployer-ctl}/bin/deployer-ctl " + "${ultracloudPackages.deployer-ctl}/bin/deployer-ctl " "--chainfire-endpoint http://{deployer_ip}:2379 " "--cluster-id vm-smoke " - "--cluster-namespace photoncloud " + "--cluster-namespace ultracloud " "--deployer-namespace deployer " - "dump --prefix photoncloud/clusters/vm-smoke/nodes/worker --format json >/tmp/worker-chainfire-preflight.json".format( + "dump --prefix ultracloud/clusters/vm-smoke/nodes/worker --format json >/tmp/worker-chainfire-preflight.json".format( deployer_ip=deployer_ip, ), timeout=120, @@ -308,11 +308,11 @@ in "--setenv=PATH=/run/current-system/sw/bin " "--setenv=RUST_LOG=info " "-- " - "${photoncloudPackages.nix-agent}/bin/nix-agent " + "${ultracloudPackages.nix-agent}/bin/nix-agent " "--apply " "--once " "--chainfire-endpoint http://{deployer_ip}:2379 " - "--cluster-namespace photoncloud " + "--cluster-namespace ultracloud " "--cluster-id vm-smoke " "--node-id worker " "--flake-root /var/lib/photon-src".format( @@ -330,9 +330,9 @@ in "deployer-ctl " "--chainfire-endpoint http://127.0.0.1:2379 " "--cluster-id vm-smoke " - "--cluster-namespace photoncloud " + "--cluster-namespace ultracloud " "--deployer-namespace deployer " - "dump --prefix photoncloud/clusters/vm-smoke/nodes/worker/observed-system --format json" + "dump --prefix ultracloud/clusters/vm-smoke/nodes/worker/observed-system --format json" ) observed_entries = [json.loads(line) for line in observed_dump_output.splitlines() if line.strip()] if not observed_entries: diff --git a/nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix b/nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix index 9d5341f..6a536ed 100644 --- a/nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix +++ b/nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix @@ -1,8 +1,7 @@ { pkgs, - photoncloudPackages, - photoncloudModule, - nixNosModule, + ultracloudPackages, + ultracloudModule, }: let @@ -145,8 +144,7 @@ in { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "lb-a"; @@ -169,7 +167,7 @@ in services.iam = { enable = true; - package = photoncloudPackages.iam-server; + package = ultracloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; @@ -178,7 +176,7 @@ in services.fiberlb = { enable = true; - package = photoncloudPackages.fiberlb-server; + package = ultracloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; @@ -229,8 +227,7 @@ in { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "lb-b"; @@ -253,7 +250,7 @@ in services.iam = { enable = true; - package = photoncloudPackages.iam-server; + package = ultracloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; @@ -262,7 +259,7 @@ in services.fiberlb = { enable = true; - package = photoncloudPackages.fiberlb-server; + package = ultracloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.3:50080"; metadataBackend = "sqlite"; diff --git a/nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix b/nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix index 6bbe874..7d07298 100644 --- a/nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix +++ b/nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix @@ -1,8 +1,7 @@ { pkgs, - photoncloudPackages, - photoncloudModule, - nixNosModule, + ultracloudPackages, + ultracloudModule, }: let @@ -232,8 +231,7 @@ in { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "lb"; @@ -256,7 +254,7 @@ in services.iam = { enable = true; - package = photoncloudPackages.iam-server; + package = ultracloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; @@ -265,7 +263,7 @@ in services.fiberlb = { enable = true; - package = photoncloudPackages.fiberlb-server; + package = ultracloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; diff --git a/nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix b/nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix index d7ff6e1..a81e819 100644 --- a/nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix +++ b/nix/tests/fiberlb-native-bgp-multipath-vm-smoke.nix @@ -1,8 +1,7 @@ { pkgs, - photoncloudPackages, - photoncloudModule, - nixNosModule, + ultracloudPackages, + ultracloudModule, }: let @@ -137,8 +136,7 @@ in { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "lb"; @@ -161,7 +159,7 @@ in services.iam = { enable = true; - package = photoncloudPackages.iam-server; + package = ultracloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; @@ -170,7 +168,7 @@ in services.fiberlb = { enable = true; - package = photoncloudPackages.fiberlb-server; + package = ultracloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; diff --git a/nix/tests/fiberlb-native-bgp-vm-smoke.nix b/nix/tests/fiberlb-native-bgp-vm-smoke.nix index f13058c..3c0694c 100644 --- a/nix/tests/fiberlb-native-bgp-vm-smoke.nix +++ b/nix/tests/fiberlb-native-bgp-vm-smoke.nix @@ -1,8 +1,7 @@ { pkgs, - photoncloudPackages, - photoncloudModule, - nixNosModule, + ultracloudPackages, + ultracloudModule, }: let @@ -93,8 +92,7 @@ in { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "lb"; @@ -116,7 +114,7 @@ in services.iam = { enable = true; - package = photoncloudPackages.iam-server; + package = ultracloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; @@ -125,7 +123,7 @@ in services.fiberlb = { enable = true; - package = photoncloudPackages.fiberlb-server; + package = ultracloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; diff --git a/nix/tests/first-boot-topology-vm-smoke.nix b/nix/tests/first-boot-topology-vm-smoke.nix index c17ccd8..f649e48 100644 --- a/nix/tests/first-boot-topology-vm-smoke.nix +++ b/nix/tests/first-boot-topology-vm-smoke.nix @@ -1,8 +1,7 @@ { pkgs, - photoncloudPackages, - photoncloudModule, - nixNosModule, + ultracloudPackages, + ultracloudModule, }: { @@ -13,8 +12,7 @@ { ... }: { imports = [ - nixNosModule - photoncloudModule + ultracloudModule ]; networking.hostName = "bridge01"; @@ -26,7 +24,7 @@ services.chainfire = { enable = true; - package = photoncloudPackages.chainfire-server; + package = ultracloudPackages.chainfire-server; nodeId = "bridge01"; apiAddr = "127.0.0.1:2379"; raftAddr = "127.0.0.1:2380"; @@ -36,12 +34,11 @@ services.first-boot-automation = { enable = true; - useNixNOS = true; enableFlareDB = false; enableIAM = false; }; - plasmacloud.cluster = { + ultracloud.cluster = { enable = true; name = "bridge-cluster"; nodes.bridge01 = { @@ -55,57 +52,6 @@ bgp.asn = 64512; }; - system.stateVersion = "24.11"; - }; - - stand01 = - { ... }: - { - imports = [ - nixNosModule - photoncloudModule - ]; - - networking.hostName = "stand01"; - networking.firewall.enable = false; - - environment.systemPackages = with pkgs; [ - jq - ]; - - nix-nos = { - enable = true; - clusters.standalone = { - name = "standalone-cluster"; - bootstrapNode = "stand01"; - nodes.stand01 = { - role = "control-plane"; - ip = "127.0.0.1"; - services = [ "chainfire" ]; - raftPort = 2380; - apiPort = 2379; - }; - }; - }; - - services.chainfire = { - enable = true; - package = photoncloudPackages.chainfire-server; - nodeId = "stand01"; - apiAddr = "127.0.0.1:2379"; - raftAddr = "127.0.0.1:2380"; - initialPeers = [ "stand01=127.0.0.1:2380" ]; - }; - systemd.services.chainfire.environment.RUST_LOG = "error"; - - services.first-boot-automation = { - enable = true; - useNixNOS = true; - nixnosClusterName = "standalone"; - enableFlareDB = false; - enableIAM = false; - }; - system.stateVersion = "24.11"; }; }; @@ -114,29 +60,20 @@ start_all() serial_stdout_off() - scenarios = [ - (bridge01, "bridge01", "bridge-cluster"), - (stand01, "stand01", "standalone-cluster"), - ] + machine = bridge01 + machine.wait_for_unit("chainfire.service") + machine.wait_until_succeeds("test -f /etc/nixos/secrets/cluster-config.json") + machine.succeed( + "bash -lc 'systemctl restart chainfire-cluster-join.service " + "|| (systemctl status chainfire-cluster-join.service --no-pager; " + "journalctl -u chainfire-cluster-join.service --no-pager -n 200; exit 1)'" + ) + machine.wait_until_succeeds("test -f /var/lib/first-boot-automation/.chainfire-initialized") + machine.succeed("systemctl is-active chainfire-cluster-join.service") - for machine, node_id, cluster_name in scenarios: - print(f"validating {node_id}") - machine.wait_for_unit("chainfire.service") - print(f"{node_id}: chainfire up") - machine.wait_until_succeeds("test -f /etc/nixos/secrets/cluster-config.json") - print(f"{node_id}: config file present") - machine.succeed( - "bash -lc 'systemctl restart chainfire-cluster-join.service " - "|| (systemctl status chainfire-cluster-join.service --no-pager; " - "journalctl -u chainfire-cluster-join.service --no-pager -n 200; exit 1)'" - ) - machine.wait_until_succeeds("test -f /var/lib/first-boot-automation/.chainfire-initialized") - print(f"{node_id}: bootstrap marker present") - machine.succeed("systemctl is-active chainfire-cluster-join.service") - - machine.succeed(f"jq -r '.node_id' /etc/nixos/secrets/cluster-config.json | grep -x '{node_id}'") - machine.succeed("jq -r '.bootstrap' /etc/nixos/secrets/cluster-config.json | grep -x true") - machine.succeed(f"jq -r '.cluster_name' /etc/nixos/secrets/cluster-config.json | grep -x '{cluster_name}'") - machine.succeed("jq -r '.chainfire_leader_url' /etc/nixos/secrets/cluster-config.json | grep -x 'http://127.0.0.1:8081'") + machine.succeed("jq -r '.node_id' /etc/nixos/secrets/cluster-config.json | grep -x 'bridge01'") + machine.succeed("jq -r '.bootstrap' /etc/nixos/secrets/cluster-config.json | grep -x true") + machine.succeed("jq -r '.cluster_name' /etc/nixos/secrets/cluster-config.json | grep -x 'bridge-cluster'") + machine.succeed("jq -r '.chainfire_leader_url' /etc/nixos/secrets/cluster-config.json | grep -x 'http://127.0.0.1:8081'") ''; } diff --git a/plans/baremetal-maas-simplification-2026-04-04.md b/plans/baremetal-maas-simplification-2026-04-04.md new file mode 100644 index 0000000..142a1b9 --- /dev/null +++ b/plans/baremetal-maas-simplification-2026-04-04.md @@ -0,0 +1,571 @@ +# Bare Metal / MaaS-like Simplification Plan (2026-04-04) + +## Summary + +UltraCloud already has many of the right building blocks: + +- `Nix` modules and flake outputs for host configuration +- `deployer` for bootstrap, enrollment, and inventory +- `nix-agent` for host OS reconciliation +- `fleet-scheduler` and `node-agent` for native service placement/runtime + +The problem is not "missing everything". The problem is that the boundaries are still muddy: + +- source of truth is duplicated +- install-time and runtime configuration are mixed together +- registration, inventory, credential issuance, and install-plan rendering are coupled +- bootstrap and scheduling are conceptually separate but still feel entangled in the repo + +This document proposes a simpler target architecture for bare metal and MaaS-like provisioning, based on both the current repo and patterns used by existing systems. + +## What Existing Systems Consistently Separate + +### MAAS + +Useful pattern: + +- machine lifecycle is explicit +- commissioning, testing, deployment, release, rescue, and broken states are operator-visible +- registration/inventory is not the same thing as workload placement + +Relevant docs: + +- https://discourse.maas.io/t/about-maas/5511 +- https://discourse.maas.io/t/machines-do-the-heavy-lifting/5080 + +### Ironic and Metal3 + +Useful pattern: + +- enrollment, manageable, available, deploy, clean, rescue are explicit provisioning states +- inspection and cleaning are first-class lifecycle steps +- root device selection is modeled explicitly instead of relying on `/dev/sdX` + +Relevant docs: + +- https://docs.openstack.org/ironic/latest/install/enrollment.html +- https://book.metal3.io/bmo/automated_cleaning +- https://book.metal3.io/bmo/root_device_hints + +### Tinkerbell + +Useful pattern: + +- hardware inventory, workflow/template, and install worker are separate concepts +- the installer environment is generic +- the workflow engine is distinct from hardware registration + +Relevant docs: + +- https://tinkerbell.org/docs/services/tink-worker/ +- https://tinkerbell.org/docs/v0.22/services/tink-controller/ + +### Talos and Omni + +Useful pattern: + +- a minimal boot medium is used only to join management +- machine classes and labels drive config selection +- machine configuration is acquired over an API instead of being hard-coded into per-node install media + +Relevant docs: + +- https://omni.siderolabs.com/how-to-guides/registering-machines +- https://docs.siderolabs.com/talos/v1.10/overview/what-is-talos + +### NixOS deployment tools + +Useful pattern: + +- installation and host rollout are separate concerns +- unattended install should be repeatable from declarative config +- activation needs timeout, health gate, and rollback semantics + +Relevant docs: + +- https://github.com/nix-community/nixos-anywhere +- https://github.com/serokell/deploy-rs +- https://colmena.cli.rs/0.4/reference/cli.html + +## Current UltraCloud Pain Points + +### 1. Source of truth is duplicated + +Today the repo has overlapping schema and generation paths: + +- `ultracloud.cluster` generates per-node cluster config, `nix-nos` topology, and deployer cluster state +- `nix-nos` still has its own cluster schema and `generateClusterConfig` +- `deployer-types::ClusterStateSpec` is another whole-cluster model on the Rust side + +This makes it too easy to author the same concept twice. + +Current references: + +- `nix/modules/ultracloud-cluster.nix` +- `nix-nos/modules/topology.nix` +- `nix-nos/lib/cluster-config-lib.nix` +- `deployer/crates/deployer-types/src/lib.rs` + +### 2. Install-time and runtime configuration are mixed + +`NodeConfig` currently contains all of the following: + +- hostname and IP +- labels, pool, node class +- services +- Nix profile +- install plan + +That is too much for a single object. A bootstrap/install contract should not be the same object as runtime scheduling hints. + +Current references: + +- `deployer/crates/deployer-types/src/lib.rs` +- `deployer/crates/deployer-server/src/phone_home.rs` + +### 3. `phone_home` is carrying too much responsibility + +The current flow combines: + +- machine identity lookup +- enrollment-rule matching +- node assignment +- inventory summarization +- cluster node record persistence +- SSH/TLS issuance +- install-plan return + +This works, but it is difficult to reason about and difficult to evolve. + +### 4. ISO bootstrap is still node-path oriented + +The generic ISO still falls back to node-specific paths like: + +- `nix/nodes/vm-cluster/$NODE_ID/disko.nix` + +That prevents profile/class-based provisioning from becoming the main path. + +### 5. Host rollout and runtime scheduling are separated in code but not in the mental model + +The repo already has: + +- `nix-agent` for host OS state +- host deployment reconciliation for writing `desired-system` +- `fleet-scheduler` for native service placement +- `node-agent` for process/container reconcile + +These are the right components, but the naming and schema boundaries do not make the split obvious. + +## Design Goal + +The simplest viable target is not "build all of MAAS". + +The simplest viable target is: + +1. `Nix` is the only authoring surface for static cluster intent. +2. bootstrap deals only with discovery, assignment, credentials, and install plans. +3. host rollout is a separate controller/agent path. +4. service scheduling is entirely downstream of host rollout. +5. BMC and PXE are optional extensions, not required for the base design. + +For your current 6-machine, no-BMC environment, this is the right scope. + +## Proposed Target Model + +### Layer 1: Static model in Nix + +Create a single Nix library as the canonical schema. Do not create a fourth schema; promote the existing `cluster-config-lib` into the canonical one. + +Recommended file: + +- `nix/lib/cluster-schema.nix` + +Practical migration: + +- move or copy `nix-nos/lib/cluster-config-lib.nix` to `nix/lib/cluster-schema.nix` +- make `ultracloud-cluster.nix` and `nix-nos/modules/topology.nix` thin wrappers over it +- stop adding new schema logic anywhere else + +This library should define only stable declarative objects: + +- cluster +- networks +- install profiles +- disk policies +- node classes +- pools +- enrollment rules +- nodes +- host deployments +- service policies + +From that one schema, generate these artifacts: + +- `nixosConfigurations.` +- bootstrap install-plan data +- deployer cluster-state JSON +- test-cluster topology + +## Recommended Nix Object Split + +### `installProfiles` + +Purpose: + +- reusable OS install targets +- used during discovery/bootstrap + +Fields: + +- flake attribute or system profile reference +- disk policy reference +- network policy reference +- bootstrap package set / image bundle reference + +### `diskPolicies` + +Purpose: + +- stable root-disk selection +- avoid hardcoding `/dev/sda` or node-specific Disko paths + +Fields: + +- root device hints +- partition layout +- wipe/cleaning policy + +Borrowed directly from Ironic/Metal3 thinking: disk choice must be modeled, not guessed. + +### `nodeClasses` + +Purpose: + +- describe intended hardware/software role + +Fields: + +- install profile +- default labels +- runtime capabilities +- minimum hardware traits + +### `enrollmentRules` + +Purpose: + +- match discovered machines to class/pool/labels + +Fields: + +- selectors on machine-id, MAC, DMI, disk traits, NIC traits +- assigned node class +- assigned pool +- optional hostname/node-id policy + +### `nodes` + +Purpose: + +- explicit identity for fixed nodes when you want them + +Use this for: + +- control plane seeds +- gateways +- special hardware + +Do not require this for every worker in the generic path. + +### `hostDeployments` + +Purpose: + +- rollout desired host OS state to already-installed machines + +This is not bootstrap. + +### `servicePolicies` + +Purpose: + +- runtime placement intent for `fleet-scheduler` + +This is not host provisioning. + +## Proposed Rust/API Object Split + +Replace the current "fat" `NodeConfig` mental model with explicit smaller objects. + +### `MachineInventory` + +Owned by: + +- bootstrap discovery + +Contains: + +- machine identity +- hardware facts +- last inventory hash +- boot method support +- optional power capability metadata + +### `NodeAssignment` + +Owned by: + +- deployer enrollment logic + +Contains: + +- stable `node_id` +- hostname +- class +- pool +- labels +- failure domain + +### `BootstrapSecrets` + +Owned by: + +- deployer credential issuer + +Contains: + +- SSH host key +- TLS cert/key +- bootstrap token or short-lived install token + +### `InstallPlan` + +Owned by: + +- deployer plan renderer + +Contains: + +- node assignment reference +- install profile reference +- resolved flake attr or system reference +- resolved disk policy or root-device selection +- network bootstrap data +- image/bundle URL + +### `DesiredSystem` + +Owned by: + +- host rollout controller + +Contains: + +- target system +- activation strategy +- health check +- rollback policy + +### `ServiceSpec` + +Owned by: + +- runtime scheduler + +Contains: + +- service placement and instance policy only + +It should not be returned by bootstrap APIs. + +## Recommended Controller Split + +### 1. Deployer server + +Keep responsibility limited to: + +- discovery +- enrollment / assignment +- inventory storage +- credential issuance +- install-plan rendering + +Do not make it the host rollout engine and do not make it the runtime scheduler. + +### 2. Host deployment controller + +Make this an explicit first-class component. Today that logic exists in `ultracloud-reconciler hosts`. + +Responsibility: + +- watch `HostDeployment` +- select nodes +- write `desired-system` +- respect rollout budget and drain policy + +Recommendation: + +- rename it conceptually to `host-controller` +- keep it separate from `fleet-scheduler` + +### 3. `nix-agent` + +This should borrow deploy-rs style semantics: + +- activation timeout +- confirmation/health gate +- rollback on failure +- staged reboot handling + +### 4. `fleet-scheduler` + +Responsibility: + +- service placement only + +Do not allow bootstrap/install concerns to leak here. + +## Recommended Bootstrap Flow + +Keep one generic installer image, but make the protocol explicit. + +### Step 1: discover + +Installer boots and sends: + +- machine identity +- hardware facts +- observed network facts + +### Step 2: assign + +Deployer resolves: + +- class +- pool +- hostname/node-id +- install profile + +### Step 3: fetch plan + +Installer receives: + +- `NodeAssignment` +- `BootstrapSecrets` +- `InstallPlan` + +### Step 4: install + +Installer: + +- fetches source bundle +- resolves disk policy +- runs Disko +- installs NixOS +- reports status + +### Step 5: first boot + +Installed system starts: + +- core static services +- `nix-agent` +- runtime agent only if needed for that class + +This is closer to Tinkerbell and Talos than to the current monolithic `node_config` flow, while remaining much smaller than MAAS or Ironic. + +## Recommended Lifecycle State Model + +Adopt a visible state machine. At minimum: + +- `discovered` +- `inspected` +- `commissioned` +- `install-pending` +- `installing` +- `installed` +- `active` +- `draining` +- `reprovisioning` +- `rescue` +- `failed` + +Keep these orthogonal to: + +- power state +- host rollout state +- runtime service health + +This separation is important. MAAS and Ironic both benefit from not collapsing every concern into one state field. + +## Concrete Repo Changes Recommended + +### Phase A: schema simplification + +1. Promote `nix-nos/lib/cluster-config-lib.nix` into `nix/lib/cluster-schema.nix`. +2. Remove duplicated schema logic from `nix-nos/modules/topology.nix`. +3. Keep `ultracloud-cluster.nix` as an exporter/generator module, not a second schema definition. + +### Phase B: bootstrap contract simplification + +1. Deprecate `NodeConfig` as the primary bootstrap payload. +2. Introduce separate Rust types for: + - assignment + - bootstrap secrets + - install plan +3. Keep `phone_home` endpoint if desired, but split the implementation internally into separate phases/functions. + +### Phase C: installer simplification + +1. Remove node-specific fallback logic from `nix/iso/ultracloud-iso.nix`. +2. Require a resolved install profile or disk policy in the returned install plan. +3. Resolve disk targets using stable hints or explicit by-id paths. + +### Phase D: controller clarification + +1. Make the host rollout controller a named subsystem. +2. Document `nix-agent` as host OS reconcile only. +3. Document `fleet-scheduler` and `node-agent` as runtime-only. + +### Phase E: operator UX + +1. Add an inventory/commission view to `deployer-ctl`. +2. Make lifecycle transitions explicit. +3. Add reinstall/rescue flows that work even without BMC. + +## What Not To Build Yet + +Do not start with: + +- a full MAAS clone +- full Ironic feature parity +- mandatory PXE +- mandatory BMC +- scheduler-driven bootstrap for all control-plane services + +For the current environment, that would add complexity faster than value. + +## Smallest Useful End State For The 6-PC Lab + +The smallest useful design is: + +- one generic ISO +- hardware discovery +- rule-based assignment to class/pool/profile +- explicit install plan +- stable disk policy +- first-boot `nix-agent` +- host rollout separate from runtime service scheduling + +That gives you a MaaS-like system for real hardware without forcing MAAS-scale complexity into the repo. + +## Immediate Next Design Tasks + +1. Write `nix/lib/cluster-schema.nix` by extracting and renaming the existing cluster library. +2. Redesign the Rust bootstrap payloads around `NodeAssignment`, `BootstrapSecrets`, and `InstallPlan`. +3. Update the ISO to consume only the new install-plan contract. +4. Write a short architecture doc that shows the four control loops: + - discovery/enrollment + - installation + - host rollout + - runtime scheduling diff --git a/plans/cluster-investigation-2026-03-02/component-clarity-findings.md b/plans/cluster-investigation-2026-03-02/component-clarity-findings.md index 4b2e933..4cf8a1b 100644 --- a/plans/cluster-investigation-2026-03-02/component-clarity-findings.md +++ b/plans/cluster-investigation-2026-03-02/component-clarity-findings.md @@ -164,7 +164,7 @@ - Deployer: - 実装(`deployer/*`)は存在するが、Nix 配線が薄く運用境界が不明瞭。 - Cluster config generator: - - `plasmacloud-cluster` と `nix-nos` の責務境界が重複。 + - `ultracloud-cluster` と `nix-nos` の責務境界が重複。 - first-boot: - 生成される `cluster-config` 契約と join 実装の API 契約が一致していない。 diff --git a/plans/cluster-investigation-2026-03-02/deployment-architecture-findings.md b/plans/cluster-investigation-2026-03-02/deployment-architecture-findings.md index 7d90164..bf3032f 100644 --- a/plans/cluster-investigation-2026-03-02/deployment-architecture-findings.md +++ b/plans/cluster-investigation-2026-03-02/deployment-architecture-findings.md @@ -28,8 +28,8 @@ ## 2. P0: ISO 自動インストールが欠損 `disko.nix` に依存 - ISO の自動インストールサービスは、リポジトリ同梱ソース内の以下を実行: - - `nix/iso/plasmacloud-iso.nix:236` - - `nix run github:nix-community/disko -- --mode disko /opt/plasmacloud-src/docs/por/T036-vm-cluster-deployment/$NODE_ID/disko.nix` + - `nix/iso/ultracloud-iso.nix:236` + - `nix run github:nix-community/disko -- --mode disko /opt/ultracloud-src/docs/por/T036-vm-cluster-deployment/$NODE_ID/disko.nix` - しかし当該 `disko.nix` 群も存在しない(node01/02/03 すべて missing)。 影響: @@ -41,9 +41,9 @@ ## 3. P0: ブートストラップの鶏卵問題(Deployer依存の経路未接続) - ISO は `DEPLOYER_URL` を既定で `http://deployer.local:8080` とする。 - - `nix/iso/plasmacloud-iso.nix:48` + - `nix/iso/ultracloud-iso.nix:48` - Phone Home は `POST $DEPLOYER_URL/api/v1/phone-home` 固定。 - - `nix/iso/plasmacloud-iso.nix:90` + - `nix/iso/ultracloud-iso.nix:90` - 一方で flake 出力/モジュール配線に deployer がない: - `rg "deployer" flake.nix` -> no matches - `rg "deployer" nix/modules` -> no matches @@ -68,8 +68,8 @@ - `nix/modules/first-boot-automation.nix:37` - `nix/modules/first-boot-automation.nix:46` - `nix/modules/first-boot-automation.nix:83` -- `plasmacloud-cluster` / `nix-nos topology` が生成する `leader_url` も `https://:`。 - - `nix/modules/plasmacloud-cluster.nix:63` +- `ultracloud-cluster` / `nix-nos topology` が生成する `leader_url` も `https://:`。 + - `nix/modules/ultracloud-cluster.nix:63` - `nix/modules/nix-nos/topology.nix:110` - しかし Chainfire の join 互換エンドポイントは HTTP REST 側 (`/admin/member/add`)。 - `chainfire/crates/chainfire-server/src/rest.rs:143` @@ -129,7 +129,7 @@ ## 6. P2: トポロジ生成ロジックの二重実装 - `cluster-config` 生成ロジックが以下2箇所に重複: - - `nix/modules/plasmacloud-cluster.nix:49-91` + - `nix/modules/ultracloud-cluster.nix:49-91` - `nix/modules/nix-nos/topology.nix:91-136` 影響: diff --git a/plans/cluster-investigation-2026-03-02/issue-register.md b/plans/cluster-investigation-2026-03-02/issue-register.md index 9679148..4b07643 100644 --- a/plans/cluster-investigation-2026-03-02/issue-register.md +++ b/plans/cluster-investigation-2026-03-02/issue-register.md @@ -5,18 +5,18 @@ | ID | Priority | Area | Finding | Evidence | Impact | Recommended Action | |---|---|---|---|---|---|---| | ARCH-001 | P0 | flake / deployment | 本流 `node01/02/03` が欠損 `docs/.../configuration.nix` を参照 | `flake.nix:486,498,510` / `nix eval .#nixosConfigurations.node01...` 失敗 | 本流デプロイ経路が評価時点で停止 | 参照先を復元 or `baremetal/*` 等の現存モジュールへ切替 | -| ARCH-002 | P0 | ISO install | ISO 自動インストールが欠損 `docs/.../disko.nix` 依存 | `nix/iso/plasmacloud-iso.nix:236` | 自動インストール実行不能 | ISO の disko 参照を実在パスに差し替え | -| ARCH-003 | P0 | bootstrap | `deployer.local` phone-home 前提だが Nix 配線なし(鶏卵) | `nix/iso/plasmacloud-iso.nix:48,90` / `rg deployer flake.nix` no match / `.github/workflows/nix.yml:49` | ゼロから起動手順が閉じない | Deployer の Nix package/module/nixosConfiguration を追加し、起動順を明文化 | +| ARCH-002 | P0 | ISO install | ISO 自動インストールが欠損 `docs/.../disko.nix` 依存 | `nix/iso/ultracloud-iso.nix:236` | 自動インストール実行不能 | ISO の disko 参照を実在パスに差し替え | +| ARCH-003 | P0 | bootstrap | `deployer.local` phone-home 前提だが Nix 配線なし(鶏卵) | `nix/iso/ultracloud-iso.nix:48,90` / `rg deployer flake.nix` no match / `.github/workflows/nix.yml:49` | ゼロから起動手順が閉じない | Deployer の Nix package/module/nixosConfiguration を追加し、起動順を明文化 | | TC-001 | P1 | test-cluster | `node02/03` で `services.chainfire.joinAddr` option 不存在 | `nix/test-cluster/node02.nix:33`, `node03.nix:33`, `nix/modules/chainfire.nix` | テストクラスタ評価失敗 | `joinAddr` を廃止し `initialPeers` 等へ統一 | | TC-002 | P1 | test-cluster | `node06` が `creditservice` 有効化時に `config.services.flaredb` 欠落 | `nix/modules/creditservice.nix:5` / `nix/test-cluster/node06.nix:8-13` / `nix eval ...node06...` 失敗 | gateway node 評価不能 | `creditservice` module を optional 化し flaredb 非存在時 fallback | -| ARCH-004 | P1 | first-boot | `leader_url` が `https://...:2379`、join は REST `/admin/member/add`(HTTP 側) | `nix/modules/first-boot-automation.nix:46,148,259` / `nix/modules/plasmacloud-cluster.nix:63` / `chainfire/.../rest.rs:143` / `chainfire/.../server.rs:175-178` | first-boot join 失敗の高リスク | `leader_url` を `http://` 契約へ改定 | +| ARCH-004 | P1 | first-boot | `leader_url` が `https://...:2379`、join は REST `/admin/member/add`(HTTP 側) | `nix/modules/first-boot-automation.nix:46,148,259` / `nix/modules/ultracloud-cluster.nix:63` / `chainfire/.../rest.rs:143` / `chainfire/.../server.rs:175-178` | first-boot join 失敗の高リスク | `leader_url` を `http://` 契約へ改定 | | ARCH-005 | P1 | first-boot / flaredb | first-boot が FlareDB `/admin/member/add` を叩くが実装なし | `nix/modules/first-boot-automation.nix:270` / `flaredb/.../rest.rs:149-157` | FlareDB join 自動化が成立しない | FlareDB に join API 追加 or first-boot ロジック改修 | | COMP-001 | P1 | config contract | PrismNet module `IAM_ENDPOINT` は binary で未消費 | `nix/modules/prismnet.nix:105` / `prismnet/.../config.rs:83-89` / `prismnet/.../main.rs:215-222` | IAM 接続先が意図どおり反映されない | module 側キーを binary 契約(`PRISMNET__AUTH__...` か CLI)へ統一 | | COMP-002 | P1 | config contract | PlasmaVMC module `IAM_ENDPOINT` は binary で未消費 | `nix/modules/plasmavmc.nix:97` / `plasmavmc/.../config.rs:54-61` / `plasmavmc/.../main.rs:137-141` | 同上 | 同上 | | COMP-003 | P1 | config contract | FiberLB/LightningStor の `*_IAM_ADDR` は binary で未消費 | `nix/modules/fiberlb.nix:123`, `lightningstor.nix:128` / 各 `config.rs` 既定 50051 | IAM 接続ミス | CLI arg or config file 生成を module に追加 | | COMP-004 | P1 | config contract | CreditService module が IAM addr を注入せず default 50051 に依存 | `nix/modules/creditservice.nix:80-96` / `creditservice/.../main.rs:61` | IAM 実配置 (50080) と不一致 | `iamAddr` option と `CREDITSERVICE_IAM_ADDR` 注入を追加 | | COMP-005 | P2 | config contract | `FIBERLB_PRISMNET_ADDR`, `PRISMNET_ENDPOINT`(flashdns), `FLAREDB_IAM_ENDPOINT` 未消費 | `nix/modules/fiberlb.nix:127`, `flashdns.nix:111`, `flaredb.nix:108` | 運用者に誤解を与える死設定 | 未使用設定を削除 or binary 実装追加 | -| ARCH-006 | P2 | architecture | `cluster-config` 生成ロジックが二重実装 | `plasmacloud-cluster.nix:49-91` / `nix-nos/topology.nix:91-136` | ドリフトリスク | 片方を正本化して他方を委譲 | +| ARCH-006 | P2 | architecture | `cluster-config` 生成ロジックが二重実装 | `ultracloud-cluster.nix:49-91` / `nix-nos/topology.nix:91-136` | ドリフトリスク | 片方を正本化して他方を委譲 | | QLT-001 | P2 | quality | 多数 package で `doCheck = false` | `flake.nix:206,240,263,276,290,306,329,345` | 回帰検知が弱い | 失敗テストの原因別 backlog を切り出し段階的復帰 | ## Suggested Remediation Order diff --git a/plans/nix-nos-simplification-2026-04-04.md b/plans/nix-nos-simplification-2026-04-04.md new file mode 100644 index 0000000..d600d5b --- /dev/null +++ b/plans/nix-nos-simplification-2026-04-04.md @@ -0,0 +1,133 @@ +# Nix-NOS Simplification Plan (2026-04-04) + +## Summary + +`nix-nos` should not remain a second cluster authoring surface. + +Status update: + +- `ultracloud.cluster` is now the only in-repo cluster authoring path +- `services.first-boot-automation` no longer has a `useNixNOS` mode +- root `flake.nix` no longer imports `nix-nos` +- topology-specific `nix-nos` files have been removed +- the remaining `nix-nos` tree is only network/BGP/routing primitives + +The right plan is: + +- keep `ultracloud.cluster` as the only cluster source of truth +- keep `nix-nos` only as a compatibility facade for older topology-driven flows +- eventually shrink `nix-nos` down to network primitives, or remove it entirely if those primitives are moved into the main Nix module tree + +## Current State + +Today the repo is already halfway through this transition. + +- `nix/lib/cluster-schema.nix` is the actual schema/helper library +- `nix/modules/ultracloud-cluster.nix` generates: + - per-node `cluster-config.json` + - `nix-nos.clusters` + - deployer cluster state +- `nix-nos/modules/topology.nix` no longer owns its own schema logic; it delegates to `cluster-schema.nix` +- `services.first-boot-automation` still has a `useNixNOS` path and still treats `nix-nos.generateClusterConfig` as a real config source + +So the duplication is smaller than before, but the user-facing model is still confusing because there are still two apparent ways to describe a cluster. + +## Recommendation + +The recommended target is: + +1. `ultracloud.cluster` is the only supported cluster authoring API. +2. `nix-nos` is explicitly legacy-compatibility only for topology consumers that have not been migrated yet. +3. `nix-nos` should stop presenting itself as a general cluster definition layer. +4. `first-boot-automation` should stop depending on `nix-nos` as a primary provider. + +This keeps the repo simpler without forcing a big-bang removal. + +## What Nix-NOS Should Still Own + +Only keep the parts that are actually distinct: + +- interface/VLAN primitives +- BGP primitives +- static routing primitives +- any truly reusable NOS-style networking submodules + +These are valid low-level modules. + +What `nix-nos` should not own anymore: + +- whole-cluster source of truth +- bootstrap node selection rules +- cluster-config generation semantics +- host inventory / deployer state generation + +Those belong in `ultracloud.cluster` and `cluster-schema.nix`. + +## Target Shape + +### Primary path + +- user writes `ultracloud.cluster` +- `cluster-schema.nix` derives: + - node cluster config + - deployer cluster state + - compatibility topology objects if needed + +### Compatibility path + +- `nix-nos` may still expose `clusters` and `generateClusterConfig` +- but they are documented and warned as legacy compatibility only +- ideally they become thin read-only views over `cluster-schema.nix`, not an authoring API + +### First boot + +`services.first-boot-automation` should eventually have only these modes: + +- use generated UltraCloud cluster config +- use an explicit file path + +It should not need a separate `useNixNOS` mode long-term. + +## Migration Plan + +### Phase 1: Freeze + +- do not add new functionality to `nix-nos.clusters` +- mark `nix-nos` topology usage as legacy in warnings/docs +- keep all schema changes in `cluster-schema.nix` + +### Phase 2: Move first-boot off Nix-NOS + +- make `services.first-boot-automation` prefer `ultracloud.cluster.generated.nodeClusterConfig` +- keep `nix-nos` only as fallback/compat, not as the preferred path +- stop using `useNixNOS` in normal tests/configurations + +### Phase 3: Remove topology authoring role + +- deprecate direct authoring of `nix-nos.clusters` +- remove `nix/modules/nix-nos/cluster-config-generator.nix` +- collapse any remaining direct topology generation onto `cluster-schema.nix` + +### Phase 4: Decide final fate + +Choose one: + +- keep `nix-nos` as a small network-primitives library +- or move those network primitives under `nix/modules/network/*` and delete `nix-nos` + +The first option is lower risk. The second is cleaner. + +## Recommended Decision + +Recommended decision: + +- short term: keep `nix-nos`, but only as a compatibility/network-primitives layer +- medium term: remove `nix-nos` as a cluster authoring concept +- long term: either rename/rehome the remaining network modules, or delete `nix-nos` if nothing substantial remains + +## Immediate Next Steps + +1. Mark `nix-nos.clusters` and `services.first-boot-automation.useNixNOS` as legacy in evaluation warnings. +2. Reduce test usage so only one compatibility smoke test still exercises direct `nix-nos` authoring. +3. Change docs/examples to author clusters through `ultracloud.cluster` only. +4. After that, remove the standalone `cluster-config-generator.nix` path. diff --git a/plans/nixos-deployment-scheduler-roadmap-2026-03-20.md b/plans/nixos-deployment-scheduler-roadmap-2026-03-20.md index ffb66e9..9760fae 100644 --- a/plans/nixos-deployment-scheduler-roadmap-2026-03-20.md +++ b/plans/nixos-deployment-scheduler-roadmap-2026-03-20.md @@ -26,7 +26,7 @@ ### 1. `Nix` が単一正本になっていない -- `plasmacloud-cluster.nix` / `nix-nos` / `deployer-ctl` の `ClusterStateSpec` が並立している +- `ultracloud-cluster.nix` / `nix-nos` / `deployer-ctl` の `ClusterStateSpec` が並立している - static topology をどこで持つかが定まっていない - node class / pool / enrollment rule / service schedule が Nix から一気通貫で生成されていない @@ -76,7 +76,7 @@ MaaS を本当に置き換えるなら、少なくとも commission/inventory/re ### 原則 `Nix` を static desired state の単一正本にする。 -動的な reconcile は PhotonCloud の各 agent / control plane に任せる。 +動的な reconcile は UltraCloud の各 agent / control plane に任せる。 分け方は以下。 @@ -128,7 +128,7 @@ MaaS を本当に置き換えるなら、少なくとも commission/inventory/re - process/container の desired state を ChainFire に書く - node-agent が実行と health を担う -`Kubernetes Deployment/DaemonSet/Service` のうち、PhotonCloud 自前 native service 用の部分をここで置き換える。 +`Kubernetes Deployment/DaemonSet/Service` のうち、UltraCloud 自前 native service 用の部分をここで置き換える。 ### 5. Tenant workload layer: `plasmavmc` / `k8shost` @@ -147,7 +147,7 @@ MaaS を本当に置き換えるなら、少なくとも commission/inventory/re 1. `Nix` を cluster source of truth に固定する 2. `deployer-ctl` の YAML/JSON は hand-written ではなく `Nix` から生成する -3. `plasmacloud-cluster` と `nix-nos` の重複生成ロジックを統一する +3. `ultracloud-cluster` と `nix-nos` の重複生成ロジックを統一する 推奨: diff --git a/plans/photoncloud-design-patterns-analysis.md b/plans/ultracloud-design-patterns-analysis.md similarity index 98% rename from plans/photoncloud-design-patterns-analysis.md rename to plans/ultracloud-design-patterns-analysis.md index ed1a201..ff51858 100644 --- a/plans/photoncloud-design-patterns-analysis.md +++ b/plans/ultracloud-design-patterns-analysis.md @@ -1,4 +1,4 @@ -# PhotonCloudシステム 設計パターン分析と改善案 +# UltraCloudシステム 設計パターン分析と改善案 ## 1. 統合可能性マトリクス @@ -166,7 +166,7 @@ photon-common/ // crates/photon-error/src/lib.rs use thiserror::Error; -/// PhotonCloud共通エラー型 +/// UltraCloud共通エラー型 #[derive(Error, Debug, Clone)] pub enum PhotonError { #[error("storage error: {0}")] @@ -524,8 +524,8 @@ version = "0.1.0" edition = "2021" license = "MIT OR Apache-2.0" rust-version = "1.75" -authors = ["PhotonCloud Contributors"] -repository = "https://github.com/photoncloud/photoncloud" +authors = ["UltraCloud Contributors"] +repository = "https://github.com/ultracloud/ultracloud" [workspace.dependencies] # Photon共通ライブラリ @@ -589,7 +589,7 @@ metrics = "0.23" ```mermaid gantt - title PhotonCloud移行ロードマップ + title UltraCloud移行ロードマップ dateFormat YYYY-MM section Phase1: 共通基盤 photon-error開発 :done, p1e, 2026-01, 1M diff --git a/plans/photoncloud-radical-redesign.md b/plans/ultracloud-radical-redesign.md similarity index 98% rename from plans/photoncloud-radical-redesign.md rename to plans/ultracloud-radical-redesign.md index 9afe67a..27ba130 100644 --- a/plans/photoncloud-radical-redesign.md +++ b/plans/ultracloud-radical-redesign.md @@ -1,8 +1,8 @@ -# PhotonCloud 抜本的再設計案 - 詳細版 +# UltraCloud 抜本的再設計案 - 詳細版 ## 概要 -本設計案は、PhotonCloudシステムを「リソース抽象化レイヤー」を中心とした統一的なアーキテクチャへと再設計するものです。特に、コンピュートリソース(VM、コンテナ、将来的にはサーバーレス)を統一的に扱えるようにし、コントロールプレーンとデータプレーンを完全に分離することを目標とします。 +本設計案は、UltraCloudシステムを「リソース抽象化レイヤー」を中心とした統一的なアーキテクチャへと再設計するものです。特に、コンピュートリソース(VM、コンテナ、将来的にはサーバーレス)を統一的に扱えるようにし、コントロールプレーンとデータプレーンを完全に分離することを目標とします。 **重要な設計原則**: 各ソフトウェアが単体で動作することを前提とし、OpenStackのように全てを動作させないといけない状況を避けます。 @@ -410,7 +410,7 @@ message FilesystemSpec { // photon-common/crates/photon-error/src/lib.rs use thiserror::Error; -/// PhotonCloud共通エラー型 +/// UltraCloud共通エラー型 #[derive(Error, Debug, Clone)] pub enum PhotonError { #[error("storage error: {0}")] @@ -564,7 +564,7 @@ pub trait MtlsManager { ```mermaid graph TB - subgraph PhotonCloud_Control_Plane [PhotonCloud Control Plane] + subgraph UltraCloud_Control_Plane [UltraCloud Control Plane] AG[API Gateway
    photon-api-gateway] RM[Resource Manager
    photon-resource-manager] SCH[Scheduler
    photon-scheduler] @@ -573,7 +573,7 @@ graph TB CH[chainfire
    State Store] end - subgraph PhotonCloud_Data_Plane [PhotonCloud Data Plane] + subgraph UltraCloud_Data_Plane [UltraCloud Data Plane] subgraph Unified_Agent_Framework [Unified Agent Framework] UA[Unified Agent
    photon-agent] PM[Plugin Manager] @@ -1150,7 +1150,7 @@ sequenceDiagram ```mermaid gantt - title PhotonCloud移行ロードマップ + title UltraCloud移行ロードマップ dateFormat YYYY-MM section Phase 0: 準備 diff --git a/plans/photoncloud-standalone-integration-balance.md b/plans/ultracloud-standalone-integration-balance.md similarity index 95% rename from plans/photoncloud-standalone-integration-balance.md rename to plans/ultracloud-standalone-integration-balance.md index 2c41623..d290863 100644 --- a/plans/photoncloud-standalone-integration-balance.md +++ b/plans/ultracloud-standalone-integration-balance.md @@ -1,8 +1,8 @@ -# PhotonCloud 単体運用と統合アーキテクチャのバランス設計 +# UltraCloud 単体運用と統合アーキテクチャのバランス設計 ## 概要 -本設計書は、PhotonCloudシステムにおいて「各コンポーネントが単体で完全に動作すること」を基本原則としつつ、必要に応じて統合機能を追加できるアーキテクチャを定義します。 +本設計書は、UltraCloudシステムにおいて「各コンポーネントが単体で完全に動作すること」を基本原則としつつ、必要に応じて統合機能を追加できるアーキテクチャを定義します。 **設計の核心:** - 単体運用時は外部依存なしで完全に動作 @@ -129,7 +129,7 @@ graph TB **統合運用時:** - 動作: マルチノードRaftクラスター -- 用途: PhotonCloud全体の状態保存基盤 +- 用途: UltraCloud全体の状態保存基盤 --- @@ -570,7 +570,7 @@ async fn initialize_integrated_mode(config: &IntegratedConfig) -> Result<()> { version: '3' services: lightningstor: - image: photoncloud/lightningstor:latest + image: ultracloud/lightningstor:latest environment: PHOTON_MODE: standalone volumes: @@ -585,7 +585,7 @@ services: ```yaml services: flashdns: - image: photoncloud/flashdns:latest + image: ultracloud/flashdns:latest environment: PHOTON_MODE: standalone volumes: @@ -599,7 +599,7 @@ services: ```yaml services: fiberlb: - image: photoncloud/fiberlb:latest + image: ultracloud/fiberlb:latest environment: PHOTON_MODE: standalone volumes: @@ -614,7 +614,7 @@ services: ```yaml services: lightningstor: - image: photoncloud/lightningstor:latest + image: ultracloud/lightningstor:latest environment: PHOTON_MODE: standalone volumes: @@ -623,7 +623,7 @@ services: - "9000:9000" flashdns: - image: photoncloud/flashdns:latest + image: ultracloud/flashdns:latest environment: PHOTON_MODE: standalone volumes: @@ -644,12 +644,12 @@ services: ```yaml services: chainfire: - image: photoncloud/chainfire:latest + image: ultracloud/chainfire:latest volumes: - chainfire-data:/var/lib/chainfire lightningstor: - image: photoncloud/lightningstor:latest + image: ultracloud/lightningstor:latest environment: PHOTON_MODE: standalone CHAINFIRE_ENDPOINT: http://chainfire:2379 @@ -657,7 +657,7 @@ services: - chainfire flashdns: - image: photoncloud/flashdns:latest + image: ultracloud/flashdns:latest environment: PHOTON_MODE: standalone CHAINFIRE_ENDPOINT: http://chainfire:2379 @@ -665,7 +665,7 @@ services: - chainfire fiberlb: - image: photoncloud/fiberlb:latest + image: ultracloud/fiberlb:latest environment: PHOTON_MODE: standalone network_mode: host @@ -682,24 +682,24 @@ services: ```yaml services: chainfire: - image: photoncloud/chainfire:latest + image: ultracloud/chainfire:latest command: --cluster-mode photonauth: - image: photoncloud/photonauth:latest + image: ultracloud/photonauth:latest environment: PHOTON_MODE: integrated CHAINFIRE_ENDPOINTS: http://chainfire:2379 photonnet: - image: photoncloud/photonnet:latest + image: ultracloud/photonnet:latest environment: PHOTON_MODE: integrated CHAINFIRE_ENDPOINTS: http://chainfire:2379 AUTH_ENDPOINT: http://photonauth:50051 lightningstor: - image: photoncloud/lightningstor:latest + image: ultracloud/lightningstor:latest environment: PHOTON_MODE: integrated CHAINFIRE_ENDPOINTS: http://chainfire:2379 @@ -713,39 +713,39 @@ services: ```yaml services: chainfire: - image: photoncloud/chainfire:latest + image: ultracloud/chainfire:latest command: --cluster-mode --bootstrap-cluster eventbus: image: nats:latest photonauth: - image: photoncloud/photonauth:latest + image: ultracloud/photonauth:latest environment: PHOTON_MODE: integrated CHAINFIRE_ENDPOINTS: http://chainfire:2379 photonnet: - image: photoncloud/photonnet:latest + image: ultracloud/photonnet:latest environment: PHOTON_MODE: integrated CHAINFIRE_ENDPOINTS: http://chainfire:2379 AUTH_ENDPOINT: http://photonauth:50051 resource-manager: - image: photoncloud/resource-manager:latest + image: ultracloud/resource-manager:latest environment: CHAINFIRE_ENDPOINTS: http://chainfire:2379 EVENT_BUS: http://eventbus:4222 scheduler: - image: photoncloud/scheduler:latest + image: ultracloud/scheduler:latest environment: CHAINFIRE_ENDPOINTS: http://chainfire:2379 RESOURCE_MANAGER: http://resource-manager:8080 api-gateway: - image: photoncloud/api-gateway:latest + image: ultracloud/api-gateway:latest environment: CHAINFIRE_ENDPOINTS: http://chainfire:2379 AUTH_ENDPOINT: http://photonauth:50051 @@ -1121,4 +1121,4 @@ graph LR --- -*本設計書はPhotonCloudアーキテクチャの「単体運用と統合のバランス」について定義したものです。実装時には本設計に従いつつ、実際のユースケースに応じた調整を行ってください。* +*本設計書はUltraCloudアーキテクチャの「単体運用と統合のバランス」について定義したものです。実装時には本設計に従いつつ、実際のユースケースに応じた調整を行ってください。* diff --git a/plasmavmc/crates/plasmavmc-server/src/volume_manager.rs b/plasmavmc/crates/plasmavmc-server/src/volume_manager.rs index 83f8b30..2634bd4 100644 --- a/plasmavmc/crates/plasmavmc-server/src/volume_manager.rs +++ b/plasmavmc/crates/plasmavmc-server/src/volume_manager.rs @@ -1758,7 +1758,7 @@ fn volume_has_pending_coronafs_image_seed(volume: &Volume) -> bool { } fn derived_volume_id(vm_id: &str, disk_id: &str) -> String { - let digest = Sha256::digest(format!("photoncloud-vm-disk:{vm_id}:{disk_id}").as_bytes()); + let digest = Sha256::digest(format!("ultracloud-vm-disk:{vm_id}:{disk_id}").as_bytes()); let mut bytes = [0u8; 16]; bytes.copy_from_slice(&digest[..16]); bytes[6] = (bytes[6] & 0x0f) | 0x50; diff --git a/scripts/ci_changed_workspaces.py b/scripts/ci_changed_workspaces.py index 3ed61ba..021e1da 100644 --- a/scripts/ci_changed_workspaces.py +++ b/scripts/ci_changed_workspaces.py @@ -217,7 +217,7 @@ def write_github_output(path: Path, result: dict[str, Any]) -> None: def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( - description="Map changed files to PhotonCloud CI workspaces." + description="Map changed files to UltraCloud CI workspaces." ) parser.add_argument( "--config", diff --git a/scripts/rename_status.sh b/scripts/rename_status.sh index 189cf74..3a506e7 100755 --- a/scripts/rename_status.sh +++ b/scripts/rename_status.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash set -euo pipefail # Count legacy names to track rename progress -rg -c "PlasmaCloud" || true +rg -c "UltraCloud" || true rg -c "Nightlight" || true