diff --git a/flake.nix b/flake.nix index a403560..0b1f15a 100644 --- a/flake.nix +++ b/flake.nix @@ -886,6 +886,15 @@ }; checks = { + first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest ( + import ./nix/tests/first-boot-topology-vm-smoke.nix { + inherit pkgs; + photoncloudPackages = self.packages.${system}; + photoncloudModule = self.nixosModules.default; + nixNosModule = nix-nos.nixosModules.default; + } + ); + deployer-vm-smoke = pkgs.testers.runNixOSTest ( import ./nix/tests/deployer-vm-smoke.nix { inherit pkgs; diff --git a/nix-nos/lib/cluster-config-lib.nix b/nix-nos/lib/cluster-config-lib.nix new file mode 100644 index 0000000..ea6bf68 --- /dev/null +++ b/nix-nos/lib/cluster-config-lib.nix @@ -0,0 +1,1483 @@ +{ lib }: + +with lib; + +let + mkInstallPlanType = types: types.submodule { + options = { + nixosConfiguration = mkOption { + type = types.nullOr types.str; + default = null; + description = "Name of the nixosConfigurations output to install"; + }; + + diskoConfigPath = mkOption { + type = types.nullOr types.str; + default = null; + description = "Repository-relative Disko file used for installation"; + }; + + targetDisk = mkOption { + type = types.nullOr types.str; + default = null; + description = "Explicit disk device path selected for installation"; + }; + + targetDiskById = mkOption { + type = types.nullOr types.str; + default = null; + description = "Stable /dev/disk/by-id path selected for installation"; + }; + }; + }; + + mkDesiredSystemType = types: types.submodule { + options = { + deploymentId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional host deployment identifier owning this desired system"; + }; + + nixosConfiguration = mkOption { + type = types.nullOr types.str; + default = null; + description = "Name of the nixosConfigurations output to activate"; + }; + + targetSystem = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional prebuilt NixOS system closure path activated directly by nix-agent"; + }; + + flakeRef = mkOption { + type = types.nullOr types.str; + default = null; + description = "Explicit flake reference used by nix-agent"; + }; + + switchAction = mkOption { + type = types.nullOr types.str; + default = null; + description = "switch-to-configuration action for nix-agent"; + }; + + healthCheckCommand = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Command vector executed after activation to validate node health"; + }; + + rollbackOnFailure = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether nix-agent should roll back when the health check fails"; + }; + + drainBeforeApply = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether the controller should drain the node before issuing this desired system"; + }; + }; + }; + + mkHostDeploymentSelectorType = types: types.submodule { + options = { + nodeIds = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Explicit node IDs targeted by the deployment"; + }; + + roles = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Node roles targeted by the deployment"; + }; + + pools = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Node pools targeted by the deployment"; + }; + + nodeClasses = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Node classes targeted by the deployment"; + }; + + matchLabels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Label selectors applied to target nodes"; + }; + }; + }; + + mkHostDeploymentType = types: + let + selectorType = mkHostDeploymentSelectorType types; + in types.submodule { + options = { + selector = mkOption { + type = selectorType; + default = { }; + description = "Node selector used by the host deployment"; + }; + + nixosConfiguration = mkOption { + type = types.nullOr types.str; + default = null; + description = "Name of the nixosConfigurations output to roll out"; + }; + + targetSystem = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional prebuilt NixOS system closure path handed directly to nix-agent"; + }; + + flakeRef = mkOption { + type = types.nullOr types.str; + default = null; + description = "Explicit flake reference used during rollout"; + }; + + batchSize = mkOption { + type = types.nullOr types.int; + default = null; + description = "Maximum number of nodes started per reconciliation wave"; + }; + + maxUnavailable = mkOption { + type = types.nullOr types.int; + default = null; + description = "Maximum number of unavailable nodes allowed during rollout"; + }; + + healthCheckCommand = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Health check command executed by nix-agent after activation"; + }; + + switchAction = mkOption { + type = types.nullOr types.str; + default = null; + description = "switch-to-configuration action used by nix-agent"; + }; + + rollbackOnFailure = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether nodes should roll back when rollout health checks fail"; + }; + + drainBeforeApply = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether the controller should drain a node before applying the rollout"; + }; + + rebootPolicy = mkOption { + type = types.nullOr types.str; + default = null; + description = "Operator-facing reboot policy associated with the rollout"; + }; + + paused = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether the rollout should start in a paused state"; + }; + }; + }; + + mkServicePortsType = types: types.submodule { + options = { + http = mkOption { + type = types.nullOr types.port; + default = null; + description = "Optional HTTP port exposed by the service"; + }; + + grpc = mkOption { + type = types.nullOr types.port; + default = null; + description = "Optional gRPC port exposed by the service"; + }; + }; + }; + + mkProcessType = types: types.submodule { + options = { + command = mkOption { + type = types.str; + description = "Executable invoked by node-agent"; + }; + + args = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Command-line arguments passed to the process"; + }; + + workingDir = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional working directory used when spawning the process"; + }; + + env = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Environment variables injected into the process"; + }; + }; + }; + + mkContainerPortType = types: types.submodule { + options = { + containerPort = mkOption { + type = types.port; + description = "Port exposed inside the container"; + }; + + hostPort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Optional fixed host port published for this container port"; + }; + + protocol = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional transport protocol for the published port"; + }; + }; + }; + + mkContainerVolumeType = types: types.submodule { + options = { + source = mkOption { + type = types.str; + description = "Host-side volume source path"; + }; + + target = mkOption { + type = types.str; + description = "Container mount target path"; + }; + + readOnly = mkOption { + type = types.bool; + default = false; + description = "Whether the volume should be mounted read-only"; + }; + }; + }; + + mkContainerType = types: + let + containerPortType = mkContainerPortType types; + containerVolumeType = mkContainerVolumeType types; + in types.submodule { + options = { + image = mkOption { + type = types.str; + description = "Container image reference"; + }; + + runtime = mkOption { + type = types.nullOr types.str; + default = null; + description = "Container runtime invoked by node-agent"; + }; + + command = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Optional entrypoint override"; + }; + + args = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Container arguments appended after the image"; + }; + + env = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Environment variables passed to the container runtime"; + }; + + ports = mkOption { + type = types.listOf containerPortType; + default = [ ]; + description = "Published container ports"; + }; + + volumes = mkOption { + type = types.listOf containerVolumeType; + default = [ ]; + description = "Host volume mounts passed to the container runtime"; + }; + + networkMode = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional container network mode"; + }; + + pullPolicy = mkOption { + type = types.nullOr types.str; + default = null; + description = "Container image pull policy"; + }; + + workingDir = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional container working directory"; + }; + }; + }; + + mkHealthCheckType = types: types.submodule { + options = { + type = mkOption { + type = types.str; + description = "Health check type executed by node-agent"; + }; + + path = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional path used by HTTP health checks"; + }; + + intervalSecs = mkOption { + type = types.nullOr types.ints.positive; + default = null; + description = "Health check interval in seconds"; + }; + + timeoutSecs = mkOption { + type = types.nullOr types.ints.positive; + default = null; + description = "Health check timeout in seconds"; + }; + + startupGraceSecs = mkOption { + type = types.nullOr types.ints.positive; + default = null; + description = "Startup grace period before a service is considered unhealthy"; + }; + }; + }; + + mkPlacementPolicyType = types: types.submodule { + options = { + roles = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Roles matched by the scheduler placement filter"; + }; + + pools = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Pools matched by the scheduler placement filter"; + }; + + nodeClasses = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Node classes matched by the scheduler placement filter"; + }; + + matchLabels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Additional label selectors matched by the scheduler"; + }; + + spreadByLabel = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional spread key used when balancing replicas"; + }; + + maxInstancesPerNode = mkOption { + type = types.ints.positive; + default = 1; + description = "Maximum number of replicas the scheduler may place on one node"; + }; + }; + }; + + mkRolloutStrategyType = types: types.submodule { + options = { + maxUnavailable = mkOption { + type = types.ints.unsigned; + default = 1; + description = "Maximum unavailable instances allowed during a rollout"; + }; + + maxSurge = mkOption { + type = types.ints.unsigned; + default = 1; + description = "Maximum extra instances allowed during a rollout"; + }; + }; + }; + + mkServiceScheduleType = types: + let + placementPolicyType = mkPlacementPolicyType types; + rolloutStrategyType = mkRolloutStrategyType types; + processType = mkProcessType types; + containerType = mkContainerType types; + healthCheckType = mkHealthCheckType types; + in types.submodule { + options = { + replicas = mkOption { + type = types.ints.positive; + default = 1; + description = "Desired number of scheduler-managed replicas"; + }; + + placement = mkOption { + type = placementPolicyType; + default = { }; + description = "Scheduler placement rules for the service"; + }; + + rollout = mkOption { + type = rolloutStrategyType; + default = { }; + description = "Rollout budget used by the scheduler"; + }; + + instancePort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Host port used when creating service instances"; + }; + + meshPort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Optional service mesh port for the managed instances"; + }; + + process = mkOption { + type = types.nullOr processType; + default = null; + description = "Process-based runtime specification"; + }; + + container = mkOption { + type = types.nullOr containerType; + default = null; + description = "Container-based runtime specification"; + }; + + healthCheck = mkOption { + type = types.nullOr healthCheckType; + default = null; + description = "Health check performed by node-agent"; + }; + }; + }; + + mkDnsPublicationType = types: types.submodule { + options = { + zone = mkOption { + type = types.str; + description = "FlashDNS zone used for service publication"; + }; + + name = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional record name inside the published zone"; + }; + + ttl = mkOption { + type = types.ints.positive; + default = 30; + description = "DNS TTL for the published record"; + }; + + mode = mkOption { + type = types.enum [ "load_balancer" "direct" ]; + default = "load_balancer"; + description = "Whether DNS publishes the load balancer VIP or a direct instance address"; + }; + }; + }; + + mkLoadBalancerPublicationType = types: types.submodule { + options = { + orgId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional organization used when provisioning FiberLB resources"; + }; + + projectId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional project used when provisioning FiberLB resources"; + }; + + name = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional explicit load balancer name"; + }; + + listenerPort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Listener port exposed by the load balancer"; + }; + + protocol = mkOption { + type = types.nullOr types.str; + default = null; + description = "Listener protocol for the published load balancer"; + }; + + poolProtocol = mkOption { + type = types.nullOr types.str; + default = null; + description = "Backend pool protocol for the published load balancer"; + }; + }; + }; + + mkServicePublicationType = types: + let + dnsPublicationType = mkDnsPublicationType types; + loadBalancerPublicationType = mkLoadBalancerPublicationType types; + in types.submodule { + options = { + orgId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Default organization used for service publication"; + }; + + projectId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Default project used for service publication"; + }; + + dns = mkOption { + type = types.nullOr dnsPublicationType; + default = null; + description = "Optional FlashDNS publication target"; + }; + + loadBalancer = mkOption { + type = types.nullOr loadBalancerPublicationType; + default = null; + description = "Optional FiberLB publication target"; + }; + }; + }; + + mkServiceType = types: + let + servicePortsType = mkServicePortsType types; + serviceScheduleType = mkServiceScheduleType types; + servicePublicationType = mkServicePublicationType types; + in types.submodule { + options = { + ports = mkOption { + type = types.nullOr servicePortsType; + default = null; + description = "Optional logical service ports"; + }; + + protocol = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional service protocol"; + }; + + mtlsRequired = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether service-to-service traffic requires mTLS"; + }; + + meshMode = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional mesh publication mode"; + }; + + schedule = mkOption { + type = types.nullOr serviceScheduleType; + default = null; + description = "Scheduler-managed runtime intent"; + }; + + publish = mkOption { + type = types.nullOr servicePublicationType; + default = null; + description = "Optional publication targets for the service"; + }; + }; + }; + + mkMtlsPolicyType = types: types.submodule { + options = { + environment = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional environment scope for the policy"; + }; + + sourceService = mkOption { + type = types.str; + description = "Source service matched by the policy"; + }; + + targetService = mkOption { + type = types.str; + description = "Target service matched by the policy"; + }; + + mtlsRequired = mkOption { + type = types.nullOr types.bool; + default = null; + description = "Whether the policy enforces mTLS"; + }; + + mode = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional policy mode"; + }; + }; + }; + + mkNodeType = types: + let + installPlanType = mkInstallPlanType types; + desiredSystemType = mkDesiredSystemType types; + in types.submodule { + options = { + role = mkOption { + type = types.enum [ "control-plane" "worker" ]; + default = "worker"; + description = "Node role in the cluster"; + }; + + ip = mkOption { + type = types.str; + description = "IP address of the node"; + }; + + services = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Services to run on this node"; + }; + + raftPort = mkOption { + type = types.port; + default = 2380; + description = "Raft port for consensus protocols"; + }; + + apiPort = mkOption { + type = types.port; + default = 2379; + description = "API port for cluster services"; + }; + + metadata = mkOption { + type = types.attrsOf types.anything; + default = { }; + description = "Additional metadata for the node"; + }; + + machineId = mkOption { + type = types.nullOr types.str; + default = null; + description = "Stable machine-id used to pre-register the node with deployer"; + }; + + labels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "User-defined labels exported into deployer cluster state"; + }; + + pool = mkOption { + type = types.nullOr types.str; + default = null; + description = "Logical node pool exported into deployer cluster state"; + }; + + nodeClass = mkOption { + type = types.nullOr types.str; + default = null; + description = "Reusable node class assigned to this node in deployer state"; + }; + + failureDomain = mkOption { + type = types.nullOr types.str; + default = null; + description = "Failure domain / zone label exported into deployer cluster state"; + }; + + nixProfile = mkOption { + type = types.nullOr types.str; + default = null; + description = "Desired Nix profile associated with the node"; + }; + + installPlan = mkOption { + type = types.nullOr installPlanType; + default = null; + description = "Explicit NixOS installation targets for bare-metal bootstrap"; + }; + + desiredSystem = mkOption { + type = types.nullOr desiredSystemType; + default = null; + description = "Desired NixOS reconciliation state exported for nix-agent"; + }; + + state = mkOption { + type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]); + default = null; + description = "Desired deployer node lifecycle state"; + }; + + commissionState = mkOption { + type = types.nullOr (types.enum [ "discovered" "commissioning" "commissioned" ]); + default = null; + description = "Optional commissioning state exported into deployer cluster state"; + }; + + installState = mkOption { + type = types.nullOr (types.enum [ "pending" "installing" "installed" "failed" "reinstall_requested" ]); + default = null; + description = "Optional install lifecycle state exported into deployer cluster state"; + }; + + powerState = mkOption { + type = types.nullOr (types.enum [ "on" "off" "cycling" "unknown" ]); + default = null; + description = "Optional external power-management state associated with the node"; + }; + + bmcRef = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional BMC / Redfish reference associated with the node"; + }; + }; + }; + + mkNodeClassType = types: + let + installPlanType = mkInstallPlanType types; + in types.submodule { + options = { + description = mkOption { + type = types.nullOr types.str; + default = null; + description = "Human-readable description of the node class"; + }; + + nixProfile = mkOption { + type = types.nullOr types.str; + default = null; + description = "Desired Nix profile inherited by nodes in this class"; + }; + + installPlan = mkOption { + type = types.nullOr installPlanType; + default = null; + description = "Default install plan inherited by nodes in this class"; + }; + + roles = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Roles inherited by nodes in this class"; + }; + + labels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Labels inherited by nodes in this class"; + }; + }; + }; + + mkNodePoolType = types: types.submodule { + options = { + description = mkOption { + type = types.nullOr types.str; + default = null; + description = "Human-readable description of the node pool"; + }; + + nodeClass = mkOption { + type = types.nullOr types.str; + default = null; + description = "Default node class assigned to nodes in this pool"; + }; + + minSize = mkOption { + type = types.nullOr types.int; + default = null; + description = "Minimum desired pool size"; + }; + + maxSize = mkOption { + type = types.nullOr types.int; + default = null; + description = "Maximum desired pool size"; + }; + + labels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Labels applied to nodes in this pool"; + }; + }; + }; + + mkEnrollmentRuleType = types: + let + installPlanType = mkInstallPlanType types; + in types.submodule { + options = { + priority = mkOption { + type = types.int; + default = 0; + description = "Higher priority rules win when multiple rules match"; + }; + + matchLabels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Label selectors matched against phone-home metadata"; + }; + + matchHostnamePrefix = mkOption { + type = types.nullOr types.str; + default = null; + description = "Optional hostname prefix matched during enrollment"; + }; + + matchIpPrefixes = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Optional IP prefixes matched during enrollment"; + }; + + pool = mkOption { + type = types.nullOr types.str; + default = null; + description = "Pool assigned when the rule matches"; + }; + + nodeClass = mkOption { + type = types.nullOr types.str; + default = null; + description = "Node class assigned when the rule matches"; + }; + + role = mkOption { + type = types.nullOr types.str; + default = null; + description = "Primary role assigned when the rule matches"; + }; + + labels = mkOption { + type = types.attrsOf types.str; + default = { }; + description = "Labels attached when the rule matches"; + }; + + nixProfile = mkOption { + type = types.nullOr types.str; + default = null; + description = "Nix profile attached when the rule matches"; + }; + + installPlan = mkOption { + type = types.nullOr installPlanType; + default = null; + description = "Install plan attached when the rule matches"; + }; + + services = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "Services enabled for matching nodes"; + }; + + sshAuthorizedKeys = mkOption { + type = types.listOf types.str; + default = [ ]; + description = "SSH authorized keys installed for matching nodes"; + }; + + nodeIdPrefix = mkOption { + type = types.nullOr types.str; + default = null; + description = "Prefix used when synthesizing node IDs"; + }; + }; + }; + + mkInstallPlan = plan: + let + rendered = + optionalAttrs (plan != null && plan.nixosConfiguration != null) { + nixos_configuration = plan.nixosConfiguration; + } + // optionalAttrs (plan != null && plan.diskoConfigPath != null) { + disko_config_path = plan.diskoConfigPath; + } + // optionalAttrs (plan != null && plan.targetDisk != null) { + target_disk = plan.targetDisk; + } + // optionalAttrs (plan != null && plan.targetDiskById != null) { + target_disk_by_id = plan.targetDiskById; + }; + in + if plan == null || rendered == { } then null else rendered; + + mkDesiredSystem = nodeName: desiredSystem: + let + rendered = + optionalAttrs (desiredSystem != null && desiredSystem.deploymentId != null) { + deployment_id = desiredSystem.deploymentId; + } + // optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) { + nixos_configuration = desiredSystem.nixosConfiguration; + } + // optionalAttrs (desiredSystem != null && desiredSystem.targetSystem != null) { + target_system = desiredSystem.targetSystem; + } + // optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) { + flake_ref = desiredSystem.flakeRef; + } + // optionalAttrs (desiredSystem != null && desiredSystem.switchAction != null) { + switch_action = desiredSystem.switchAction; + } + // optionalAttrs (desiredSystem != null && desiredSystem.healthCheckCommand != [ ]) { + health_check_command = desiredSystem.healthCheckCommand; + } + // optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) { + rollback_on_failure = desiredSystem.rollbackOnFailure; + } + // optionalAttrs (desiredSystem != null && desiredSystem.drainBeforeApply != null) { + drain_before_apply = desiredSystem.drainBeforeApply; + }; + in + if desiredSystem == null || rendered == { } then null else { + node_id = nodeName; + } // rendered; + + mkHostDeploymentSelector = selector: + { + node_ids = selector.nodeIds or [ ]; + roles = selector.roles or [ ]; + pools = selector.pools or [ ]; + node_classes = selector.nodeClasses or [ ]; + match_labels = selector.matchLabels or { }; + }; + + mkDeployerHostDeploymentSpec = name: deployment: + { + inherit name; + selector = mkHostDeploymentSelector deployment.selector; + } + // optionalAttrs (deployment.nixosConfiguration != null) { + nixos_configuration = deployment.nixosConfiguration; + } + // optionalAttrs (deployment.targetSystem != null) { + target_system = deployment.targetSystem; + } + // optionalAttrs (deployment.flakeRef != null) { + flake_ref = deployment.flakeRef; + } + // optionalAttrs (deployment.batchSize != null) { + batch_size = deployment.batchSize; + } + // optionalAttrs (deployment.maxUnavailable != null) { + max_unavailable = deployment.maxUnavailable; + } + // optionalAttrs (deployment.healthCheckCommand != [ ]) { + health_check_command = deployment.healthCheckCommand; + } + // optionalAttrs (deployment.switchAction != null) { + switch_action = deployment.switchAction; + } + // optionalAttrs (deployment.rollbackOnFailure != null) { + rollback_on_failure = deployment.rollbackOnFailure; + } + // optionalAttrs (deployment.drainBeforeApply != null) { + drain_before_apply = deployment.drainBeforeApply; + } + // optionalAttrs (deployment.rebootPolicy != null) { + reboot_policy = deployment.rebootPolicy; + } + // optionalAttrs (deployment.paused != null) { + paused = deployment.paused; + }; + + mkDeployerNodeSpec = nodeName: node: + { + node_id = nodeName; + hostname = nodeName; + ip = node.ip; + roles = unique [ node.role ]; + labels = node.labels; + } + // optionalAttrs (node.machineId != null) { + machine_id = node.machineId; + } + // optionalAttrs (node.pool != null) { + pool = node.pool; + } + // optionalAttrs (node.nodeClass != null) { + node_class = node.nodeClass; + } + // optionalAttrs (node.failureDomain != null) { + failure_domain = node.failureDomain; + } + // optionalAttrs (node.nixProfile != null) { + nix_profile = node.nixProfile; + } + // optionalAttrs (mkInstallPlan node.installPlan != null) { + install_plan = mkInstallPlan node.installPlan; + } + // optionalAttrs (mkDesiredSystem nodeName node.desiredSystem != null) { + desired_system = mkDesiredSystem nodeName node.desiredSystem; + } + // optionalAttrs (node.state != null) { + state = node.state; + } + // optionalAttrs (node.commissionState != null) { + commission_state = node.commissionState; + } + // optionalAttrs (node.installState != null) { + install_state = node.installState; + } + // optionalAttrs (node.powerState != null) { + power_state = node.powerState; + } + // optionalAttrs (node.bmcRef != null) { + bmc_ref = node.bmcRef; + }; + + mkDeployerNodeClassSpec = name: nodeClass: + { + inherit name; + roles = nodeClass.roles; + labels = nodeClass.labels; + } + // optionalAttrs (nodeClass.description != null) { + description = nodeClass.description; + } + // optionalAttrs (nodeClass.nixProfile != null) { + nix_profile = nodeClass.nixProfile; + } + // optionalAttrs (mkInstallPlan nodeClass.installPlan != null) { + install_plan = mkInstallPlan nodeClass.installPlan; + }; + + mkDeployerPoolSpec = name: pool: + { + inherit name; + labels = pool.labels; + } + // optionalAttrs (pool.description != null) { + description = pool.description; + } + // optionalAttrs (pool.nodeClass != null) { + node_class = pool.nodeClass; + } + // optionalAttrs (pool.minSize != null) { + min_size = pool.minSize; + } + // optionalAttrs (pool.maxSize != null) { + max_size = pool.maxSize; + }; + + mkDeployerEnrollmentRuleSpec = name: rule: + { + inherit name; + priority = rule.priority; + match_labels = rule.matchLabels; + match_ip_prefixes = rule.matchIpPrefixes; + labels = rule.labels; + services = rule.services; + ssh_authorized_keys = rule.sshAuthorizedKeys; + } + // optionalAttrs (rule.matchHostnamePrefix != null) { + match_hostname_prefix = rule.matchHostnamePrefix; + } + // optionalAttrs (rule.pool != null) { + pool = rule.pool; + } + // optionalAttrs (rule.nodeClass != null) { + node_class = rule.nodeClass; + } + // optionalAttrs (rule.role != null) { + role = rule.role; + } + // optionalAttrs (rule.nixProfile != null) { + nix_profile = rule.nixProfile; + } + // optionalAttrs (mkInstallPlan rule.installPlan != null) { + install_plan = mkInstallPlan rule.installPlan; + } + // optionalAttrs (rule.nodeIdPrefix != null) { + node_id_prefix = rule.nodeIdPrefix; + }; + + mkServicePorts = ports: + optionalAttrs (ports != null && ports.http != null) { + http = ports.http; + } + // optionalAttrs (ports != null && ports.grpc != null) { + grpc = ports.grpc; + }; + + mkProcessSpec = process: + { + command = process.command; + args = process.args; + env = process.env; + } + // optionalAttrs (process.workingDir != null) { + working_dir = process.workingDir; + }; + + mkContainerPortSpec = port: + { + container_port = port.containerPort; + } + // optionalAttrs (port.hostPort != null) { + host_port = port.hostPort; + } + // optionalAttrs (port.protocol != null) { + protocol = port.protocol; + }; + + mkContainerVolumeSpec = volume: + { + source = volume.source; + target = volume.target; + } + // optionalAttrs volume.readOnly { + read_only = true; + }; + + mkContainerSpec = container: + { + image = container.image; + command = container.command; + args = container.args; + env = container.env; + ports = map mkContainerPortSpec container.ports; + volumes = map mkContainerVolumeSpec container.volumes; + } + // optionalAttrs (container.runtime != null) { + runtime = container.runtime; + } + // optionalAttrs (container.networkMode != null) { + network_mode = container.networkMode; + } + // optionalAttrs (container.pullPolicy != null) { + pull_policy = container.pullPolicy; + } + // optionalAttrs (container.workingDir != null) { + working_dir = container.workingDir; + }; + + mkHealthCheckSpec = healthCheck: + { + type = healthCheck.type; + } + // optionalAttrs (healthCheck.path != null) { + path = healthCheck.path; + } + // optionalAttrs (healthCheck.intervalSecs != null) { + interval_secs = healthCheck.intervalSecs; + } + // optionalAttrs (healthCheck.timeoutSecs != null) { + timeout_secs = healthCheck.timeoutSecs; + } + // optionalAttrs (healthCheck.startupGraceSecs != null) { + startup_grace_secs = healthCheck.startupGraceSecs; + }; + + mkPlacementPolicySpec = placement: + { + roles = placement.roles; + pools = placement.pools; + node_classes = placement.nodeClasses; + match_labels = placement.matchLabels; + max_instances_per_node = placement.maxInstancesPerNode; + } + // optionalAttrs (placement.spreadByLabel != null) { + spread_by_label = placement.spreadByLabel; + }; + + mkRolloutStrategySpec = rollout: { + max_unavailable = rollout.maxUnavailable; + max_surge = rollout.maxSurge; + }; + + mkServiceScheduleSpec = schedule: + { + replicas = schedule.replicas; + placement = mkPlacementPolicySpec schedule.placement; + rollout = mkRolloutStrategySpec schedule.rollout; + } + // optionalAttrs (schedule.instancePort != null) { + instance_port = schedule.instancePort; + } + // optionalAttrs (schedule.meshPort != null) { + mesh_port = schedule.meshPort; + } + // optionalAttrs (schedule.process != null) { + process = mkProcessSpec schedule.process; + } + // optionalAttrs (schedule.container != null) { + container = mkContainerSpec schedule.container; + } + // optionalAttrs (schedule.healthCheck != null) { + health_check = mkHealthCheckSpec schedule.healthCheck; + }; + + mkDnsPublicationSpec = dns: + { + zone = dns.zone; + ttl = dns.ttl; + mode = dns.mode; + } + // optionalAttrs (dns.name != null) { + name = dns.name; + }; + + mkLoadBalancerPublicationSpec = loadBalancer: + optionalAttrs (loadBalancer.orgId != null) { + org_id = loadBalancer.orgId; + } + // optionalAttrs (loadBalancer.projectId != null) { + project_id = loadBalancer.projectId; + } + // optionalAttrs (loadBalancer.name != null) { + name = loadBalancer.name; + } + // optionalAttrs (loadBalancer.listenerPort != null) { + listener_port = loadBalancer.listenerPort; + } + // optionalAttrs (loadBalancer.protocol != null) { + protocol = loadBalancer.protocol; + } + // optionalAttrs (loadBalancer.poolProtocol != null) { + pool_protocol = loadBalancer.poolProtocol; + }; + + mkServicePublicationSpec = publish: + optionalAttrs (publish.orgId != null) { + org_id = publish.orgId; + } + // optionalAttrs (publish.projectId != null) { + project_id = publish.projectId; + } + // optionalAttrs (publish.dns != null) { + dns = mkDnsPublicationSpec publish.dns; + } + // optionalAttrs (publish.loadBalancer != null) { + load_balancer = mkLoadBalancerPublicationSpec publish.loadBalancer; + }; + + mkDeployerServiceSpec = name: service: + { + inherit name; + } + // optionalAttrs (service.ports != null && mkServicePorts service.ports != { }) { + ports = mkServicePorts service.ports; + } + // optionalAttrs (service.protocol != null) { + protocol = service.protocol; + } + // optionalAttrs (service.mtlsRequired != null) { + mtls_required = service.mtlsRequired; + } + // optionalAttrs (service.meshMode != null) { + mesh_mode = service.meshMode; + } + // optionalAttrs (service.schedule != null) { + schedule = mkServiceScheduleSpec service.schedule; + } + // optionalAttrs (service.publish != null) { + publish = mkServicePublicationSpec service.publish; + }; + + mkDeployerMtlsPolicySpec = name: policy: + { + policy_id = name; + source_service = policy.sourceService; + target_service = policy.targetService; + } + // optionalAttrs (policy.environment != null) { + environment = policy.environment; + } + // optionalAttrs (policy.mtlsRequired != null) { + mtls_required = policy.mtlsRequired; + } + // optionalAttrs (policy.mode != null) { + mode = policy.mode; + }; + + resolveBootstrapNodeName = cluster: bootstrapNodeName: + let + controlPlaneNodes = + filter (n: (cluster.nodes.${n}.role or "worker") == "control-plane") + (attrNames cluster.nodes); + in + if bootstrapNodeName != null then + bootstrapNodeName + else if cluster ? bootstrapNode && cluster.bootstrapNode != null then + cluster.bootstrapNode + else if cluster ? bootstrap && cluster.bootstrap ? initialPeers && cluster.bootstrap.initialPeers != [ ] then + head cluster.bootstrap.initialPeers + else + head controlPlaneNodes; + + mkNixNOSTopologyCluster = cluster: { + name = cluster.name; + bootstrapNode = resolveBootstrapNodeName cluster null; + nodes = cluster.nodes; + }; + + mkClusterConfig = { + cluster, + hostname, + bootstrapNodeName ? null, + }: + let + node = cluster.nodes.${hostname} or (throw "Node ${hostname} not found in cluster configuration"); + + controlPlaneNodes = + filter (n: (cluster.nodes.${n}.role or "worker") == "control-plane") + (attrNames cluster.nodes); + + resolvedBootstrapNodeName = resolveBootstrapNodeName cluster bootstrapNodeName; + + bootstrapNode = cluster.nodes.${resolvedBootstrapNodeName} + or (throw "Bootstrap node ${resolvedBootstrapNodeName} not found in cluster configuration"); + + initialPeers = map (nodeName: { + id = nodeName; + addr = "${cluster.nodes.${nodeName}.ip}:${toString cluster.nodes.${nodeName}.raftPort}"; + }) controlPlaneNodes; + + flaredbPeers = map (nodeName: + "${cluster.nodes.${nodeName}.ip}:${toString (cluster.nodes.${nodeName}.apiPort + 100)}" + ) controlPlaneNodes; + + chainfireLeaderUrl = "http://${bootstrapNode.ip}:8081"; + flaredbLeaderUrl = "http://${bootstrapNode.ip}:8082"; + in { + node_id = hostname; + node_role = node.role; + bootstrap = hostname == resolvedBootstrapNodeName; + cluster_name = cluster.name; + leader_url = chainfireLeaderUrl; + chainfire_leader_url = chainfireLeaderUrl; + flaredb_leader_url = flaredbLeaderUrl; + raft_addr = "${node.ip}:${toString node.raftPort}"; + initial_peers = initialPeers; + flaredb_peers = flaredbPeers; + services = node.services; + metadata = node.metadata; + } // optionalAttrs (cluster ? bgp && cluster.bgp ? asn) { + bgp_asn = cluster.bgp.asn; + }; + + mkDeployerClusterState = cluster: + let + deployer = cluster.deployer or { }; + clusterId = + if deployer ? clusterId && deployer.clusterId != null then + deployer.clusterId + else + cluster.name; + nodeClasses = deployer.nodeClasses or { }; + pools = deployer.pools or { }; + enrollmentRules = deployer.enrollmentRules or { }; + hostDeployments = deployer.hostDeployments or { }; + services = deployer.services or { }; + mtlsPolicies = deployer.mtlsPolicies or { }; + in { + cluster = { + cluster_id = clusterId; + } // optionalAttrs (deployer ? environment && deployer.environment != null) { + environment = deployer.environment; + }; + nodes = map (nodeName: mkDeployerNodeSpec nodeName cluster.nodes.${nodeName}) (attrNames cluster.nodes); + node_classes = map (name: mkDeployerNodeClassSpec name nodeClasses.${name}) (attrNames nodeClasses); + pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools); + enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules); + host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments); + services = map (name: mkDeployerServiceSpec name services.${name}) (attrNames services); + instances = [ ]; + mtls_policies = map (name: mkDeployerMtlsPolicySpec name mtlsPolicies.${name}) (attrNames mtlsPolicies); + }; +in +{ + inherit + mkInstallPlanType + mkDesiredSystemType + mkHostDeploymentSelectorType + mkHostDeploymentType + mkServicePortsType + mkProcessType + mkContainerPortType + mkContainerVolumeType + mkContainerType + mkHealthCheckType + mkPlacementPolicyType + mkRolloutStrategyType + mkServiceScheduleType + mkDnsPublicationType + mkLoadBalancerPublicationType + mkServicePublicationType + mkServiceType + mkMtlsPolicyType + mkNodeType + mkNodeClassType + mkNodePoolType + mkEnrollmentRuleType + resolveBootstrapNodeName + mkNixNOSTopologyCluster + mkClusterConfig + mkDeployerClusterState; +} diff --git a/nix-nos/modules/default.nix b/nix-nos/modules/default.nix index f1b4be4..68a5862 100644 --- a/nix-nos/modules/default.nix +++ b/nix-nos/modules/default.nix @@ -2,6 +2,7 @@ { imports = [ + ./topology.nix ./network/interfaces.nix ./network/vlans.nix ./bgp/default.nix diff --git a/nix-nos/modules/topology.nix b/nix-nos/modules/topology.nix new file mode 100644 index 0000000..83b766b --- /dev/null +++ b/nix-nos/modules/topology.nix @@ -0,0 +1,68 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.nix-nos; + clusterConfigLib = import ../lib/cluster-config-lib.nix { inherit lib; }; + nodeType = clusterConfigLib.mkNodeType types; + + # Cluster definition type + clusterType = types.submodule { + options = { + name = mkOption { + type = types.str; + default = "plasmacloud-cluster"; + description = "Cluster name"; + }; + + nodes = mkOption { + type = types.attrsOf nodeType; + default = {}; + description = "Map of node names to their configurations"; + example = literalExpression '' + { + "node01" = { + role = "control-plane"; + ip = "10.0.1.10"; + services = [ "chainfire" "flaredb" ]; + }; + } + ''; + }; + + bootstrapNode = mkOption { + type = types.nullOr types.str; + default = null; + description = "Name of the bootstrap node (first control-plane node if null)"; + }; + }; + }; + +in { + options.nix-nos = { + clusters = mkOption { + type = types.attrsOf clusterType; + default = {}; + description = "Map of cluster names to their configurations"; + }; + + # Helper function to generate cluster-config.json for a specific node + generateClusterConfig = mkOption { + type = types.functionTo types.attrs; + default = { hostname, clusterName ? "plasmacloud" }: + let + cluster = cfg.clusters.${clusterName} or (throw "Cluster ${clusterName} not found"); + in clusterConfigLib.mkClusterConfig { + inherit cluster hostname; + bootstrapNodeName = + if cluster.bootstrapNode != null + then cluster.bootstrapNode + else null; + }; + description = "Function to generate cluster-config.json for a specific hostname"; + }; + }; + + config = mkIf cfg.enable { }; +} diff --git a/nix/modules/cluster-config-lib.nix b/nix/modules/cluster-config-lib.nix index 3a98fab..c6a8e71 100644 --- a/nix/modules/cluster-config-lib.nix +++ b/nix/modules/cluster-config-lib.nix @@ -1,1468 +1,3 @@ { lib }: -with lib; - -let - mkInstallPlanType = types: types.submodule { - options = { - nixosConfiguration = mkOption { - type = types.nullOr types.str; - default = null; - description = "Name of the nixosConfigurations output to install"; - }; - - diskoConfigPath = mkOption { - type = types.nullOr types.str; - default = null; - description = "Repository-relative Disko file used for installation"; - }; - - targetDisk = mkOption { - type = types.nullOr types.str; - default = null; - description = "Explicit disk device path selected for installation"; - }; - - targetDiskById = mkOption { - type = types.nullOr types.str; - default = null; - description = "Stable /dev/disk/by-id path selected for installation"; - }; - }; - }; - - mkDesiredSystemType = types: types.submodule { - options = { - deploymentId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional host deployment identifier owning this desired system"; - }; - - nixosConfiguration = mkOption { - type = types.nullOr types.str; - default = null; - description = "Name of the nixosConfigurations output to activate"; - }; - - targetSystem = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional prebuilt NixOS system closure path activated directly by nix-agent"; - }; - - flakeRef = mkOption { - type = types.nullOr types.str; - default = null; - description = "Explicit flake reference used by nix-agent"; - }; - - switchAction = mkOption { - type = types.nullOr types.str; - default = null; - description = "switch-to-configuration action for nix-agent"; - }; - - healthCheckCommand = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Command vector executed after activation to validate node health"; - }; - - rollbackOnFailure = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether nix-agent should roll back when the health check fails"; - }; - - drainBeforeApply = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether the controller should drain the node before issuing this desired system"; - }; - }; - }; - - mkHostDeploymentSelectorType = types: types.submodule { - options = { - nodeIds = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Explicit node IDs targeted by the deployment"; - }; - - roles = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Node roles targeted by the deployment"; - }; - - pools = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Node pools targeted by the deployment"; - }; - - nodeClasses = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Node classes targeted by the deployment"; - }; - - matchLabels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Label selectors applied to target nodes"; - }; - }; - }; - - mkHostDeploymentType = types: - let - selectorType = mkHostDeploymentSelectorType types; - in types.submodule { - options = { - selector = mkOption { - type = selectorType; - default = { }; - description = "Node selector used by the host deployment"; - }; - - nixosConfiguration = mkOption { - type = types.nullOr types.str; - default = null; - description = "Name of the nixosConfigurations output to roll out"; - }; - - targetSystem = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional prebuilt NixOS system closure path handed directly to nix-agent"; - }; - - flakeRef = mkOption { - type = types.nullOr types.str; - default = null; - description = "Explicit flake reference used during rollout"; - }; - - batchSize = mkOption { - type = types.nullOr types.int; - default = null; - description = "Maximum number of nodes started per reconciliation wave"; - }; - - maxUnavailable = mkOption { - type = types.nullOr types.int; - default = null; - description = "Maximum number of unavailable nodes allowed during rollout"; - }; - - healthCheckCommand = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Health check command executed by nix-agent after activation"; - }; - - switchAction = mkOption { - type = types.nullOr types.str; - default = null; - description = "switch-to-configuration action used by nix-agent"; - }; - - rollbackOnFailure = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether nodes should roll back when rollout health checks fail"; - }; - - drainBeforeApply = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether the controller should drain a node before applying the rollout"; - }; - - rebootPolicy = mkOption { - type = types.nullOr types.str; - default = null; - description = "Operator-facing reboot policy associated with the rollout"; - }; - - paused = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether the rollout should start in a paused state"; - }; - }; - }; - - mkServicePortsType = types: types.submodule { - options = { - http = mkOption { - type = types.nullOr types.port; - default = null; - description = "Optional HTTP port exposed by the service"; - }; - - grpc = mkOption { - type = types.nullOr types.port; - default = null; - description = "Optional gRPC port exposed by the service"; - }; - }; - }; - - mkProcessType = types: types.submodule { - options = { - command = mkOption { - type = types.str; - description = "Executable invoked by node-agent"; - }; - - args = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Command-line arguments passed to the process"; - }; - - workingDir = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional working directory used when spawning the process"; - }; - - env = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Environment variables injected into the process"; - }; - }; - }; - - mkContainerPortType = types: types.submodule { - options = { - containerPort = mkOption { - type = types.port; - description = "Port exposed inside the container"; - }; - - hostPort = mkOption { - type = types.nullOr types.port; - default = null; - description = "Optional fixed host port published for this container port"; - }; - - protocol = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional transport protocol for the published port"; - }; - }; - }; - - mkContainerVolumeType = types: types.submodule { - options = { - source = mkOption { - type = types.str; - description = "Host-side volume source path"; - }; - - target = mkOption { - type = types.str; - description = "Container mount target path"; - }; - - readOnly = mkOption { - type = types.bool; - default = false; - description = "Whether the volume should be mounted read-only"; - }; - }; - }; - - mkContainerType = types: - let - containerPortType = mkContainerPortType types; - containerVolumeType = mkContainerVolumeType types; - in types.submodule { - options = { - image = mkOption { - type = types.str; - description = "Container image reference"; - }; - - runtime = mkOption { - type = types.nullOr types.str; - default = null; - description = "Container runtime invoked by node-agent"; - }; - - command = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Optional entrypoint override"; - }; - - args = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Container arguments appended after the image"; - }; - - env = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Environment variables passed to the container runtime"; - }; - - ports = mkOption { - type = types.listOf containerPortType; - default = [ ]; - description = "Published container ports"; - }; - - volumes = mkOption { - type = types.listOf containerVolumeType; - default = [ ]; - description = "Host volume mounts passed to the container runtime"; - }; - - networkMode = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional container network mode"; - }; - - pullPolicy = mkOption { - type = types.nullOr types.str; - default = null; - description = "Container image pull policy"; - }; - - workingDir = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional container working directory"; - }; - }; - }; - - mkHealthCheckType = types: types.submodule { - options = { - type = mkOption { - type = types.str; - description = "Health check type executed by node-agent"; - }; - - path = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional path used by HTTP health checks"; - }; - - intervalSecs = mkOption { - type = types.nullOr types.ints.positive; - default = null; - description = "Health check interval in seconds"; - }; - - timeoutSecs = mkOption { - type = types.nullOr types.ints.positive; - default = null; - description = "Health check timeout in seconds"; - }; - - startupGraceSecs = mkOption { - type = types.nullOr types.ints.positive; - default = null; - description = "Startup grace period before a service is considered unhealthy"; - }; - }; - }; - - mkPlacementPolicyType = types: types.submodule { - options = { - roles = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Roles matched by the scheduler placement filter"; - }; - - pools = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Pools matched by the scheduler placement filter"; - }; - - nodeClasses = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Node classes matched by the scheduler placement filter"; - }; - - matchLabels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Additional label selectors matched by the scheduler"; - }; - - spreadByLabel = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional spread key used when balancing replicas"; - }; - - maxInstancesPerNode = mkOption { - type = types.ints.positive; - default = 1; - description = "Maximum number of replicas the scheduler may place on one node"; - }; - }; - }; - - mkRolloutStrategyType = types: types.submodule { - options = { - maxUnavailable = mkOption { - type = types.ints.unsigned; - default = 1; - description = "Maximum unavailable instances allowed during a rollout"; - }; - - maxSurge = mkOption { - type = types.ints.unsigned; - default = 1; - description = "Maximum extra instances allowed during a rollout"; - }; - }; - }; - - mkServiceScheduleType = types: - let - placementPolicyType = mkPlacementPolicyType types; - rolloutStrategyType = mkRolloutStrategyType types; - processType = mkProcessType types; - containerType = mkContainerType types; - healthCheckType = mkHealthCheckType types; - in types.submodule { - options = { - replicas = mkOption { - type = types.ints.positive; - default = 1; - description = "Desired number of scheduler-managed replicas"; - }; - - placement = mkOption { - type = placementPolicyType; - default = { }; - description = "Scheduler placement rules for the service"; - }; - - rollout = mkOption { - type = rolloutStrategyType; - default = { }; - description = "Rollout budget used by the scheduler"; - }; - - instancePort = mkOption { - type = types.nullOr types.port; - default = null; - description = "Host port used when creating service instances"; - }; - - meshPort = mkOption { - type = types.nullOr types.port; - default = null; - description = "Optional service mesh port for the managed instances"; - }; - - process = mkOption { - type = types.nullOr processType; - default = null; - description = "Process-based runtime specification"; - }; - - container = mkOption { - type = types.nullOr containerType; - default = null; - description = "Container-based runtime specification"; - }; - - healthCheck = mkOption { - type = types.nullOr healthCheckType; - default = null; - description = "Health check performed by node-agent"; - }; - }; - }; - - mkDnsPublicationType = types: types.submodule { - options = { - zone = mkOption { - type = types.str; - description = "FlashDNS zone used for service publication"; - }; - - name = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional record name inside the published zone"; - }; - - ttl = mkOption { - type = types.ints.positive; - default = 30; - description = "DNS TTL for the published record"; - }; - - mode = mkOption { - type = types.enum [ "load_balancer" "direct" ]; - default = "load_balancer"; - description = "Whether DNS publishes the load balancer VIP or a direct instance address"; - }; - }; - }; - - mkLoadBalancerPublicationType = types: types.submodule { - options = { - orgId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional organization used when provisioning FiberLB resources"; - }; - - projectId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional project used when provisioning FiberLB resources"; - }; - - name = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional explicit load balancer name"; - }; - - listenerPort = mkOption { - type = types.nullOr types.port; - default = null; - description = "Listener port exposed by the load balancer"; - }; - - protocol = mkOption { - type = types.nullOr types.str; - default = null; - description = "Listener protocol for the published load balancer"; - }; - - poolProtocol = mkOption { - type = types.nullOr types.str; - default = null; - description = "Backend pool protocol for the published load balancer"; - }; - }; - }; - - mkServicePublicationType = types: - let - dnsPublicationType = mkDnsPublicationType types; - loadBalancerPublicationType = mkLoadBalancerPublicationType types; - in types.submodule { - options = { - orgId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Default organization used for service publication"; - }; - - projectId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Default project used for service publication"; - }; - - dns = mkOption { - type = types.nullOr dnsPublicationType; - default = null; - description = "Optional FlashDNS publication target"; - }; - - loadBalancer = mkOption { - type = types.nullOr loadBalancerPublicationType; - default = null; - description = "Optional FiberLB publication target"; - }; - }; - }; - - mkServiceType = types: - let - servicePortsType = mkServicePortsType types; - serviceScheduleType = mkServiceScheduleType types; - servicePublicationType = mkServicePublicationType types; - in types.submodule { - options = { - ports = mkOption { - type = types.nullOr servicePortsType; - default = null; - description = "Optional logical service ports"; - }; - - protocol = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional service protocol"; - }; - - mtlsRequired = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether service-to-service traffic requires mTLS"; - }; - - meshMode = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional mesh publication mode"; - }; - - schedule = mkOption { - type = types.nullOr serviceScheduleType; - default = null; - description = "Scheduler-managed runtime intent"; - }; - - publish = mkOption { - type = types.nullOr servicePublicationType; - default = null; - description = "Optional publication targets for the service"; - }; - }; - }; - - mkMtlsPolicyType = types: types.submodule { - options = { - environment = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional environment scope for the policy"; - }; - - sourceService = mkOption { - type = types.str; - description = "Source service matched by the policy"; - }; - - targetService = mkOption { - type = types.str; - description = "Target service matched by the policy"; - }; - - mtlsRequired = mkOption { - type = types.nullOr types.bool; - default = null; - description = "Whether the policy enforces mTLS"; - }; - - mode = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional policy mode"; - }; - }; - }; - - mkNodeType = types: - let - installPlanType = mkInstallPlanType types; - desiredSystemType = mkDesiredSystemType types; - in types.submodule { - options = { - role = mkOption { - type = types.enum [ "control-plane" "worker" ]; - default = "worker"; - description = "Node role in the cluster"; - }; - - ip = mkOption { - type = types.str; - description = "IP address of the node"; - }; - - services = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Services to run on this node"; - }; - - raftPort = mkOption { - type = types.port; - default = 2380; - description = "Raft port for consensus protocols"; - }; - - apiPort = mkOption { - type = types.port; - default = 2379; - description = "API port for cluster services"; - }; - - metadata = mkOption { - type = types.attrsOf types.anything; - default = { }; - description = "Additional metadata for the node"; - }; - - machineId = mkOption { - type = types.nullOr types.str; - default = null; - description = "Stable machine-id used to pre-register the node with deployer"; - }; - - labels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "User-defined labels exported into deployer cluster state"; - }; - - pool = mkOption { - type = types.nullOr types.str; - default = null; - description = "Logical node pool exported into deployer cluster state"; - }; - - nodeClass = mkOption { - type = types.nullOr types.str; - default = null; - description = "Reusable node class assigned to this node in deployer state"; - }; - - failureDomain = mkOption { - type = types.nullOr types.str; - default = null; - description = "Failure domain / zone label exported into deployer cluster state"; - }; - - nixProfile = mkOption { - type = types.nullOr types.str; - default = null; - description = "Desired Nix profile associated with the node"; - }; - - installPlan = mkOption { - type = types.nullOr installPlanType; - default = null; - description = "Explicit NixOS installation targets for bare-metal bootstrap"; - }; - - desiredSystem = mkOption { - type = types.nullOr desiredSystemType; - default = null; - description = "Desired NixOS reconciliation state exported for nix-agent"; - }; - - state = mkOption { - type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]); - default = null; - description = "Desired deployer node lifecycle state"; - }; - - commissionState = mkOption { - type = types.nullOr (types.enum [ "discovered" "commissioning" "commissioned" ]); - default = null; - description = "Optional commissioning state exported into deployer cluster state"; - }; - - installState = mkOption { - type = types.nullOr (types.enum [ "pending" "installing" "installed" "failed" "reinstall_requested" ]); - default = null; - description = "Optional install lifecycle state exported into deployer cluster state"; - }; - - powerState = mkOption { - type = types.nullOr (types.enum [ "on" "off" "cycling" "unknown" ]); - default = null; - description = "Optional external power-management state associated with the node"; - }; - - bmcRef = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional BMC / Redfish reference associated with the node"; - }; - }; - }; - - mkNodeClassType = types: - let - installPlanType = mkInstallPlanType types; - in types.submodule { - options = { - description = mkOption { - type = types.nullOr types.str; - default = null; - description = "Human-readable description of the node class"; - }; - - nixProfile = mkOption { - type = types.nullOr types.str; - default = null; - description = "Desired Nix profile inherited by nodes in this class"; - }; - - installPlan = mkOption { - type = types.nullOr installPlanType; - default = null; - description = "Default install plan inherited by nodes in this class"; - }; - - roles = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Roles inherited by nodes in this class"; - }; - - labels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Labels inherited by nodes in this class"; - }; - }; - }; - - mkNodePoolType = types: types.submodule { - options = { - description = mkOption { - type = types.nullOr types.str; - default = null; - description = "Human-readable description of the node pool"; - }; - - nodeClass = mkOption { - type = types.nullOr types.str; - default = null; - description = "Default node class assigned to nodes in this pool"; - }; - - minSize = mkOption { - type = types.nullOr types.int; - default = null; - description = "Minimum desired pool size"; - }; - - maxSize = mkOption { - type = types.nullOr types.int; - default = null; - description = "Maximum desired pool size"; - }; - - labels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Labels applied to nodes in this pool"; - }; - }; - }; - - mkEnrollmentRuleType = types: - let - installPlanType = mkInstallPlanType types; - in types.submodule { - options = { - priority = mkOption { - type = types.int; - default = 0; - description = "Higher priority rules win when multiple rules match"; - }; - - matchLabels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Label selectors matched against phone-home metadata"; - }; - - matchHostnamePrefix = mkOption { - type = types.nullOr types.str; - default = null; - description = "Optional hostname prefix matched during enrollment"; - }; - - matchIpPrefixes = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Optional IP prefixes matched during enrollment"; - }; - - pool = mkOption { - type = types.nullOr types.str; - default = null; - description = "Pool assigned when the rule matches"; - }; - - nodeClass = mkOption { - type = types.nullOr types.str; - default = null; - description = "Node class assigned when the rule matches"; - }; - - role = mkOption { - type = types.nullOr types.str; - default = null; - description = "Primary role assigned when the rule matches"; - }; - - labels = mkOption { - type = types.attrsOf types.str; - default = { }; - description = "Labels attached when the rule matches"; - }; - - nixProfile = mkOption { - type = types.nullOr types.str; - default = null; - description = "Nix profile attached when the rule matches"; - }; - - installPlan = mkOption { - type = types.nullOr installPlanType; - default = null; - description = "Install plan attached when the rule matches"; - }; - - services = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "Services enabled for matching nodes"; - }; - - sshAuthorizedKeys = mkOption { - type = types.listOf types.str; - default = [ ]; - description = "SSH authorized keys installed for matching nodes"; - }; - - nodeIdPrefix = mkOption { - type = types.nullOr types.str; - default = null; - description = "Prefix used when synthesizing node IDs"; - }; - }; - }; - - mkInstallPlan = plan: - let - rendered = - optionalAttrs (plan != null && plan.nixosConfiguration != null) { - nixos_configuration = plan.nixosConfiguration; - } - // optionalAttrs (plan != null && plan.diskoConfigPath != null) { - disko_config_path = plan.diskoConfigPath; - } - // optionalAttrs (plan != null && plan.targetDisk != null) { - target_disk = plan.targetDisk; - } - // optionalAttrs (plan != null && plan.targetDiskById != null) { - target_disk_by_id = plan.targetDiskById; - }; - in - if plan == null || rendered == { } then null else rendered; - - mkDesiredSystem = nodeName: desiredSystem: - let - rendered = - optionalAttrs (desiredSystem != null && desiredSystem.deploymentId != null) { - deployment_id = desiredSystem.deploymentId; - } - // optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) { - nixos_configuration = desiredSystem.nixosConfiguration; - } - // optionalAttrs (desiredSystem != null && desiredSystem.targetSystem != null) { - target_system = desiredSystem.targetSystem; - } - // optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) { - flake_ref = desiredSystem.flakeRef; - } - // optionalAttrs (desiredSystem != null && desiredSystem.switchAction != null) { - switch_action = desiredSystem.switchAction; - } - // optionalAttrs (desiredSystem != null && desiredSystem.healthCheckCommand != [ ]) { - health_check_command = desiredSystem.healthCheckCommand; - } - // optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) { - rollback_on_failure = desiredSystem.rollbackOnFailure; - } - // optionalAttrs (desiredSystem != null && desiredSystem.drainBeforeApply != null) { - drain_before_apply = desiredSystem.drainBeforeApply; - }; - in - if desiredSystem == null || rendered == { } then null else { - node_id = nodeName; - } // rendered; - - mkHostDeploymentSelector = selector: - { - node_ids = selector.nodeIds or [ ]; - roles = selector.roles or [ ]; - pools = selector.pools or [ ]; - node_classes = selector.nodeClasses or [ ]; - match_labels = selector.matchLabels or { }; - }; - - mkDeployerHostDeploymentSpec = name: deployment: - { - inherit name; - selector = mkHostDeploymentSelector deployment.selector; - } - // optionalAttrs (deployment.nixosConfiguration != null) { - nixos_configuration = deployment.nixosConfiguration; - } - // optionalAttrs (deployment.targetSystem != null) { - target_system = deployment.targetSystem; - } - // optionalAttrs (deployment.flakeRef != null) { - flake_ref = deployment.flakeRef; - } - // optionalAttrs (deployment.batchSize != null) { - batch_size = deployment.batchSize; - } - // optionalAttrs (deployment.maxUnavailable != null) { - max_unavailable = deployment.maxUnavailable; - } - // optionalAttrs (deployment.healthCheckCommand != [ ]) { - health_check_command = deployment.healthCheckCommand; - } - // optionalAttrs (deployment.switchAction != null) { - switch_action = deployment.switchAction; - } - // optionalAttrs (deployment.rollbackOnFailure != null) { - rollback_on_failure = deployment.rollbackOnFailure; - } - // optionalAttrs (deployment.drainBeforeApply != null) { - drain_before_apply = deployment.drainBeforeApply; - } - // optionalAttrs (deployment.rebootPolicy != null) { - reboot_policy = deployment.rebootPolicy; - } - // optionalAttrs (deployment.paused != null) { - paused = deployment.paused; - }; - - mkDeployerNodeSpec = nodeName: node: - { - node_id = nodeName; - hostname = nodeName; - ip = node.ip; - roles = unique [ node.role ]; - labels = node.labels; - } - // optionalAttrs (node.machineId != null) { - machine_id = node.machineId; - } - // optionalAttrs (node.pool != null) { - pool = node.pool; - } - // optionalAttrs (node.nodeClass != null) { - node_class = node.nodeClass; - } - // optionalAttrs (node.failureDomain != null) { - failure_domain = node.failureDomain; - } - // optionalAttrs (node.nixProfile != null) { - nix_profile = node.nixProfile; - } - // optionalAttrs (mkInstallPlan node.installPlan != null) { - install_plan = mkInstallPlan node.installPlan; - } - // optionalAttrs (mkDesiredSystem nodeName node.desiredSystem != null) { - desired_system = mkDesiredSystem nodeName node.desiredSystem; - } - // optionalAttrs (node.state != null) { - state = node.state; - } - // optionalAttrs (node.commissionState != null) { - commission_state = node.commissionState; - } - // optionalAttrs (node.installState != null) { - install_state = node.installState; - } - // optionalAttrs (node.powerState != null) { - power_state = node.powerState; - } - // optionalAttrs (node.bmcRef != null) { - bmc_ref = node.bmcRef; - }; - - mkDeployerNodeClassSpec = name: nodeClass: - { - inherit name; - roles = nodeClass.roles; - labels = nodeClass.labels; - } - // optionalAttrs (nodeClass.description != null) { - description = nodeClass.description; - } - // optionalAttrs (nodeClass.nixProfile != null) { - nix_profile = nodeClass.nixProfile; - } - // optionalAttrs (mkInstallPlan nodeClass.installPlan != null) { - install_plan = mkInstallPlan nodeClass.installPlan; - }; - - mkDeployerPoolSpec = name: pool: - { - inherit name; - labels = pool.labels; - } - // optionalAttrs (pool.description != null) { - description = pool.description; - } - // optionalAttrs (pool.nodeClass != null) { - node_class = pool.nodeClass; - } - // optionalAttrs (pool.minSize != null) { - min_size = pool.minSize; - } - // optionalAttrs (pool.maxSize != null) { - max_size = pool.maxSize; - }; - - mkDeployerEnrollmentRuleSpec = name: rule: - { - inherit name; - priority = rule.priority; - match_labels = rule.matchLabels; - match_ip_prefixes = rule.matchIpPrefixes; - labels = rule.labels; - services = rule.services; - ssh_authorized_keys = rule.sshAuthorizedKeys; - } - // optionalAttrs (rule.matchHostnamePrefix != null) { - match_hostname_prefix = rule.matchHostnamePrefix; - } - // optionalAttrs (rule.pool != null) { - pool = rule.pool; - } - // optionalAttrs (rule.nodeClass != null) { - node_class = rule.nodeClass; - } - // optionalAttrs (rule.role != null) { - role = rule.role; - } - // optionalAttrs (rule.nixProfile != null) { - nix_profile = rule.nixProfile; - } - // optionalAttrs (mkInstallPlan rule.installPlan != null) { - install_plan = mkInstallPlan rule.installPlan; - } - // optionalAttrs (rule.nodeIdPrefix != null) { - node_id_prefix = rule.nodeIdPrefix; - }; - - mkServicePorts = ports: - optionalAttrs (ports != null && ports.http != null) { - http = ports.http; - } - // optionalAttrs (ports != null && ports.grpc != null) { - grpc = ports.grpc; - }; - - mkProcessSpec = process: - { - command = process.command; - args = process.args; - env = process.env; - } - // optionalAttrs (process.workingDir != null) { - working_dir = process.workingDir; - }; - - mkContainerPortSpec = port: - { - container_port = port.containerPort; - } - // optionalAttrs (port.hostPort != null) { - host_port = port.hostPort; - } - // optionalAttrs (port.protocol != null) { - protocol = port.protocol; - }; - - mkContainerVolumeSpec = volume: - { - source = volume.source; - target = volume.target; - } - // optionalAttrs volume.readOnly { - read_only = true; - }; - - mkContainerSpec = container: - { - image = container.image; - command = container.command; - args = container.args; - env = container.env; - ports = map mkContainerPortSpec container.ports; - volumes = map mkContainerVolumeSpec container.volumes; - } - // optionalAttrs (container.runtime != null) { - runtime = container.runtime; - } - // optionalAttrs (container.networkMode != null) { - network_mode = container.networkMode; - } - // optionalAttrs (container.pullPolicy != null) { - pull_policy = container.pullPolicy; - } - // optionalAttrs (container.workingDir != null) { - working_dir = container.workingDir; - }; - - mkHealthCheckSpec = healthCheck: - { - type = healthCheck.type; - } - // optionalAttrs (healthCheck.path != null) { - path = healthCheck.path; - } - // optionalAttrs (healthCheck.intervalSecs != null) { - interval_secs = healthCheck.intervalSecs; - } - // optionalAttrs (healthCheck.timeoutSecs != null) { - timeout_secs = healthCheck.timeoutSecs; - } - // optionalAttrs (healthCheck.startupGraceSecs != null) { - startup_grace_secs = healthCheck.startupGraceSecs; - }; - - mkPlacementPolicySpec = placement: - { - roles = placement.roles; - pools = placement.pools; - node_classes = placement.nodeClasses; - match_labels = placement.matchLabels; - max_instances_per_node = placement.maxInstancesPerNode; - } - // optionalAttrs (placement.spreadByLabel != null) { - spread_by_label = placement.spreadByLabel; - }; - - mkRolloutStrategySpec = rollout: { - max_unavailable = rollout.maxUnavailable; - max_surge = rollout.maxSurge; - }; - - mkServiceScheduleSpec = schedule: - { - replicas = schedule.replicas; - placement = mkPlacementPolicySpec schedule.placement; - rollout = mkRolloutStrategySpec schedule.rollout; - } - // optionalAttrs (schedule.instancePort != null) { - instance_port = schedule.instancePort; - } - // optionalAttrs (schedule.meshPort != null) { - mesh_port = schedule.meshPort; - } - // optionalAttrs (schedule.process != null) { - process = mkProcessSpec schedule.process; - } - // optionalAttrs (schedule.container != null) { - container = mkContainerSpec schedule.container; - } - // optionalAttrs (schedule.healthCheck != null) { - health_check = mkHealthCheckSpec schedule.healthCheck; - }; - - mkDnsPublicationSpec = dns: - { - zone = dns.zone; - ttl = dns.ttl; - mode = dns.mode; - } - // optionalAttrs (dns.name != null) { - name = dns.name; - }; - - mkLoadBalancerPublicationSpec = loadBalancer: - optionalAttrs (loadBalancer.orgId != null) { - org_id = loadBalancer.orgId; - } - // optionalAttrs (loadBalancer.projectId != null) { - project_id = loadBalancer.projectId; - } - // optionalAttrs (loadBalancer.name != null) { - name = loadBalancer.name; - } - // optionalAttrs (loadBalancer.listenerPort != null) { - listener_port = loadBalancer.listenerPort; - } - // optionalAttrs (loadBalancer.protocol != null) { - protocol = loadBalancer.protocol; - } - // optionalAttrs (loadBalancer.poolProtocol != null) { - pool_protocol = loadBalancer.poolProtocol; - }; - - mkServicePublicationSpec = publish: - optionalAttrs (publish.orgId != null) { - org_id = publish.orgId; - } - // optionalAttrs (publish.projectId != null) { - project_id = publish.projectId; - } - // optionalAttrs (publish.dns != null) { - dns = mkDnsPublicationSpec publish.dns; - } - // optionalAttrs (publish.loadBalancer != null) { - load_balancer = mkLoadBalancerPublicationSpec publish.loadBalancer; - }; - - mkDeployerServiceSpec = name: service: - { - inherit name; - } - // optionalAttrs (service.ports != null && mkServicePorts service.ports != { }) { - ports = mkServicePorts service.ports; - } - // optionalAttrs (service.protocol != null) { - protocol = service.protocol; - } - // optionalAttrs (service.mtlsRequired != null) { - mtls_required = service.mtlsRequired; - } - // optionalAttrs (service.meshMode != null) { - mesh_mode = service.meshMode; - } - // optionalAttrs (service.schedule != null) { - schedule = mkServiceScheduleSpec service.schedule; - } - // optionalAttrs (service.publish != null) { - publish = mkServicePublicationSpec service.publish; - }; - - mkDeployerMtlsPolicySpec = name: policy: - { - policy_id = name; - source_service = policy.sourceService; - target_service = policy.targetService; - } - // optionalAttrs (policy.environment != null) { - environment = policy.environment; - } - // optionalAttrs (policy.mtlsRequired != null) { - mtls_required = policy.mtlsRequired; - } - // optionalAttrs (policy.mode != null) { - mode = policy.mode; - }; - - mkClusterConfig = { - cluster, - hostname, - bootstrapNodeName ? null, - }: - let - node = cluster.nodes.${hostname} or (throw "Node ${hostname} not found in cluster configuration"); - - controlPlaneNodes = - filter (n: (cluster.nodes.${n}.role or "worker") == "control-plane") - (attrNames cluster.nodes); - - resolvedBootstrapNodeName = - if bootstrapNodeName != null then - bootstrapNodeName - else if cluster ? bootstrapNode && cluster.bootstrapNode != null then - cluster.bootstrapNode - else if cluster ? bootstrap && cluster.bootstrap ? initialPeers && cluster.bootstrap.initialPeers != [ ] then - head cluster.bootstrap.initialPeers - else - head controlPlaneNodes; - - bootstrapNode = cluster.nodes.${resolvedBootstrapNodeName} - or (throw "Bootstrap node ${resolvedBootstrapNodeName} not found in cluster configuration"); - - initialPeers = map (nodeName: { - id = nodeName; - addr = "${cluster.nodes.${nodeName}.ip}:${toString cluster.nodes.${nodeName}.raftPort}"; - }) controlPlaneNodes; - - flaredbPeers = map (nodeName: - "${cluster.nodes.${nodeName}.ip}:${toString (cluster.nodes.${nodeName}.apiPort + 100)}" - ) controlPlaneNodes; - - chainfireLeaderUrl = "http://${bootstrapNode.ip}:8081"; - flaredbLeaderUrl = "http://${bootstrapNode.ip}:8082"; - in { - node_id = hostname; - node_role = node.role; - bootstrap = hostname == resolvedBootstrapNodeName; - cluster_name = cluster.name; - leader_url = chainfireLeaderUrl; - chainfire_leader_url = chainfireLeaderUrl; - flaredb_leader_url = flaredbLeaderUrl; - raft_addr = "${node.ip}:${toString node.raftPort}"; - initial_peers = initialPeers; - flaredb_peers = flaredbPeers; - services = node.services; - metadata = node.metadata; - } // optionalAttrs (cluster ? bgp && cluster.bgp ? asn) { - bgp_asn = cluster.bgp.asn; - }; - - mkDeployerClusterState = cluster: - let - deployer = cluster.deployer or { }; - clusterId = - if deployer ? clusterId && deployer.clusterId != null then - deployer.clusterId - else - cluster.name; - nodeClasses = deployer.nodeClasses or { }; - pools = deployer.pools or { }; - enrollmentRules = deployer.enrollmentRules or { }; - hostDeployments = deployer.hostDeployments or { }; - services = deployer.services or { }; - mtlsPolicies = deployer.mtlsPolicies or { }; - in { - cluster = { - cluster_id = clusterId; - } // optionalAttrs (deployer ? environment && deployer.environment != null) { - environment = deployer.environment; - }; - nodes = map (nodeName: mkDeployerNodeSpec nodeName cluster.nodes.${nodeName}) (attrNames cluster.nodes); - node_classes = map (name: mkDeployerNodeClassSpec name nodeClasses.${name}) (attrNames nodeClasses); - pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools); - enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules); - host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments); - services = map (name: mkDeployerServiceSpec name services.${name}) (attrNames services); - instances = [ ]; - mtls_policies = map (name: mkDeployerMtlsPolicySpec name mtlsPolicies.${name}) (attrNames mtlsPolicies); - }; -in -{ - inherit - mkInstallPlanType - mkDesiredSystemType - mkHostDeploymentSelectorType - mkHostDeploymentType - mkServicePortsType - mkProcessType - mkContainerPortType - mkContainerVolumeType - mkContainerType - mkHealthCheckType - mkPlacementPolicyType - mkRolloutStrategyType - mkServiceScheduleType - mkDnsPublicationType - mkLoadBalancerPublicationType - mkServicePublicationType - mkServiceType - mkMtlsPolicyType - mkNodeType - mkNodeClassType - mkNodePoolType - mkEnrollmentRuleType - mkClusterConfig - mkDeployerClusterState; -} +import ../../nix-nos/lib/cluster-config-lib.nix { inherit lib; } diff --git a/nix/modules/first-boot-automation.nix b/nix/modules/first-boot-automation.nix index 8f1fb89..776b4b8 100644 --- a/nix/modules/first-boot-automation.nix +++ b/nix/modules/first-boot-automation.nix @@ -2,39 +2,40 @@ let cfg = config.services.first-boot-automation; - - # Read cluster config from nix-nos or file - # Priority: 1) nix-nos topology, 2) cluster-config.json file, 3) defaults - clusterConfigExists = builtins.pathExists cfg.configFile; - - # Check if nix-nos is available and enabled + configFilePath = toString cfg.configFile; + configEtcPath = + if lib.hasPrefix "/etc/" configFilePath + then lib.removePrefix "/etc/" configFilePath + else null; + hasPlasmacloudManagedClusterConfig = + (config ? plasmacloud) + && (config.plasmacloud ? cluster) + && (config.plasmacloud.cluster.generated.nodeClusterConfig or null) != null; + availableNixNOSClusters = builtins.attrNames (config.nix-nos.clusters or {}); + resolvedNixNOSClusterName = + if builtins.elem cfg.nixnosClusterName availableNixNOSClusters then + cfg.nixnosClusterName + else if + (config ? plasmacloud) + && (config.plasmacloud ? cluster) + && (config.plasmacloud.cluster.enable or false) + && builtins.elem config.plasmacloud.cluster.name availableNixNOSClusters + then + config.plasmacloud.cluster.name + else if builtins.length availableNixNOSClusters == 1 then + builtins.head availableNixNOSClusters + else + cfg.nixnosClusterName; useNixNOS = cfg.useNixNOS && (config.nix-nos.enable or false) && - (builtins.length (builtins.attrNames (config.nix-nos.clusters or {}))) > 0; - - clusterConfig = + (builtins.length availableNixNOSClusters) > 0; + nixNOSClusterConfig = if useNixNOS then - # Generate config from nix-nos topology config.nix-nos.generateClusterConfig { hostname = config.networking.hostName; - clusterName = cfg.nixnosClusterName; + clusterName = resolvedNixNOSClusterName; } - else if clusterConfigExists && cfg.enable then - # Read from cluster-config.json file (legacy) - builtins.fromJSON (builtins.readFile cfg.configFile) else - # Fallback defaults - { - node_id = "unknown"; - node_role = "control-plane"; - bootstrap = false; - cluster_name = "default-cluster"; - leader_url = "http://localhost:8081"; - chainfire_leader_url = "http://localhost:8081"; - flaredb_leader_url = "http://localhost:8082"; - raft_addr = "127.0.0.1:2380"; - initial_peers = []; - flaredb_peers = []; - }; + null; # Helper function to create cluster join service mkClusterJoinService = { @@ -45,17 +46,7 @@ let joinPath ? null, port, description ? "" - }: - let - leaderUrl = - clusterConfig.${leaderUrlKey} - or clusterConfig.leader_url - or defaultLeaderUrl; - nodeId = clusterConfig.node_id or "unknown"; - raftAddr = clusterConfig.raft_addr or "127.0.0.1:${toString (port + 1)}"; - isBootstrap = clusterConfig.bootstrap or false; - in - { + }: { description = "Cluster Join for ${description}"; after = [ "network-online.target" "${serviceName}.service" ]; wants = [ "network-online.target" ]; @@ -265,6 +256,34 @@ in }; config = lib.mkIf cfg.enable { + assertions = [ + { + assertion = (!cfg.useNixNOS) || (config.nix-nos.enable or false); + message = "services.first-boot-automation.useNixNOS requires nix-nos.enable = true"; + } + { + assertion = (!cfg.useNixNOS) || ((builtins.length availableNixNOSClusters) > 0); + message = "services.first-boot-automation.useNixNOS requires at least one nix-nos.clusters entry"; + } + { + assertion = (!cfg.useNixNOS) || (configEtcPath != null); + message = "services.first-boot-automation.useNixNOS requires services.first-boot-automation.configFile to live under /etc"; + } + { + assertion = (!cfg.useNixNOS) || builtins.elem resolvedNixNOSClusterName availableNixNOSClusters; + message = "services.first-boot-automation.useNixNOS could not resolve nix-nos cluster '${cfg.nixnosClusterName}' (available: ${lib.concatStringsSep ", " availableNixNOSClusters})"; + } + ]; + + environment.etc = lib.mkIf (useNixNOS && !hasPlasmacloudManagedClusterConfig) ( + lib.optionalAttrs (configEtcPath != null) { + "${configEtcPath}" = { + text = builtins.toJSON nixNOSClusterConfig; + mode = "0600"; + }; + } + ); + # Chainfire cluster join service systemd.services.chainfire-cluster-join = lib.mkIf cfg.enableChainfire ( mkClusterJoinService { diff --git a/nix/modules/nix-nos/topology.nix b/nix/modules/nix-nos/topology.nix index c498470..def826e 100644 --- a/nix/modules/nix-nos/topology.nix +++ b/nix/modules/nix-nos/topology.nix @@ -1,78 +1,3 @@ { config, lib, pkgs, ... }: -with lib; - -let - cfg = config.nix-nos; - clusterConfigLib = import ../cluster-config-lib.nix { inherit lib; }; - nodeType = clusterConfigLib.mkNodeType types; - - # Cluster definition type - clusterType = types.submodule { - options = { - name = mkOption { - type = types.str; - default = "plasmacloud-cluster"; - description = "Cluster name"; - }; - - nodes = mkOption { - type = types.attrsOf nodeType; - default = {}; - description = "Map of node names to their configurations"; - example = literalExpression '' - { - "node01" = { - role = "control-plane"; - ip = "10.0.1.10"; - services = [ "chainfire" "flaredb" ]; - }; - } - ''; - }; - - bootstrapNode = mkOption { - type = types.nullOr types.str; - default = null; - description = "Name of the bootstrap node (first control-plane node if null)"; - }; - }; - }; - -in { - options.nix-nos = { - enable = mkEnableOption "Nix-NOS declarative cluster management"; - - clusters = mkOption { - type = types.attrsOf clusterType; - default = {}; - description = "Map of cluster names to their configurations"; - }; - - # Helper function to generate cluster-config.json for a specific node - generateClusterConfig = mkOption { - type = types.functionTo types.attrs; - default = { hostname, clusterName ? "plasmacloud" }: - let - cluster = cfg.clusters.${clusterName} or (throw "Cluster ${clusterName} not found"); - in clusterConfigLib.mkClusterConfig { - inherit cluster hostname; - bootstrapNodeName = - if cluster.bootstrapNode != null - then cluster.bootstrapNode - else null; - }; - description = "Function to generate cluster-config.json for a specific hostname"; - }; - }; - - config = mkIf cfg.enable { - # Ensure at least one cluster is defined - assertions = [ - { - assertion = (builtins.length (attrNames cfg.clusters)) > 0; - message = "nix-nos.clusters must contain at least one cluster definition"; - } - ]; - }; -} +import ../../../nix-nos/modules/topology.nix { inherit config lib pkgs; } diff --git a/nix/modules/plasmacloud-cluster.nix b/nix/modules/plasmacloud-cluster.nix index 3aa54d8..168e870 100644 --- a/nix/modules/plasmacloud-cluster.nix +++ b/nix/modules/plasmacloud-cluster.nix @@ -4,7 +4,7 @@ with lib; let cfg = config.plasmacloud.cluster; - clusterConfigLib = import ./cluster-config-lib.nix { inherit lib; }; + clusterConfigLib = import ../../nix-nos/lib/cluster-config-lib.nix { inherit lib; }; nodeType = clusterConfigLib.mkNodeType types; nodeClassType = clusterConfigLib.mkNodeClassType types; nodePoolType = clusterConfigLib.mkNodePoolType types; @@ -28,6 +28,7 @@ let else null; + generatedNixNOSTopologyCluster = clusterConfigLib.mkNixNOSTopologyCluster cfg; generatedDeployerClusterState = clusterConfigLib.mkDeployerClusterState cfg; in { @@ -212,6 +213,11 @@ in { mode = "0600"; }; + nix-nos.enable = mkDefault true; + nix-nos.clusters = { + "${cfg.name}" = mkDefault generatedNixNOSTopologyCluster; + }; + plasmacloud.cluster.generated.nodeClusterConfig = generatedNodeClusterConfig; plasmacloud.cluster.generated.deployerClusterState = generatedDeployerClusterState; diff --git a/nix/test-cluster/common.nix b/nix/test-cluster/common.nix index 817ecfd..f431eb5 100644 --- a/nix/test-cluster/common.nix +++ b/nix/test-cluster/common.nix @@ -12,6 +12,7 @@ in { imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") + ../../nix-nos/modules/default.nix ../modules/plasmacloud-cluster.nix ]; diff --git a/nix/tests/first-boot-topology-vm-smoke.nix b/nix/tests/first-boot-topology-vm-smoke.nix new file mode 100644 index 0000000..c17ccd8 --- /dev/null +++ b/nix/tests/first-boot-topology-vm-smoke.nix @@ -0,0 +1,142 @@ +{ + pkgs, + photoncloudPackages, + photoncloudModule, + nixNosModule, +}: + +{ + name = "first-boot-topology-vm-smoke"; + + nodes = { + bridge01 = + { ... }: + { + imports = [ + nixNosModule + photoncloudModule + ]; + + networking.hostName = "bridge01"; + networking.firewall.enable = false; + + environment.systemPackages = with pkgs; [ + jq + ]; + + services.chainfire = { + enable = true; + package = photoncloudPackages.chainfire-server; + nodeId = "bridge01"; + apiAddr = "127.0.0.1:2379"; + raftAddr = "127.0.0.1:2380"; + initialPeers = [ "bridge01=127.0.0.1:2380" ]; + }; + systemd.services.chainfire.environment.RUST_LOG = "error"; + + services.first-boot-automation = { + enable = true; + useNixNOS = true; + enableFlareDB = false; + enableIAM = false; + }; + + plasmacloud.cluster = { + enable = true; + name = "bridge-cluster"; + nodes.bridge01 = { + role = "control-plane"; + ip = "127.0.0.1"; + services = [ "chainfire" ]; + raftPort = 2380; + apiPort = 2379; + }; + bootstrap.initialPeers = [ "bridge01" ]; + bgp.asn = 64512; + }; + + system.stateVersion = "24.11"; + }; + + stand01 = + { ... }: + { + imports = [ + nixNosModule + photoncloudModule + ]; + + networking.hostName = "stand01"; + networking.firewall.enable = false; + + environment.systemPackages = with pkgs; [ + jq + ]; + + nix-nos = { + enable = true; + clusters.standalone = { + name = "standalone-cluster"; + bootstrapNode = "stand01"; + nodes.stand01 = { + role = "control-plane"; + ip = "127.0.0.1"; + services = [ "chainfire" ]; + raftPort = 2380; + apiPort = 2379; + }; + }; + }; + + services.chainfire = { + enable = true; + package = photoncloudPackages.chainfire-server; + nodeId = "stand01"; + apiAddr = "127.0.0.1:2379"; + raftAddr = "127.0.0.1:2380"; + initialPeers = [ "stand01=127.0.0.1:2380" ]; + }; + systemd.services.chainfire.environment.RUST_LOG = "error"; + + services.first-boot-automation = { + enable = true; + useNixNOS = true; + nixnosClusterName = "standalone"; + enableFlareDB = false; + enableIAM = false; + }; + + system.stateVersion = "24.11"; + }; + }; + + testScript = '' + start_all() + serial_stdout_off() + + scenarios = [ + (bridge01, "bridge01", "bridge-cluster"), + (stand01, "stand01", "standalone-cluster"), + ] + + for machine, node_id, cluster_name in scenarios: + print(f"validating {node_id}") + machine.wait_for_unit("chainfire.service") + print(f"{node_id}: chainfire up") + machine.wait_until_succeeds("test -f /etc/nixos/secrets/cluster-config.json") + print(f"{node_id}: config file present") + machine.succeed( + "bash -lc 'systemctl restart chainfire-cluster-join.service " + "|| (systemctl status chainfire-cluster-join.service --no-pager; " + "journalctl -u chainfire-cluster-join.service --no-pager -n 200; exit 1)'" + ) + machine.wait_until_succeeds("test -f /var/lib/first-boot-automation/.chainfire-initialized") + print(f"{node_id}: bootstrap marker present") + machine.succeed("systemctl is-active chainfire-cluster-join.service") + + machine.succeed(f"jq -r '.node_id' /etc/nixos/secrets/cluster-config.json | grep -x '{node_id}'") + machine.succeed("jq -r '.bootstrap' /etc/nixos/secrets/cluster-config.json | grep -x true") + machine.succeed(f"jq -r '.cluster_name' /etc/nixos/secrets/cluster-config.json | grep -x '{cluster_name}'") + machine.succeed("jq -r '.chainfire_leader_url' /etc/nixos/secrets/cluster-config.json | grep -x 'http://127.0.0.1:8081'") + ''; +}