photoncloud-monorepo/nix/modules/cluster-config-lib.nix

550 lines
16 KiB
Nix

{ lib }:
with lib;
let
mkInstallPlanType = types: types.submodule {
options = {
nixosConfiguration = mkOption {
type = types.nullOr types.str;
default = null;
description = "Name of the nixosConfigurations output to install";
};
diskoConfigPath = mkOption {
type = types.nullOr types.str;
default = null;
description = "Repository-relative Disko file used for installation";
};
targetDisk = mkOption {
type = types.nullOr types.str;
default = null;
description = "Explicit disk device path selected for installation";
};
targetDiskById = mkOption {
type = types.nullOr types.str;
default = null;
description = "Stable /dev/disk/by-id path selected for installation";
};
};
};
mkDesiredSystemType = types: types.submodule {
options = {
nixosConfiguration = mkOption {
type = types.nullOr types.str;
default = null;
description = "Name of the nixosConfigurations output to activate";
};
flakeRef = mkOption {
type = types.nullOr types.str;
default = null;
description = "Explicit flake reference used by nix-agent";
};
switchAction = mkOption {
type = types.nullOr types.str;
default = null;
description = "switch-to-configuration action for nix-agent";
};
healthCheckCommand = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Command vector executed after activation to validate node health";
};
rollbackOnFailure = mkOption {
type = types.nullOr types.bool;
default = null;
description = "Whether nix-agent should roll back when the health check fails";
};
};
};
mkNodeType = types:
let
installPlanType = mkInstallPlanType types;
desiredSystemType = mkDesiredSystemType types;
in types.submodule {
options = {
role = mkOption {
type = types.enum [ "control-plane" "worker" ];
default = "worker";
description = "Node role in the cluster";
};
ip = mkOption {
type = types.str;
description = "IP address of the node";
};
services = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Services to run on this node";
};
raftPort = mkOption {
type = types.port;
default = 2380;
description = "Raft port for consensus protocols";
};
apiPort = mkOption {
type = types.port;
default = 2379;
description = "API port for cluster services";
};
metadata = mkOption {
type = types.attrsOf types.anything;
default = { };
description = "Additional metadata for the node";
};
machineId = mkOption {
type = types.nullOr types.str;
default = null;
description = "Stable machine-id used to pre-register the node with deployer";
};
labels = mkOption {
type = types.attrsOf types.str;
default = { };
description = "User-defined labels exported into deployer cluster state";
};
pool = mkOption {
type = types.nullOr types.str;
default = null;
description = "Logical node pool exported into deployer cluster state";
};
nodeClass = mkOption {
type = types.nullOr types.str;
default = null;
description = "Reusable node class assigned to this node in deployer state";
};
failureDomain = mkOption {
type = types.nullOr types.str;
default = null;
description = "Failure domain / zone label exported into deployer cluster state";
};
nixProfile = mkOption {
type = types.nullOr types.str;
default = null;
description = "Desired Nix profile associated with the node";
};
installPlan = mkOption {
type = types.nullOr installPlanType;
default = null;
description = "Explicit NixOS installation targets for bare-metal bootstrap";
};
desiredSystem = mkOption {
type = types.nullOr desiredSystemType;
default = null;
description = "Desired NixOS reconciliation state exported for nix-agent";
};
state = mkOption {
type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]);
default = null;
description = "Desired deployer node lifecycle state";
};
};
};
mkNodeClassType = types:
let
installPlanType = mkInstallPlanType types;
in types.submodule {
options = {
description = mkOption {
type = types.nullOr types.str;
default = null;
description = "Human-readable description of the node class";
};
nixProfile = mkOption {
type = types.nullOr types.str;
default = null;
description = "Desired Nix profile inherited by nodes in this class";
};
installPlan = mkOption {
type = types.nullOr installPlanType;
default = null;
description = "Default install plan inherited by nodes in this class";
};
roles = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Roles inherited by nodes in this class";
};
labels = mkOption {
type = types.attrsOf types.str;
default = { };
description = "Labels inherited by nodes in this class";
};
};
};
mkNodePoolType = types: types.submodule {
options = {
description = mkOption {
type = types.nullOr types.str;
default = null;
description = "Human-readable description of the node pool";
};
nodeClass = mkOption {
type = types.nullOr types.str;
default = null;
description = "Default node class assigned to nodes in this pool";
};
minSize = mkOption {
type = types.nullOr types.int;
default = null;
description = "Minimum desired pool size";
};
maxSize = mkOption {
type = types.nullOr types.int;
default = null;
description = "Maximum desired pool size";
};
labels = mkOption {
type = types.attrsOf types.str;
default = { };
description = "Labels applied to nodes in this pool";
};
};
};
mkEnrollmentRuleType = types:
let
installPlanType = mkInstallPlanType types;
in types.submodule {
options = {
priority = mkOption {
type = types.int;
default = 0;
description = "Higher priority rules win when multiple rules match";
};
matchLabels = mkOption {
type = types.attrsOf types.str;
default = { };
description = "Label selectors matched against phone-home metadata";
};
matchHostnamePrefix = mkOption {
type = types.nullOr types.str;
default = null;
description = "Optional hostname prefix matched during enrollment";
};
matchIpPrefixes = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Optional IP prefixes matched during enrollment";
};
pool = mkOption {
type = types.nullOr types.str;
default = null;
description = "Pool assigned when the rule matches";
};
nodeClass = mkOption {
type = types.nullOr types.str;
default = null;
description = "Node class assigned when the rule matches";
};
role = mkOption {
type = types.nullOr types.str;
default = null;
description = "Primary role assigned when the rule matches";
};
labels = mkOption {
type = types.attrsOf types.str;
default = { };
description = "Labels attached when the rule matches";
};
nixProfile = mkOption {
type = types.nullOr types.str;
default = null;
description = "Nix profile attached when the rule matches";
};
installPlan = mkOption {
type = types.nullOr installPlanType;
default = null;
description = "Install plan attached when the rule matches";
};
services = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Services enabled for matching nodes";
};
sshAuthorizedKeys = mkOption {
type = types.listOf types.str;
default = [ ];
description = "SSH authorized keys installed for matching nodes";
};
nodeIdPrefix = mkOption {
type = types.nullOr types.str;
default = null;
description = "Prefix used when synthesizing node IDs";
};
};
};
mkInstallPlan = plan:
let
rendered =
optionalAttrs (plan != null && plan.nixosConfiguration != null) {
nixos_configuration = plan.nixosConfiguration;
}
// optionalAttrs (plan != null && plan.diskoConfigPath != null) {
disko_config_path = plan.diskoConfigPath;
}
// optionalAttrs (plan != null && plan.targetDisk != null) {
target_disk = plan.targetDisk;
}
// optionalAttrs (plan != null && plan.targetDiskById != null) {
target_disk_by_id = plan.targetDiskById;
};
in
if plan == null || rendered == { } then null else rendered;
mkDesiredSystem = nodeName: desiredSystem:
let
rendered =
optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
nixos_configuration = desiredSystem.nixosConfiguration;
}
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
flake_ref = desiredSystem.flakeRef;
}
// optionalAttrs (desiredSystem != null && desiredSystem.switchAction != null) {
switch_action = desiredSystem.switchAction;
}
// optionalAttrs (desiredSystem != null && desiredSystem.healthCheckCommand != [ ]) {
health_check_command = desiredSystem.healthCheckCommand;
}
// optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) {
rollback_on_failure = desiredSystem.rollbackOnFailure;
};
in
if desiredSystem == null || rendered == { } then null else {
node_id = nodeName;
} // rendered;
mkDeployerNodeSpec = nodeName: node:
{
node_id = nodeName;
hostname = nodeName;
ip = node.ip;
roles = unique [ node.role ];
labels = node.labels;
}
// optionalAttrs (node.machineId != null) {
machine_id = node.machineId;
}
// optionalAttrs (node.pool != null) {
pool = node.pool;
}
// optionalAttrs (node.nodeClass != null) {
node_class = node.nodeClass;
}
// optionalAttrs (node.failureDomain != null) {
failure_domain = node.failureDomain;
}
// optionalAttrs (node.nixProfile != null) {
nix_profile = node.nixProfile;
}
// optionalAttrs (mkInstallPlan node.installPlan != null) {
install_plan = mkInstallPlan node.installPlan;
}
// optionalAttrs (mkDesiredSystem nodeName node.desiredSystem != null) {
desired_system = mkDesiredSystem nodeName node.desiredSystem;
}
// optionalAttrs (node.state != null) {
state = node.state;
};
mkDeployerNodeClassSpec = name: nodeClass:
{
inherit name;
roles = nodeClass.roles;
labels = nodeClass.labels;
}
// optionalAttrs (nodeClass.description != null) {
description = nodeClass.description;
}
// optionalAttrs (nodeClass.nixProfile != null) {
nix_profile = nodeClass.nixProfile;
}
// optionalAttrs (mkInstallPlan nodeClass.installPlan != null) {
install_plan = mkInstallPlan nodeClass.installPlan;
};
mkDeployerPoolSpec = name: pool:
{
inherit name;
labels = pool.labels;
}
// optionalAttrs (pool.description != null) {
description = pool.description;
}
// optionalAttrs (pool.nodeClass != null) {
node_class = pool.nodeClass;
}
// optionalAttrs (pool.minSize != null) {
min_size = pool.minSize;
}
// optionalAttrs (pool.maxSize != null) {
max_size = pool.maxSize;
};
mkDeployerEnrollmentRuleSpec = name: rule:
{
inherit name;
priority = rule.priority;
match_labels = rule.matchLabels;
match_ip_prefixes = rule.matchIpPrefixes;
labels = rule.labels;
services = rule.services;
ssh_authorized_keys = rule.sshAuthorizedKeys;
}
// optionalAttrs (rule.matchHostnamePrefix != null) {
match_hostname_prefix = rule.matchHostnamePrefix;
}
// optionalAttrs (rule.pool != null) {
pool = rule.pool;
}
// optionalAttrs (rule.nodeClass != null) {
node_class = rule.nodeClass;
}
// optionalAttrs (rule.role != null) {
role = rule.role;
}
// optionalAttrs (rule.nixProfile != null) {
nix_profile = rule.nixProfile;
}
// optionalAttrs (mkInstallPlan rule.installPlan != null) {
install_plan = mkInstallPlan rule.installPlan;
}
// optionalAttrs (rule.nodeIdPrefix != null) {
node_id_prefix = rule.nodeIdPrefix;
};
mkClusterConfig = {
cluster,
hostname,
bootstrapNodeName ? null,
}:
let
node = cluster.nodes.${hostname} or (throw "Node ${hostname} not found in cluster configuration");
controlPlaneNodes =
filter (n: (cluster.nodes.${n}.role or "worker") == "control-plane")
(attrNames cluster.nodes);
resolvedBootstrapNodeName =
if bootstrapNodeName != null then
bootstrapNodeName
else if cluster ? bootstrapNode && cluster.bootstrapNode != null then
cluster.bootstrapNode
else if cluster ? bootstrap && cluster.bootstrap ? initialPeers && cluster.bootstrap.initialPeers != [ ] then
head cluster.bootstrap.initialPeers
else
head controlPlaneNodes;
bootstrapNode = cluster.nodes.${resolvedBootstrapNodeName}
or (throw "Bootstrap node ${resolvedBootstrapNodeName} not found in cluster configuration");
initialPeers = map (nodeName: {
id = nodeName;
addr = "${cluster.nodes.${nodeName}.ip}:${toString cluster.nodes.${nodeName}.raftPort}";
}) controlPlaneNodes;
flaredbPeers = map (nodeName:
"${cluster.nodes.${nodeName}.ip}:${toString (cluster.nodes.${nodeName}.apiPort + 100)}"
) controlPlaneNodes;
chainfireLeaderUrl = "http://${bootstrapNode.ip}:8081";
flaredbLeaderUrl = "http://${bootstrapNode.ip}:8082";
in {
node_id = hostname;
node_role = node.role;
bootstrap = hostname == resolvedBootstrapNodeName;
cluster_name = cluster.name;
leader_url = chainfireLeaderUrl;
chainfire_leader_url = chainfireLeaderUrl;
flaredb_leader_url = flaredbLeaderUrl;
raft_addr = "${node.ip}:${toString node.raftPort}";
initial_peers = initialPeers;
flaredb_peers = flaredbPeers;
services = node.services;
metadata = node.metadata;
} // optionalAttrs (cluster ? bgp && cluster.bgp ? asn) {
bgp_asn = cluster.bgp.asn;
};
mkDeployerClusterState = cluster:
let
deployer = cluster.deployer or { };
clusterId =
if deployer ? clusterId && deployer.clusterId != null then
deployer.clusterId
else
cluster.name;
nodeClasses = deployer.nodeClasses or { };
pools = deployer.pools or { };
enrollmentRules = deployer.enrollmentRules or { };
in {
cluster = {
cluster_id = clusterId;
} // optionalAttrs (deployer ? environment && deployer.environment != null) {
environment = deployer.environment;
};
nodes = map (nodeName: mkDeployerNodeSpec nodeName cluster.nodes.${nodeName}) (attrNames cluster.nodes);
node_classes = map (name: mkDeployerNodeClassSpec name nodeClasses.${name}) (attrNames nodeClasses);
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
services = [ ];
instances = [ ];
mtls_policies = [ ];
};
in
{
inherit
mkInstallPlanType
mkDesiredSystemType
mkNodeType
mkNodeClassType
mkNodePoolType
mkEnrollmentRuleType
mkClusterConfig
mkDeployerClusterState;
}