Integrate topology-driven bootstrap into nix-nos
This commit is contained in:
parent
795b8ad70c
commit
96d46a3603
10 changed files with 1770 additions and 1581 deletions
|
|
@ -886,6 +886,15 @@
|
|||
};
|
||||
|
||||
checks = {
|
||||
first-boot-topology-vm-smoke = pkgs.testers.runNixOSTest (
|
||||
import ./nix/tests/first-boot-topology-vm-smoke.nix {
|
||||
inherit pkgs;
|
||||
photoncloudPackages = self.packages.${system};
|
||||
photoncloudModule = self.nixosModules.default;
|
||||
nixNosModule = nix-nos.nixosModules.default;
|
||||
}
|
||||
);
|
||||
|
||||
deployer-vm-smoke = pkgs.testers.runNixOSTest (
|
||||
import ./nix/tests/deployer-vm-smoke.nix {
|
||||
inherit pkgs;
|
||||
|
|
|
|||
1483
nix-nos/lib/cluster-config-lib.nix
Normal file
1483
nix-nos/lib/cluster-config-lib.nix
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
{
|
||||
imports = [
|
||||
./topology.nix
|
||||
./network/interfaces.nix
|
||||
./network/vlans.nix
|
||||
./bgp/default.nix
|
||||
|
|
|
|||
68
nix-nos/modules/topology.nix
Normal file
68
nix-nos/modules/topology.nix
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.nix-nos;
|
||||
clusterConfigLib = import ../lib/cluster-config-lib.nix { inherit lib; };
|
||||
nodeType = clusterConfigLib.mkNodeType types;
|
||||
|
||||
# Cluster definition type
|
||||
clusterType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
default = "plasmacloud-cluster";
|
||||
description = "Cluster name";
|
||||
};
|
||||
|
||||
nodes = mkOption {
|
||||
type = types.attrsOf nodeType;
|
||||
default = {};
|
||||
description = "Map of node names to their configurations";
|
||||
example = literalExpression ''
|
||||
{
|
||||
"node01" = {
|
||||
role = "control-plane";
|
||||
ip = "10.0.1.10";
|
||||
services = [ "chainfire" "flaredb" ];
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
bootstrapNode = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Name of the bootstrap node (first control-plane node if null)";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
options.nix-nos = {
|
||||
clusters = mkOption {
|
||||
type = types.attrsOf clusterType;
|
||||
default = {};
|
||||
description = "Map of cluster names to their configurations";
|
||||
};
|
||||
|
||||
# Helper function to generate cluster-config.json for a specific node
|
||||
generateClusterConfig = mkOption {
|
||||
type = types.functionTo types.attrs;
|
||||
default = { hostname, clusterName ? "plasmacloud" }:
|
||||
let
|
||||
cluster = cfg.clusters.${clusterName} or (throw "Cluster ${clusterName} not found");
|
||||
in clusterConfigLib.mkClusterConfig {
|
||||
inherit cluster hostname;
|
||||
bootstrapNodeName =
|
||||
if cluster.bootstrapNode != null
|
||||
then cluster.bootstrapNode
|
||||
else null;
|
||||
};
|
||||
description = "Function to generate cluster-config.json for a specific hostname";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable { };
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -2,39 +2,40 @@
|
|||
|
||||
let
|
||||
cfg = config.services.first-boot-automation;
|
||||
|
||||
# Read cluster config from nix-nos or file
|
||||
# Priority: 1) nix-nos topology, 2) cluster-config.json file, 3) defaults
|
||||
clusterConfigExists = builtins.pathExists cfg.configFile;
|
||||
|
||||
# Check if nix-nos is available and enabled
|
||||
configFilePath = toString cfg.configFile;
|
||||
configEtcPath =
|
||||
if lib.hasPrefix "/etc/" configFilePath
|
||||
then lib.removePrefix "/etc/" configFilePath
|
||||
else null;
|
||||
hasPlasmacloudManagedClusterConfig =
|
||||
(config ? plasmacloud)
|
||||
&& (config.plasmacloud ? cluster)
|
||||
&& (config.plasmacloud.cluster.generated.nodeClusterConfig or null) != null;
|
||||
availableNixNOSClusters = builtins.attrNames (config.nix-nos.clusters or {});
|
||||
resolvedNixNOSClusterName =
|
||||
if builtins.elem cfg.nixnosClusterName availableNixNOSClusters then
|
||||
cfg.nixnosClusterName
|
||||
else if
|
||||
(config ? plasmacloud)
|
||||
&& (config.plasmacloud ? cluster)
|
||||
&& (config.plasmacloud.cluster.enable or false)
|
||||
&& builtins.elem config.plasmacloud.cluster.name availableNixNOSClusters
|
||||
then
|
||||
config.plasmacloud.cluster.name
|
||||
else if builtins.length availableNixNOSClusters == 1 then
|
||||
builtins.head availableNixNOSClusters
|
||||
else
|
||||
cfg.nixnosClusterName;
|
||||
useNixNOS = cfg.useNixNOS && (config.nix-nos.enable or false) &&
|
||||
(builtins.length (builtins.attrNames (config.nix-nos.clusters or {}))) > 0;
|
||||
|
||||
clusterConfig =
|
||||
(builtins.length availableNixNOSClusters) > 0;
|
||||
nixNOSClusterConfig =
|
||||
if useNixNOS then
|
||||
# Generate config from nix-nos topology
|
||||
config.nix-nos.generateClusterConfig {
|
||||
hostname = config.networking.hostName;
|
||||
clusterName = cfg.nixnosClusterName;
|
||||
clusterName = resolvedNixNOSClusterName;
|
||||
}
|
||||
else if clusterConfigExists && cfg.enable then
|
||||
# Read from cluster-config.json file (legacy)
|
||||
builtins.fromJSON (builtins.readFile cfg.configFile)
|
||||
else
|
||||
# Fallback defaults
|
||||
{
|
||||
node_id = "unknown";
|
||||
node_role = "control-plane";
|
||||
bootstrap = false;
|
||||
cluster_name = "default-cluster";
|
||||
leader_url = "http://localhost:8081";
|
||||
chainfire_leader_url = "http://localhost:8081";
|
||||
flaredb_leader_url = "http://localhost:8082";
|
||||
raft_addr = "127.0.0.1:2380";
|
||||
initial_peers = [];
|
||||
flaredb_peers = [];
|
||||
};
|
||||
null;
|
||||
|
||||
# Helper function to create cluster join service
|
||||
mkClusterJoinService = {
|
||||
|
|
@ -45,17 +46,7 @@ let
|
|||
joinPath ? null,
|
||||
port,
|
||||
description ? ""
|
||||
}:
|
||||
let
|
||||
leaderUrl =
|
||||
clusterConfig.${leaderUrlKey}
|
||||
or clusterConfig.leader_url
|
||||
or defaultLeaderUrl;
|
||||
nodeId = clusterConfig.node_id or "unknown";
|
||||
raftAddr = clusterConfig.raft_addr or "127.0.0.1:${toString (port + 1)}";
|
||||
isBootstrap = clusterConfig.bootstrap or false;
|
||||
in
|
||||
{
|
||||
}: {
|
||||
description = "Cluster Join for ${description}";
|
||||
after = [ "network-online.target" "${serviceName}.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
|
|
@ -265,6 +256,34 @@ in
|
|||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
assertions = [
|
||||
{
|
||||
assertion = (!cfg.useNixNOS) || (config.nix-nos.enable or false);
|
||||
message = "services.first-boot-automation.useNixNOS requires nix-nos.enable = true";
|
||||
}
|
||||
{
|
||||
assertion = (!cfg.useNixNOS) || ((builtins.length availableNixNOSClusters) > 0);
|
||||
message = "services.first-boot-automation.useNixNOS requires at least one nix-nos.clusters entry";
|
||||
}
|
||||
{
|
||||
assertion = (!cfg.useNixNOS) || (configEtcPath != null);
|
||||
message = "services.first-boot-automation.useNixNOS requires services.first-boot-automation.configFile to live under /etc";
|
||||
}
|
||||
{
|
||||
assertion = (!cfg.useNixNOS) || builtins.elem resolvedNixNOSClusterName availableNixNOSClusters;
|
||||
message = "services.first-boot-automation.useNixNOS could not resolve nix-nos cluster '${cfg.nixnosClusterName}' (available: ${lib.concatStringsSep ", " availableNixNOSClusters})";
|
||||
}
|
||||
];
|
||||
|
||||
environment.etc = lib.mkIf (useNixNOS && !hasPlasmacloudManagedClusterConfig) (
|
||||
lib.optionalAttrs (configEtcPath != null) {
|
||||
"${configEtcPath}" = {
|
||||
text = builtins.toJSON nixNOSClusterConfig;
|
||||
mode = "0600";
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
# Chainfire cluster join service
|
||||
systemd.services.chainfire-cluster-join = lib.mkIf cfg.enableChainfire (
|
||||
mkClusterJoinService {
|
||||
|
|
|
|||
|
|
@ -1,78 +1,3 @@
|
|||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.nix-nos;
|
||||
clusterConfigLib = import ../cluster-config-lib.nix { inherit lib; };
|
||||
nodeType = clusterConfigLib.mkNodeType types;
|
||||
|
||||
# Cluster definition type
|
||||
clusterType = types.submodule {
|
||||
options = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
default = "plasmacloud-cluster";
|
||||
description = "Cluster name";
|
||||
};
|
||||
|
||||
nodes = mkOption {
|
||||
type = types.attrsOf nodeType;
|
||||
default = {};
|
||||
description = "Map of node names to their configurations";
|
||||
example = literalExpression ''
|
||||
{
|
||||
"node01" = {
|
||||
role = "control-plane";
|
||||
ip = "10.0.1.10";
|
||||
services = [ "chainfire" "flaredb" ];
|
||||
};
|
||||
}
|
||||
'';
|
||||
};
|
||||
|
||||
bootstrapNode = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Name of the bootstrap node (first control-plane node if null)";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
options.nix-nos = {
|
||||
enable = mkEnableOption "Nix-NOS declarative cluster management";
|
||||
|
||||
clusters = mkOption {
|
||||
type = types.attrsOf clusterType;
|
||||
default = {};
|
||||
description = "Map of cluster names to their configurations";
|
||||
};
|
||||
|
||||
# Helper function to generate cluster-config.json for a specific node
|
||||
generateClusterConfig = mkOption {
|
||||
type = types.functionTo types.attrs;
|
||||
default = { hostname, clusterName ? "plasmacloud" }:
|
||||
let
|
||||
cluster = cfg.clusters.${clusterName} or (throw "Cluster ${clusterName} not found");
|
||||
in clusterConfigLib.mkClusterConfig {
|
||||
inherit cluster hostname;
|
||||
bootstrapNodeName =
|
||||
if cluster.bootstrapNode != null
|
||||
then cluster.bootstrapNode
|
||||
else null;
|
||||
};
|
||||
description = "Function to generate cluster-config.json for a specific hostname";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable {
|
||||
# Ensure at least one cluster is defined
|
||||
assertions = [
|
||||
{
|
||||
assertion = (builtins.length (attrNames cfg.clusters)) > 0;
|
||||
message = "nix-nos.clusters must contain at least one cluster definition";
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
import ../../../nix-nos/modules/topology.nix { inherit config lib pkgs; }
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ with lib;
|
|||
|
||||
let
|
||||
cfg = config.plasmacloud.cluster;
|
||||
clusterConfigLib = import ./cluster-config-lib.nix { inherit lib; };
|
||||
clusterConfigLib = import ../../nix-nos/lib/cluster-config-lib.nix { inherit lib; };
|
||||
nodeType = clusterConfigLib.mkNodeType types;
|
||||
nodeClassType = clusterConfigLib.mkNodeClassType types;
|
||||
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
||||
|
|
@ -28,6 +28,7 @@ let
|
|||
else
|
||||
null;
|
||||
|
||||
generatedNixNOSTopologyCluster = clusterConfigLib.mkNixNOSTopologyCluster cfg;
|
||||
generatedDeployerClusterState = clusterConfigLib.mkDeployerClusterState cfg;
|
||||
|
||||
in {
|
||||
|
|
@ -212,6 +213,11 @@ in {
|
|||
mode = "0600";
|
||||
};
|
||||
|
||||
nix-nos.enable = mkDefault true;
|
||||
nix-nos.clusters = {
|
||||
"${cfg.name}" = mkDefault generatedNixNOSTopologyCluster;
|
||||
};
|
||||
|
||||
plasmacloud.cluster.generated.nodeClusterConfig = generatedNodeClusterConfig;
|
||||
plasmacloud.cluster.generated.deployerClusterState = generatedDeployerClusterState;
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ in
|
|||
{
|
||||
imports = [
|
||||
(modulesPath + "/virtualisation/qemu-vm.nix")
|
||||
../../nix-nos/modules/default.nix
|
||||
../modules/plasmacloud-cluster.nix
|
||||
];
|
||||
|
||||
|
|
|
|||
142
nix/tests/first-boot-topology-vm-smoke.nix
Normal file
142
nix/tests/first-boot-topology-vm-smoke.nix
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
{
|
||||
pkgs,
|
||||
photoncloudPackages,
|
||||
photoncloudModule,
|
||||
nixNosModule,
|
||||
}:
|
||||
|
||||
{
|
||||
name = "first-boot-topology-vm-smoke";
|
||||
|
||||
nodes = {
|
||||
bridge01 =
|
||||
{ ... }:
|
||||
{
|
||||
imports = [
|
||||
nixNosModule
|
||||
photoncloudModule
|
||||
];
|
||||
|
||||
networking.hostName = "bridge01";
|
||||
networking.firewall.enable = false;
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
jq
|
||||
];
|
||||
|
||||
services.chainfire = {
|
||||
enable = true;
|
||||
package = photoncloudPackages.chainfire-server;
|
||||
nodeId = "bridge01";
|
||||
apiAddr = "127.0.0.1:2379";
|
||||
raftAddr = "127.0.0.1:2380";
|
||||
initialPeers = [ "bridge01=127.0.0.1:2380" ];
|
||||
};
|
||||
systemd.services.chainfire.environment.RUST_LOG = "error";
|
||||
|
||||
services.first-boot-automation = {
|
||||
enable = true;
|
||||
useNixNOS = true;
|
||||
enableFlareDB = false;
|
||||
enableIAM = false;
|
||||
};
|
||||
|
||||
plasmacloud.cluster = {
|
||||
enable = true;
|
||||
name = "bridge-cluster";
|
||||
nodes.bridge01 = {
|
||||
role = "control-plane";
|
||||
ip = "127.0.0.1";
|
||||
services = [ "chainfire" ];
|
||||
raftPort = 2380;
|
||||
apiPort = 2379;
|
||||
};
|
||||
bootstrap.initialPeers = [ "bridge01" ];
|
||||
bgp.asn = 64512;
|
||||
};
|
||||
|
||||
system.stateVersion = "24.11";
|
||||
};
|
||||
|
||||
stand01 =
|
||||
{ ... }:
|
||||
{
|
||||
imports = [
|
||||
nixNosModule
|
||||
photoncloudModule
|
||||
];
|
||||
|
||||
networking.hostName = "stand01";
|
||||
networking.firewall.enable = false;
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
jq
|
||||
];
|
||||
|
||||
nix-nos = {
|
||||
enable = true;
|
||||
clusters.standalone = {
|
||||
name = "standalone-cluster";
|
||||
bootstrapNode = "stand01";
|
||||
nodes.stand01 = {
|
||||
role = "control-plane";
|
||||
ip = "127.0.0.1";
|
||||
services = [ "chainfire" ];
|
||||
raftPort = 2380;
|
||||
apiPort = 2379;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.chainfire = {
|
||||
enable = true;
|
||||
package = photoncloudPackages.chainfire-server;
|
||||
nodeId = "stand01";
|
||||
apiAddr = "127.0.0.1:2379";
|
||||
raftAddr = "127.0.0.1:2380";
|
||||
initialPeers = [ "stand01=127.0.0.1:2380" ];
|
||||
};
|
||||
systemd.services.chainfire.environment.RUST_LOG = "error";
|
||||
|
||||
services.first-boot-automation = {
|
||||
enable = true;
|
||||
useNixNOS = true;
|
||||
nixnosClusterName = "standalone";
|
||||
enableFlareDB = false;
|
||||
enableIAM = false;
|
||||
};
|
||||
|
||||
system.stateVersion = "24.11";
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
start_all()
|
||||
serial_stdout_off()
|
||||
|
||||
scenarios = [
|
||||
(bridge01, "bridge01", "bridge-cluster"),
|
||||
(stand01, "stand01", "standalone-cluster"),
|
||||
]
|
||||
|
||||
for machine, node_id, cluster_name in scenarios:
|
||||
print(f"validating {node_id}")
|
||||
machine.wait_for_unit("chainfire.service")
|
||||
print(f"{node_id}: chainfire up")
|
||||
machine.wait_until_succeeds("test -f /etc/nixos/secrets/cluster-config.json")
|
||||
print(f"{node_id}: config file present")
|
||||
machine.succeed(
|
||||
"bash -lc 'systemctl restart chainfire-cluster-join.service "
|
||||
"|| (systemctl status chainfire-cluster-join.service --no-pager; "
|
||||
"journalctl -u chainfire-cluster-join.service --no-pager -n 200; exit 1)'"
|
||||
)
|
||||
machine.wait_until_succeeds("test -f /var/lib/first-boot-automation/.chainfire-initialized")
|
||||
print(f"{node_id}: bootstrap marker present")
|
||||
machine.succeed("systemctl is-active chainfire-cluster-join.service")
|
||||
|
||||
machine.succeed(f"jq -r '.node_id' /etc/nixos/secrets/cluster-config.json | grep -x '{node_id}'")
|
||||
machine.succeed("jq -r '.bootstrap' /etc/nixos/secrets/cluster-config.json | grep -x true")
|
||||
machine.succeed(f"jq -r '.cluster_name' /etc/nixos/secrets/cluster-config.json | grep -x '{cluster_name}'")
|
||||
machine.succeed("jq -r '.chainfire_leader_url' /etc/nixos/secrets/cluster-config.json | grep -x 'http://127.0.0.1:8081'")
|
||||
'';
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue