Add desired-system state and health-gated nix-agent rollback

This commit is contained in:
centra 2026-03-20 17:09:59 +09:00
parent de60f087d6
commit edd2442267
Signed by: centra
GPG key ID: 0C09689D20B25ACA
5 changed files with 373 additions and 33 deletions

View file

@ -4,7 +4,7 @@ use std::path::Path;
use anyhow::{Context, Result};
use chainfire_client::{Client, ClientError};
use deployer_types::{ClusterStateSpec, InstallPlan, NodeConfig, NodeSpec};
use deployer_types::{ClusterStateSpec, DesiredSystemSpec, InstallPlan, NodeConfig, NodeSpec};
use serde::de::DeserializeOwned;
use serde_json::{json, Value};
use tokio::fs;
@ -40,6 +40,15 @@ fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8>
.into_bytes()
}
fn key_desired_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
format!(
"{}nodes/{}/desired-system",
cluster_prefix(cluster_namespace, cluster_id),
node_id
)
.into_bytes()
}
fn key_node_class(cluster_namespace: &str, cluster_id: &str, node_class: &str) -> Vec<u8> {
format!(
"{}node-classes/{}",
@ -154,6 +163,21 @@ fn node_config_from_spec(node: &NodeSpec) -> NodeConfig {
}
}
fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> {
Some(DesiredSystemSpec {
node_id: node.node_id.clone(),
nixos_configuration: node
.install_plan
.as_ref()
.and_then(|plan| plan.nixos_configuration.clone()),
flake_ref: None,
switch_action: Some("switch".to_string()),
health_check_command: Vec::new(),
rollback_on_failure: Some(true),
})
.filter(|desired| desired.nixos_configuration.is_some())
}
fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
let node_classes = spec
.node_classes
@ -441,6 +465,16 @@ pub async fn bootstrap_cluster(
client.put(&key, &value).await?;
info!(node_id = %node.node_id, "upserted node");
if let Some(desired_system) = desired_system_from_spec(node) {
client
.put(
&key_desired_system(cluster_namespace, cluster_id, &node.node_id),
&serde_json::to_vec(&desired_system)?,
)
.await?;
info!(node_id = %node.node_id, "upserted desired system");
}
if let Some(machine_id) = node.machine_id.as_deref() {
let config = node_config_from_spec(node);
client
@ -542,6 +576,15 @@ pub async fn apply_cluster_state(
let value = serde_json::to_vec(&merged)?;
client.put(&key, &value).await?;
if let Some(desired_system) = desired_system_from_spec(node) {
client
.put(
&key_desired_system(cluster_namespace, cluster_id, &node.node_id),
&serde_json::to_vec(&desired_system)?,
)
.await?;
}
if let Some(machine_id) = node.machine_id.as_deref() {
let config = node_config_from_spec(node);
client
@ -661,6 +704,12 @@ async fn prune_cluster_state(
for node in &resolved_nodes {
desired_keys.insert(String::from_utf8_lossy(&key_node(cluster_namespace, cluster_id, &node.node_id)).to_string());
if desired_system_from_spec(node).is_some() {
desired_keys.insert(
String::from_utf8_lossy(&key_desired_system(cluster_namespace, cluster_id, &node.node_id))
.to_string(),
);
}
}
for node_class in &spec.node_classes {
desired_keys.insert(
@ -849,14 +898,45 @@ mod tests {
Some("rack-a")
);
}
#[test]
fn test_desired_system_is_derived_from_install_plan() {
let spec = test_spec();
let resolved = resolve_nodes(&spec).unwrap();
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
assert_eq!(desired.node_id, "node01");
assert_eq!(desired.nixos_configuration.as_deref(), Some("worker-golden"));
assert_eq!(desired.switch_action.as_deref(), Some("switch"));
assert_eq!(desired.rollback_on_failure, Some(true));
}
#[test]
fn test_is_prunable_key_keeps_observed_system() {
let prefix = cluster_prefix("photoncloud", "test-cluster");
assert!(is_prunable_key(&format!("{}nodes/node01", prefix), &prefix));
assert!(is_prunable_key(
&format!("{}nodes/node01/desired-system", prefix),
&prefix
));
assert!(!is_prunable_key(
&format!("{}nodes/node01/observed-system", prefix),
&prefix
));
}
}
fn is_prunable_key(key: &str, prefix: &str) -> bool {
if key == format!("{}meta", prefix) {
return true;
}
key.starts_with(&format!("{}nodes/", prefix))
|| key.starts_with(&format!("{}node-classes/", prefix))
if let Some(node_suffix) = key.strip_prefix(&format!("{}nodes/", prefix)) {
return !node_suffix.is_empty()
&& (!node_suffix.contains('/') || node_suffix.ends_with("/desired-system"));
}
key.starts_with(&format!("{}node-classes/", prefix))
|| key.starts_with(&format!("{}pools/", prefix))
|| key.starts_with(&format!("{}enrollment-rules/", prefix))
|| key.starts_with(&format!("{}services/", prefix))

View file

@ -443,6 +443,22 @@ pub struct ObservedSystemState {
pub last_error: Option<String>,
}
/// Desired NixOS system state for a specific node.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct DesiredSystemSpec {
pub node_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub nixos_configuration: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub flake_ref: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub switch_action: Option<String>,
#[serde(default)]
pub health_check_command: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub rollback_on_failure: Option<bool>,
}
/// Cluster metadata (PhotonCloud scope).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ClusterSpec {
@ -930,4 +946,26 @@ mod tests {
assert_eq!(decoded.nixos_configuration.as_deref(), Some("node01"));
assert_eq!(decoded.status.as_deref(), Some("pending"));
}
#[test]
fn test_desired_system_spec_roundtrip() {
let desired = DesiredSystemSpec {
node_id: "node01".to_string(),
nixos_configuration: Some("node01".to_string()),
flake_ref: Some("/opt/plasmacloud-src".to_string()),
switch_action: Some("switch".to_string()),
health_check_command: vec![
"systemctl".to_string(),
"is-system-running".to_string(),
],
rollback_on_failure: Some(true),
};
let json = serde_json::to_string(&desired).unwrap();
let decoded: DesiredSystemSpec = serde_json::from_str(&json).unwrap();
assert_eq!(decoded.node_id, "node01");
assert_eq!(decoded.nixos_configuration.as_deref(), Some("node01"));
assert_eq!(decoded.health_check_command.len(), 2);
assert_eq!(decoded.rollback_on_failure, Some(true));
}
}

View file

@ -7,7 +7,7 @@ use anyhow::{anyhow, Context, Result};
use chainfire_client::Client;
use chrono::Utc;
use clap::Parser;
use deployer_types::{ClusterNodeRecord, ObservedSystemState};
use deployer_types::{ClusterNodeRecord, DesiredSystemSpec, ObservedSystemState};
use tokio::process::Command;
use tokio::time::sleep;
use tracing::{info, warn};
@ -26,6 +26,15 @@ fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8>
.into_bytes()
}
fn key_desired_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
format!(
"{}nodes/{}/desired-system",
cluster_prefix(cluster_namespace, cluster_id),
node_id
)
.into_bytes()
}
fn key_observed_system(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
format!(
"{}nodes/{}/observed-system",
@ -59,6 +68,12 @@ struct Cli {
#[arg(long, default_value = "switch")]
switch_action: String,
#[arg(long, allow_hyphen_values = true)]
health_check_command: Vec<String>,
#[arg(long, default_value_t = false)]
rollback_on_failure: bool,
#[arg(long, default_value_t = false)]
apply: bool,
@ -74,9 +89,20 @@ struct Agent {
flake_root: String,
interval: Duration,
switch_action: String,
health_check_command: Vec<String>,
rollback_on_failure: bool,
apply: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ResolvedDesiredSystem {
nixos_configuration: String,
flake_ref: String,
switch_action: String,
health_check_command: Vec<String>,
rollback_on_failure: bool,
}
impl Agent {
fn new(cli: Cli) -> Self {
Self {
@ -87,6 +113,8 @@ impl Agent {
flake_root: cli.flake_root,
interval: Duration::from_secs(cli.interval_secs),
switch_action: cli.switch_action,
health_check_command: cli.health_check_command,
rollback_on_failure: cli.rollback_on_failure,
apply: cli.apply,
}
}
@ -116,8 +144,21 @@ impl Agent {
let node: ClusterNodeRecord =
serde_json::from_slice(&node_bytes).context("failed to parse node record")?;
let desired = client
.get(key_desired_system(
&self.cluster_namespace,
&self.cluster_id,
&self.node_id,
))
.await?
.map(|bytes| serde_json::from_slice::<DesiredSystemSpec>(&bytes))
.transpose()
.context("failed to parse desired-system spec")?;
let mut observed = self.base_observed_state(&node);
let reconcile_result = self.reconcile_node(&node, &mut observed).await;
let reconcile_result = self
.reconcile_node(&node, desired.as_ref(), &mut observed)
.await;
if let Err(error) = reconcile_result {
observed.status = Some("failed".to_string());
observed.last_error = Some(error.to_string());
@ -136,8 +177,6 @@ impl Agent {
fn base_observed_state(&self, node: &ClusterNodeRecord) -> ObservedSystemState {
ObservedSystemState {
node_id: node.node_id.clone(),
nixos_configuration: desired_configuration(node),
flake_root: Some(self.flake_root.clone()),
current_system: read_symlink_target("/run/current-system"),
booted_system: read_symlink_target("/run/booted-system"),
..ObservedSystemState::default()
@ -147,6 +186,7 @@ impl Agent {
async fn reconcile_node(
&self,
node: &ClusterNodeRecord,
desired: Option<&DesiredSystemSpec>,
observed: &mut ObservedSystemState,
) -> Result<()> {
match node.state.as_deref() {
@ -157,15 +197,31 @@ impl Agent {
_ => {}
}
let Some(configuration) = desired_configuration(node) else {
let Some(desired) = resolve_desired_system(
node,
desired,
&self.flake_root,
&self.switch_action,
&self.health_check_command,
self.rollback_on_failure,
) else {
observed.status = Some("idle".to_string());
return Ok(());
};
observed.nixos_configuration = Some(desired.nixos_configuration.clone());
observed.flake_root = Some(desired.flake_ref.clone());
let previous_system = observed.current_system.clone();
let target_system = self
.build_target_system(&configuration)
.build_target_system(&desired.flake_ref, &desired.nixos_configuration)
.await
.with_context(|| format!("failed to build target system for {}", configuration))?;
.with_context(|| {
format!(
"failed to build target system for {}",
desired.nixos_configuration
)
})?;
observed.target_system = Some(target_system.clone());
if observed.current_system.as_deref() == Some(target_system.as_str()) {
@ -181,26 +237,31 @@ impl Agent {
observed.status = Some("reconciling".to_string());
observed.last_attempt = Some(Utc::now());
self.switch_to_target(&target_system).await?;
self.switch_to_target(&target_system, &desired.switch_action)
.await?;
observed.current_system = read_symlink_target("/run/current-system");
observed.booted_system = read_symlink_target("/run/booted-system");
if observed.current_system.as_deref() == Some(target_system.as_str()) {
observed.status = Some("active".to_string());
observed.last_success = Some(Utc::now());
observed.last_error = None;
return Ok(());
if observed.current_system.as_deref() != Some(target_system.as_str()) {
return Err(anyhow!(
"switch completed but /run/current-system does not match target {}",
target_system
));
}
Err(anyhow!(
"switch completed but /run/current-system does not match target {}",
target_system
))
self.run_health_check_and_maybe_rollback(&desired, previous_system.as_deref(), observed)
.await?;
observed.status = Some("active".to_string());
observed.last_success = Some(Utc::now());
observed.last_error = None;
Ok(())
}
async fn build_target_system(&self, configuration: &str) -> Result<String> {
let flake_attr = target_flake_attr(&self.flake_root, configuration);
async fn build_target_system(&self, flake_ref: &str, configuration: &str) -> Result<String> {
let flake_attr = target_flake_attr(flake_ref, configuration);
let output = run_command(
"nix",
&["build", "--no-link", "--print-out-paths", flake_attr.as_str()],
@ -214,7 +275,7 @@ impl Agent {
Ok(path.to_string())
}
async fn switch_to_target(&self, target_system: &str) -> Result<()> {
async fn switch_to_target(&self, target_system: &str, switch_action: &str) -> Result<()> {
let switch_bin = Path::new(target_system).join("bin/switch-to-configuration");
if !switch_bin.exists() {
return Err(anyhow!(
@ -227,17 +288,79 @@ impl Agent {
switch_bin
.to_str()
.ok_or_else(|| anyhow!("invalid switch path"))?,
&[self.switch_action.as_str()],
&[switch_action],
)
.await?;
Ok(())
}
async fn run_health_check_and_maybe_rollback(
&self,
desired: &ResolvedDesiredSystem,
previous_system: Option<&str>,
observed: &mut ObservedSystemState,
) -> Result<()> {
if desired.health_check_command.is_empty() {
return Ok(());
}
if let Err(error) = run_vec_command(&desired.health_check_command).await {
let error_message = format!("health check failed after activation: {error}");
if desired.rollback_on_failure {
self.rollback_to_previous(previous_system).await?;
observed.current_system = read_symlink_target("/run/current-system");
observed.booted_system = read_symlink_target("/run/booted-system");
observed.status = Some("rolled-back".to_string());
observed.last_error = Some(error_message);
return Ok(());
}
return Err(anyhow!(error_message));
}
Ok(())
}
async fn rollback_to_previous(&self, previous_system: Option<&str>) -> Result<()> {
let previous_system = previous_system
.filter(|value| !value.is_empty())
.ok_or_else(|| anyhow!("rollback requested but no previous system is known"))?;
self.switch_to_target(previous_system, "switch").await
}
}
fn desired_configuration(node: &ClusterNodeRecord) -> Option<String> {
node.install_plan
.as_ref()
.and_then(|plan| plan.nixos_configuration.clone())
fn resolve_desired_system(
node: &ClusterNodeRecord,
desired: Option<&DesiredSystemSpec>,
local_flake_root: &str,
local_switch_action: &str,
local_health_check_command: &[String],
local_rollback_on_failure: bool,
) -> Option<ResolvedDesiredSystem> {
let nixos_configuration = desired
.and_then(|spec| spec.nixos_configuration.clone())
.or_else(|| {
node.install_plan
.as_ref()
.and_then(|plan| plan.nixos_configuration.clone())
})?;
Some(ResolvedDesiredSystem {
nixos_configuration,
flake_ref: desired
.and_then(|spec| spec.flake_ref.clone())
.unwrap_or_else(|| local_flake_root.to_string()),
switch_action: desired
.and_then(|spec| spec.switch_action.clone())
.unwrap_or_else(|| local_switch_action.to_string()),
health_check_command: desired
.map(|spec| spec.health_check_command.clone())
.filter(|command| !command.is_empty())
.unwrap_or_else(|| local_health_check_command.to_vec()),
rollback_on_failure: desired
.and_then(|spec| spec.rollback_on_failure)
.unwrap_or(local_rollback_on_failure),
})
}
fn target_flake_attr(flake_root: &str, configuration: &str) -> String {
@ -279,6 +402,14 @@ async fn run_command(program: &str, args: &[&str]) -> Result<String> {
}
}
async fn run_vec_command(command: &[String]) -> Result<String> {
let (program, args) = command
.split_first()
.ok_or_else(|| anyhow!("command vector is empty"))?;
let arg_refs = args.iter().map(String::as_str).collect::<Vec<_>>();
run_command(program, &arg_refs).await
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
@ -309,7 +440,7 @@ async fn main() -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use deployer_types::InstallPlan;
use deployer_types::{DesiredSystemSpec, InstallPlan};
fn test_node() -> ClusterNodeRecord {
ClusterNodeRecord {
@ -333,8 +464,77 @@ mod tests {
}
#[test]
fn desired_configuration_prefers_install_plan() {
assert_eq!(desired_configuration(&test_node()).as_deref(), Some("node01"));
fn resolve_desired_system_falls_back_to_install_plan() {
let resolved = resolve_desired_system(
&test_node(),
None,
"/opt/plasmacloud-src",
"switch",
&[],
true,
)
.expect("desired system should resolve");
assert_eq!(resolved.nixos_configuration, "node01");
assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src");
assert_eq!(resolved.switch_action, "switch");
assert!(resolved.rollback_on_failure);
}
#[test]
fn resolve_desired_system_prefers_chainfire_spec() {
let desired = DesiredSystemSpec {
node_id: "node01".to_string(),
nixos_configuration: Some("node01-next".to_string()),
flake_ref: Some("github:centra/cloud".to_string()),
switch_action: Some("boot".to_string()),
health_check_command: vec!["true".to_string()],
rollback_on_failure: Some(true),
};
let resolved = resolve_desired_system(
&test_node(),
Some(&desired),
"/opt/plasmacloud-src",
"switch",
&[],
false,
)
.expect("desired system should resolve");
assert_eq!(resolved.nixos_configuration, "node01-next");
assert_eq!(resolved.flake_ref, "github:centra/cloud");
assert_eq!(resolved.switch_action, "boot");
assert_eq!(resolved.health_check_command, vec!["true".to_string()]);
assert!(resolved.rollback_on_failure);
}
#[test]
fn resolve_desired_system_uses_local_health_check_defaults_when_spec_omits_them() {
let desired = DesiredSystemSpec {
node_id: "node01".to_string(),
nixos_configuration: Some("node01-next".to_string()),
flake_ref: None,
switch_action: None,
health_check_command: Vec::new(),
rollback_on_failure: None,
};
let resolved = resolve_desired_system(
&test_node(),
Some(&desired),
"/opt/plasmacloud-src",
"switch",
&["systemctl".to_string(), "is-system-running".to_string()],
true,
)
.expect("desired system should resolve");
assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src");
assert_eq!(resolved.switch_action, "switch");
assert_eq!(
resolved.health_check_command,
vec!["systemctl".to_string(), "is-system-running".to_string()]
);
assert!(resolved.rollback_on_failure);
}
#[test]

View file

@ -604,6 +604,8 @@
nodeId = "node01";
flakeRoot = self.outPath;
intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true;
};
}
@ -626,6 +628,8 @@
nodeId = "node02";
flakeRoot = self.outPath;
intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true;
};
}
@ -648,6 +652,8 @@
nodeId = "node03";
flakeRoot = self.outPath;
intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true;
};
}

View file

@ -2,6 +2,11 @@
let
cfg = config.services.nix-agent;
extraArgs =
map (arg: "--health-check-command ${lib.escapeShellArg arg}") cfg.healthCheckCommand
++ lib.optionals cfg.rollbackOnFailure [ "--rollback-on-failure" ]
++ lib.optionals cfg.apply [ "--apply" ];
renderedExtraArgs = lib.concatStringsSep " \\\n " extraArgs;
in
{
options.services.nix-agent = {
@ -48,6 +53,18 @@ in
description = "switch-to-configuration action executed after building the target system";
};
healthCheckCommand = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = "Command vector executed after activation to verify node health";
};
rollbackOnFailure = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Roll back to the previous system if the post-activation health check fails";
};
apply = lib.mkOption {
type = lib.types.bool;
default = true;
@ -81,8 +98,7 @@ in
--node-id ${lib.escapeShellArg cfg.nodeId} \
--flake-root ${lib.escapeShellArg cfg.flakeRoot} \
--interval-secs ${toString cfg.intervalSecs} \
--switch-action ${lib.escapeShellArg cfg.switchAction} \
${lib.optionalString cfg.apply "--apply"}
--switch-action ${lib.escapeShellArg cfg.switchAction}${lib.optionalString (renderedExtraArgs != "") " \\\n ${renderedExtraArgs}"}
'';
};
};