From 795b8ad70cc74e73f94e0d8d3f3e82c57b8be07a Mon Sep 17 00:00:00 2001
From: centra
Date: Mon, 30 Mar 2026 13:54:14 +0900
Subject: [PATCH] Add prebuilt system closure support for host rollouts
---
README.md | 2 +
deployer/crates/deployer-ctl/src/chainfire.rs | 21 ++-
deployer/crates/deployer-types/src/lib.rs | 32 +++-
deployer/crates/nix-agent/src/main.rs | 92 +++++++++---
.../plasmacloud-reconciler/src/hosts.rs | 137 +++++++++++-------
docs/testing.md | 2 +
nix/modules/cluster-config-lib.nix | 18 +++
nix/tests/deployer-vm-smoke.nix | 4 +-
8 files changed, 230 insertions(+), 78 deletions(-)
diff --git a/README.md b/README.md
index 577c535..c219b79 100644
--- a/README.md
+++ b/README.md
@@ -48,3 +48,5 @@ nix run ./nix/test-cluster#cluster -- fresh-smoke
## Scope
PhotonCloud is centered on reproducible infrastructure behavior rather than polished end-user product surfaces. Some services, such as `creditservice`, are intentionally minimal reference implementations that prove integration points rather than full products.
+
+Host-level NixOS rollout validation is also expected to stay reproducible: the `deployer-vm-smoke` VM test now proves that `nix-agent` can activate a prebuilt target system closure directly, without recompiling the stack inside the guest.
diff --git a/deployer/crates/deployer-ctl/src/chainfire.rs b/deployer/crates/deployer-ctl/src/chainfire.rs
index 7126737..fdc5878 100644
--- a/deployer/crates/deployer-ctl/src/chainfire.rs
+++ b/deployer/crates/deployer-ctl/src/chainfire.rs
@@ -236,7 +236,7 @@ fn desired_system_from_spec(node: &NodeSpec) -> Option {
if desired.drain_before_apply.is_none() {
desired.drain_before_apply = Some(false);
}
- if desired.nixos_configuration.is_some() {
+ if desired.nixos_configuration.is_some() || desired.target_system.is_some() {
Some(desired)
} else {
None
@@ -882,7 +882,9 @@ pub async fn inspect_node(
if let Some(observed_system) = observed_system {
println!(
"observed_status={}",
- observed_system.status.unwrap_or_else(|| "unknown".to_string())
+ observed_system
+ .status
+ .unwrap_or_else(|| "unknown".to_string())
);
}
}
@@ -1090,7 +1092,8 @@ pub async fn set_host_deployment_paused(
let deployment_name = deployment_name.to_string();
async move {
let mut client = Client::connect(endpoint).await?;
- let spec_key = key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
+ let spec_key =
+ key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
if client.get(&spec_key).await?.is_none() {
return Err(anyhow::anyhow!(
"host deployment {} not found",
@@ -1116,7 +1119,9 @@ pub async fn set_host_deployment_paused(
"resumed by operator".to_string()
});
status.updated_at = Some(Utc::now());
- client.put(&status_key, &serde_json::to_vec(&status)?).await?;
+ client
+ .put(&status_key, &serde_json::to_vec(&status)?)
+ .await?;
println!("{}", serde_json::to_string_pretty(&status)?);
Ok(())
}
@@ -1138,7 +1143,8 @@ pub async fn abort_host_deployment(
let deployment_name = deployment_name.to_string();
async move {
let mut client = Client::connect(endpoint).await?;
- let spec_key = key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
+ let spec_key =
+ key_host_deployment_spec(&cluster_namespace, &cluster_id, &deployment_name);
if client.get(&spec_key).await?.is_none() {
return Err(anyhow::anyhow!(
"host deployment {} not found",
@@ -1512,6 +1518,7 @@ mod tests {
node_id: String::new(),
deployment_id: None,
nixos_configuration: Some("node01-next".to_string()),
+ target_system: Some("/nix/store/node01-next".to_string()),
flake_ref: Some("github:centra/cloud".to_string()),
switch_action: Some("boot".to_string()),
health_check_command: vec!["true".to_string()],
@@ -1523,6 +1530,10 @@ mod tests {
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
assert_eq!(desired.node_id, "node01");
assert_eq!(desired.nixos_configuration.as_deref(), Some("node01-next"));
+ assert_eq!(
+ desired.target_system.as_deref(),
+ Some("/nix/store/node01-next")
+ );
assert_eq!(desired.flake_ref.as_deref(), Some("github:centra/cloud"));
assert_eq!(desired.switch_action.as_deref(), Some("boot"));
assert_eq!(desired.health_check_command, vec!["true".to_string()]);
diff --git a/deployer/crates/deployer-types/src/lib.rs b/deployer/crates/deployer-types/src/lib.rs
index 2704f60..ee4ca4c 100644
--- a/deployer/crates/deployer-types/src/lib.rs
+++ b/deployer/crates/deployer-types/src/lib.rs
@@ -605,6 +605,9 @@ pub struct DesiredSystemSpec {
pub deployment_id: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub nixos_configuration: Option,
+ /// Optional prebuilt NixOS system closure path to activate directly.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub target_system: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub flake_ref: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -756,6 +759,9 @@ pub struct HostDeploymentSpec {
pub selector: HostDeploymentSelector,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub nixos_configuration: Option,
+ /// Optional prebuilt NixOS system closure path handed directly to nix-agent.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub target_system: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub flake_ref: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -1238,6 +1244,7 @@ mod tests {
node_id: "node01".to_string(),
deployment_id: Some("worker-rollout".to_string()),
nixos_configuration: Some("node01".to_string()),
+ target_system: Some("/nix/store/system-node01".to_string()),
flake_ref: Some("/opt/plasmacloud-src".to_string()),
switch_action: Some("switch".to_string()),
health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()],
@@ -1250,6 +1257,10 @@ mod tests {
assert_eq!(decoded.node_id, "node01");
assert_eq!(decoded.deployment_id.as_deref(), Some("worker-rollout"));
assert_eq!(decoded.nixos_configuration.as_deref(), Some("node01"));
+ assert_eq!(
+ decoded.target_system.as_deref(),
+ Some("/nix/store/system-node01")
+ );
assert_eq!(decoded.health_check_command.len(), 2);
assert_eq!(decoded.rollback_on_failure, Some(true));
assert_eq!(decoded.drain_before_apply, Some(true));
@@ -1267,6 +1278,7 @@ mod tests {
match_labels: HashMap::from([("tier".to_string(), "general".to_string())]),
},
nixos_configuration: Some("worker-golden".to_string()),
+ target_system: Some("/nix/store/worker-golden".to_string()),
flake_ref: Some("/opt/plasmacloud-src".to_string()),
batch_size: Some(1),
max_unavailable: Some(1),
@@ -1283,9 +1295,17 @@ mod tests {
assert_eq!(decoded.name, "worker-rollout");
assert_eq!(decoded.batch_size, Some(1));
assert_eq!(decoded.max_unavailable, Some(1));
+ assert_eq!(
+ decoded.target_system.as_deref(),
+ Some("/nix/store/worker-golden")
+ );
assert_eq!(decoded.selector.roles, vec!["worker".to_string()]);
assert_eq!(
- decoded.selector.match_labels.get("tier").map(String::as_str),
+ decoded
+ .selector
+ .match_labels
+ .get("tier")
+ .map(String::as_str),
Some("general")
);
assert_eq!(decoded.drain_before_apply, Some(true));
@@ -1318,10 +1338,16 @@ mod tests {
let json = serde_json::to_string(&node).unwrap();
let decoded: ClusterNodeRecord = serde_json::from_str(&json).unwrap();
- assert_eq!(decoded.commission_state, Some(CommissionState::Commissioned));
+ assert_eq!(
+ decoded.commission_state,
+ Some(CommissionState::Commissioned)
+ );
assert_eq!(decoded.install_state, Some(InstallState::Installed));
assert_eq!(decoded.power_state, Some(PowerState::On));
- assert_eq!(decoded.bmc_ref.as_deref(), Some("redfish://lab-rack-a/node01"));
+ assert_eq!(
+ decoded.bmc_ref.as_deref(),
+ Some("redfish://lab-rack-a/node01")
+ );
}
#[test]
diff --git a/deployer/crates/nix-agent/src/main.rs b/deployer/crates/nix-agent/src/main.rs
index e375eec..abca814 100644
--- a/deployer/crates/nix-agent/src/main.rs
+++ b/deployer/crates/nix-agent/src/main.rs
@@ -97,7 +97,8 @@ struct Agent {
#[derive(Debug, Clone, PartialEq, Eq)]
struct ResolvedDesiredSystem {
- nixos_configuration: String,
+ nixos_configuration: Option,
+ target_system: Option,
flake_ref: String,
switch_action: String,
health_check_command: Vec,
@@ -298,8 +299,7 @@ impl Agent {
Some("draining")
if !desired
.map(|spec| {
- spec.deployment_id.is_some()
- && spec.drain_before_apply.unwrap_or(false)
+ spec.deployment_id.is_some() && spec.drain_before_apply.unwrap_or(false)
})
.unwrap_or(false) =>
{
@@ -321,7 +321,8 @@ impl Agent {
return Ok(());
};
info!(
- nixos_configuration = %desired.nixos_configuration,
+ nixos_configuration = desired.nixos_configuration.as_deref().unwrap_or(""),
+ target_system = desired.target_system.as_deref().unwrap_or(""),
flake_ref = %desired.flake_ref,
switch_action = %desired.switch_action,
rollback_on_failure = desired.rollback_on_failure,
@@ -329,7 +330,7 @@ impl Agent {
"resolved desired system"
);
- observed.nixos_configuration = Some(desired.nixos_configuration.clone());
+ observed.nixos_configuration = desired.nixos_configuration.clone();
observed.flake_root = Some(desired.flake_ref.clone());
observed.switch_action = Some(desired.switch_action.clone());
@@ -341,17 +342,25 @@ impl Agent {
previous_system = previous_system.as_deref().unwrap_or(""),
"selected rollback baseline"
);
- let target_system = self
- .build_target_system(&desired.flake_ref, &desired.nixos_configuration)
- .await
- .with_context(|| {
- format!(
- "failed to build target system for {}",
- desired.nixos_configuration
- )
- })?;
+ let target_system = match desired.target_system.as_deref() {
+ Some(target_system) => {
+ info!(target_system, "using prebuilt target system");
+ target_system.to_string()
+ }
+ None => {
+ let configuration = desired
+ .nixos_configuration
+ .as_deref()
+ .ok_or_else(|| anyhow!("desired system did not specify nixos_configuration"))?;
+ self.build_target_system(&desired.flake_ref, configuration)
+ .await
+ .with_context(|| {
+ format!("failed to build target system for {}", configuration)
+ })?
+ }
+ };
observed.target_system = Some(target_system.clone());
- info!(target_system = %target_system, "built target system");
+ info!(target_system = %target_system, "resolved target system");
if observed.current_system.as_deref() == Some(target_system.as_str()) {
info!("target system already active");
@@ -575,10 +584,16 @@ fn resolve_desired_system(
node.install_plan
.as_ref()
.and_then(|plan| plan.nixos_configuration.clone())
- })?;
+ });
+ let target_system = desired.and_then(|spec| spec.target_system.clone());
+
+ if nixos_configuration.is_none() && target_system.is_none() {
+ return None;
+ }
Some(ResolvedDesiredSystem {
nixos_configuration,
+ target_system,
flake_ref: desired
.and_then(|spec| spec.flake_ref.clone())
.unwrap_or_else(|| local_flake_root.to_string()),
@@ -787,7 +802,8 @@ mod tests {
true,
)
.expect("desired system should resolve");
- assert_eq!(resolved.nixos_configuration, "node01");
+ assert_eq!(resolved.nixos_configuration.as_deref(), Some("node01"));
+ assert_eq!(resolved.target_system, None);
assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src");
assert_eq!(resolved.switch_action, "switch");
assert!(resolved.rollback_on_failure);
@@ -799,6 +815,7 @@ mod tests {
node_id: "node01".to_string(),
deployment_id: None,
nixos_configuration: Some("node01-next".to_string()),
+ target_system: None,
flake_ref: Some("github:centra/cloud".to_string()),
switch_action: Some("boot".to_string()),
health_check_command: vec!["true".to_string()],
@@ -815,19 +832,52 @@ mod tests {
false,
)
.expect("desired system should resolve");
- assert_eq!(resolved.nixos_configuration, "node01-next");
+ assert_eq!(resolved.nixos_configuration.as_deref(), Some("node01-next"));
assert_eq!(resolved.flake_ref, "github:centra/cloud");
assert_eq!(resolved.switch_action, "boot");
assert_eq!(resolved.health_check_command, vec!["true".to_string()]);
assert!(resolved.rollback_on_failure);
}
+ #[test]
+ fn resolve_desired_system_accepts_prebuilt_target_system() {
+ let desired = DesiredSystemSpec {
+ node_id: "node01".to_string(),
+ deployment_id: None,
+ nixos_configuration: Some("node01-next".to_string()),
+ target_system: Some("/nix/store/node01-next".to_string()),
+ flake_ref: None,
+ switch_action: Some("switch".to_string()),
+ health_check_command: Vec::new(),
+ rollback_on_failure: Some(true),
+ drain_before_apply: Some(false),
+ };
+
+ let resolved = resolve_desired_system(
+ &test_node(),
+ Some(&desired),
+ "/opt/plasmacloud-src",
+ "switch",
+ &[],
+ true,
+ )
+ .expect("desired system should resolve");
+
+ assert_eq!(resolved.nixos_configuration.as_deref(), Some("node01-next"));
+ assert_eq!(
+ resolved.target_system.as_deref(),
+ Some("/nix/store/node01-next")
+ );
+ assert_eq!(resolved.flake_ref, "/opt/plasmacloud-src");
+ }
+
#[test]
fn resolve_desired_system_uses_local_health_check_defaults_when_spec_omits_them() {
let desired = DesiredSystemSpec {
node_id: "node01".to_string(),
deployment_id: None,
nixos_configuration: Some("node01-next".to_string()),
+ target_system: None,
flake_ref: None,
switch_action: None,
health_check_command: Vec::new(),
@@ -873,7 +923,8 @@ mod tests {
#[test]
fn post_boot_health_check_is_requested_for_matching_staged_target() {
let desired = ResolvedDesiredSystem {
- nixos_configuration: "node01".to_string(),
+ nixos_configuration: Some("node01".to_string()),
+ target_system: None,
flake_ref: "/opt/plasmacloud-src".to_string(),
switch_action: "boot".to_string(),
health_check_command: vec!["true".to_string()],
@@ -895,7 +946,8 @@ mod tests {
#[test]
fn post_boot_health_check_is_skipped_for_non_matching_state() {
let desired = ResolvedDesiredSystem {
- nixos_configuration: "node01".to_string(),
+ nixos_configuration: Some("node01".to_string()),
+ target_system: None,
flake_ref: "/opt/plasmacloud-src".to_string(),
switch_action: "boot".to_string(),
health_check_command: vec!["true".to_string()],
diff --git a/deployer/crates/plasmacloud-reconciler/src/hosts.rs b/deployer/crates/plasmacloud-reconciler/src/hosts.rs
index 9afe4ec..a17803e 100644
--- a/deployer/crates/plasmacloud-reconciler/src/hosts.rs
+++ b/deployer/crates/plasmacloud-reconciler/src/hosts.rs
@@ -4,7 +4,8 @@ use chrono::Utc;
use clap::Args;
use deployer_types::{
ClusterNodeRecord, CommissionState, DesiredSystemSpec, HostDeploymentSelector,
- HostDeploymentSpec, HostDeploymentStatus, InstallState, ObservedSystemState, ServiceInstanceSpec,
+ HostDeploymentSpec, HostDeploymentStatus, InstallState, ObservedSystemState,
+ ServiceInstanceSpec,
};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::time::Duration;
@@ -286,7 +287,9 @@ impl HostDeploymentController {
let key = String::from_utf8_lossy(&key);
match serde_json::from_slice::(&value) {
Ok(instance) => instances.push(instance),
- Err(error) => warn!(error = %error, key = %key, "failed to decode service instance"),
+ Err(error) => {
+ warn!(error = %error, key = %key, "failed to decode service instance")
+ }
}
}
@@ -336,7 +339,9 @@ impl HostDeploymentController {
Ok(status) => {
statuses.insert(status.name.clone(), status);
}
- Err(error) => warn!(error = %error, key = %key, "failed to decode host deployment status"),
+ Err(error) => {
+ warn!(error = %error, key = %key, "failed to decode host deployment status")
+ }
}
}
@@ -393,17 +398,28 @@ fn plan_host_deployment(
for node in &selector_matches {
let desired = desired_systems.get(&node.node_id);
let observed = observed_systems.get(&node.node_id);
- let is_completed =
- is_node_completed(deployment, node, desired, observed, target_configuration.as_deref());
+ let is_completed = is_node_completed(
+ deployment,
+ node,
+ desired,
+ observed,
+ target_configuration.as_deref(),
+ );
let is_failed = is_node_failed(deployment, desired, observed);
- let is_in_progress = is_node_in_progress(deployment, desired, observed, is_completed, is_failed)
- || (deployment.drain_before_apply == Some(true)
- && node.state.as_deref() == Some("draining")
- && instance_counts.get(&node.node_id).copied().unwrap_or_default() > 0);
+ let is_in_progress =
+ is_node_in_progress(deployment, desired, observed, is_completed, is_failed)
+ || (deployment.drain_before_apply == Some(true)
+ && node.state.as_deref() == Some("draining")
+ && instance_counts
+ .get(&node.node_id)
+ .copied()
+ .unwrap_or_default()
+ > 0);
if is_completed {
completed.push(node.node_id.clone());
- if deployment.drain_before_apply == Some(true) && node.state.as_deref() == Some("draining")
+ if deployment.drain_before_apply == Some(true)
+ && node.state.as_deref() == Some("draining")
{
let mut updated = (*node).clone();
updated.state = Some("active".to_string());
@@ -431,11 +447,12 @@ fn plan_host_deployment(
let paused = operator_paused || spec_paused || !failed.is_empty();
let remaining_unavailable_budget = max_unavailable.saturating_sub(unavailable);
let remaining_batch_budget = batch_size.saturating_sub(in_progress.len());
- let max_starts = if deployment.nixos_configuration.is_some() {
- remaining_unavailable_budget.min(remaining_batch_budget)
- } else {
- 0
- };
+ let max_starts =
+ if deployment.nixos_configuration.is_some() || deployment.target_system.is_some() {
+ remaining_unavailable_budget.min(remaining_batch_budget)
+ } else {
+ 0
+ };
let mut planned = 0usize;
let mut newly_started = Vec::new();
@@ -445,7 +462,10 @@ fn plan_host_deployment(
break;
}
- let remaining_instances = instance_counts.get(&node.node_id).copied().unwrap_or_default();
+ let remaining_instances = instance_counts
+ .get(&node.node_id)
+ .copied()
+ .unwrap_or_default();
if deployment.drain_before_apply == Some(true) && remaining_instances > 0 {
let mut updated = node.clone();
updated.state = Some("draining".to_string());
@@ -460,8 +480,12 @@ fn plan_host_deployment(
node_id: node.node_id.clone(),
deployment_id: Some(deployment.name.clone()),
nixos_configuration: deployment.nixos_configuration.clone(),
+ target_system: deployment.target_system.clone(),
flake_ref: deployment.flake_ref.clone(),
- switch_action: deployment.switch_action.clone().or_else(|| Some("switch".to_string())),
+ switch_action: deployment
+ .switch_action
+ .clone()
+ .or_else(|| Some("switch".to_string())),
health_check_command: deployment.health_check_command.clone(),
rollback_on_failure: Some(deployment.rollback_on_failure.unwrap_or(true)),
drain_before_apply: Some(deployment.drain_before_apply.unwrap_or(false)),
@@ -469,7 +493,8 @@ fn plan_host_deployment(
newly_started.push(node.node_id.clone());
in_progress.push(node.node_id.clone());
planned += 1;
- if deployment.drain_before_apply == Some(true) && node.state.as_deref() != Some("draining")
+ if deployment.drain_before_apply == Some(true)
+ && node.state.as_deref() != Some("draining")
{
let mut updated = node.clone();
updated.state = Some("draining".to_string());
@@ -481,26 +506,31 @@ fn plan_host_deployment(
let mut status = existing_status.cloned().unwrap_or_default();
status.name = deployment.name.clone();
- status.selected_nodes = selector_matches.iter().map(|node| node.node_id.clone()).collect();
+ status.selected_nodes = selector_matches
+ .iter()
+ .map(|node| node.node_id.clone())
+ .collect();
status.completed_nodes = dedup_sorted(completed);
status.in_progress_nodes = dedup_sorted(in_progress);
status.failed_nodes = dedup_sorted(failed);
status.paused_by_operator = operator_paused;
status.paused = paused;
- status.phase = Some(if status.selected_nodes.is_empty() {
- "idle"
- } else if deployment.nixos_configuration.is_none() {
- "invalid"
- } else if status.paused {
- "paused"
- } else if status.completed_nodes.len() == status.selected_nodes.len() {
- "completed"
- } else if !newly_started.is_empty() || !status.in_progress_nodes.is_empty() {
- "running"
- } else {
- "ready"
- }
- .to_string());
+ status.phase = Some(
+ if status.selected_nodes.is_empty() {
+ "idle"
+ } else if deployment.nixos_configuration.is_none() && deployment.target_system.is_none() {
+ "invalid"
+ } else if status.paused {
+ "paused"
+ } else if status.completed_nodes.len() == status.selected_nodes.len() {
+ "completed"
+ } else if !newly_started.is_empty() || !status.in_progress_nodes.is_empty() {
+ "running"
+ } else {
+ "ready"
+ }
+ .to_string(),
+ );
status.message = Some(format!(
"selected={} completed={} in_progress={} failed={} newly_started={}",
status.selected_nodes.len(),
@@ -585,9 +615,7 @@ fn node_is_rollout_candidate(node: &ClusterNodeRecord, heartbeat_timeout_secs: u
}
if matches!(
node.install_state,
- Some(
- InstallState::Installing | InstallState::Failed | InstallState::ReinstallRequested
- )
+ Some(InstallState::Installing | InstallState::Failed | InstallState::ReinstallRequested)
) {
return false;
}
@@ -612,9 +640,17 @@ fn is_node_completed(
) -> bool {
observed
.filter(|observed| observed.status.as_deref() == Some("active"))
- .and_then(|observed| observed.nixos_configuration.as_deref())
- .zip(target_configuration)
- .map(|(observed_configuration, target)| observed_configuration == target)
+ .map(|observed| {
+ target_configuration
+ .map(|target| observed.nixos_configuration.as_deref() == Some(target))
+ .or_else(|| {
+ deployment
+ .target_system
+ .as_deref()
+ .map(|target| observed.target_system.as_deref() == Some(target))
+ })
+ .unwrap_or(false)
+ })
.unwrap_or(false)
&& desired
.and_then(|desired| desired.deployment_id.as_deref())
@@ -653,7 +689,12 @@ fn is_node_in_progress(
.unwrap_or(false)
|| observed
.and_then(|observed| observed.status.as_deref())
- .map(|status| matches!(status, "planning" | "pending" | "reconciling" | "verifying" | "staged"))
+ .map(|status| {
+ matches!(
+ status,
+ "planning" | "pending" | "reconciling" | "verifying" | "staged"
+ )
+ })
.unwrap_or(false)
}
@@ -706,6 +747,7 @@ mod tests {
match_labels: HashMap::from([("tier".to_string(), "general".to_string())]),
},
nixos_configuration: Some("worker-golden".to_string()),
+ target_system: Some("/nix/store/worker-golden".to_string()),
flake_ref: Some("/opt/plasmacloud-src".to_string()),
batch_size: Some(1),
max_unavailable: Some(1),
@@ -733,6 +775,10 @@ mod tests {
);
assert_eq!(plan.desired_upserts.len(), 1);
+ assert_eq!(
+ plan.desired_upserts[0].target_system.as_deref(),
+ Some("/nix/store/worker-golden")
+ );
assert_eq!(plan.status.in_progress_nodes, vec!["node01".to_string()]);
assert_eq!(plan.status.phase.as_deref(), Some("running"));
}
@@ -747,6 +793,7 @@ mod tests {
node_id: "node01".to_string(),
deployment_id: Some("worker-rollout".to_string()),
nixos_configuration: Some("worker-golden".to_string()),
+ target_system: Some("/nix/store/worker-golden".to_string()),
flake_ref: None,
switch_action: Some("switch".to_string()),
health_check_command: Vec::new(),
@@ -764,15 +811,7 @@ mod tests {
},
)]);
- let plan = plan_host_deployment(
- &deployment,
- None,
- &nodes,
- &desired,
- &observed,
- &[],
- 300,
- );
+ let plan = plan_host_deployment(&deployment, None, &nodes, &desired, &observed, &[], 300);
assert!(plan.desired_upserts.is_empty());
assert!(plan.status.paused);
diff --git a/docs/testing.md b/docs/testing.md
index c8ec165..4d820ea 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -21,6 +21,7 @@ This flow:
nix run ./nix/test-cluster#cluster -- fresh-smoke
nix run ./nix/test-cluster#cluster -- fresh-matrix
nix run ./nix/test-cluster#cluster -- fresh-bench-storage
+nix build .#checks.x86_64-linux.deployer-vm-smoke
```
Use these three commands as the release-facing local proof set:
@@ -28,6 +29,7 @@ Use these three commands as the release-facing local proof set:
- `fresh-smoke`: whole-cluster readiness, core behavior, and fault injection
- `fresh-matrix`: composed service scenarios such as `prismnet + flashdns + fiberlb` and PrismNet-backed VM hosting bundles with `plasmavmc + coronafs + lightningstor`
- `fresh-bench-storage`: CoronaFS local-vs-shared-volume throughput, cross-worker volume visibility, and LightningStor large/small-object throughput capture
+- `deployer-vm-smoke`: prebuilt NixOS system closure handoff into `nix-agent`, proving host rollout can activate a host-built target without guest-side compilation
## Operational Commands
diff --git a/nix/modules/cluster-config-lib.nix b/nix/modules/cluster-config-lib.nix
index 5702d0a..3a98fab 100644
--- a/nix/modules/cluster-config-lib.nix
+++ b/nix/modules/cluster-config-lib.nix
@@ -45,6 +45,12 @@ let
description = "Name of the nixosConfigurations output to activate";
};
+ targetSystem = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = "Optional prebuilt NixOS system closure path activated directly by nix-agent";
+ };
+
flakeRef = mkOption {
type = types.nullOr types.str;
default = null;
@@ -128,6 +134,12 @@ let
description = "Name of the nixosConfigurations output to roll out";
};
+ targetSystem = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = "Optional prebuilt NixOS system closure path handed directly to nix-agent";
+ };
+
flakeRef = mkOption {
type = types.nullOr types.str;
default = null;
@@ -963,6 +975,9 @@ let
// optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
nixos_configuration = desiredSystem.nixosConfiguration;
}
+ // optionalAttrs (desiredSystem != null && desiredSystem.targetSystem != null) {
+ target_system = desiredSystem.targetSystem;
+ }
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
flake_ref = desiredSystem.flakeRef;
}
@@ -1000,6 +1015,9 @@ let
// optionalAttrs (deployment.nixosConfiguration != null) {
nixos_configuration = deployment.nixosConfiguration;
}
+ // optionalAttrs (deployment.targetSystem != null) {
+ target_system = deployment.targetSystem;
+ }
// optionalAttrs (deployment.flakeRef != null) {
flake_ref = deployment.flakeRef;
}
diff --git a/nix/tests/deployer-vm-smoke.nix b/nix/tests/deployer-vm-smoke.nix
index 0e19420..df7a6d1 100644
--- a/nix/tests/deployer-vm-smoke.nix
+++ b/nix/tests/deployer-vm-smoke.nix
@@ -103,6 +103,7 @@ in
import time
desired_system_overrides = json.loads("""${desiredSystemOverridesJson}""")
+ smoke_target_system = "${smokeTargetToplevel}"
def write_remote_json(machine, path, payload):
machine.succeed(
@@ -151,6 +152,7 @@ in
},
"desired_system": {
"nixos_configuration": "vm-smoke-target",
+ "target_system": smoke_target_system,
**desired_system_overrides,
},
"state": "pending",
@@ -387,7 +389,7 @@ in
assert observed["status"] == "${expectedStatus}", observed
assert observed["nixos_configuration"] == "vm-smoke-target"
assert observed["flake_root"] == "/var/lib/photon-src"
- assert observed["target_system"].startswith("/nix/store/")
+ assert observed["target_system"] == smoke_target_system
current_system = worker.succeed("readlink -f /run/current-system").strip()
print("worker_current_system=", current_system)
if ${if expectCurrentSystemMatchesTarget then "True" else "False"}: