Add bootstrap flake bundle delivery and Nix desired-system generation

This commit is contained in:
centra 2026-03-20 17:43:26 +09:00
parent edd2442267
commit fbcbb4e5dc
Signed by: centra
GPG key ID: 0C09689D20B25ACA
15 changed files with 666 additions and 188 deletions

View file

@ -164,18 +164,25 @@ fn node_config_from_spec(node: &NodeSpec) -> NodeConfig {
} }
fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> { fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> {
Some(DesiredSystemSpec { let mut desired = node.desired_system.clone().unwrap_or_default();
node_id: node.node_id.clone(), desired.node_id = node.node_id.clone();
nixos_configuration: node if desired.nixos_configuration.is_none() {
desired.nixos_configuration = node
.install_plan .install_plan
.as_ref() .as_ref()
.and_then(|plan| plan.nixos_configuration.clone()), .and_then(|plan| plan.nixos_configuration.clone());
flake_ref: None, }
switch_action: Some("switch".to_string()), if desired.switch_action.is_none() {
health_check_command: Vec::new(), desired.switch_action = Some("switch".to_string());
rollback_on_failure: Some(true), }
}) if desired.rollback_on_failure.is_none() {
.filter(|desired| desired.nixos_configuration.is_some()) desired.rollback_on_failure = Some(true);
}
if desired.nixos_configuration.is_some() {
Some(desired)
} else {
None
}
} }
fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> { fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
@ -196,12 +203,12 @@ fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
let mut resolved = node.clone(); let mut resolved = node.clone();
let pool_spec = match resolved.pool.as_deref() { let pool_spec = match resolved.pool.as_deref() {
Some(pool_name) => Some( Some(pool_name) => Some(pools.get(pool_name).copied().with_context(|| {
pools format!(
.get(pool_name) "node {} references unknown pool {}",
.copied() node.node_id, pool_name
.with_context(|| format!("node {} references unknown pool {}", node.node_id, pool_name))?, )
), })?),
None => None, None => None,
}; };
@ -557,94 +564,102 @@ pub async fn apply_cluster_state(
with_chainfire_endpoint_failover(&endpoints, "apply cluster state", |endpoint| { with_chainfire_endpoint_failover(&endpoints, "apply cluster state", |endpoint| {
let endpoint = endpoint.to_string(); let endpoint = endpoint.to_string();
async move { async move {
let spec: ClusterStateSpec = read_config_file(config_path).await?; let spec: ClusterStateSpec = read_config_file(config_path).await?;
let resolved_nodes = resolve_nodes(&spec)?; let resolved_nodes = resolve_nodes(&spec)?;
let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id); let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id);
info!(cluster_id, "applying cluster state to Chainfire at {}", endpoint); info!(
let mut client = Client::connect(endpoint.to_string()).await?;
// MVP としては bootstrap と同じく upsert のみ行う。
// 将来的に、既存一覧を取得して差分削除 (prune) を実装できる構造にしておく。
let meta_key = key_cluster_meta(cluster_namespace, cluster_id);
let meta_value = serde_json::to_vec(&spec.cluster)?;
client.put(&meta_key, &meta_value).await?;
for node in &resolved_nodes {
let key = key_node(cluster_namespace, cluster_id, &node.node_id);
let merged = merge_existing_node_observed_fields(&mut client, &key, node).await?;
let value = serde_json::to_vec(&merged)?;
client.put(&key, &value).await?;
if let Some(desired_system) = desired_system_from_spec(node) {
client
.put(
&key_desired_system(cluster_namespace, cluster_id, &node.node_id),
&serde_json::to_vec(&desired_system)?,
)
.await?;
}
if let Some(machine_id) = node.machine_id.as_deref() {
let config = node_config_from_spec(node);
client
.put(
&deployer_node_config_key(deployer_namespace, machine_id),
serde_json::to_vec(&config)?,
)
.await?;
client
.put(
&deployer_node_mapping_key(deployer_namespace, machine_id),
node.node_id.as_bytes(),
)
.await?;
}
}
for node_class in &spec.node_classes {
let key = key_node_class(cluster_namespace, cluster_id, &node_class.name);
let value = serde_json::to_vec(node_class)?;
client.put(&key, &value).await?;
}
for pool in &spec.pools {
let key = key_pool(cluster_namespace, cluster_id, &pool.name);
let value = serde_json::to_vec(pool)?;
client.put(&key, &value).await?;
}
for rule in &spec.enrollment_rules {
let key = key_enrollment_rule(cluster_namespace, cluster_id, &rule.name);
let value = serde_json::to_vec(rule)?;
client.put(&key, &value).await?;
}
for svc in &spec.services {
let key = key_service(cluster_namespace, cluster_id, &svc.name);
let value = serde_json::to_vec(svc)?;
client.put(&key, &value).await?;
}
for inst in &spec.instances {
let key = key_instance(cluster_namespace, cluster_id, &inst.service, &inst.instance_id);
let value = serde_json::to_vec(inst)?;
client.put(&key, &value).await?;
}
for policy in &spec.mtls_policies {
let key = key_mtls_policy(cluster_namespace, cluster_id, &policy.policy_id);
let value = serde_json::to_vec(policy)?;
client.put(&key, &value).await?;
}
if prune {
prune_cluster_state(
&mut client,
cluster_namespace,
deployer_namespace,
cluster_id, cluster_id,
&spec, "applying cluster state to Chainfire at {}", endpoint
) );
.await?; let mut client = Client::connect(endpoint.to_string()).await?;
}
Ok(()) // MVP としては bootstrap と同じく upsert のみ行う。
// 将来的に、既存一覧を取得して差分削除 (prune) を実装できる構造にしておく。
let meta_key = key_cluster_meta(cluster_namespace, cluster_id);
let meta_value = serde_json::to_vec(&spec.cluster)?;
client.put(&meta_key, &meta_value).await?;
for node in &resolved_nodes {
let key = key_node(cluster_namespace, cluster_id, &node.node_id);
let merged = merge_existing_node_observed_fields(&mut client, &key, node).await?;
let value = serde_json::to_vec(&merged)?;
client.put(&key, &value).await?;
if let Some(desired_system) = desired_system_from_spec(node) {
client
.put(
&key_desired_system(cluster_namespace, cluster_id, &node.node_id),
&serde_json::to_vec(&desired_system)?,
)
.await?;
}
if let Some(machine_id) = node.machine_id.as_deref() {
let config = node_config_from_spec(node);
client
.put(
&deployer_node_config_key(deployer_namespace, machine_id),
serde_json::to_vec(&config)?,
)
.await?;
client
.put(
&deployer_node_mapping_key(deployer_namespace, machine_id),
node.node_id.as_bytes(),
)
.await?;
}
}
for node_class in &spec.node_classes {
let key = key_node_class(cluster_namespace, cluster_id, &node_class.name);
let value = serde_json::to_vec(node_class)?;
client.put(&key, &value).await?;
}
for pool in &spec.pools {
let key = key_pool(cluster_namespace, cluster_id, &pool.name);
let value = serde_json::to_vec(pool)?;
client.put(&key, &value).await?;
}
for rule in &spec.enrollment_rules {
let key = key_enrollment_rule(cluster_namespace, cluster_id, &rule.name);
let value = serde_json::to_vec(rule)?;
client.put(&key, &value).await?;
}
for svc in &spec.services {
let key = key_service(cluster_namespace, cluster_id, &svc.name);
let value = serde_json::to_vec(svc)?;
client.put(&key, &value).await?;
}
for inst in &spec.instances {
let key = key_instance(
cluster_namespace,
cluster_id,
&inst.service,
&inst.instance_id,
);
let value = serde_json::to_vec(inst)?;
client.put(&key, &value).await?;
}
for policy in &spec.mtls_policies {
let key = key_mtls_policy(cluster_namespace, cluster_id, &policy.policy_id);
let value = serde_json::to_vec(policy)?;
client.put(&key, &value).await?;
}
if prune {
prune_cluster_state(
&mut client,
cluster_namespace,
deployer_namespace,
cluster_id,
&spec,
)
.await?;
}
Ok(())
} }
}) })
.await .await
@ -656,35 +671,36 @@ pub async fn dump_prefix(endpoint: &str, prefix: &str, json_output: bool) -> Res
with_chainfire_endpoint_failover(&endpoints, "dump Chainfire prefix", |endpoint| { with_chainfire_endpoint_failover(&endpoints, "dump Chainfire prefix", |endpoint| {
let endpoint = endpoint.to_string(); let endpoint = endpoint.to_string();
async move { async move {
let mut client = Client::connect(endpoint.to_string()).await?; let mut client = Client::connect(endpoint.to_string()).await?;
let start = prefix.as_bytes(); let start = prefix.as_bytes();
info!("dumping keys with prefix {:?}", prefix); info!("dumping keys with prefix {:?}", prefix);
let (kvs, _next) = client.scan_prefix(start, 0).await?; let (kvs, _next) = client.scan_prefix(start, 0).await?;
if kvs.is_empty() { if kvs.is_empty() {
warn!("no keys found under prefix {:?}", prefix); warn!("no keys found under prefix {:?}", prefix);
}
for (key, value, rev) in kvs {
let k = String::from_utf8_lossy(&key);
if json_output {
let value = serde_json::from_slice::<Value>(&value)
.unwrap_or_else(|_| Value::String(String::from_utf8_lossy(&value).into_owned()));
println!(
"{}",
serde_json::to_string(&json!({
"revision": rev,
"key": k.as_ref(),
"value": value,
}))?
);
} else {
let v = String::from_utf8_lossy(&value);
println!("rev={} key={} value={}", rev, k, v);
} }
}
Ok(()) for (key, value, rev) in kvs {
let k = String::from_utf8_lossy(&key);
if json_output {
let value = serde_json::from_slice::<Value>(&value).unwrap_or_else(|_| {
Value::String(String::from_utf8_lossy(&value).into_owned())
});
println!(
"{}",
serde_json::to_string(&json!({
"revision": rev,
"key": k.as_ref(),
"value": value,
}))?
);
} else {
let v = String::from_utf8_lossy(&value);
println!("rev={} key={} value={}", rev, k, v);
}
}
Ok(())
} }
}) })
.await .await
@ -698,42 +714,80 @@ async fn prune_cluster_state(
spec: &ClusterStateSpec, spec: &ClusterStateSpec,
) -> Result<()> { ) -> Result<()> {
let mut desired_keys = HashSet::new(); let mut desired_keys = HashSet::new();
desired_keys.insert(String::from_utf8_lossy(&key_cluster_meta(cluster_namespace, cluster_id)).to_string()); desired_keys.insert(
String::from_utf8_lossy(&key_cluster_meta(cluster_namespace, cluster_id)).to_string(),
);
let resolved_nodes = resolve_nodes(spec)?; let resolved_nodes = resolve_nodes(spec)?;
for node in &resolved_nodes { for node in &resolved_nodes {
desired_keys.insert(String::from_utf8_lossy(&key_node(cluster_namespace, cluster_id, &node.node_id)).to_string()); desired_keys.insert(
String::from_utf8_lossy(&key_node(cluster_namespace, cluster_id, &node.node_id))
.to_string(),
);
if desired_system_from_spec(node).is_some() { if desired_system_from_spec(node).is_some() {
desired_keys.insert( desired_keys.insert(
String::from_utf8_lossy(&key_desired_system(cluster_namespace, cluster_id, &node.node_id)) String::from_utf8_lossy(&key_desired_system(
.to_string(), cluster_namespace,
cluster_id,
&node.node_id,
))
.to_string(),
); );
} }
} }
for node_class in &spec.node_classes { for node_class in &spec.node_classes {
desired_keys.insert( desired_keys.insert(
String::from_utf8_lossy(&key_node_class(cluster_namespace, cluster_id, &node_class.name)) String::from_utf8_lossy(&key_node_class(
.to_string(), cluster_namespace,
cluster_id,
&node_class.name,
))
.to_string(),
); );
} }
for pool in &spec.pools { for pool in &spec.pools {
desired_keys.insert(String::from_utf8_lossy(&key_pool(cluster_namespace, cluster_id, &pool.name)).to_string());
}
for rule in &spec.enrollment_rules {
desired_keys.insert( desired_keys.insert(
String::from_utf8_lossy(&key_enrollment_rule(cluster_namespace, cluster_id, &rule.name)) String::from_utf8_lossy(&key_pool(cluster_namespace, cluster_id, &pool.name))
.to_string(), .to_string(),
); );
} }
for rule in &spec.enrollment_rules {
desired_keys.insert(
String::from_utf8_lossy(&key_enrollment_rule(
cluster_namespace,
cluster_id,
&rule.name,
))
.to_string(),
);
}
for svc in &spec.services { for svc in &spec.services {
desired_keys.insert(String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name)).to_string()); desired_keys.insert(
String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name))
.to_string(),
);
} }
for inst in &spec.instances { for inst in &spec.instances {
desired_keys.insert(String::from_utf8_lossy(&key_instance(cluster_namespace, cluster_id, &inst.service, &inst.instance_id)).to_string()); desired_keys.insert(
String::from_utf8_lossy(&key_instance(
cluster_namespace,
cluster_id,
&inst.service,
&inst.instance_id,
))
.to_string(),
);
} }
for policy in &spec.mtls_policies { for policy in &spec.mtls_policies {
desired_keys.insert(String::from_utf8_lossy(&key_mtls_policy(cluster_namespace, cluster_id, &policy.policy_id)).to_string()); desired_keys.insert(
String::from_utf8_lossy(&key_mtls_policy(
cluster_namespace,
cluster_id,
&policy.policy_id,
))
.to_string(),
);
} }
let prefix = cluster_prefix(cluster_namespace, cluster_id); let prefix = cluster_prefix(cluster_namespace, cluster_id);
@ -839,6 +893,7 @@ mod tests {
failure_domain: Some("rack-a".to_string()), failure_domain: Some("rack-a".to_string()),
nix_profile: None, nix_profile: None,
install_plan: None, install_plan: None,
desired_system: None,
state: Some(match NodeState::Pending { state: Some(match NodeState::Pending {
NodeState::Pending => "pending".to_string(), NodeState::Pending => "pending".to_string(),
_ => unreachable!(), _ => unreachable!(),
@ -879,7 +934,10 @@ mod tests {
assert_eq!(node.node_class.as_deref(), Some("worker-linux")); assert_eq!(node.node_class.as_deref(), Some("worker-linux"));
assert_eq!(node.nix_profile.as_deref(), Some("profiles/worker-linux")); assert_eq!(node.nix_profile.as_deref(), Some("profiles/worker-linux"));
let install_plan = node.install_plan.as_ref().expect("install plan should inherit"); let install_plan = node
.install_plan
.as_ref()
.expect("install plan should inherit");
assert_eq!( assert_eq!(
install_plan.nixos_configuration.as_deref(), install_plan.nixos_configuration.as_deref(),
Some("worker-golden") Some("worker-golden")
@ -890,11 +948,15 @@ mod tests {
assert_eq!(node.labels.get("env").map(String::as_str), Some("dev")); assert_eq!(node.labels.get("env").map(String::as_str), Some("dev"));
assert_eq!(node.labels.get("pool").map(String::as_str), Some("general")); assert_eq!(node.labels.get("pool").map(String::as_str), Some("general"));
assert_eq!( assert_eq!(
node.labels.get("nodeclass.photoncloud.io/name").map(String::as_str), node.labels
.get("nodeclass.photoncloud.io/name")
.map(String::as_str),
Some("worker-linux") Some("worker-linux")
); );
assert_eq!( assert_eq!(
node.labels.get("topology.kubernetes.io/zone").map(String::as_str), node.labels
.get("topology.kubernetes.io/zone")
.map(String::as_str),
Some("rack-a") Some("rack-a")
); );
} }
@ -906,11 +968,36 @@ mod tests {
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist"); let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
assert_eq!(desired.node_id, "node01"); assert_eq!(desired.node_id, "node01");
assert_eq!(desired.nixos_configuration.as_deref(), Some("worker-golden")); assert_eq!(
desired.nixos_configuration.as_deref(),
Some("worker-golden")
);
assert_eq!(desired.switch_action.as_deref(), Some("switch")); assert_eq!(desired.switch_action.as_deref(), Some("switch"));
assert_eq!(desired.rollback_on_failure, Some(true)); assert_eq!(desired.rollback_on_failure, Some(true));
} }
#[test]
fn test_desired_system_keeps_explicit_node_overrides() {
let mut spec = test_spec();
spec.nodes[0].desired_system = Some(DesiredSystemSpec {
node_id: String::new(),
nixos_configuration: Some("node01-next".to_string()),
flake_ref: Some("github:centra/cloud".to_string()),
switch_action: Some("boot".to_string()),
health_check_command: vec!["true".to_string()],
rollback_on_failure: Some(false),
});
let resolved = resolve_nodes(&spec).unwrap();
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
assert_eq!(desired.node_id, "node01");
assert_eq!(desired.nixos_configuration.as_deref(), Some("node01-next"));
assert_eq!(desired.flake_ref.as_deref(), Some("github:centra/cloud"));
assert_eq!(desired.switch_action.as_deref(), Some("boot"));
assert_eq!(desired.health_check_command, vec!["true".to_string()]);
assert_eq!(desired.rollback_on_failure, Some(false));
}
#[test] #[test]
fn test_is_prunable_key_keeps_observed_system() { fn test_is_prunable_key_keeps_observed_system() {
let prefix = cluster_prefix("photoncloud", "test-cluster"); let prefix = cluster_prefix("photoncloud", "test-cluster");

View file

@ -0,0 +1,133 @@
use std::sync::Arc;
use axum::{
body::Body,
extract::State,
http::{header, HeaderMap, HeaderValue, StatusCode},
response::IntoResponse,
};
use tokio::fs;
use crate::{auth::require_bootstrap_auth, state::AppState};
/// GET /api/v1/bootstrap/flake-bundle
pub async fn flake_bundle(
State(state): State<Arc<AppState>>,
headers: HeaderMap,
) -> Result<impl IntoResponse, (StatusCode, String)> {
require_bootstrap_auth(&state, &headers)?;
let Some(path) = state.config.bootstrap_flake_bundle_path.as_ref() else {
return Err((
StatusCode::SERVICE_UNAVAILABLE,
"bootstrap flake bundle not configured".to_string(),
));
};
let bytes = fs::read(path).await.map_err(|error| {
let status = if error.kind() == std::io::ErrorKind::NotFound {
StatusCode::NOT_FOUND
} else {
StatusCode::INTERNAL_SERVER_ERROR
};
(
status,
format!(
"failed to read bootstrap flake bundle {}: {}",
path.display(),
error
),
)
})?;
let headers = [
(
header::CONTENT_TYPE,
HeaderValue::from_static("application/gzip"),
),
(
header::CONTENT_DISPOSITION,
HeaderValue::from_static("attachment; filename=\"plasmacloud-flake-bundle.tar.gz\""),
),
];
Ok((headers, Body::from(bytes)))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{build_router, config::Config};
use axum::{body::to_bytes, http::Request};
use std::{
fs,
time::{SystemTime, UNIX_EPOCH},
};
use tower::ServiceExt;
fn temp_path(name: &str) -> std::path::PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
std::env::temp_dir().join(format!("{}-{}-{}", name, std::process::id(), nanos))
}
#[tokio::test]
async fn flake_bundle_route_serves_configured_bundle() {
let bundle_path = temp_path("deployer-flake-bundle");
fs::write(&bundle_path, b"bundle-bytes").unwrap();
let mut config = Config::default();
config.bootstrap_token = Some("test-token".to_string());
config.bootstrap_flake_bundle_path = Some(bundle_path.clone());
let state = Arc::new(AppState::with_config(config));
let app = build_router(state);
let response = app
.oneshot(
Request::builder()
.uri("/api/v1/bootstrap/flake-bundle")
.header("x-deployer-token", "test-token")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::OK);
assert_eq!(
response
.headers()
.get(header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok()),
Some("application/gzip")
);
let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
assert_eq!(body.as_ref(), b"bundle-bytes");
let _ = fs::remove_file(bundle_path);
}
#[tokio::test]
async fn flake_bundle_route_requires_configured_bundle() {
let mut config = Config::default();
config.bootstrap_token = Some("test-token".to_string());
let state = Arc::new(AppState::with_config(config));
let app = build_router(state);
let response = app
.oneshot(
Request::builder()
.uri("/api/v1/bootstrap/flake-bundle")
.header("x-deployer-token", "test-token")
.body(Body::empty())
.unwrap(),
)
.await
.unwrap();
assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
}
}

View file

@ -7,9 +7,7 @@ use deployer_types::NodeConfig;
use std::sync::Arc; use std::sync::Arc;
use crate::{ use crate::{
auth::require_bootstrap_auth, auth::require_bootstrap_auth, phone_home::lookup_node_config, state::AppState,
phone_home::lookup_node_config,
state::AppState,
validation::validate_identifier, validation::validate_identifier,
}; };
@ -23,7 +21,10 @@ pub async fn meta_data(
validate_identifier(&machine_id, "machine_id")?; validate_identifier(&machine_id, "machine_id")?;
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else { let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
return Err((StatusCode::NOT_FOUND, "machine-id not registered".to_string())); return Err((
StatusCode::NOT_FOUND,
"machine-id not registered".to_string(),
));
}; };
let body = format!( let body = format!(
@ -43,12 +44,18 @@ pub async fn user_data(
validate_identifier(&machine_id, "machine_id")?; validate_identifier(&machine_id, "machine_id")?;
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else { let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
return Err((StatusCode::NOT_FOUND, "machine-id not registered".to_string())); return Err((
StatusCode::NOT_FOUND,
"machine-id not registered".to_string(),
));
}; };
let body = render_user_data(&node_id, &config) let body = render_user_data(&node_id, &config)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(([(axum::http::header::CONTENT_TYPE, "text/cloud-config")], body)) Ok((
[(axum::http::header::CONTENT_TYPE, "text/cloud-config")],
body,
))
} }
fn render_yaml_list(items: &[String], indent: usize) -> String { fn render_yaml_list(items: &[String], indent: usize) -> String {

View file

@ -30,6 +30,10 @@ pub struct Config {
#[serde(default = "default_local_state_path")] #[serde(default = "default_local_state_path")]
pub local_state_path: Option<PathBuf>, pub local_state_path: Option<PathBuf>,
/// Optional tar.gz bundle containing the PhotonCloud flake source tree for bootstrap installs
#[serde(default)]
pub bootstrap_flake_bundle_path: Option<PathBuf>,
/// Shared bootstrap token required for phone-home/admin APIs /// Shared bootstrap token required for phone-home/admin APIs
#[serde(default)] #[serde(default)]
pub bootstrap_token: Option<String>, pub bootstrap_token: Option<String>,
@ -80,6 +84,7 @@ impl Default for Config {
cluster_namespace: default_cluster_namespace(), cluster_namespace: default_cluster_namespace(),
heartbeat_timeout_secs: default_heartbeat_timeout(), heartbeat_timeout_secs: default_heartbeat_timeout(),
local_state_path: default_local_state_path(), local_state_path: default_local_state_path(),
bootstrap_flake_bundle_path: None,
bootstrap_token: None, bootstrap_token: None,
admin_token: None, admin_token: None,
allow_admin_fallback: default_allow_admin_fallback(), allow_admin_fallback: default_allow_admin_fallback(),
@ -224,6 +229,7 @@ mod tests {
config.local_state_path, config.local_state_path,
Some(PathBuf::from("/var/lib/deployer/state")) Some(PathBuf::from("/var/lib/deployer/state"))
); );
assert!(config.bootstrap_flake_bundle_path.is_none());
assert!(config.bootstrap_token.is_none()); assert!(config.bootstrap_token.is_none());
assert!(config.admin_token.is_none()); assert!(config.admin_token.is_none());
assert!(!config.allow_admin_fallback); assert!(!config.allow_admin_fallback);
@ -253,6 +259,7 @@ mod tests {
bind_addr = "127.0.0.1:18080" bind_addr = "127.0.0.1:18080"
cluster_id = "cluster-a" cluster_id = "cluster-a"
allow_unauthenticated = true allow_unauthenticated = true
bootstrap_flake_bundle_path = "/tmp/plasmacloud-flake-bundle.tar.gz"
[chainfire] [chainfire]
endpoints = ["http://10.0.0.1:2379"] endpoints = ["http://10.0.0.1:2379"]
@ -264,6 +271,10 @@ mod tests {
let config = load_config(&path).unwrap(); let config = load_config(&path).unwrap();
assert_eq!(config.bind_addr.to_string(), "127.0.0.1:18080"); assert_eq!(config.bind_addr.to_string(), "127.0.0.1:18080");
assert_eq!(config.cluster_id.as_deref(), Some("cluster-a")); assert_eq!(config.cluster_id.as_deref(), Some("cluster-a"));
assert_eq!(
config.bootstrap_flake_bundle_path,
Some(PathBuf::from("/tmp/plasmacloud-flake-bundle.tar.gz"))
);
assert!(config.allow_unauthenticated); assert!(config.allow_unauthenticated);
assert_eq!(config.chainfire.namespace, "bootstrap"); assert_eq!(config.chainfire.namespace, "bootstrap");
assert_eq!(config.chainfire.endpoints, vec!["http://10.0.0.1:2379"]); assert_eq!(config.chainfire.endpoints, vec!["http://10.0.0.1:2379"]);

View file

@ -1,7 +1,8 @@
pub mod admin; pub mod admin;
pub mod auth; pub mod auth;
pub mod cluster; pub mod bootstrap_assets;
pub mod cloud_init; pub mod cloud_init;
pub mod cluster;
pub mod config; pub mod config;
pub mod local_storage; pub mod local_storage;
pub mod phone_home; pub mod phone_home;
@ -34,6 +35,10 @@ pub fn build_router(state: Arc<AppState>) -> Router {
"/api/v1/cloud-init/:machine_id/user-data", "/api/v1/cloud-init/:machine_id/user-data",
get(cloud_init::user_data), get(cloud_init::user_data),
) )
.route(
"/api/v1/bootstrap/flake-bundle",
get(bootstrap_assets::flake_bundle),
)
// Admin API (node management) // Admin API (node management)
.route("/api/v1/admin/nodes", post(admin::pre_register)) .route("/api/v1/admin/nodes", post(admin::pre_register))
.route("/api/v1/admin/nodes", get(admin::list_nodes)) .route("/api/v1/admin/nodes", get(admin::list_nodes))

View file

@ -440,12 +440,15 @@ async fn resolve_enrollment_config(
format!("failed to load node classes: {}", e), format!("failed to load node classes: {}", e),
) )
})?; })?;
let pools = storage.list_pools(cluster_namespace, cluster_id).await.map_err(|e| { let pools = storage
( .list_pools(cluster_namespace, cluster_id)
StatusCode::INTERNAL_SERVER_ERROR, .await
format!("failed to load pools: {}", e), .map_err(|e| {
) (
})?; StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to load pools: {}", e),
)
})?;
drop(storage); drop(storage);
rules.sort_by(|lhs, rhs| { rules.sort_by(|lhs, rhs| {
@ -483,7 +486,11 @@ fn enrollment_rule_matches(rule: &EnrollmentRuleSpec, request: &PhoneHomeRequest
let Some(ip) = request.ip.as_deref() else { let Some(ip) = request.ip.as_deref() else {
return false; return false;
}; };
if !rule.match_ip_prefixes.iter().any(|prefix| ip.starts_with(prefix)) { if !rule
.match_ip_prefixes
.iter()
.any(|prefix| ip.starts_with(prefix))
{
return false; return false;
} }
} }
@ -528,16 +535,16 @@ fn build_node_config_from_rule(
.node_class .node_class
.clone() .clone()
.or_else(|| pool_spec.and_then(|pool| pool.node_class.clone())); .or_else(|| pool_spec.and_then(|pool| pool.node_class.clone()));
let node_class_spec = node_class let node_class_spec = node_class.as_deref().and_then(|name| {
.as_deref() node_classes
.and_then(|name| node_classes.iter().find(|node_class| node_class.name == name)); .iter()
.find(|node_class| node_class.name == name)
});
let role = rule let role = rule
.role .role
.clone() .clone()
.or_else(|| { .or_else(|| node_class_spec.and_then(|node_class| node_class.roles.first().cloned()))
node_class_spec.and_then(|node_class| node_class.roles.first().cloned())
})
.unwrap_or_else(|| "worker".to_string()); .unwrap_or_else(|| "worker".to_string());
let mut labels = std::collections::HashMap::new(); let mut labels = std::collections::HashMap::new();
@ -1011,7 +1018,9 @@ mod tests {
assert_eq!(config.pool.as_deref(), Some("gpu")); assert_eq!(config.pool.as_deref(), Some("gpu"));
assert_eq!(config.node_class.as_deref(), Some("gpu-worker")); assert_eq!(config.node_class.as_deref(), Some("gpu-worker"));
assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker")); assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker"));
let install_plan = config.install_plan.expect("install_plan should inherit from class"); let install_plan = config
.install_plan
.expect("install_plan should inherit from class");
assert_eq!( assert_eq!(
install_plan.nixos_configuration.as_deref(), install_plan.nixos_configuration.as_deref(),
Some("gpu-worker") Some("gpu-worker")

View file

@ -80,7 +80,10 @@ impl NodeStorage {
} }
fn cluster_node_classes_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String { fn cluster_node_classes_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
format!("{}/clusters/{}/node-classes/", cluster_namespace, cluster_id) format!(
"{}/clusters/{}/node-classes/",
cluster_namespace, cluster_id
)
} }
fn cluster_pools_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String { fn cluster_pools_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
@ -276,8 +279,10 @@ impl NodeStorage {
cluster_namespace: &str, cluster_namespace: &str,
cluster_id: &str, cluster_id: &str,
) -> Result<Vec<EnrollmentRuleSpec>, StorageError> { ) -> Result<Vec<EnrollmentRuleSpec>, StorageError> {
self.list_cluster_objects(self.cluster_enrollment_rules_prefix(cluster_namespace, cluster_id)) self.list_cluster_objects(
.await self.cluster_enrollment_rules_prefix(cluster_namespace, cluster_id),
)
.await
} }
/// Get node info by node_id /// Get node info by node_id

View file

@ -446,6 +446,7 @@ pub struct ObservedSystemState {
/// Desired NixOS system state for a specific node. /// Desired NixOS system state for a specific node.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct DesiredSystemSpec { pub struct DesiredSystemSpec {
#[serde(default)]
pub node_id: String, pub node_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub nixos_configuration: Option<String>, pub nixos_configuration: Option<String>,
@ -490,6 +491,8 @@ pub struct NodeSpec {
#[serde(default)] #[serde(default)]
pub install_plan: Option<InstallPlan>, pub install_plan: Option<InstallPlan>,
#[serde(default)] #[serde(default)]
pub desired_system: Option<DesiredSystemSpec>,
#[serde(default)]
pub state: Option<String>, pub state: Option<String>,
#[serde(default)] #[serde(default)]
pub last_heartbeat: Option<DateTime<Utc>>, pub last_heartbeat: Option<DateTime<Utc>>,
@ -954,10 +957,7 @@ mod tests {
nixos_configuration: Some("node01".to_string()), nixos_configuration: Some("node01".to_string()),
flake_ref: Some("/opt/plasmacloud-src".to_string()), flake_ref: Some("/opt/plasmacloud-src".to_string()),
switch_action: Some("switch".to_string()), switch_action: Some("switch".to_string()),
health_check_command: vec![ health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()],
"systemctl".to_string(),
"is-system-running".to_string(),
],
rollback_on_failure: Some(true), rollback_on_failure: Some(true),
}; };

View file

@ -101,6 +101,7 @@ raft_port="$(free_port)"
gossip_port="$(free_port)" gossip_port="$(free_port)"
deployer_port="$(free_port)" deployer_port="$(free_port)"
bootstrap_token="bootstrap-secret" bootstrap_token="bootstrap-secret"
printf 'bundle-bytes' >"$tmp_dir/flake-bundle.tar.gz"
cat >"$tmp_dir/chainfire.toml" <<EOF cat >"$tmp_dir/chainfire.toml" <<EOF
[node] [node]
@ -140,6 +141,7 @@ cluster_id = "test-cluster"
cluster_namespace = "photoncloud" cluster_namespace = "photoncloud"
heartbeat_timeout_secs = 300 heartbeat_timeout_secs = 300
local_state_path = "$tmp_dir/deployer-state" local_state_path = "$tmp_dir/deployer-state"
bootstrap_flake_bundle_path = "$tmp_dir/flake-bundle.tar.gz"
bootstrap_token = "${bootstrap_token}" bootstrap_token = "${bootstrap_token}"
require_chainfire = true require_chainfire = true
allow_unknown_nodes = false allow_unknown_nodes = false
@ -206,6 +208,13 @@ nodes:
install_plan: install_plan:
nixos_configuration: node01 nixos_configuration: node01
disko_config_path: nix/nodes/vm-cluster/node01/disko.nix disko_config_path: nix/nodes/vm-cluster/node01/disko.nix
desired_system:
flake_ref: "github:centra/cloud"
health_check_command:
- systemctl
- is-system-running
- "--wait"
rollback_on_failure: true
state: pending state: pending
enrollment_rules: enrollment_rules:
@ -294,6 +303,23 @@ for path, expected in (
print("cloud-init endpoints validated") print("cloud-init endpoints validated")
PY PY
echo "Validating bootstrap flake bundle endpoint"
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
import sys
import urllib.request
endpoint, token = sys.argv[1], sys.argv[2]
request = urllib.request.Request(
endpoint + "/api/v1/bootstrap/flake-bundle",
headers={"X-Deployer-Token": token},
)
with urllib.request.urlopen(request, timeout=5) as response:
payload = response.read()
assert payload == b"bundle-bytes"
print("bootstrap flake bundle endpoint validated")
PY
echo "Validating enrollment-rule bootstrap path" echo "Validating enrollment-rule bootstrap path"
dynamic_node_id="$( dynamic_node_id="$(
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY' python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
@ -350,7 +376,10 @@ records = {}
with open(path, "r", encoding="utf-8") as handle: with open(path, "r", encoding="utf-8") as handle:
for line in handle: for line in handle:
line = line.strip() line = line.strip()
if " value=" not in line: if " key=" not in line or " value=" not in line:
continue
key = line.split(" key=", 1)[1].split(" value=", 1)[0]
if key.endswith("/desired-system"):
continue continue
value = line.split(" value=", 1)[1] value = line.split(" value=", 1)[1]
record = json.loads(value) record = json.loads(value)
@ -375,4 +404,26 @@ if dynamic.get("labels", {}).get("lane") != "edge":
print("Deployer bootstrap records validated") print("Deployer bootstrap records validated")
PY PY
echo "Inspecting desired-system state"
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node-seeded/desired-system" >"$tmp_dir/desired-system.dump"
python3 - "$tmp_dir/desired-system.dump" <<'PY'
import json
import sys
path = sys.argv[1]
with open(path, "r", encoding="utf-8") as handle:
lines = [line.strip() for line in handle if " value=" in line]
if len(lines) != 1:
raise SystemExit(f"unexpected desired-system dump: {lines}")
payload = json.loads(lines[0].split(" value=", 1)[1])
assert payload["node_id"] == "node-seeded"
assert payload["nixos_configuration"] == "node01"
assert payload["flake_ref"] == "github:centra/cloud"
assert payload["health_check_command"] == ["systemctl", "is-system-running", "--wait"]
assert payload["rollback_on_failure"] is True
print("desired-system state validated")
PY
echo "Deployer bootstrap E2E verification passed" echo "Deployer bootstrap E2E verification passed"

View file

@ -102,6 +102,43 @@
|| builtins.elem topLevel includedTopLevels; || builtins.elem topLevel includedTopLevels;
}; };
flakeBundleSrc = pkgs.lib.cleanSourceWith {
src = ./.;
filter = path: type:
let
rel = pkgs.lib.removePrefix ((toString ./. ) + "/") (toString path);
topLevel = builtins.head (pkgs.lib.splitString "/" rel);
includedTopLevels = [
"apigateway"
"baremetal"
"chainfire"
"coronafs"
"crates"
"creditservice"
"deployer"
"fiberlb"
"flashdns"
"flaredb"
"iam"
"k8shost"
"lightningstor"
"mtls-agent"
"nightlight"
"nix"
"nix-nos"
"plasmavmc"
"prismnet"
];
isTargetDir = builtins.match "(.*/)?target(/.*)?" rel != null;
in
!isTargetDir
&& (
rel == ""
|| builtins.elem rel [ "flake.nix" "flake.lock" ]
|| builtins.elem topLevel includedTopLevels
);
};
# Helper function to build a Rust workspace package # Helper function to build a Rust workspace package
# Parameters: # Parameters:
# name: package name (e.g., "chainfire-server") # name: package name (e.g., "chainfire-server")
@ -397,6 +434,20 @@
description = "Node-local NixOS reconciliation agent for PhotonCloud hosts"; description = "Node-local NixOS reconciliation agent for PhotonCloud hosts";
}; };
plasmacloudFlakeBundle = pkgs.runCommand "plasmacloud-flake-bundle.tar.gz" {
nativeBuildInputs = [ pkgs.gnutar pkgs.gzip ];
} ''
tar \
--sort=name \
--mtime='@1' \
--owner=0 \
--group=0 \
--numeric-owner \
-C ${flakeBundleSrc} \
-cf - . \
| gzip -n > "$out"
'';
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes # Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes
# -------------------------------------------------------------------- # --------------------------------------------------------------------
@ -410,6 +461,8 @@
vmClusterDeployerState = vmClusterDeployerState =
self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState; self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState;
vmClusterFlakeBundle = self.packages.${system}.plasmacloudFlakeBundle;
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# Default package: Build all servers # Default package: Build all servers
# -------------------------------------------------------------------- # --------------------------------------------------------------------
@ -583,7 +636,7 @@
nix-nos.nixosModules.default nix-nos.nixosModules.default
./nix/nodes/vm-cluster/node01/configuration.nix ./nix/nodes/vm-cluster/node01/configuration.nix
self.nixosModules.default self.nixosModules.default
{ ({ pkgs, ... }: {
services.deployer = { services.deployer = {
enable = true; enable = true;
bindAddr = "0.0.0.0:8088"; bindAddr = "0.0.0.0:8088";
@ -594,6 +647,7 @@
allowUnauthenticated = false; allowUnauthenticated = false;
bootstrapToken = "vm-cluster-bootstrap-token"; bootstrapToken = "vm-cluster-bootstrap-token";
adminToken = "vm-cluster-admin-token"; adminToken = "vm-cluster-admin-token";
bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle;
seedClusterState = true; seedClusterState = true;
}; };
@ -604,11 +658,9 @@
nodeId = "node01"; nodeId = "node01";
flakeRoot = self.outPath; flakeRoot = self.outPath;
intervalSecs = 30; intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true; apply = true;
}; };
} })
{ nixpkgs.overlays = [ self.overlays.default ]; } { nixpkgs.overlays = [ self.overlays.default ]; }
]; ];
}; };
@ -628,8 +680,6 @@
nodeId = "node02"; nodeId = "node02";
flakeRoot = self.outPath; flakeRoot = self.outPath;
intervalSecs = 30; intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true; apply = true;
}; };
} }
@ -652,8 +702,6 @@
nodeId = "node03"; nodeId = "node03";
flakeRoot = self.outPath; flakeRoot = self.outPath;
intervalSecs = 30; intervalSecs = 30;
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
apply = true; apply = true;
}; };
} }
@ -684,6 +732,7 @@
k8shost-server = self.packages.${final.system}.k8shost-server; k8shost-server = self.packages.${final.system}.k8shost-server;
deployer-server = self.packages.${final.system}.deployer-server; deployer-server = self.packages.${final.system}.deployer-server;
deployer-ctl = self.packages.${final.system}.deployer-ctl; deployer-ctl = self.packages.${final.system}.deployer-ctl;
plasmacloudFlakeBundle = self.packages.${final.system}.plasmacloudFlakeBundle;
nix-agent = self.packages.${final.system}.nix-agent; nix-agent = self.packages.${final.system}.nix-agent;
node-agent = self.packages.${final.system}.node-agent; node-agent = self.packages.${final.system}.node-agent;
fleet-scheduler = self.packages.${final.system}.fleet-scheduler; fleet-scheduler = self.packages.${final.system}.fleet-scheduler;

View file

@ -186,6 +186,8 @@
NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json) NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json)
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.install_plan.nixos_configuration // .hostname // empty' /etc/plasmacloud/node-config.json) NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.install_plan.nixos_configuration // .hostname // empty' /etc/plasmacloud/node-config.json)
DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.install_plan.disko_config_path // empty' /etc/plasmacloud/node-config.json) DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.install_plan.disko_config_path // empty' /etc/plasmacloud/node-config.json)
DEPLOYER_URL="''${DEPLOYER_URL:-http://192.168.100.1:8080}"
SRC_ROOT="/opt/plasmacloud-src"
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
echo "ERROR: node-config.json missing hostname/ip" echo "ERROR: node-config.json missing hostname/ip"
@ -197,9 +199,38 @@
exit 1 exit 1
fi fi
TOKEN_FILE="/etc/plasmacloud/bootstrap-token"
DEPLOYER_TOKEN=""
if [ -s "$TOKEN_FILE" ]; then
DEPLOYER_TOKEN=$(cat "$TOKEN_FILE")
elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then
DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}"
fi
CURL_ARGS=(-sfL --connect-timeout 5 --max-time 120)
if [ -n "$DEPLOYER_TOKEN" ]; then
CURL_ARGS+=(-H "X-Deployer-Token: $DEPLOYER_TOKEN")
fi
if [ -n "''${DEPLOYER_CA_CERT:-}" ] && [ -f "''${DEPLOYER_CA_CERT}" ]; then
CURL_ARGS+=(--cacert "''${DEPLOYER_CA_CERT}")
fi
BUNDLE_PATH="/run/plasmacloud/flake-bundle.tar.gz"
mkdir -p /run/plasmacloud
if ${pkgs.curl}/bin/curl "''${CURL_ARGS[@]}" \
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
-o "$BUNDLE_PATH"; then
echo "Downloaded bootstrap flake bundle from deployer"
rm -rf "$SRC_ROOT"
mkdir -p "$SRC_ROOT"
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
else
echo "No deployer flake bundle available; using embedded source tree"
fi
if [ -z "$DISKO_PATH" ]; then if [ -z "$DISKO_PATH" ]; then
CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix" CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
if [ -f "/opt/plasmacloud-src/$CANDIDATE_DISKO" ]; then if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then
DISKO_PATH="$CANDIDATE_DISKO" DISKO_PATH="$CANDIDATE_DISKO"
fi fi
fi fi
@ -209,8 +240,8 @@
exit 1 exit 1
fi fi
if [ ! -f "/opt/plasmacloud-src/$DISKO_PATH" ]; then if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then
echo "ERROR: Disko config not found: /opt/plasmacloud-src/$DISKO_PATH" echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH"
exit 1 exit 1
fi fi
@ -238,14 +269,14 @@
fi fi
echo "Validating NixOS configuration output..." echo "Validating NixOS configuration output..."
nix eval --raw "/opt/plasmacloud-src#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
echo "Running disko to partition $DISK..." echo "Running disko to partition $DISK..."
export NIX_CONFIG="experimental-features = nix-command flakes" export NIX_CONFIG="experimental-features = nix-command flakes"
nix run github:nix-community/disko -- --mode disko "/opt/plasmacloud-src/$DISKO_PATH" nix run github:nix-community/disko -- --mode disko "$SRC_ROOT/$DISKO_PATH"
echo "Running nixos-install..." echo "Running nixos-install..."
nixos-install --flake "/opt/plasmacloud-src#$NIXOS_CONFIGURATION" --no-root-passwd nixos-install --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" --no-root-passwd
sync sync
echo " Install complete; rebooting..." echo " Install complete; rebooting..."
@ -255,7 +286,7 @@
# Packages for bootstrap + install # Packages for bootstrap + install
environment.systemPackages = with pkgs; [ environment.systemPackages = with pkgs; [
curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs gnutar gzip
]; ];
# SSH with key-based auth for non-interactive access # SSH with key-based auth for non-interactive access

View file

@ -19,9 +19,44 @@ let
}; };
}; };
mkDesiredSystemType = types: types.submodule {
options = {
nixosConfiguration = mkOption {
type = types.nullOr types.str;
default = null;
description = "Name of the nixosConfigurations output to activate";
};
flakeRef = mkOption {
type = types.nullOr types.str;
default = null;
description = "Explicit flake reference used by nix-agent";
};
switchAction = mkOption {
type = types.nullOr types.str;
default = null;
description = "switch-to-configuration action for nix-agent";
};
healthCheckCommand = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Command vector executed after activation to validate node health";
};
rollbackOnFailure = mkOption {
type = types.nullOr types.bool;
default = null;
description = "Whether nix-agent should roll back when the health check fails";
};
};
};
mkNodeType = types: mkNodeType = types:
let let
installPlanType = mkInstallPlanType types; installPlanType = mkInstallPlanType types;
desiredSystemType = mkDesiredSystemType types;
in types.submodule { in types.submodule {
options = { options = {
role = mkOption { role = mkOption {
@ -101,6 +136,12 @@ let
description = "Explicit NixOS installation targets for bare-metal bootstrap"; description = "Explicit NixOS installation targets for bare-metal bootstrap";
}; };
desiredSystem = mkOption {
type = types.nullOr desiredSystemType;
default = null;
description = "Desired NixOS reconciliation state exported for nix-agent";
};
state = mkOption { state = mkOption {
type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]); type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]);
default = null; default = null;
@ -277,6 +318,29 @@ let
in in
if plan == null || rendered == { } then null else rendered; if plan == null || rendered == { } then null else rendered;
mkDesiredSystem = nodeName: desiredSystem:
let
rendered =
optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
nixos_configuration = desiredSystem.nixosConfiguration;
}
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
flake_ref = desiredSystem.flakeRef;
}
// optionalAttrs (desiredSystem != null && desiredSystem.switchAction != null) {
switch_action = desiredSystem.switchAction;
}
// optionalAttrs (desiredSystem != null && desiredSystem.healthCheckCommand != [ ]) {
health_check_command = desiredSystem.healthCheckCommand;
}
// optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) {
rollback_on_failure = desiredSystem.rollbackOnFailure;
};
in
if desiredSystem == null || rendered == { } then null else {
node_id = nodeName;
} // rendered;
mkDeployerNodeSpec = nodeName: node: mkDeployerNodeSpec = nodeName: node:
{ {
node_id = nodeName; node_id = nodeName;
@ -303,6 +367,9 @@ let
// optionalAttrs (mkInstallPlan node.installPlan != null) { // optionalAttrs (mkInstallPlan node.installPlan != null) {
install_plan = mkInstallPlan node.installPlan; install_plan = mkInstallPlan node.installPlan;
} }
// optionalAttrs (mkDesiredSystem nodeName node.desiredSystem != null) {
desired_system = mkDesiredSystem nodeName node.desiredSystem;
}
// optionalAttrs (node.state != null) { // optionalAttrs (node.state != null) {
state = node.state; state = node.state;
}; };
@ -455,6 +522,7 @@ in
{ {
inherit inherit
mkInstallPlanType mkInstallPlanType
mkDesiredSystemType
mkNodeType mkNodeType
mkNodeClassType mkNodeClassType
mkNodePoolType mkNodePoolType

View file

@ -22,6 +22,9 @@ let
// lib.optionalAttrs (cfg.clusterId != null) { // lib.optionalAttrs (cfg.clusterId != null) {
cluster_id = cfg.clusterId; cluster_id = cfg.clusterId;
} }
// lib.optionalAttrs (cfg.bootstrapFlakeBundle != null) {
bootstrap_flake_bundle_path = toString cfg.bootstrapFlakeBundle;
}
// lib.optionalAttrs (cfg.tlsCaCertPath != null) { // lib.optionalAttrs (cfg.tlsCaCertPath != null) {
tls_ca_cert_path = cfg.tlsCaCertPath; tls_ca_cert_path = cfg.tlsCaCertPath;
} }
@ -77,6 +80,12 @@ in
description = "Local storage path for deployer bootstrap state"; description = "Local storage path for deployer bootstrap state";
}; };
bootstrapFlakeBundle = lib.mkOption {
type = lib.types.nullOr lib.types.path;
default = null;
description = "Optional tar.gz bundle served to bootstrap installers as the canonical PhotonCloud flake source";
};
requireChainfire = lib.mkOption { requireChainfire = lib.mkOption {
type = lib.types.bool; type = lib.types.bool;
default = false; default = false;

View file

@ -22,6 +22,10 @@
nixosConfiguration = "node01"; nixosConfiguration = "node01";
diskoConfigPath = "nix/nodes/vm-cluster/node01/disko.nix"; diskoConfigPath = "nix/nodes/vm-cluster/node01/disko.nix";
}; };
desiredSystem = {
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
};
raftPort = 2380; raftPort = 2380;
apiPort = 2379; apiPort = 2379;
}; };
@ -42,6 +46,10 @@
nixosConfiguration = "node02"; nixosConfiguration = "node02";
diskoConfigPath = "nix/nodes/vm-cluster/node02/disko.nix"; diskoConfigPath = "nix/nodes/vm-cluster/node02/disko.nix";
}; };
desiredSystem = {
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
};
raftPort = 2380; raftPort = 2380;
apiPort = 2379; apiPort = 2379;
}; };
@ -62,6 +70,10 @@
nixosConfiguration = "node03"; nixosConfiguration = "node03";
diskoConfigPath = "nix/nodes/vm-cluster/node03/disko.nix"; diskoConfigPath = "nix/nodes/vm-cluster/node03/disko.nix";
}; };
desiredSystem = {
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
rollbackOnFailure = true;
};
raftPort = 2380; raftPort = 2380;
apiPort = 2379; apiPort = 2379;
}; };

View file

@ -81,6 +81,7 @@
requireChainfire = true; requireChainfire = true;
bootstrapToken = "test-bootstrap-token"; bootstrapToken = "test-bootstrap-token";
adminToken = "test-admin-token"; adminToken = "test-admin-token";
bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle;
seedClusterState = true; seedClusterState = true;
}; };