Add bootstrap flake bundle delivery and Nix desired-system generation
This commit is contained in:
parent
edd2442267
commit
fbcbb4e5dc
15 changed files with 666 additions and 188 deletions
|
|
@ -164,18 +164,25 @@ fn node_config_from_spec(node: &NodeSpec) -> NodeConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> {
|
fn desired_system_from_spec(node: &NodeSpec) -> Option<DesiredSystemSpec> {
|
||||||
Some(DesiredSystemSpec {
|
let mut desired = node.desired_system.clone().unwrap_or_default();
|
||||||
node_id: node.node_id.clone(),
|
desired.node_id = node.node_id.clone();
|
||||||
nixos_configuration: node
|
if desired.nixos_configuration.is_none() {
|
||||||
|
desired.nixos_configuration = node
|
||||||
.install_plan
|
.install_plan
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.and_then(|plan| plan.nixos_configuration.clone()),
|
.and_then(|plan| plan.nixos_configuration.clone());
|
||||||
flake_ref: None,
|
}
|
||||||
switch_action: Some("switch".to_string()),
|
if desired.switch_action.is_none() {
|
||||||
health_check_command: Vec::new(),
|
desired.switch_action = Some("switch".to_string());
|
||||||
rollback_on_failure: Some(true),
|
}
|
||||||
})
|
if desired.rollback_on_failure.is_none() {
|
||||||
.filter(|desired| desired.nixos_configuration.is_some())
|
desired.rollback_on_failure = Some(true);
|
||||||
|
}
|
||||||
|
if desired.nixos_configuration.is_some() {
|
||||||
|
Some(desired)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
|
fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
|
||||||
|
|
@ -196,12 +203,12 @@ fn resolve_nodes(spec: &ClusterStateSpec) -> Result<Vec<NodeSpec>> {
|
||||||
let mut resolved = node.clone();
|
let mut resolved = node.clone();
|
||||||
|
|
||||||
let pool_spec = match resolved.pool.as_deref() {
|
let pool_spec = match resolved.pool.as_deref() {
|
||||||
Some(pool_name) => Some(
|
Some(pool_name) => Some(pools.get(pool_name).copied().with_context(|| {
|
||||||
pools
|
format!(
|
||||||
.get(pool_name)
|
"node {} references unknown pool {}",
|
||||||
.copied()
|
node.node_id, pool_name
|
||||||
.with_context(|| format!("node {} references unknown pool {}", node.node_id, pool_name))?,
|
)
|
||||||
),
|
})?),
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -561,7 +568,10 @@ pub async fn apply_cluster_state(
|
||||||
let resolved_nodes = resolve_nodes(&spec)?;
|
let resolved_nodes = resolve_nodes(&spec)?;
|
||||||
let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id);
|
let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id);
|
||||||
|
|
||||||
info!(cluster_id, "applying cluster state to Chainfire at {}", endpoint);
|
info!(
|
||||||
|
cluster_id,
|
||||||
|
"applying cluster state to Chainfire at {}", endpoint
|
||||||
|
);
|
||||||
let mut client = Client::connect(endpoint.to_string()).await?;
|
let mut client = Client::connect(endpoint.to_string()).await?;
|
||||||
|
|
||||||
// MVP としては bootstrap と同じく upsert のみ行う。
|
// MVP としては bootstrap と同じく upsert のみ行う。
|
||||||
|
|
@ -623,7 +633,12 @@ pub async fn apply_cluster_state(
|
||||||
client.put(&key, &value).await?;
|
client.put(&key, &value).await?;
|
||||||
}
|
}
|
||||||
for inst in &spec.instances {
|
for inst in &spec.instances {
|
||||||
let key = key_instance(cluster_namespace, cluster_id, &inst.service, &inst.instance_id);
|
let key = key_instance(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&inst.service,
|
||||||
|
&inst.instance_id,
|
||||||
|
);
|
||||||
let value = serde_json::to_vec(inst)?;
|
let value = serde_json::to_vec(inst)?;
|
||||||
client.put(&key, &value).await?;
|
client.put(&key, &value).await?;
|
||||||
}
|
}
|
||||||
|
|
@ -668,8 +683,9 @@ pub async fn dump_prefix(endpoint: &str, prefix: &str, json_output: bool) -> Res
|
||||||
for (key, value, rev) in kvs {
|
for (key, value, rev) in kvs {
|
||||||
let k = String::from_utf8_lossy(&key);
|
let k = String::from_utf8_lossy(&key);
|
||||||
if json_output {
|
if json_output {
|
||||||
let value = serde_json::from_slice::<Value>(&value)
|
let value = serde_json::from_slice::<Value>(&value).unwrap_or_else(|_| {
|
||||||
.unwrap_or_else(|_| Value::String(String::from_utf8_lossy(&value).into_owned()));
|
Value::String(String::from_utf8_lossy(&value).into_owned())
|
||||||
|
});
|
||||||
println!(
|
println!(
|
||||||
"{}",
|
"{}",
|
||||||
serde_json::to_string(&json!({
|
serde_json::to_string(&json!({
|
||||||
|
|
@ -698,42 +714,80 @@ async fn prune_cluster_state(
|
||||||
spec: &ClusterStateSpec,
|
spec: &ClusterStateSpec,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut desired_keys = HashSet::new();
|
let mut desired_keys = HashSet::new();
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_cluster_meta(cluster_namespace, cluster_id)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_cluster_meta(cluster_namespace, cluster_id)).to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_nodes = resolve_nodes(spec)?;
|
let resolved_nodes = resolve_nodes(spec)?;
|
||||||
|
|
||||||
for node in &resolved_nodes {
|
for node in &resolved_nodes {
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_node(cluster_namespace, cluster_id, &node.node_id)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_node(cluster_namespace, cluster_id, &node.node_id))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
if desired_system_from_spec(node).is_some() {
|
if desired_system_from_spec(node).is_some() {
|
||||||
desired_keys.insert(
|
desired_keys.insert(
|
||||||
String::from_utf8_lossy(&key_desired_system(cluster_namespace, cluster_id, &node.node_id))
|
String::from_utf8_lossy(&key_desired_system(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node.node_id,
|
||||||
|
))
|
||||||
.to_string(),
|
.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for node_class in &spec.node_classes {
|
for node_class in &spec.node_classes {
|
||||||
desired_keys.insert(
|
desired_keys.insert(
|
||||||
String::from_utf8_lossy(&key_node_class(cluster_namespace, cluster_id, &node_class.name))
|
String::from_utf8_lossy(&key_node_class(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&node_class.name,
|
||||||
|
))
|
||||||
.to_string(),
|
.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
for pool in &spec.pools {
|
for pool in &spec.pools {
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_pool(cluster_namespace, cluster_id, &pool.name)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_pool(cluster_namespace, cluster_id, &pool.name))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
for rule in &spec.enrollment_rules {
|
for rule in &spec.enrollment_rules {
|
||||||
desired_keys.insert(
|
desired_keys.insert(
|
||||||
String::from_utf8_lossy(&key_enrollment_rule(cluster_namespace, cluster_id, &rule.name))
|
String::from_utf8_lossy(&key_enrollment_rule(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&rule.name,
|
||||||
|
))
|
||||||
.to_string(),
|
.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
for svc in &spec.services {
|
for svc in &spec.services {
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_service(cluster_namespace, cluster_id, &svc.name))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
for inst in &spec.instances {
|
for inst in &spec.instances {
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_instance(cluster_namespace, cluster_id, &inst.service, &inst.instance_id)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_instance(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&inst.service,
|
||||||
|
&inst.instance_id,
|
||||||
|
))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
for policy in &spec.mtls_policies {
|
for policy in &spec.mtls_policies {
|
||||||
desired_keys.insert(String::from_utf8_lossy(&key_mtls_policy(cluster_namespace, cluster_id, &policy.policy_id)).to_string());
|
desired_keys.insert(
|
||||||
|
String::from_utf8_lossy(&key_mtls_policy(
|
||||||
|
cluster_namespace,
|
||||||
|
cluster_id,
|
||||||
|
&policy.policy_id,
|
||||||
|
))
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let prefix = cluster_prefix(cluster_namespace, cluster_id);
|
let prefix = cluster_prefix(cluster_namespace, cluster_id);
|
||||||
|
|
@ -839,6 +893,7 @@ mod tests {
|
||||||
failure_domain: Some("rack-a".to_string()),
|
failure_domain: Some("rack-a".to_string()),
|
||||||
nix_profile: None,
|
nix_profile: None,
|
||||||
install_plan: None,
|
install_plan: None,
|
||||||
|
desired_system: None,
|
||||||
state: Some(match NodeState::Pending {
|
state: Some(match NodeState::Pending {
|
||||||
NodeState::Pending => "pending".to_string(),
|
NodeState::Pending => "pending".to_string(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
|
|
@ -879,7 +934,10 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(node.node_class.as_deref(), Some("worker-linux"));
|
assert_eq!(node.node_class.as_deref(), Some("worker-linux"));
|
||||||
assert_eq!(node.nix_profile.as_deref(), Some("profiles/worker-linux"));
|
assert_eq!(node.nix_profile.as_deref(), Some("profiles/worker-linux"));
|
||||||
let install_plan = node.install_plan.as_ref().expect("install plan should inherit");
|
let install_plan = node
|
||||||
|
.install_plan
|
||||||
|
.as_ref()
|
||||||
|
.expect("install plan should inherit");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
install_plan.nixos_configuration.as_deref(),
|
install_plan.nixos_configuration.as_deref(),
|
||||||
Some("worker-golden")
|
Some("worker-golden")
|
||||||
|
|
@ -890,11 +948,15 @@ mod tests {
|
||||||
assert_eq!(node.labels.get("env").map(String::as_str), Some("dev"));
|
assert_eq!(node.labels.get("env").map(String::as_str), Some("dev"));
|
||||||
assert_eq!(node.labels.get("pool").map(String::as_str), Some("general"));
|
assert_eq!(node.labels.get("pool").map(String::as_str), Some("general"));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node.labels.get("nodeclass.photoncloud.io/name").map(String::as_str),
|
node.labels
|
||||||
|
.get("nodeclass.photoncloud.io/name")
|
||||||
|
.map(String::as_str),
|
||||||
Some("worker-linux")
|
Some("worker-linux")
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
node.labels.get("topology.kubernetes.io/zone").map(String::as_str),
|
node.labels
|
||||||
|
.get("topology.kubernetes.io/zone")
|
||||||
|
.map(String::as_str),
|
||||||
Some("rack-a")
|
Some("rack-a")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -906,11 +968,36 @@ mod tests {
|
||||||
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
|
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
|
||||||
|
|
||||||
assert_eq!(desired.node_id, "node01");
|
assert_eq!(desired.node_id, "node01");
|
||||||
assert_eq!(desired.nixos_configuration.as_deref(), Some("worker-golden"));
|
assert_eq!(
|
||||||
|
desired.nixos_configuration.as_deref(),
|
||||||
|
Some("worker-golden")
|
||||||
|
);
|
||||||
assert_eq!(desired.switch_action.as_deref(), Some("switch"));
|
assert_eq!(desired.switch_action.as_deref(), Some("switch"));
|
||||||
assert_eq!(desired.rollback_on_failure, Some(true));
|
assert_eq!(desired.rollback_on_failure, Some(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_desired_system_keeps_explicit_node_overrides() {
|
||||||
|
let mut spec = test_spec();
|
||||||
|
spec.nodes[0].desired_system = Some(DesiredSystemSpec {
|
||||||
|
node_id: String::new(),
|
||||||
|
nixos_configuration: Some("node01-next".to_string()),
|
||||||
|
flake_ref: Some("github:centra/cloud".to_string()),
|
||||||
|
switch_action: Some("boot".to_string()),
|
||||||
|
health_check_command: vec!["true".to_string()],
|
||||||
|
rollback_on_failure: Some(false),
|
||||||
|
});
|
||||||
|
|
||||||
|
let resolved = resolve_nodes(&spec).unwrap();
|
||||||
|
let desired = desired_system_from_spec(&resolved[0]).expect("desired system should exist");
|
||||||
|
assert_eq!(desired.node_id, "node01");
|
||||||
|
assert_eq!(desired.nixos_configuration.as_deref(), Some("node01-next"));
|
||||||
|
assert_eq!(desired.flake_ref.as_deref(), Some("github:centra/cloud"));
|
||||||
|
assert_eq!(desired.switch_action.as_deref(), Some("boot"));
|
||||||
|
assert_eq!(desired.health_check_command, vec!["true".to_string()]);
|
||||||
|
assert_eq!(desired.rollback_on_failure, Some(false));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_prunable_key_keeps_observed_system() {
|
fn test_is_prunable_key_keeps_observed_system() {
|
||||||
let prefix = cluster_prefix("photoncloud", "test-cluster");
|
let prefix = cluster_prefix("photoncloud", "test-cluster");
|
||||||
|
|
|
||||||
133
deployer/crates/deployer-server/src/bootstrap_assets.rs
Normal file
133
deployer/crates/deployer-server/src/bootstrap_assets.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
body::Body,
|
||||||
|
extract::State,
|
||||||
|
http::{header, HeaderMap, HeaderValue, StatusCode},
|
||||||
|
response::IntoResponse,
|
||||||
|
};
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::{auth::require_bootstrap_auth, state::AppState};
|
||||||
|
|
||||||
|
/// GET /api/v1/bootstrap/flake-bundle
|
||||||
|
pub async fn flake_bundle(
|
||||||
|
State(state): State<Arc<AppState>>,
|
||||||
|
headers: HeaderMap,
|
||||||
|
) -> Result<impl IntoResponse, (StatusCode, String)> {
|
||||||
|
require_bootstrap_auth(&state, &headers)?;
|
||||||
|
|
||||||
|
let Some(path) = state.config.bootstrap_flake_bundle_path.as_ref() else {
|
||||||
|
return Err((
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
"bootstrap flake bundle not configured".to_string(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let bytes = fs::read(path).await.map_err(|error| {
|
||||||
|
let status = if error.kind() == std::io::ErrorKind::NotFound {
|
||||||
|
StatusCode::NOT_FOUND
|
||||||
|
} else {
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
|
};
|
||||||
|
(
|
||||||
|
status,
|
||||||
|
format!(
|
||||||
|
"failed to read bootstrap flake bundle {}: {}",
|
||||||
|
path.display(),
|
||||||
|
error
|
||||||
|
),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let headers = [
|
||||||
|
(
|
||||||
|
header::CONTENT_TYPE,
|
||||||
|
HeaderValue::from_static("application/gzip"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
header::CONTENT_DISPOSITION,
|
||||||
|
HeaderValue::from_static("attachment; filename=\"plasmacloud-flake-bundle.tar.gz\""),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
Ok((headers, Body::from(bytes)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::{build_router, config::Config};
|
||||||
|
use axum::{body::to_bytes, http::Request};
|
||||||
|
use std::{
|
||||||
|
fs,
|
||||||
|
time::{SystemTime, UNIX_EPOCH},
|
||||||
|
};
|
||||||
|
use tower::ServiceExt;
|
||||||
|
|
||||||
|
fn temp_path(name: &str) -> std::path::PathBuf {
|
||||||
|
let nanos = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos();
|
||||||
|
std::env::temp_dir().join(format!("{}-{}-{}", name, std::process::id(), nanos))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn flake_bundle_route_serves_configured_bundle() {
|
||||||
|
let bundle_path = temp_path("deployer-flake-bundle");
|
||||||
|
fs::write(&bundle_path, b"bundle-bytes").unwrap();
|
||||||
|
|
||||||
|
let mut config = Config::default();
|
||||||
|
config.bootstrap_token = Some("test-token".to_string());
|
||||||
|
config.bootstrap_flake_bundle_path = Some(bundle_path.clone());
|
||||||
|
let state = Arc::new(AppState::with_config(config));
|
||||||
|
let app = build_router(state);
|
||||||
|
|
||||||
|
let response = app
|
||||||
|
.oneshot(
|
||||||
|
Request::builder()
|
||||||
|
.uri("/api/v1/bootstrap/flake-bundle")
|
||||||
|
.header("x-deployer-token", "test-token")
|
||||||
|
.body(Body::empty())
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(response.status(), StatusCode::OK);
|
||||||
|
assert_eq!(
|
||||||
|
response
|
||||||
|
.headers()
|
||||||
|
.get(header::CONTENT_TYPE)
|
||||||
|
.and_then(|value| value.to_str().ok()),
|
||||||
|
Some("application/gzip")
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = to_bytes(response.into_body(), usize::MAX).await.unwrap();
|
||||||
|
assert_eq!(body.as_ref(), b"bundle-bytes");
|
||||||
|
|
||||||
|
let _ = fs::remove_file(bundle_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn flake_bundle_route_requires_configured_bundle() {
|
||||||
|
let mut config = Config::default();
|
||||||
|
config.bootstrap_token = Some("test-token".to_string());
|
||||||
|
let state = Arc::new(AppState::with_config(config));
|
||||||
|
let app = build_router(state);
|
||||||
|
|
||||||
|
let response = app
|
||||||
|
.oneshot(
|
||||||
|
Request::builder()
|
||||||
|
.uri("/api/v1/bootstrap/flake-bundle")
|
||||||
|
.header("x-deployer-token", "test-token")
|
||||||
|
.body(Body::empty())
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,9 +7,7 @@ use deployer_types::NodeConfig;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::require_bootstrap_auth,
|
auth::require_bootstrap_auth, phone_home::lookup_node_config, state::AppState,
|
||||||
phone_home::lookup_node_config,
|
|
||||||
state::AppState,
|
|
||||||
validation::validate_identifier,
|
validation::validate_identifier,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -23,7 +21,10 @@ pub async fn meta_data(
|
||||||
validate_identifier(&machine_id, "machine_id")?;
|
validate_identifier(&machine_id, "machine_id")?;
|
||||||
|
|
||||||
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
|
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
|
||||||
return Err((StatusCode::NOT_FOUND, "machine-id not registered".to_string()));
|
return Err((
|
||||||
|
StatusCode::NOT_FOUND,
|
||||||
|
"machine-id not registered".to_string(),
|
||||||
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
let body = format!(
|
let body = format!(
|
||||||
|
|
@ -43,12 +44,18 @@ pub async fn user_data(
|
||||||
validate_identifier(&machine_id, "machine_id")?;
|
validate_identifier(&machine_id, "machine_id")?;
|
||||||
|
|
||||||
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
|
let Some((node_id, config)) = lookup_node_config(&state, &machine_id).await else {
|
||||||
return Err((StatusCode::NOT_FOUND, "machine-id not registered".to_string()));
|
return Err((
|
||||||
|
StatusCode::NOT_FOUND,
|
||||||
|
"machine-id not registered".to_string(),
|
||||||
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
let body = render_user_data(&node_id, &config)
|
let body = render_user_data(&node_id, &config)
|
||||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
|
||||||
Ok(([(axum::http::header::CONTENT_TYPE, "text/cloud-config")], body))
|
Ok((
|
||||||
|
[(axum::http::header::CONTENT_TYPE, "text/cloud-config")],
|
||||||
|
body,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_yaml_list(items: &[String], indent: usize) -> String {
|
fn render_yaml_list(items: &[String], indent: usize) -> String {
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,10 @@ pub struct Config {
|
||||||
#[serde(default = "default_local_state_path")]
|
#[serde(default = "default_local_state_path")]
|
||||||
pub local_state_path: Option<PathBuf>,
|
pub local_state_path: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// Optional tar.gz bundle containing the PhotonCloud flake source tree for bootstrap installs
|
||||||
|
#[serde(default)]
|
||||||
|
pub bootstrap_flake_bundle_path: Option<PathBuf>,
|
||||||
|
|
||||||
/// Shared bootstrap token required for phone-home/admin APIs
|
/// Shared bootstrap token required for phone-home/admin APIs
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub bootstrap_token: Option<String>,
|
pub bootstrap_token: Option<String>,
|
||||||
|
|
@ -80,6 +84,7 @@ impl Default for Config {
|
||||||
cluster_namespace: default_cluster_namespace(),
|
cluster_namespace: default_cluster_namespace(),
|
||||||
heartbeat_timeout_secs: default_heartbeat_timeout(),
|
heartbeat_timeout_secs: default_heartbeat_timeout(),
|
||||||
local_state_path: default_local_state_path(),
|
local_state_path: default_local_state_path(),
|
||||||
|
bootstrap_flake_bundle_path: None,
|
||||||
bootstrap_token: None,
|
bootstrap_token: None,
|
||||||
admin_token: None,
|
admin_token: None,
|
||||||
allow_admin_fallback: default_allow_admin_fallback(),
|
allow_admin_fallback: default_allow_admin_fallback(),
|
||||||
|
|
@ -224,6 +229,7 @@ mod tests {
|
||||||
config.local_state_path,
|
config.local_state_path,
|
||||||
Some(PathBuf::from("/var/lib/deployer/state"))
|
Some(PathBuf::from("/var/lib/deployer/state"))
|
||||||
);
|
);
|
||||||
|
assert!(config.bootstrap_flake_bundle_path.is_none());
|
||||||
assert!(config.bootstrap_token.is_none());
|
assert!(config.bootstrap_token.is_none());
|
||||||
assert!(config.admin_token.is_none());
|
assert!(config.admin_token.is_none());
|
||||||
assert!(!config.allow_admin_fallback);
|
assert!(!config.allow_admin_fallback);
|
||||||
|
|
@ -253,6 +259,7 @@ mod tests {
|
||||||
bind_addr = "127.0.0.1:18080"
|
bind_addr = "127.0.0.1:18080"
|
||||||
cluster_id = "cluster-a"
|
cluster_id = "cluster-a"
|
||||||
allow_unauthenticated = true
|
allow_unauthenticated = true
|
||||||
|
bootstrap_flake_bundle_path = "/tmp/plasmacloud-flake-bundle.tar.gz"
|
||||||
|
|
||||||
[chainfire]
|
[chainfire]
|
||||||
endpoints = ["http://10.0.0.1:2379"]
|
endpoints = ["http://10.0.0.1:2379"]
|
||||||
|
|
@ -264,6 +271,10 @@ mod tests {
|
||||||
let config = load_config(&path).unwrap();
|
let config = load_config(&path).unwrap();
|
||||||
assert_eq!(config.bind_addr.to_string(), "127.0.0.1:18080");
|
assert_eq!(config.bind_addr.to_string(), "127.0.0.1:18080");
|
||||||
assert_eq!(config.cluster_id.as_deref(), Some("cluster-a"));
|
assert_eq!(config.cluster_id.as_deref(), Some("cluster-a"));
|
||||||
|
assert_eq!(
|
||||||
|
config.bootstrap_flake_bundle_path,
|
||||||
|
Some(PathBuf::from("/tmp/plasmacloud-flake-bundle.tar.gz"))
|
||||||
|
);
|
||||||
assert!(config.allow_unauthenticated);
|
assert!(config.allow_unauthenticated);
|
||||||
assert_eq!(config.chainfire.namespace, "bootstrap");
|
assert_eq!(config.chainfire.namespace, "bootstrap");
|
||||||
assert_eq!(config.chainfire.endpoints, vec!["http://10.0.0.1:2379"]);
|
assert_eq!(config.chainfire.endpoints, vec!["http://10.0.0.1:2379"]);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
pub mod admin;
|
pub mod admin;
|
||||||
pub mod auth;
|
pub mod auth;
|
||||||
pub mod cluster;
|
pub mod bootstrap_assets;
|
||||||
pub mod cloud_init;
|
pub mod cloud_init;
|
||||||
|
pub mod cluster;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod local_storage;
|
pub mod local_storage;
|
||||||
pub mod phone_home;
|
pub mod phone_home;
|
||||||
|
|
@ -34,6 +35,10 @@ pub fn build_router(state: Arc<AppState>) -> Router {
|
||||||
"/api/v1/cloud-init/:machine_id/user-data",
|
"/api/v1/cloud-init/:machine_id/user-data",
|
||||||
get(cloud_init::user_data),
|
get(cloud_init::user_data),
|
||||||
)
|
)
|
||||||
|
.route(
|
||||||
|
"/api/v1/bootstrap/flake-bundle",
|
||||||
|
get(bootstrap_assets::flake_bundle),
|
||||||
|
)
|
||||||
// Admin API (node management)
|
// Admin API (node management)
|
||||||
.route("/api/v1/admin/nodes", post(admin::pre_register))
|
.route("/api/v1/admin/nodes", post(admin::pre_register))
|
||||||
.route("/api/v1/admin/nodes", get(admin::list_nodes))
|
.route("/api/v1/admin/nodes", get(admin::list_nodes))
|
||||||
|
|
|
||||||
|
|
@ -440,7 +440,10 @@ async fn resolve_enrollment_config(
|
||||||
format!("failed to load node classes: {}", e),
|
format!("failed to load node classes: {}", e),
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
let pools = storage.list_pools(cluster_namespace, cluster_id).await.map_err(|e| {
|
let pools = storage
|
||||||
|
.list_pools(cluster_namespace, cluster_id)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
(
|
(
|
||||||
StatusCode::INTERNAL_SERVER_ERROR,
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
format!("failed to load pools: {}", e),
|
format!("failed to load pools: {}", e),
|
||||||
|
|
@ -483,7 +486,11 @@ fn enrollment_rule_matches(rule: &EnrollmentRuleSpec, request: &PhoneHomeRequest
|
||||||
let Some(ip) = request.ip.as_deref() else {
|
let Some(ip) = request.ip.as_deref() else {
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
if !rule.match_ip_prefixes.iter().any(|prefix| ip.starts_with(prefix)) {
|
if !rule
|
||||||
|
.match_ip_prefixes
|
||||||
|
.iter()
|
||||||
|
.any(|prefix| ip.starts_with(prefix))
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -528,16 +535,16 @@ fn build_node_config_from_rule(
|
||||||
.node_class
|
.node_class
|
||||||
.clone()
|
.clone()
|
||||||
.or_else(|| pool_spec.and_then(|pool| pool.node_class.clone()));
|
.or_else(|| pool_spec.and_then(|pool| pool.node_class.clone()));
|
||||||
let node_class_spec = node_class
|
let node_class_spec = node_class.as_deref().and_then(|name| {
|
||||||
.as_deref()
|
node_classes
|
||||||
.and_then(|name| node_classes.iter().find(|node_class| node_class.name == name));
|
.iter()
|
||||||
|
.find(|node_class| node_class.name == name)
|
||||||
|
});
|
||||||
|
|
||||||
let role = rule
|
let role = rule
|
||||||
.role
|
.role
|
||||||
.clone()
|
.clone()
|
||||||
.or_else(|| {
|
.or_else(|| node_class_spec.and_then(|node_class| node_class.roles.first().cloned()))
|
||||||
node_class_spec.and_then(|node_class| node_class.roles.first().cloned())
|
|
||||||
})
|
|
||||||
.unwrap_or_else(|| "worker".to_string());
|
.unwrap_or_else(|| "worker".to_string());
|
||||||
|
|
||||||
let mut labels = std::collections::HashMap::new();
|
let mut labels = std::collections::HashMap::new();
|
||||||
|
|
@ -1011,7 +1018,9 @@ mod tests {
|
||||||
assert_eq!(config.pool.as_deref(), Some("gpu"));
|
assert_eq!(config.pool.as_deref(), Some("gpu"));
|
||||||
assert_eq!(config.node_class.as_deref(), Some("gpu-worker"));
|
assert_eq!(config.node_class.as_deref(), Some("gpu-worker"));
|
||||||
assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker"));
|
assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker"));
|
||||||
let install_plan = config.install_plan.expect("install_plan should inherit from class");
|
let install_plan = config
|
||||||
|
.install_plan
|
||||||
|
.expect("install_plan should inherit from class");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
install_plan.nixos_configuration.as_deref(),
|
install_plan.nixos_configuration.as_deref(),
|
||||||
Some("gpu-worker")
|
Some("gpu-worker")
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,10 @@ impl NodeStorage {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cluster_node_classes_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
|
fn cluster_node_classes_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
format!("{}/clusters/{}/node-classes/", cluster_namespace, cluster_id)
|
format!(
|
||||||
|
"{}/clusters/{}/node-classes/",
|
||||||
|
cluster_namespace, cluster_id
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cluster_pools_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
|
fn cluster_pools_prefix(&self, cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
|
|
@ -276,7 +279,9 @@ impl NodeStorage {
|
||||||
cluster_namespace: &str,
|
cluster_namespace: &str,
|
||||||
cluster_id: &str,
|
cluster_id: &str,
|
||||||
) -> Result<Vec<EnrollmentRuleSpec>, StorageError> {
|
) -> Result<Vec<EnrollmentRuleSpec>, StorageError> {
|
||||||
self.list_cluster_objects(self.cluster_enrollment_rules_prefix(cluster_namespace, cluster_id))
|
self.list_cluster_objects(
|
||||||
|
self.cluster_enrollment_rules_prefix(cluster_namespace, cluster_id),
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -446,6 +446,7 @@ pub struct ObservedSystemState {
|
||||||
/// Desired NixOS system state for a specific node.
|
/// Desired NixOS system state for a specific node.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
pub struct DesiredSystemSpec {
|
pub struct DesiredSystemSpec {
|
||||||
|
#[serde(default)]
|
||||||
pub node_id: String,
|
pub node_id: String,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub nixos_configuration: Option<String>,
|
pub nixos_configuration: Option<String>,
|
||||||
|
|
@ -490,6 +491,8 @@ pub struct NodeSpec {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub install_plan: Option<InstallPlan>,
|
pub install_plan: Option<InstallPlan>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
pub desired_system: Option<DesiredSystemSpec>,
|
||||||
|
#[serde(default)]
|
||||||
pub state: Option<String>,
|
pub state: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub last_heartbeat: Option<DateTime<Utc>>,
|
pub last_heartbeat: Option<DateTime<Utc>>,
|
||||||
|
|
@ -954,10 +957,7 @@ mod tests {
|
||||||
nixos_configuration: Some("node01".to_string()),
|
nixos_configuration: Some("node01".to_string()),
|
||||||
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
flake_ref: Some("/opt/plasmacloud-src".to_string()),
|
||||||
switch_action: Some("switch".to_string()),
|
switch_action: Some("switch".to_string()),
|
||||||
health_check_command: vec![
|
health_check_command: vec!["systemctl".to_string(), "is-system-running".to_string()],
|
||||||
"systemctl".to_string(),
|
|
||||||
"is-system-running".to_string(),
|
|
||||||
],
|
|
||||||
rollback_on_failure: Some(true),
|
rollback_on_failure: Some(true),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,7 @@ raft_port="$(free_port)"
|
||||||
gossip_port="$(free_port)"
|
gossip_port="$(free_port)"
|
||||||
deployer_port="$(free_port)"
|
deployer_port="$(free_port)"
|
||||||
bootstrap_token="bootstrap-secret"
|
bootstrap_token="bootstrap-secret"
|
||||||
|
printf 'bundle-bytes' >"$tmp_dir/flake-bundle.tar.gz"
|
||||||
|
|
||||||
cat >"$tmp_dir/chainfire.toml" <<EOF
|
cat >"$tmp_dir/chainfire.toml" <<EOF
|
||||||
[node]
|
[node]
|
||||||
|
|
@ -140,6 +141,7 @@ cluster_id = "test-cluster"
|
||||||
cluster_namespace = "photoncloud"
|
cluster_namespace = "photoncloud"
|
||||||
heartbeat_timeout_secs = 300
|
heartbeat_timeout_secs = 300
|
||||||
local_state_path = "$tmp_dir/deployer-state"
|
local_state_path = "$tmp_dir/deployer-state"
|
||||||
|
bootstrap_flake_bundle_path = "$tmp_dir/flake-bundle.tar.gz"
|
||||||
bootstrap_token = "${bootstrap_token}"
|
bootstrap_token = "${bootstrap_token}"
|
||||||
require_chainfire = true
|
require_chainfire = true
|
||||||
allow_unknown_nodes = false
|
allow_unknown_nodes = false
|
||||||
|
|
@ -206,6 +208,13 @@ nodes:
|
||||||
install_plan:
|
install_plan:
|
||||||
nixos_configuration: node01
|
nixos_configuration: node01
|
||||||
disko_config_path: nix/nodes/vm-cluster/node01/disko.nix
|
disko_config_path: nix/nodes/vm-cluster/node01/disko.nix
|
||||||
|
desired_system:
|
||||||
|
flake_ref: "github:centra/cloud"
|
||||||
|
health_check_command:
|
||||||
|
- systemctl
|
||||||
|
- is-system-running
|
||||||
|
- "--wait"
|
||||||
|
rollback_on_failure: true
|
||||||
state: pending
|
state: pending
|
||||||
|
|
||||||
enrollment_rules:
|
enrollment_rules:
|
||||||
|
|
@ -294,6 +303,23 @@ for path, expected in (
|
||||||
print("cloud-init endpoints validated")
|
print("cloud-init endpoints validated")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
echo "Validating bootstrap flake bundle endpoint"
|
||||||
|
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
endpoint, token = sys.argv[1], sys.argv[2]
|
||||||
|
request = urllib.request.Request(
|
||||||
|
endpoint + "/api/v1/bootstrap/flake-bundle",
|
||||||
|
headers={"X-Deployer-Token": token},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=5) as response:
|
||||||
|
payload = response.read()
|
||||||
|
|
||||||
|
assert payload == b"bundle-bytes"
|
||||||
|
print("bootstrap flake bundle endpoint validated")
|
||||||
|
PY
|
||||||
|
|
||||||
echo "Validating enrollment-rule bootstrap path"
|
echo "Validating enrollment-rule bootstrap path"
|
||||||
dynamic_node_id="$(
|
dynamic_node_id="$(
|
||||||
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
python3 - "$deployer_endpoint" "$bootstrap_token" <<'PY'
|
||||||
|
|
@ -350,7 +376,10 @@ records = {}
|
||||||
with open(path, "r", encoding="utf-8") as handle:
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
for line in handle:
|
for line in handle:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if " value=" not in line:
|
if " key=" not in line or " value=" not in line:
|
||||||
|
continue
|
||||||
|
key = line.split(" key=", 1)[1].split(" value=", 1)[0]
|
||||||
|
if key.endswith("/desired-system"):
|
||||||
continue
|
continue
|
||||||
value = line.split(" value=", 1)[1]
|
value = line.split(" value=", 1)[1]
|
||||||
record = json.loads(value)
|
record = json.loads(value)
|
||||||
|
|
@ -375,4 +404,26 @@ if dynamic.get("labels", {}).get("lane") != "edge":
|
||||||
print("Deployer bootstrap records validated")
|
print("Deployer bootstrap records validated")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
echo "Inspecting desired-system state"
|
||||||
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/node-seeded/desired-system" >"$tmp_dir/desired-system.dump"
|
||||||
|
python3 - "$tmp_dir/desired-system.dump" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
path = sys.argv[1]
|
||||||
|
with open(path, "r", encoding="utf-8") as handle:
|
||||||
|
lines = [line.strip() for line in handle if " value=" in line]
|
||||||
|
|
||||||
|
if len(lines) != 1:
|
||||||
|
raise SystemExit(f"unexpected desired-system dump: {lines}")
|
||||||
|
|
||||||
|
payload = json.loads(lines[0].split(" value=", 1)[1])
|
||||||
|
assert payload["node_id"] == "node-seeded"
|
||||||
|
assert payload["nixos_configuration"] == "node01"
|
||||||
|
assert payload["flake_ref"] == "github:centra/cloud"
|
||||||
|
assert payload["health_check_command"] == ["systemctl", "is-system-running", "--wait"]
|
||||||
|
assert payload["rollback_on_failure"] is True
|
||||||
|
print("desired-system state validated")
|
||||||
|
PY
|
||||||
|
|
||||||
echo "Deployer bootstrap E2E verification passed"
|
echo "Deployer bootstrap E2E verification passed"
|
||||||
|
|
|
||||||
65
flake.nix
65
flake.nix
|
|
@ -102,6 +102,43 @@
|
||||||
|| builtins.elem topLevel includedTopLevels;
|
|| builtins.elem topLevel includedTopLevels;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
flakeBundleSrc = pkgs.lib.cleanSourceWith {
|
||||||
|
src = ./.;
|
||||||
|
filter = path: type:
|
||||||
|
let
|
||||||
|
rel = pkgs.lib.removePrefix ((toString ./. ) + "/") (toString path);
|
||||||
|
topLevel = builtins.head (pkgs.lib.splitString "/" rel);
|
||||||
|
includedTopLevels = [
|
||||||
|
"apigateway"
|
||||||
|
"baremetal"
|
||||||
|
"chainfire"
|
||||||
|
"coronafs"
|
||||||
|
"crates"
|
||||||
|
"creditservice"
|
||||||
|
"deployer"
|
||||||
|
"fiberlb"
|
||||||
|
"flashdns"
|
||||||
|
"flaredb"
|
||||||
|
"iam"
|
||||||
|
"k8shost"
|
||||||
|
"lightningstor"
|
||||||
|
"mtls-agent"
|
||||||
|
"nightlight"
|
||||||
|
"nix"
|
||||||
|
"nix-nos"
|
||||||
|
"plasmavmc"
|
||||||
|
"prismnet"
|
||||||
|
];
|
||||||
|
isTargetDir = builtins.match "(.*/)?target(/.*)?" rel != null;
|
||||||
|
in
|
||||||
|
!isTargetDir
|
||||||
|
&& (
|
||||||
|
rel == ""
|
||||||
|
|| builtins.elem rel [ "flake.nix" "flake.lock" ]
|
||||||
|
|| builtins.elem topLevel includedTopLevels
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
# Helper function to build a Rust workspace package
|
# Helper function to build a Rust workspace package
|
||||||
# Parameters:
|
# Parameters:
|
||||||
# name: package name (e.g., "chainfire-server")
|
# name: package name (e.g., "chainfire-server")
|
||||||
|
|
@ -397,6 +434,20 @@
|
||||||
description = "Node-local NixOS reconciliation agent for PhotonCloud hosts";
|
description = "Node-local NixOS reconciliation agent for PhotonCloud hosts";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
plasmacloudFlakeBundle = pkgs.runCommand "plasmacloud-flake-bundle.tar.gz" {
|
||||||
|
nativeBuildInputs = [ pkgs.gnutar pkgs.gzip ];
|
||||||
|
} ''
|
||||||
|
tar \
|
||||||
|
--sort=name \
|
||||||
|
--mtime='@1' \
|
||||||
|
--owner=0 \
|
||||||
|
--group=0 \
|
||||||
|
--numeric-owner \
|
||||||
|
-C ${flakeBundleSrc} \
|
||||||
|
-cf - . \
|
||||||
|
| gzip -n > "$out"
|
||||||
|
'';
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes
|
# Fleet Scheduler: Non-Kubernetes service scheduler for bare-metal nodes
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
|
@ -410,6 +461,8 @@
|
||||||
vmClusterDeployerState =
|
vmClusterDeployerState =
|
||||||
self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState;
|
self.nixosConfigurations.node01.config.system.build.plasmacloudDeployerClusterState;
|
||||||
|
|
||||||
|
vmClusterFlakeBundle = self.packages.${system}.plasmacloudFlakeBundle;
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# Default package: Build all servers
|
# Default package: Build all servers
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
|
@ -583,7 +636,7 @@
|
||||||
nix-nos.nixosModules.default
|
nix-nos.nixosModules.default
|
||||||
./nix/nodes/vm-cluster/node01/configuration.nix
|
./nix/nodes/vm-cluster/node01/configuration.nix
|
||||||
self.nixosModules.default
|
self.nixosModules.default
|
||||||
{
|
({ pkgs, ... }: {
|
||||||
services.deployer = {
|
services.deployer = {
|
||||||
enable = true;
|
enable = true;
|
||||||
bindAddr = "0.0.0.0:8088";
|
bindAddr = "0.0.0.0:8088";
|
||||||
|
|
@ -594,6 +647,7 @@
|
||||||
allowUnauthenticated = false;
|
allowUnauthenticated = false;
|
||||||
bootstrapToken = "vm-cluster-bootstrap-token";
|
bootstrapToken = "vm-cluster-bootstrap-token";
|
||||||
adminToken = "vm-cluster-admin-token";
|
adminToken = "vm-cluster-admin-token";
|
||||||
|
bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle;
|
||||||
seedClusterState = true;
|
seedClusterState = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -604,11 +658,9 @@
|
||||||
nodeId = "node01";
|
nodeId = "node01";
|
||||||
flakeRoot = self.outPath;
|
flakeRoot = self.outPath;
|
||||||
intervalSecs = 30;
|
intervalSecs = 30;
|
||||||
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
|
||||||
rollbackOnFailure = true;
|
|
||||||
apply = true;
|
apply = true;
|
||||||
};
|
};
|
||||||
}
|
})
|
||||||
{ nixpkgs.overlays = [ self.overlays.default ]; }
|
{ nixpkgs.overlays = [ self.overlays.default ]; }
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
@ -628,8 +680,6 @@
|
||||||
nodeId = "node02";
|
nodeId = "node02";
|
||||||
flakeRoot = self.outPath;
|
flakeRoot = self.outPath;
|
||||||
intervalSecs = 30;
|
intervalSecs = 30;
|
||||||
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
|
||||||
rollbackOnFailure = true;
|
|
||||||
apply = true;
|
apply = true;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -652,8 +702,6 @@
|
||||||
nodeId = "node03";
|
nodeId = "node03";
|
||||||
flakeRoot = self.outPath;
|
flakeRoot = self.outPath;
|
||||||
intervalSecs = 30;
|
intervalSecs = 30;
|
||||||
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
|
||||||
rollbackOnFailure = true;
|
|
||||||
apply = true;
|
apply = true;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -684,6 +732,7 @@
|
||||||
k8shost-server = self.packages.${final.system}.k8shost-server;
|
k8shost-server = self.packages.${final.system}.k8shost-server;
|
||||||
deployer-server = self.packages.${final.system}.deployer-server;
|
deployer-server = self.packages.${final.system}.deployer-server;
|
||||||
deployer-ctl = self.packages.${final.system}.deployer-ctl;
|
deployer-ctl = self.packages.${final.system}.deployer-ctl;
|
||||||
|
plasmacloudFlakeBundle = self.packages.${final.system}.plasmacloudFlakeBundle;
|
||||||
nix-agent = self.packages.${final.system}.nix-agent;
|
nix-agent = self.packages.${final.system}.nix-agent;
|
||||||
node-agent = self.packages.${final.system}.node-agent;
|
node-agent = self.packages.${final.system}.node-agent;
|
||||||
fleet-scheduler = self.packages.${final.system}.fleet-scheduler;
|
fleet-scheduler = self.packages.${final.system}.fleet-scheduler;
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,8 @@
|
||||||
NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json)
|
NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json)
|
||||||
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.install_plan.nixos_configuration // .hostname // empty' /etc/plasmacloud/node-config.json)
|
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.install_plan.nixos_configuration // .hostname // empty' /etc/plasmacloud/node-config.json)
|
||||||
DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.install_plan.disko_config_path // empty' /etc/plasmacloud/node-config.json)
|
DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.install_plan.disko_config_path // empty' /etc/plasmacloud/node-config.json)
|
||||||
|
DEPLOYER_URL="''${DEPLOYER_URL:-http://192.168.100.1:8080}"
|
||||||
|
SRC_ROOT="/opt/plasmacloud-src"
|
||||||
|
|
||||||
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
|
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
|
||||||
echo "ERROR: node-config.json missing hostname/ip"
|
echo "ERROR: node-config.json missing hostname/ip"
|
||||||
|
|
@ -197,9 +199,38 @@
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
TOKEN_FILE="/etc/plasmacloud/bootstrap-token"
|
||||||
|
DEPLOYER_TOKEN=""
|
||||||
|
if [ -s "$TOKEN_FILE" ]; then
|
||||||
|
DEPLOYER_TOKEN=$(cat "$TOKEN_FILE")
|
||||||
|
elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then
|
||||||
|
DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
CURL_ARGS=(-sfL --connect-timeout 5 --max-time 120)
|
||||||
|
if [ -n "$DEPLOYER_TOKEN" ]; then
|
||||||
|
CURL_ARGS+=(-H "X-Deployer-Token: $DEPLOYER_TOKEN")
|
||||||
|
fi
|
||||||
|
if [ -n "''${DEPLOYER_CA_CERT:-}" ] && [ -f "''${DEPLOYER_CA_CERT}" ]; then
|
||||||
|
CURL_ARGS+=(--cacert "''${DEPLOYER_CA_CERT}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
BUNDLE_PATH="/run/plasmacloud/flake-bundle.tar.gz"
|
||||||
|
mkdir -p /run/plasmacloud
|
||||||
|
if ${pkgs.curl}/bin/curl "''${CURL_ARGS[@]}" \
|
||||||
|
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
|
||||||
|
-o "$BUNDLE_PATH"; then
|
||||||
|
echo "Downloaded bootstrap flake bundle from deployer"
|
||||||
|
rm -rf "$SRC_ROOT"
|
||||||
|
mkdir -p "$SRC_ROOT"
|
||||||
|
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
|
||||||
|
else
|
||||||
|
echo "No deployer flake bundle available; using embedded source tree"
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "$DISKO_PATH" ]; then
|
if [ -z "$DISKO_PATH" ]; then
|
||||||
CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
|
CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
|
||||||
if [ -f "/opt/plasmacloud-src/$CANDIDATE_DISKO" ]; then
|
if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then
|
||||||
DISKO_PATH="$CANDIDATE_DISKO"
|
DISKO_PATH="$CANDIDATE_DISKO"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
@ -209,8 +240,8 @@
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f "/opt/plasmacloud-src/$DISKO_PATH" ]; then
|
if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then
|
||||||
echo "ERROR: Disko config not found: /opt/plasmacloud-src/$DISKO_PATH"
|
echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -238,14 +269,14 @@
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Validating NixOS configuration output..."
|
echo "Validating NixOS configuration output..."
|
||||||
nix eval --raw "/opt/plasmacloud-src#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
|
nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
|
||||||
|
|
||||||
echo "Running disko to partition $DISK..."
|
echo "Running disko to partition $DISK..."
|
||||||
export NIX_CONFIG="experimental-features = nix-command flakes"
|
export NIX_CONFIG="experimental-features = nix-command flakes"
|
||||||
nix run github:nix-community/disko -- --mode disko "/opt/plasmacloud-src/$DISKO_PATH"
|
nix run github:nix-community/disko -- --mode disko "$SRC_ROOT/$DISKO_PATH"
|
||||||
|
|
||||||
echo "Running nixos-install..."
|
echo "Running nixos-install..."
|
||||||
nixos-install --flake "/opt/plasmacloud-src#$NIXOS_CONFIGURATION" --no-root-passwd
|
nixos-install --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" --no-root-passwd
|
||||||
|
|
||||||
sync
|
sync
|
||||||
echo "✓ Install complete; rebooting..."
|
echo "✓ Install complete; rebooting..."
|
||||||
|
|
@ -255,7 +286,7 @@
|
||||||
|
|
||||||
# Packages for bootstrap + install
|
# Packages for bootstrap + install
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs
|
curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs gnutar gzip
|
||||||
];
|
];
|
||||||
|
|
||||||
# SSH with key-based auth for non-interactive access
|
# SSH with key-based auth for non-interactive access
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,44 @@ let
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mkDesiredSystemType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
nixosConfiguration = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Name of the nixosConfigurations output to activate";
|
||||||
|
};
|
||||||
|
|
||||||
|
flakeRef = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Explicit flake reference used by nix-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
switchAction = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "switch-to-configuration action for nix-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
healthCheckCommand = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Command vector executed after activation to validate node health";
|
||||||
|
};
|
||||||
|
|
||||||
|
rollbackOnFailure = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether nix-agent should roll back when the health check fails";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
mkNodeType = types:
|
mkNodeType = types:
|
||||||
let
|
let
|
||||||
installPlanType = mkInstallPlanType types;
|
installPlanType = mkInstallPlanType types;
|
||||||
|
desiredSystemType = mkDesiredSystemType types;
|
||||||
in types.submodule {
|
in types.submodule {
|
||||||
options = {
|
options = {
|
||||||
role = mkOption {
|
role = mkOption {
|
||||||
|
|
@ -101,6 +136,12 @@ let
|
||||||
description = "Explicit NixOS installation targets for bare-metal bootstrap";
|
description = "Explicit NixOS installation targets for bare-metal bootstrap";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
desiredSystem = mkOption {
|
||||||
|
type = types.nullOr desiredSystemType;
|
||||||
|
default = null;
|
||||||
|
description = "Desired NixOS reconciliation state exported for nix-agent";
|
||||||
|
};
|
||||||
|
|
||||||
state = mkOption {
|
state = mkOption {
|
||||||
type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]);
|
type = types.nullOr (types.enum [ "pending" "provisioning" "active" "failed" "draining" ]);
|
||||||
default = null;
|
default = null;
|
||||||
|
|
@ -277,6 +318,29 @@ let
|
||||||
in
|
in
|
||||||
if plan == null || rendered == { } then null else rendered;
|
if plan == null || rendered == { } then null else rendered;
|
||||||
|
|
||||||
|
mkDesiredSystem = nodeName: desiredSystem:
|
||||||
|
let
|
||||||
|
rendered =
|
||||||
|
optionalAttrs (desiredSystem != null && desiredSystem.nixosConfiguration != null) {
|
||||||
|
nixos_configuration = desiredSystem.nixosConfiguration;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.flakeRef != null) {
|
||||||
|
flake_ref = desiredSystem.flakeRef;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.switchAction != null) {
|
||||||
|
switch_action = desiredSystem.switchAction;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.healthCheckCommand != [ ]) {
|
||||||
|
health_check_command = desiredSystem.healthCheckCommand;
|
||||||
|
}
|
||||||
|
// optionalAttrs (desiredSystem != null && desiredSystem.rollbackOnFailure != null) {
|
||||||
|
rollback_on_failure = desiredSystem.rollbackOnFailure;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
if desiredSystem == null || rendered == { } then null else {
|
||||||
|
node_id = nodeName;
|
||||||
|
} // rendered;
|
||||||
|
|
||||||
mkDeployerNodeSpec = nodeName: node:
|
mkDeployerNodeSpec = nodeName: node:
|
||||||
{
|
{
|
||||||
node_id = nodeName;
|
node_id = nodeName;
|
||||||
|
|
@ -303,6 +367,9 @@ let
|
||||||
// optionalAttrs (mkInstallPlan node.installPlan != null) {
|
// optionalAttrs (mkInstallPlan node.installPlan != null) {
|
||||||
install_plan = mkInstallPlan node.installPlan;
|
install_plan = mkInstallPlan node.installPlan;
|
||||||
}
|
}
|
||||||
|
// optionalAttrs (mkDesiredSystem nodeName node.desiredSystem != null) {
|
||||||
|
desired_system = mkDesiredSystem nodeName node.desiredSystem;
|
||||||
|
}
|
||||||
// optionalAttrs (node.state != null) {
|
// optionalAttrs (node.state != null) {
|
||||||
state = node.state;
|
state = node.state;
|
||||||
};
|
};
|
||||||
|
|
@ -455,6 +522,7 @@ in
|
||||||
{
|
{
|
||||||
inherit
|
inherit
|
||||||
mkInstallPlanType
|
mkInstallPlanType
|
||||||
|
mkDesiredSystemType
|
||||||
mkNodeType
|
mkNodeType
|
||||||
mkNodeClassType
|
mkNodeClassType
|
||||||
mkNodePoolType
|
mkNodePoolType
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,9 @@ let
|
||||||
// lib.optionalAttrs (cfg.clusterId != null) {
|
// lib.optionalAttrs (cfg.clusterId != null) {
|
||||||
cluster_id = cfg.clusterId;
|
cluster_id = cfg.clusterId;
|
||||||
}
|
}
|
||||||
|
// lib.optionalAttrs (cfg.bootstrapFlakeBundle != null) {
|
||||||
|
bootstrap_flake_bundle_path = toString cfg.bootstrapFlakeBundle;
|
||||||
|
}
|
||||||
// lib.optionalAttrs (cfg.tlsCaCertPath != null) {
|
// lib.optionalAttrs (cfg.tlsCaCertPath != null) {
|
||||||
tls_ca_cert_path = cfg.tlsCaCertPath;
|
tls_ca_cert_path = cfg.tlsCaCertPath;
|
||||||
}
|
}
|
||||||
|
|
@ -77,6 +80,12 @@ in
|
||||||
description = "Local storage path for deployer bootstrap state";
|
description = "Local storage path for deployer bootstrap state";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bootstrapFlakeBundle = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.path;
|
||||||
|
default = null;
|
||||||
|
description = "Optional tar.gz bundle served to bootstrap installers as the canonical PhotonCloud flake source";
|
||||||
|
};
|
||||||
|
|
||||||
requireChainfire = lib.mkOption {
|
requireChainfire = lib.mkOption {
|
||||||
type = lib.types.bool;
|
type = lib.types.bool;
|
||||||
default = false;
|
default = false;
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,10 @@
|
||||||
nixosConfiguration = "node01";
|
nixosConfiguration = "node01";
|
||||||
diskoConfigPath = "nix/nodes/vm-cluster/node01/disko.nix";
|
diskoConfigPath = "nix/nodes/vm-cluster/node01/disko.nix";
|
||||||
};
|
};
|
||||||
|
desiredSystem = {
|
||||||
|
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
||||||
|
rollbackOnFailure = true;
|
||||||
|
};
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
apiPort = 2379;
|
apiPort = 2379;
|
||||||
};
|
};
|
||||||
|
|
@ -42,6 +46,10 @@
|
||||||
nixosConfiguration = "node02";
|
nixosConfiguration = "node02";
|
||||||
diskoConfigPath = "nix/nodes/vm-cluster/node02/disko.nix";
|
diskoConfigPath = "nix/nodes/vm-cluster/node02/disko.nix";
|
||||||
};
|
};
|
||||||
|
desiredSystem = {
|
||||||
|
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
||||||
|
rollbackOnFailure = true;
|
||||||
|
};
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
apiPort = 2379;
|
apiPort = 2379;
|
||||||
};
|
};
|
||||||
|
|
@ -62,6 +70,10 @@
|
||||||
nixosConfiguration = "node03";
|
nixosConfiguration = "node03";
|
||||||
diskoConfigPath = "nix/nodes/vm-cluster/node03/disko.nix";
|
diskoConfigPath = "nix/nodes/vm-cluster/node03/disko.nix";
|
||||||
};
|
};
|
||||||
|
desiredSystem = {
|
||||||
|
healthCheckCommand = [ "systemctl" "is-system-running" "--wait" ];
|
||||||
|
rollbackOnFailure = true;
|
||||||
|
};
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
apiPort = 2379;
|
apiPort = 2379;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -81,6 +81,7 @@
|
||||||
requireChainfire = true;
|
requireChainfire = true;
|
||||||
bootstrapToken = "test-bootstrap-token";
|
bootstrapToken = "test-bootstrap-token";
|
||||||
adminToken = "test-admin-token";
|
adminToken = "test-admin-token";
|
||||||
|
bootstrapFlakeBundle = pkgs.plasmacloudFlakeBundle;
|
||||||
seedClusterState = true;
|
seedClusterState = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue