photoncloud-monorepo/deployer/crates/deployer-server/src/phone_home.rs

1109 lines
37 KiB
Rust

use axum::{extract::State, http::HeaderMap, http::StatusCode, Json};
use chrono::Utc;
use deployer_types::{
CommissionState, EnrollmentRuleSpec, HardwareFacts, InstallPlan, InstallState,
NodeClassSpec, NodeConfig, NodeInfo, NodePoolSpec, NodeState, PhoneHomeRequest,
PhoneHomeResponse, PowerState,
};
use sha2::{Digest, Sha256};
use std::sync::Arc;
use tracing::{debug, error, info, warn};
use crate::auth::require_bootstrap_auth;
use crate::cluster::ClusterNodeRecord;
use crate::state::AppState;
use crate::validation::{validate_identifier, validate_ip};
fn merge_install_plan(
preferred: Option<&InstallPlan>,
fallback: Option<&InstallPlan>,
) -> Option<InstallPlan> {
InstallPlan::from_layers(preferred, fallback)
}
fn merge_hardware_summary_metadata(
metadata: &mut std::collections::HashMap<String, String>,
hardware_facts: Option<&HardwareFacts>,
) {
let Some(hardware_facts) = hardware_facts else {
return;
};
if let Some(cpu_threads) = hardware_facts.cpu_threads {
metadata.insert("hardware.cpu_threads".to_string(), cpu_threads.to_string());
}
if let Some(cpu_cores) = hardware_facts.cpu_cores {
metadata.insert("hardware.cpu_cores".to_string(), cpu_cores.to_string());
}
if let Some(memory_bytes) = hardware_facts.memory_bytes {
metadata.insert("hardware.memory_bytes".to_string(), memory_bytes.to_string());
}
metadata.insert(
"hardware.disk_count".to_string(),
hardware_facts.disks.len().to_string(),
);
metadata.insert(
"hardware.nic_count".to_string(),
hardware_facts.nics.len().to_string(),
);
if let Some(architecture) = hardware_facts.architecture.as_deref() {
metadata.insert("hardware.architecture".to_string(), architecture.to_string());
}
}
fn inventory_hash(hardware_facts: Option<&HardwareFacts>) -> Option<String> {
let hardware_facts = hardware_facts?;
let payload = serde_json::to_vec(hardware_facts).ok()?;
let mut hasher = Sha256::new();
hasher.update(payload);
Some(format!("{:x}", hasher.finalize()))
}
/// POST /api/v1/phone-home
///
/// Handles node registration during first boot.
/// Nodes send their machine-id, and Deployer returns:
/// - Node configuration (hostname, role, IP, services)
/// - SSH host key
/// - TLS certificates (optional)
///
/// Uses ChainFire storage when available, falls back to in-memory.
pub async fn phone_home(
State(state): State<Arc<AppState>>,
headers: HeaderMap,
Json(request): Json<PhoneHomeRequest>,
) -> Result<Json<PhoneHomeResponse>, (StatusCode, String)> {
require_bootstrap_auth(&state, &headers)?;
validate_identifier(&request.machine_id, "machine_id")?;
if let Some(node_id) = request.node_id.as_ref() {
validate_identifier(node_id, "node_id")?;
}
if let Some(ip) = request.ip.as_ref() {
validate_ip(ip, "ip")?;
}
info!(
machine_id = %request.machine_id,
"Phone home request received"
);
// Lookup node configuration (ChainFire or fallback)
let (node_id, mut node_config) = match lookup_node_config(&state, &request.machine_id).await {
Some((id, config)) => (id, config),
None => {
if let Some((id, config)) = resolve_enrollment_config(&state, &request).await? {
info!(
machine_id = %request.machine_id,
node_id = %id,
"Resolved unknown machine through enrollment rules"
);
(id, config)
} else {
if !state.config.allow_unknown_nodes {
warn!(
machine_id = %request.machine_id,
"Unknown machine-id rejected (pre-registration required)"
);
return Err((
StatusCode::FORBIDDEN,
"machine-id not registered".to_string(),
));
}
warn!(
machine_id = %request.machine_id,
"Unknown machine-id, assigning default configuration (unsafe)"
);
// Assign default configuration for unknown machines (dev-only).
// Prefer explicit node_id, then DHCP-provided hostname, then machine-id suffix.
let node_id = request
.node_id
.as_ref()
.map(|v| v.trim())
.filter(|v| !v.is_empty())
.map(|v| v.to_string())
.or_else(|| {
request
.hostname
.as_ref()
.map(|v| v.trim())
.filter(|v| !v.is_empty())
.map(|v| v.to_string())
})
.unwrap_or_else(|| {
let max_suffix_len = 128usize.saturating_sub("node-".len());
let suffix_len = std::cmp::min(max_suffix_len, request.machine_id.len());
format!("node-{}", &request.machine_id[..suffix_len])
});
let config = NodeConfig {
hostname: node_id.clone(),
role: "worker".to_string(),
ip: request.ip.clone().unwrap_or_default(),
services: vec![],
ssh_authorized_keys: vec![],
labels: std::collections::HashMap::new(),
pool: None,
node_class: None,
failure_domain: request.metadata.get("failure_domain").cloned(),
nix_profile: None,
install_plan: None,
};
(node_id, config)
}
}
};
if let Some(request_ip) = request.ip.as_ref() {
if !node_config.ip.is_empty() && node_config.ip != *request_ip {
warn!(
machine_id = %request.machine_id,
requested_ip = %request_ip,
expected_ip = %node_config.ip,
"Node IP mismatch in phone-home"
);
return Err((StatusCode::BAD_REQUEST, "node ip mismatch".to_string()));
}
}
if let Some(requested_id) = request.node_id.as_ref() {
if requested_id != &node_id {
warn!(
machine_id = %request.machine_id,
requested_id = %requested_id,
expected_id = %node_id,
"Node ID mismatch in phone-home"
);
return Err((StatusCode::BAD_REQUEST, "node_id mismatch".to_string()));
}
}
if node_config.hostname.is_empty() {
if let Some(hostname) = request.hostname.as_ref() {
node_config.hostname = hostname.clone();
} else {
node_config.hostname = node_id.clone();
}
}
if node_config.ip.is_empty() {
if let Some(ip) = request.ip.clone() {
node_config.ip = ip;
} else {
warn!(
machine_id = %request.machine_id,
node_id = %node_id,
"Node config missing IP; refusing registration"
);
return Err((StatusCode::BAD_REQUEST, "node ip missing".to_string()));
}
}
validate_ip(&node_config.ip, "node_config.ip")?;
// Ensure metadata contains authoritative role/service info
let mut metadata = request.metadata.clone();
metadata.insert("role".to_string(), node_config.role.clone());
metadata.insert("services".to_string(), node_config.services.join(","));
merge_hardware_summary_metadata(&mut metadata, request.hardware_facts.as_ref());
// Create NodeInfo for tracking
let node_info = NodeInfo {
id: node_id.clone(),
machine_id: Some(request.machine_id.clone()),
hostname: node_config.hostname.clone(),
ip: node_config.ip.clone(),
state: NodeState::Provisioning,
cluster_config_hash: request.cluster_config_hash.unwrap_or_default(),
last_heartbeat: Utc::now(),
metadata,
};
// Persist config mapping for this machine (best-effort)
if let Err(e) = persist_node_config(&state, &request.machine_id, &node_id, &node_config).await {
warn!(
machine_id = %request.machine_id,
node_id = %node_id,
error = %e,
"Failed to persist node configuration"
);
}
// Store in ChainFire or in-memory
match store_node_info(&state, &node_info).await {
Ok(_) => {
let storage = if state.has_local_storage() {
"local"
} else if state.has_storage() {
"chainfire"
} else {
"in-memory"
};
info!(
node_id = %node_info.id,
hostname = %node_info.hostname,
role = %node_config.role,
storage = storage,
"Node registered successfully"
);
if let Err(e) = store_cluster_node_if_configured(
&state,
&node_info,
&node_config,
&request.machine_id,
request.hardware_facts.as_ref(),
)
.await
{
warn!(
node_id = %node_info.id,
error = %e,
"Failed to store cluster node state"
);
}
let ssh_host_key = if let Some(local_storage) = &state.local_storage {
let mut storage = local_storage.lock().await;
match storage.get_or_generate_ssh_host_key(&node_info.id) {
Ok(key) => Some(key),
Err(e) => {
warn!(error = %e, "Failed to generate ssh host key");
None
}
}
} else {
None
};
let (tls_cert, tls_key) = if state.config.tls_self_signed
|| (state.config.tls_ca_cert_path.is_some()
&& state.config.tls_ca_key_path.is_some())
{
if let Some(local_storage) = &state.local_storage {
let mut storage = local_storage.lock().await;
match storage.get_or_generate_tls_cert(
&node_info.id,
&node_config.hostname,
&node_config.ip,
state.config.tls_ca_cert_path.as_deref(),
state.config.tls_ca_key_path.as_deref(),
) {
Ok((cert, key)) => (Some(cert), Some(key)),
Err(e) => {
warn!(error = %e, "Failed to issue node TLS certificate");
(None, None)
}
}
} else {
match crate::tls::issue_node_cert(
&node_info.id,
&node_config.hostname,
&node_config.ip,
state.config.tls_ca_cert_path.as_deref(),
state.config.tls_ca_key_path.as_deref(),
) {
Ok((cert, key)) => (Some(cert), Some(key)),
Err(e) => {
warn!(error = %e, "Failed to issue node TLS certificate");
(None, None)
}
}
}
} else {
(None, None)
};
Ok(Json(PhoneHomeResponse {
success: true,
message: Some(format!("Node {} registered successfully", node_info.id)),
node_id: node_id.clone(),
state: NodeState::Provisioning,
node_config: Some(node_config),
ssh_host_key,
tls_cert,
tls_key,
}))
}
Err(e) => {
error!(
machine_id = %request.machine_id,
error = %e,
"Failed to store node info"
);
Err((
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to register node: {}", e),
))
}
}
}
/// Lookup node configuration by machine-id
///
/// Tries ChainFire first, then falls back to in-memory storage.
pub(crate) async fn lookup_node_config(
state: &AppState,
machine_id: &str,
) -> Option<(String, NodeConfig)> {
debug!(machine_id = %machine_id, "Looking up node configuration");
// Try local storage first
if let Some(local_storage) = &state.local_storage {
let storage = local_storage.lock().await;
if let Some((node_id, config)) = storage.get_node_config(machine_id) {
debug!(
machine_id = %machine_id,
node_id = %node_id,
"Found config in local storage"
);
return Some((node_id, config));
}
}
// Try ChainFire storage first
if let Some(storage_mutex) = &state.storage {
let mut storage = storage_mutex.lock().await;
match storage.get_node_config(machine_id).await {
Ok(Some((node_id, config))) => {
debug!(
machine_id = %machine_id,
node_id = %node_id,
"Found config in ChainFire"
);
return Some((node_id, config));
}
Ok(None) => {
debug!(machine_id = %machine_id, "Not found in ChainFire");
}
Err(e) => {
warn!(
machine_id = %machine_id,
error = %e,
"ChainFire lookup failed, trying fallback"
);
}
}
}
// Fallback to in-memory storage
let configs = state.machine_configs.read().await;
if let Some((node_id, config)) = configs.get(machine_id) {
debug!(
machine_id = %machine_id,
node_id = %node_id,
"Found config in in-memory storage"
);
return Some((node_id.clone(), config.clone()));
}
// Hardcoded test mappings (for development/testing)
if state.config.allow_test_mappings {
match machine_id {
"test-machine-01" => {
return Some((
"node01".to_string(),
NodeConfig {
hostname: "node01".to_string(),
role: "control-plane".to_string(),
ip: "10.0.1.10".to_string(),
services: vec!["chainfire".to_string(), "flaredb".to_string()],
ssh_authorized_keys: vec![],
labels: std::collections::HashMap::new(),
pool: None,
node_class: None,
failure_domain: None,
nix_profile: None,
install_plan: None,
},
));
}
"test-machine-02" => {
return Some((
"node02".to_string(),
NodeConfig {
hostname: "node02".to_string(),
role: "worker".to_string(),
ip: "10.0.1.11".to_string(),
services: vec!["chainfire".to_string()],
ssh_authorized_keys: vec![],
labels: std::collections::HashMap::new(),
pool: None,
node_class: None,
failure_domain: None,
nix_profile: None,
install_plan: None,
},
));
}
_ => {}
}
}
None
}
async fn resolve_enrollment_config(
state: &AppState,
request: &PhoneHomeRequest,
) -> Result<Option<(String, NodeConfig)>, (StatusCode, String)> {
let Some(cluster_id) = state.config.cluster_id.as_deref() else {
return Ok(None);
};
let Some(storage_mutex) = &state.storage else {
return Ok(None);
};
let cluster_namespace = state.config.cluster_namespace.trim();
if cluster_namespace.is_empty() {
return Ok(None);
}
let mut storage = storage_mutex.lock().await;
let mut rules = storage
.list_enrollment_rules(cluster_namespace, cluster_id)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to load enrollment rules: {}", e),
)
})?;
if rules.is_empty() {
return Ok(None);
}
let node_classes = storage
.list_node_classes(cluster_namespace, cluster_id)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to load node classes: {}", e),
)
})?;
let pools = storage
.list_pools(cluster_namespace, cluster_id)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to load pools: {}", e),
)
})?;
drop(storage);
rules.sort_by(|lhs, rhs| {
rhs.priority
.cmp(&lhs.priority)
.then_with(|| lhs.name.cmp(&rhs.name))
});
let Some(rule) = rules
.iter()
.find(|rule| enrollment_rule_matches(rule, request))
else {
return Ok(None);
};
Ok(Some(build_node_config_from_rule(
rule,
request,
&node_classes,
&pools,
)))
}
fn enrollment_rule_matches(rule: &EnrollmentRuleSpec, request: &PhoneHomeRequest) -> bool {
if let Some(prefix) = rule.match_hostname_prefix.as_deref() {
let Some(hostname) = request.hostname.as_deref() else {
return false;
};
if !hostname.starts_with(prefix) {
return false;
}
}
if !rule.match_ip_prefixes.is_empty() {
let Some(ip) = request.ip.as_deref() else {
return false;
};
if !rule
.match_ip_prefixes
.iter()
.any(|prefix| ip.starts_with(prefix))
{
return false;
}
}
rule.match_labels
.iter()
.all(|(key, value)| request.metadata.get(key) == Some(value))
}
fn build_node_config_from_rule(
rule: &EnrollmentRuleSpec,
request: &PhoneHomeRequest,
node_classes: &[NodeClassSpec],
pools: &[NodePoolSpec],
) -> (String, NodeConfig) {
let requested_id = request
.node_id
.as_ref()
.map(|value| value.trim())
.filter(|value| !value.is_empty())
.map(str::to_string)
.or_else(|| {
request
.hostname
.as_ref()
.map(|value| value.trim())
.filter(|value| !value.is_empty())
.map(str::to_string)
});
let node_id = requested_id.unwrap_or_else(|| {
let prefix = rule.node_id_prefix.as_deref().unwrap_or("node");
let suffix_len = std::cmp::min(12usize, request.machine_id.len());
format!("{prefix}-{}", &request.machine_id[..suffix_len])
});
let pool = rule.pool.clone();
let pool_spec = pool
.as_deref()
.and_then(|name| pools.iter().find(|pool| pool.name == name));
let node_class = rule
.node_class
.clone()
.or_else(|| pool_spec.and_then(|pool| pool.node_class.clone()));
let node_class_spec = node_class.as_deref().and_then(|name| {
node_classes
.iter()
.find(|node_class| node_class.name == name)
});
let role = rule
.role
.clone()
.or_else(|| node_class_spec.and_then(|node_class| node_class.roles.first().cloned()))
.unwrap_or_else(|| "worker".to_string());
let mut labels = std::collections::HashMap::new();
if let Some(node_class) = node_class_spec {
labels.extend(node_class.labels.clone());
}
if let Some(pool) = pool_spec {
for (key, value) in &pool.labels {
labels.entry(key.clone()).or_insert_with(|| value.clone());
}
}
for (key, value) in &rule.labels {
labels.insert(key.clone(), value.clone());
}
if let Some(pool_name) = pool.as_deref() {
labels
.entry("pool".to_string())
.or_insert_with(|| pool_name.to_string());
}
if let Some(node_class_name) = node_class.as_deref() {
labels
.entry("node_class".to_string())
.or_insert_with(|| node_class_name.to_string());
}
let failure_domain = request
.metadata
.get("failure_domain")
.cloned()
.or_else(|| request.metadata.get("topology.kubernetes.io/zone").cloned());
(
node_id.clone(),
NodeConfig {
hostname: request
.hostname
.clone()
.filter(|value| !value.trim().is_empty())
.unwrap_or_else(|| node_id.clone()),
role,
ip: request.ip.clone().unwrap_or_default(),
services: rule.services.clone(),
ssh_authorized_keys: rule.ssh_authorized_keys.clone(),
labels,
pool,
node_class,
failure_domain,
nix_profile: rule
.nix_profile
.clone()
.or_else(|| node_class_spec.and_then(|node_class| node_class.nix_profile.clone())),
install_plan: merge_install_plan(
rule.install_plan.as_ref(),
node_class_spec.and_then(|node_class| node_class.install_plan.as_ref()),
),
},
)
}
/// Store NodeInfo in ChainFire or in-memory
async fn store_node_info(state: &AppState, node_info: &NodeInfo) -> anyhow::Result<()> {
let mut stored = false;
// Prefer local storage when configured.
if let Some(local_storage) = &state.local_storage {
let mut storage = local_storage.lock().await;
match storage.store_node_info(node_info) {
Ok(()) => {
stored = true;
debug!(node_id = %node_info.id, "Stored node info in local storage");
}
Err(e) => {
warn!(error = %e, "Failed to store node info in local storage");
}
}
}
// Also try ChainFire if available.
if let Some(storage_mutex) = &state.storage {
let mut chainfire = storage_mutex.lock().await;
match chainfire.store_node_info(node_info).await {
Ok(()) => {
stored = true;
debug!(node_id = %node_info.id, "Stored node info in ChainFire");
}
Err(e) => {
warn!(error = %e, "Failed to store node info in ChainFire");
}
}
}
if stored {
return Ok(());
}
// Fallback to in-memory storage when all configured backends fail.
state
.nodes
.write()
.await
.insert(node_info.id.clone(), node_info.clone());
debug!(
node_id = %node_info.id,
"Stored node info in-memory (all backends unavailable)"
);
Ok(())
}
/// Persist node config mapping in ChainFire and in-memory fallback
async fn persist_node_config(
state: &AppState,
machine_id: &str,
node_id: &str,
config: &NodeConfig,
) -> anyhow::Result<()> {
if let Some(local_storage) = &state.local_storage {
let mut storage = local_storage.lock().await;
if let Err(e) = storage.register_node(machine_id, node_id, config) {
warn!(
machine_id = %machine_id,
node_id = %node_id,
error = %e,
"Failed to persist node config to local storage"
);
}
}
if let Some(storage_mutex) = &state.storage {
let mut storage = storage_mutex.lock().await;
if let Err(e) = storage.register_node(machine_id, node_id, config).await {
warn!(
machine_id = %machine_id,
node_id = %node_id,
error = %e,
"Failed to persist node config to ChainFire"
);
}
}
// Keep in-memory mapping in sync as a fallback cache
{
let mut map = state.machine_configs.write().await;
if let Some((existing_node, _)) = map.get(machine_id) {
if existing_node != node_id {
warn!(
machine_id = %machine_id,
existing_node = %existing_node,
requested_node = %node_id,
"Skipping in-memory mapping update due to conflict"
);
return Ok(());
}
}
map.insert(
machine_id.to_string(),
(node_id.to_string(), config.clone()),
);
}
Ok(())
}
async fn store_cluster_node_if_configured(
state: &AppState,
node_info: &NodeInfo,
node_config: &NodeConfig,
machine_id: &str,
hardware_facts: Option<&HardwareFacts>,
) -> anyhow::Result<()> {
let Some(cluster_id) = state.config.cluster_id.as_deref() else {
debug!("cluster_id not configured; skipping cluster node state write");
return Ok(());
};
if cluster_id.trim().is_empty() {
debug!("cluster_id is empty; skipping cluster node state write");
return Ok(());
}
let cluster_namespace = state.config.cluster_namespace.trim();
if cluster_namespace.is_empty() {
debug!("cluster_namespace is empty; skipping cluster node state write");
return Ok(());
}
let mut labels = node_config.labels.clone();
for (key, value) in &node_info.metadata {
labels.insert(key.clone(), value.clone());
}
labels.remove("role");
labels.remove("services");
let mut roles = Vec::new();
if !node_config.role.trim().is_empty() {
roles.push(node_config.role.clone());
} else if let Some(role) = node_info.metadata.get("role") {
if !role.trim().is_empty() {
roles.push(role.clone());
}
}
let record = ClusterNodeRecord {
node_id: node_info.id.clone(),
machine_id: Some(machine_id.to_string()),
ip: node_info.ip.clone(),
hostname: node_info.hostname.clone(),
roles,
labels,
pool: node_config.pool.clone(),
node_class: node_config.node_class.clone(),
failure_domain: node_config.failure_domain.clone(),
nix_profile: node_config.nix_profile.clone(),
install_plan: node_config.install_plan.clone(),
hardware_facts: hardware_facts.cloned(),
state: Some(format!("{:?}", node_info.state).to_lowercase()),
commission_state: hardware_facts.map(|_| CommissionState::Discovered),
install_state: node_config.install_plan.as_ref().map(|_| InstallState::Pending),
commissioned_at: None,
last_inventory_hash: inventory_hash(hardware_facts),
power_state: node_info
.metadata
.get("power_state")
.and_then(|value| match value.as_str() {
"on" => Some(PowerState::On),
"off" => Some(PowerState::Off),
"cycling" => Some(PowerState::Cycling),
"unknown" => Some(PowerState::Unknown),
_ => None,
}),
bmc_ref: node_info.metadata.get("bmc_ref").cloned(),
last_heartbeat: Some(node_info.last_heartbeat),
};
if let Some(local_storage) = &state.local_storage {
let mut storage = local_storage.lock().await;
if let Err(e) =
storage.store_cluster_node(cluster_namespace, cluster_id, &node_info.id, &record)
{
warn!(error = %e, "Failed to store cluster node in local storage");
}
}
if let Some(storage_mutex) = &state.storage {
let mut storage = storage_mutex.lock().await;
if let Err(e) = storage
.store_cluster_node(cluster_namespace, cluster_id, &node_info.id, &record)
.await
{
warn!(error = %e, "Failed to store cluster node in ChainFire");
}
} else if state.local_storage.is_none() {
debug!("ChainFire storage unavailable; skipping cluster node state write");
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::Config;
use crate::state::AppState;
use axum::http::HeaderMap;
use std::collections::HashMap;
fn test_headers() -> HeaderMap {
let mut headers = HeaderMap::new();
headers.insert("x-deployer-token", "test-token".parse().unwrap());
headers
}
fn test_state() -> Arc<AppState> {
let mut config = Config::default();
config.bootstrap_token = Some("test-token".to_string());
Arc::new(AppState::with_config(config))
}
#[tokio::test]
async fn test_phone_home_known_machine() {
let state = test_state();
// Pre-register a machine
let config = NodeConfig {
hostname: "node01".to_string(),
role: "control-plane".to_string(),
ip: "10.0.1.10".to_string(),
services: vec!["chainfire".to_string(), "flaredb".to_string()],
ssh_authorized_keys: vec![],
labels: HashMap::new(),
pool: None,
node_class: None,
failure_domain: None,
nix_profile: None,
install_plan: None,
};
state.machine_configs.write().await.insert(
"test-machine-01".to_string(),
("node01".to_string(), config),
);
let request = PhoneHomeRequest {
machine_id: "test-machine-01".to_string(),
node_id: None,
hostname: None,
ip: None,
cluster_config_hash: None,
metadata: HashMap::new(),
hardware_facts: None,
};
let result = phone_home(State(state.clone()), test_headers(), Json(request)).await;
assert!(result.is_ok());
let response = result.unwrap().0;
assert!(response.success);
assert_eq!(response.node_id, "node01");
assert_eq!(response.state, NodeState::Provisioning);
assert!(response.node_config.is_some());
assert!(response.ssh_host_key.is_none());
let config = response.node_config.unwrap();
assert_eq!(config.hostname, "node01");
assert_eq!(config.role, "control-plane");
// Verify node was stored
let nodes = state.nodes.read().await;
assert!(nodes.contains_key("node01"));
}
#[tokio::test]
async fn test_phone_home_unknown_machine() {
let mut config = Config::default();
config.bootstrap_token = Some("test-token".to_string());
config.allow_unknown_nodes = true;
let state = Arc::new(AppState::with_config(config));
let request = PhoneHomeRequest {
machine_id: "unknown-machine-xyz".to_string(),
node_id: None,
hostname: None,
ip: Some("10.0.1.100".to_string()),
cluster_config_hash: None,
metadata: HashMap::new(),
hardware_facts: None,
};
let result = phone_home(State(state.clone()), test_headers(), Json(request)).await;
assert!(result.is_ok());
let response = result.unwrap().0;
assert!(response.success);
assert!(response.node_id.starts_with("node-"));
assert_eq!(response.state, NodeState::Provisioning);
assert!(response.node_config.is_some());
let config = response.node_config.unwrap();
assert_eq!(config.role, "worker"); // Default role
}
#[tokio::test]
async fn test_phone_home_with_preregistered_config() {
let state = test_state();
// Pre-register a machine
let config = NodeConfig {
hostname: "my-node".to_string(),
role: "storage".to_string(),
ip: "10.0.2.50".to_string(),
services: vec!["lightningstor".to_string()],
ssh_authorized_keys: vec![],
labels: HashMap::new(),
pool: None,
node_class: None,
failure_domain: None,
nix_profile: None,
install_plan: None,
};
state.machine_configs.write().await.insert(
"preregistered-123".to_string(),
("my-node".to_string(), config),
);
let request = PhoneHomeRequest {
machine_id: "preregistered-123".to_string(),
node_id: None,
hostname: None,
ip: None,
cluster_config_hash: None,
metadata: HashMap::new(),
hardware_facts: None,
};
let result = phone_home(State(state.clone()), test_headers(), Json(request)).await;
assert!(result.is_ok());
let response = result.unwrap().0;
assert!(response.success);
assert_eq!(response.node_id, "my-node");
let config = response.node_config.unwrap();
assert_eq!(config.role, "storage");
assert_eq!(config.ip, "10.0.2.50");
}
#[test]
fn test_enrollment_rule_matching() {
let rule = EnrollmentRuleSpec {
name: "gpu".to_string(),
priority: 10,
match_labels: HashMap::from([("sku".to_string(), "gpu".to_string())]),
match_hostname_prefix: Some("gpu-".to_string()),
match_ip_prefixes: vec!["10.0.3.".to_string()],
pool: Some("gpu".to_string()),
node_class: Some("gpu-worker".to_string()),
role: None,
labels: HashMap::new(),
nix_profile: None,
install_plan: None,
services: vec![],
ssh_authorized_keys: vec![],
node_id_prefix: Some("gpu".to_string()),
};
let request = PhoneHomeRequest {
machine_id: "machine-1".to_string(),
node_id: None,
hostname: Some("gpu-node-01".to_string()),
ip: Some("10.0.3.25".to_string()),
cluster_config_hash: None,
metadata: HashMap::from([("sku".to_string(), "gpu".to_string())]),
hardware_facts: None,
};
assert!(enrollment_rule_matches(&rule, &request));
}
#[test]
fn test_build_node_config_from_rule_inherits_class_and_pool() {
let rule = EnrollmentRuleSpec {
name: "gpu".to_string(),
priority: 10,
match_labels: HashMap::new(),
match_hostname_prefix: None,
match_ip_prefixes: vec![],
pool: Some("gpu".to_string()),
node_class: None,
role: None,
labels: HashMap::from([("accelerator".to_string(), "nvidia".to_string())]),
nix_profile: None,
install_plan: None,
services: vec!["gpu-agent".to_string()],
ssh_authorized_keys: vec!["ssh-ed25519 test".to_string()],
node_id_prefix: Some("gpu".to_string()),
};
let request = PhoneHomeRequest {
machine_id: "abcdef123456".to_string(),
node_id: None,
hostname: Some("gpu-dyn-01".to_string()),
ip: Some("10.0.9.10".to_string()),
cluster_config_hash: None,
metadata: HashMap::from([(
"topology.kubernetes.io/zone".to_string(),
"rack-z".to_string(),
)]),
hardware_facts: None,
};
let node_classes = vec![NodeClassSpec {
name: "gpu-worker".to_string(),
description: None,
nix_profile: Some("profiles/gpu-worker".to_string()),
install_plan: Some(InstallPlan {
nixos_configuration: Some("gpu-worker".to_string()),
disko_config_path: Some("profiles/gpu-worker/disko.nix".to_string()),
target_disk: Some("/dev/disk/by-id/nvme-gpu-worker".to_string()),
target_disk_by_id: None,
}),
roles: vec!["worker".to_string()],
labels: HashMap::from([("tier".to_string(), "gpu".to_string())]),
}];
let pools = vec![NodePoolSpec {
name: "gpu".to_string(),
description: None,
node_class: Some("gpu-worker".to_string()),
min_size: None,
max_size: None,
labels: HashMap::from([("pool-kind".to_string(), "accelerated".to_string())]),
}];
let (node_id, config) = build_node_config_from_rule(&rule, &request, &node_classes, &pools);
assert_eq!(node_id, "gpu-dyn-01");
assert_eq!(config.role, "worker");
assert_eq!(config.pool.as_deref(), Some("gpu"));
assert_eq!(config.node_class.as_deref(), Some("gpu-worker"));
assert_eq!(config.nix_profile.as_deref(), Some("profiles/gpu-worker"));
let install_plan = config
.install_plan
.expect("install_plan should inherit from class");
assert_eq!(
install_plan.nixos_configuration.as_deref(),
Some("gpu-worker")
);
assert_eq!(
install_plan.disko_config_path.as_deref(),
Some("profiles/gpu-worker/disko.nix")
);
assert_eq!(config.labels.get("tier").map(String::as_str), Some("gpu"));
assert_eq!(
config.labels.get("pool-kind").map(String::as_str),
Some("accelerated")
);
assert_eq!(
config.labels.get("accelerator").map(String::as_str),
Some("nvidia")
);
assert_eq!(config.failure_domain.as_deref(), Some("rack-z"));
}
}