//! PlasmaVMC control plane server binary use clap::Parser; use iam_service_auth::AuthService; use metrics_exporter_prometheus::PrometheusBuilder; use plasmavmc_api::proto::image_service_server::ImageServiceServer; use plasmavmc_api::proto::node_service_client::NodeServiceClient; use plasmavmc_api::proto::node_service_server::NodeServiceServer; use plasmavmc_api::proto::vm_service_server::VmServiceServer; use plasmavmc_api::proto::volume_service_server::VolumeServiceServer; use plasmavmc_api::proto::{ HeartbeatNodeRequest, HypervisorType as ProtoHypervisorType, NodeCapacity, NodeState as ProtoNodeState, VolumeDriverKind as ProtoVolumeDriverKind, }; use plasmavmc_firecracker::FireCrackerBackend; use plasmavmc_hypervisor::HypervisorRegistry; use plasmavmc_kvm::KvmBackend; use plasmavmc_server::config::ServerConfig; use plasmavmc_server::watcher::{StateSynchronizer, StateWatcher, WatcherConfig}; use plasmavmc_server::VmServiceImpl; use std::net::SocketAddr; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use std::{collections::HashMap, fs}; use tonic::transport::{Certificate, Endpoint, Identity, Server, ServerTlsConfig}; use tonic::{Request, Status}; use tonic_health::server::health_reporter; use tracing_subscriber::EnvFilter; /// PlasmaVMC control plane server #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] struct Args { /// Configuration file path #[arg(short, long, default_value = "plasmavmc.toml")] config: PathBuf, /// Address to listen on (overrides config) #[arg(short, long)] addr: Option, /// Log level (overrides config) #[arg(short, long)] log_level: Option, /// Path to the Firecracker kernel image (overrides config) #[arg(long)] firecracker_kernel_path: Option, /// Path to the Firecracker rootfs image (overrides config) #[arg(long)] firecracker_rootfs_path: Option, /// Metrics port for Prometheus scraping #[arg(long, default_value = "9102")] metrics_port: u16, } fn normalize_endpoint(endpoint: &str) -> String { if endpoint.starts_with("http://") || endpoint.starts_with("https://") { endpoint.to_string() } else { format!("http://{endpoint}") } } fn available_memory_mib() -> u64 { let Ok(meminfo) = fs::read_to_string("/proc/meminfo") else { return 0; }; meminfo .lines() .find_map(|line| line.strip_prefix("MemTotal:")) .and_then(|rest| rest.split_whitespace().next()) .and_then(|value| value.parse::().ok()) .map(|kib| kib / 1024) .unwrap_or(0) } async fn start_agent_heartbeat( local_addr: SocketAddr, supported_volume_drivers: Vec, supported_storage_classes: Vec, shared_live_migration: bool, ) { let Some(control_plane_addr) = std::env::var("PLASMAVMC_CONTROL_PLANE_ADDR") .ok() .map(|value| value.trim().to_string()) .filter(|value| !value.is_empty()) else { return; }; let Some(node_id) = std::env::var("PLASMAVMC_NODE_ID") .ok() .map(|value| value.trim().to_string()) .filter(|value| !value.is_empty()) else { return; }; let endpoint = normalize_endpoint(&control_plane_addr); let advertise_endpoint = std::env::var("PLASMAVMC_ENDPOINT_ADVERTISE") .ok() .map(|value| value.trim().to_string()) .filter(|value| !value.is_empty()) .unwrap_or_else(|| local_addr.to_string()); let node_name = std::env::var("PLASMAVMC_NODE_NAME") .ok() .filter(|value| !value.trim().is_empty()) .unwrap_or_else(|| node_id.clone()); let heartbeat_secs = std::env::var("PLASMAVMC_NODE_HEARTBEAT_INTERVAL_SECS") .ok() .and_then(|value| value.parse::().ok()) .unwrap_or(5); tokio::spawn(async move { let mut ticker = tokio::time::interval(Duration::from_secs(heartbeat_secs)); loop { ticker.tick().await; let channel = match Endpoint::from_shared(endpoint.clone()) { Ok(endpoint) => match endpoint.connect().await { Ok(channel) => channel, Err(error) => { tracing::warn!(%error, "Failed to connect to PlasmaVMC control plane for heartbeat"); continue; } }, Err(error) => { tracing::warn!(%error, "Invalid PlasmaVMC control plane endpoint for heartbeat"); continue; } }; let mut client = NodeServiceClient::new(channel); let mut labels = HashMap::new(); labels.insert("plasmavmc_endpoint".to_string(), advertise_endpoint.clone()); let request = HeartbeatNodeRequest { node_id: node_id.clone(), name: node_name.clone(), state: ProtoNodeState::Ready as i32, capacity: Some(NodeCapacity { vcpus: std::thread::available_parallelism() .map(|parallelism| parallelism.get() as u32) .unwrap_or(1), memory_mib: available_memory_mib(), storage_gib: 0, }), allocatable: Some(NodeCapacity { vcpus: std::thread::available_parallelism() .map(|parallelism| parallelism.get() as u32) .unwrap_or(1), memory_mib: available_memory_mib(), storage_gib: 0, }), hypervisors: vec![ProtoHypervisorType::Kvm as i32], labels, agent_version: env!("CARGO_PKG_VERSION").to_string(), supported_volume_drivers: supported_volume_drivers.clone(), supported_storage_classes: supported_storage_classes.clone(), shared_live_migration, }; if let Err(error) = client.heartbeat_node(request).await { tracing::warn!(%error, "Failed to heartbeat PlasmaVMC node"); } } }); } #[tokio::main] async fn main() -> Result<(), Box> { let args = Args::parse(); // Load configuration from file or use defaults let mut config = if args.config.exists() { let contents = tokio::fs::read_to_string(&args.config).await?; toml::from_str(&contents)? } else { tracing::info!( "Config file not found: {}, using defaults", args.config.display() ); ServerConfig::default() }; // Apply command line overrides if let Some(addr_str) = args.addr { config.addr = addr_str.parse()?; } if let Some(log_level) = args.log_level { config.log_level = log_level; } if let Some(kernel_path) = args.firecracker_kernel_path { config.firecracker.kernel_path = Some(kernel_path); } if let Some(rootfs_path) = args.firecracker_rootfs_path { config.firecracker.rootfs_path = Some(rootfs_path); } // Initialize tracing tracing_subscriber::fmt() .with_env_filter( EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)), ) .init(); tracing::info!("Starting PlasmaVMC server on {}", config.addr); // Initialize Prometheus metrics exporter let metrics_addr = format!("0.0.0.0:{}", args.metrics_port); let builder = PrometheusBuilder::new(); builder .with_http_listener(metrics_addr.parse::()?) .install() .expect("Failed to install Prometheus metrics exporter"); tracing::info!( "Prometheus metrics available at http://{}/metrics", metrics_addr ); // Create hypervisor registry and register backends let registry = Arc::new(HypervisorRegistry::new()); // Register KVM backend (always available) let kvm_backend = Arc::new(KvmBackend::with_defaults()); registry.register(kvm_backend); // Register FireCracker backend if kernel/rootfs paths are configured (config or env) let has_kernel = config.firecracker.kernel_path.is_some() || std::env::var_os("PLASMAVMC_FIRECRACKER_KERNEL_PATH").is_some(); let has_rootfs = config.firecracker.rootfs_path.is_some() || std::env::var_os("PLASMAVMC_FIRECRACKER_ROOTFS_PATH").is_some(); if has_kernel && has_rootfs { match FireCrackerBackend::from_config(&config.firecracker) { Ok(firecracker_backend) => { registry.register(Arc::new(firecracker_backend)); tracing::info!("Registered FireCracker backend"); } Err(err) => { tracing::warn!("Failed to initialize FireCracker backend: {}", err); } } } else if has_kernel || has_rootfs { tracing::warn!( "FireCracker backend configuration incomplete: kernel_path/rootfs_path must both be set (config or env)" ); } else { tracing::debug!("FireCracker backend not available (missing kernel/rootfs paths)"); } tracing::info!("Registered hypervisors: {:?}", registry.available()); // Initialize IAM authentication service tracing::info!( "Connecting to IAM server at {}", config.auth.iam_server_addr ); let auth_service = AuthService::new(&config.auth.iam_server_addr) .await .map_err(|e| format!("Failed to connect to IAM server: {}", e))?; let auth_service = Arc::new(auth_service); // gRPC interceptors are synchronous, so bridge into the current Tokio runtime // from a blocking section instead of creating a nested runtime that would // later be dropped from async context during shutdown. let auth_handle = tokio::runtime::Handle::current(); let make_interceptor = |auth: Arc| { let handle = auth_handle.clone(); move |mut req: Request<()>| -> Result, Status> { let auth = auth.clone(); tokio::task::block_in_place(|| { handle.block_on(async move { let tenant_context = auth.authenticate_request(&req).await?; req.extensions_mut().insert(tenant_context); Ok(req) }) }) } }; // Create services let vm_service = Arc::new( VmServiceImpl::new( registry, auth_service.clone(), config.auth.iam_server_addr.clone(), ) .await?, ); // Optional: start state watcher for multi-instance HA sync if std::env::var("PLASMAVMC_STATE_WATCHER") .map(|v| matches!(v.as_str(), "1" | "true" | "yes")) .unwrap_or(false) { let config = WatcherConfig::default(); let (watcher, rx) = StateWatcher::new(vm_service.store(), config); let synchronizer = StateSynchronizer::new(vm_service.clone()); tokio::spawn(async move { if let Err(e) = watcher.start().await { tracing::error!(error = %e, "State watcher failed to start"); } }); tokio::spawn(async move { synchronizer.run(rx).await; }); tracing::info!("State watcher enabled (PLASMAVMC_STATE_WATCHER)"); } // Optional: start health monitor to refresh VM status periodically if let Some(secs) = std::env::var("PLASMAVMC_HEALTH_MONITOR_INTERVAL_SECS") .ok() .and_then(|v| v.parse::().ok()) { if secs > 0 { vm_service .clone() .start_health_monitor(Duration::from_secs(secs)); } } // Optional: start node health monitor to detect stale heartbeats if let Some(interval_secs) = std::env::var("PLASMAVMC_NODE_HEALTH_MONITOR_INTERVAL_SECS") .ok() .and_then(|v| v.parse::().ok()) { if interval_secs > 0 { let timeout_secs = std::env::var("PLASMAVMC_NODE_HEARTBEAT_TIMEOUT_SECS") .ok() .and_then(|v| v.parse::().ok()) .unwrap_or(60); vm_service.clone().start_node_health_monitor( Duration::from_secs(interval_secs), Duration::from_secs(timeout_secs), ); } } // Setup health service let (mut health_reporter, health_service) = health_reporter(); health_reporter .set_serving::>() .await; health_reporter .set_serving::>() .await; health_reporter .set_serving::>() .await; health_reporter .set_serving::>() .await; // Parse address let addr: SocketAddr = config.addr; let heartbeat_volume_drivers = vm_service .supported_volume_drivers() .into_iter() .map(|driver| match driver { plasmavmc_types::VolumeDriverKind::Managed => ProtoVolumeDriverKind::Managed as i32, plasmavmc_types::VolumeDriverKind::CephRbd => ProtoVolumeDriverKind::CephRbd as i32, }) .collect(); let heartbeat_storage_classes = vm_service.supported_storage_classes(); let shared_live_migration = vm_service.shared_live_migration(); start_agent_heartbeat( addr, heartbeat_volume_drivers, heartbeat_storage_classes, shared_live_migration, ) .await; tracing::info!("PlasmaVMC gRPC server listening on {}", addr); // Configure TLS if enabled let mut server = Server::builder(); if let Some(tls_config) = &config.tls { tracing::info!("TLS enabled, loading certificates..."); let cert = tokio::fs::read(&tls_config.cert_file).await?; let key = tokio::fs::read(&tls_config.key_file).await?; let server_identity = Identity::from_pem(cert, key); let tls = if tls_config.require_client_cert { tracing::info!("mTLS enabled"); let ca_cert = tokio::fs::read( tls_config .ca_file .as_ref() .ok_or("ca_file required for mTLS")?, ) .await?; let ca = Certificate::from_pem(ca_cert); ServerTlsConfig::new() .identity(server_identity) .client_ca_root(ca) } else { ServerTlsConfig::new().identity(server_identity) }; server = server.tls_config(tls)?; } // gRPC server (clone Arc for gRPC service) let grpc_vm_service = Arc::clone(&vm_service); let grpc_server = server .add_service(health_service) .add_service(tonic::codegen::InterceptedService::new( VmServiceServer::from_arc(grpc_vm_service), make_interceptor(auth_service.clone()), )) .add_service(tonic::codegen::InterceptedService::new( ImageServiceServer::from_arc(Arc::clone(&vm_service)), make_interceptor(auth_service.clone()), )) .add_service(tonic::codegen::InterceptedService::new( VolumeServiceServer::from_arc(Arc::clone(&vm_service)), make_interceptor(auth_service.clone()), )) .add_service(NodeServiceServer::from_arc(Arc::clone(&vm_service))) .serve(addr); // HTTP REST API server let http_addr = config.http_addr; let rest_state = plasmavmc_server::rest::RestApiState { vm_service: vm_service, auth_service: auth_service.clone(), }; let rest_app = plasmavmc_server::rest::build_router(rest_state); let http_listener = tokio::net::TcpListener::bind(&http_addr).await?; tracing::info!("PlasmaVMC HTTP REST API server starting on {}", http_addr); let http_server = async move { axum::serve(http_listener, rest_app) .await .map_err(|e| format!("HTTP server error: {}", e)) }; // Run both servers concurrently tokio::select! { result = grpc_server => { result?; } result = http_server => { result?; } } Ok(()) }