453 lines
16 KiB
Rust
453 lines
16 KiB
Rust
//! PlasmaVMC control plane server binary
|
|
|
|
use clap::Parser;
|
|
use iam_service_auth::AuthService;
|
|
use metrics_exporter_prometheus::PrometheusBuilder;
|
|
use plasmavmc_api::proto::image_service_server::ImageServiceServer;
|
|
use plasmavmc_api::proto::node_service_client::NodeServiceClient;
|
|
use plasmavmc_api::proto::node_service_server::NodeServiceServer;
|
|
use plasmavmc_api::proto::vm_service_server::VmServiceServer;
|
|
use plasmavmc_api::proto::volume_service_server::VolumeServiceServer;
|
|
use plasmavmc_api::proto::{
|
|
HeartbeatNodeRequest, HypervisorType as ProtoHypervisorType, NodeCapacity,
|
|
NodeState as ProtoNodeState, VolumeDriverKind as ProtoVolumeDriverKind,
|
|
};
|
|
use plasmavmc_firecracker::FireCrackerBackend;
|
|
use plasmavmc_hypervisor::HypervisorRegistry;
|
|
use plasmavmc_kvm::KvmBackend;
|
|
use plasmavmc_server::config::ServerConfig;
|
|
use plasmavmc_server::watcher::{StateSynchronizer, StateWatcher, WatcherConfig};
|
|
use plasmavmc_server::VmServiceImpl;
|
|
use std::net::SocketAddr;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use std::{collections::HashMap, fs};
|
|
use tonic::transport::{Certificate, Endpoint, Identity, Server, ServerTlsConfig};
|
|
use tonic::{Request, Status};
|
|
use tonic_health::server::health_reporter;
|
|
use tracing_subscriber::EnvFilter;
|
|
|
|
/// PlasmaVMC control plane server
|
|
#[derive(Parser, Debug)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Args {
|
|
/// Configuration file path
|
|
#[arg(short, long, default_value = "plasmavmc.toml")]
|
|
config: PathBuf,
|
|
|
|
/// Address to listen on (overrides config)
|
|
#[arg(short, long)]
|
|
addr: Option<String>,
|
|
|
|
/// Log level (overrides config)
|
|
#[arg(short, long)]
|
|
log_level: Option<String>,
|
|
|
|
/// Path to the Firecracker kernel image (overrides config)
|
|
#[arg(long)]
|
|
firecracker_kernel_path: Option<PathBuf>,
|
|
|
|
/// Path to the Firecracker rootfs image (overrides config)
|
|
#[arg(long)]
|
|
firecracker_rootfs_path: Option<PathBuf>,
|
|
|
|
/// Metrics port for Prometheus scraping
|
|
#[arg(long, default_value = "9102")]
|
|
metrics_port: u16,
|
|
}
|
|
|
|
fn normalize_endpoint(endpoint: &str) -> String {
|
|
if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
|
endpoint.to_string()
|
|
} else {
|
|
format!("http://{endpoint}")
|
|
}
|
|
}
|
|
|
|
fn available_memory_mib() -> u64 {
|
|
let Ok(meminfo) = fs::read_to_string("/proc/meminfo") else {
|
|
return 0;
|
|
};
|
|
meminfo
|
|
.lines()
|
|
.find_map(|line| line.strip_prefix("MemTotal:"))
|
|
.and_then(|rest| rest.split_whitespace().next())
|
|
.and_then(|value| value.parse::<u64>().ok())
|
|
.map(|kib| kib / 1024)
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
async fn start_agent_heartbeat(
|
|
local_addr: SocketAddr,
|
|
supported_volume_drivers: Vec<i32>,
|
|
supported_storage_classes: Vec<String>,
|
|
shared_live_migration: bool,
|
|
) {
|
|
let Some(control_plane_addr) = std::env::var("PLASMAVMC_CONTROL_PLANE_ADDR")
|
|
.ok()
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
else {
|
|
return;
|
|
};
|
|
let Some(node_id) = std::env::var("PLASMAVMC_NODE_ID")
|
|
.ok()
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
else {
|
|
return;
|
|
};
|
|
|
|
let endpoint = normalize_endpoint(&control_plane_addr);
|
|
let advertise_endpoint = std::env::var("PLASMAVMC_ENDPOINT_ADVERTISE")
|
|
.ok()
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| local_addr.to_string());
|
|
let node_name = std::env::var("PLASMAVMC_NODE_NAME")
|
|
.ok()
|
|
.filter(|value| !value.trim().is_empty())
|
|
.unwrap_or_else(|| node_id.clone());
|
|
let heartbeat_secs = std::env::var("PLASMAVMC_NODE_HEARTBEAT_INTERVAL_SECS")
|
|
.ok()
|
|
.and_then(|value| value.parse::<u64>().ok())
|
|
.unwrap_or(5);
|
|
|
|
tokio::spawn(async move {
|
|
let mut ticker = tokio::time::interval(Duration::from_secs(heartbeat_secs));
|
|
loop {
|
|
ticker.tick().await;
|
|
let channel = match Endpoint::from_shared(endpoint.clone()) {
|
|
Ok(endpoint) => match endpoint.connect().await {
|
|
Ok(channel) => channel,
|
|
Err(error) => {
|
|
tracing::warn!(%error, "Failed to connect to PlasmaVMC control plane for heartbeat");
|
|
continue;
|
|
}
|
|
},
|
|
Err(error) => {
|
|
tracing::warn!(%error, "Invalid PlasmaVMC control plane endpoint for heartbeat");
|
|
continue;
|
|
}
|
|
};
|
|
let mut client = NodeServiceClient::new(channel);
|
|
let mut labels = HashMap::new();
|
|
labels.insert("plasmavmc_endpoint".to_string(), advertise_endpoint.clone());
|
|
let request = HeartbeatNodeRequest {
|
|
node_id: node_id.clone(),
|
|
name: node_name.clone(),
|
|
state: ProtoNodeState::Ready as i32,
|
|
capacity: Some(NodeCapacity {
|
|
vcpus: std::thread::available_parallelism()
|
|
.map(|parallelism| parallelism.get() as u32)
|
|
.unwrap_or(1),
|
|
memory_mib: available_memory_mib(),
|
|
storage_gib: 0,
|
|
}),
|
|
allocatable: Some(NodeCapacity {
|
|
vcpus: std::thread::available_parallelism()
|
|
.map(|parallelism| parallelism.get() as u32)
|
|
.unwrap_or(1),
|
|
memory_mib: available_memory_mib(),
|
|
storage_gib: 0,
|
|
}),
|
|
hypervisors: vec![ProtoHypervisorType::Kvm as i32],
|
|
labels,
|
|
agent_version: env!("CARGO_PKG_VERSION").to_string(),
|
|
supported_volume_drivers: supported_volume_drivers.clone(),
|
|
supported_storage_classes: supported_storage_classes.clone(),
|
|
shared_live_migration,
|
|
};
|
|
if let Err(error) = client.heartbeat_node(request).await {
|
|
tracing::warn!(%error, "Failed to heartbeat PlasmaVMC node");
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
let args = Args::parse();
|
|
|
|
// Load configuration from file or use defaults
|
|
let mut config = if args.config.exists() {
|
|
let contents = tokio::fs::read_to_string(&args.config).await?;
|
|
toml::from_str(&contents)?
|
|
} else {
|
|
tracing::info!(
|
|
"Config file not found: {}, using defaults",
|
|
args.config.display()
|
|
);
|
|
ServerConfig::default()
|
|
};
|
|
|
|
// Apply command line overrides
|
|
if let Some(addr_str) = args.addr {
|
|
config.addr = addr_str.parse()?;
|
|
}
|
|
if let Some(log_level) = args.log_level {
|
|
config.log_level = log_level;
|
|
}
|
|
if let Some(kernel_path) = args.firecracker_kernel_path {
|
|
config.firecracker.kernel_path = Some(kernel_path);
|
|
}
|
|
if let Some(rootfs_path) = args.firecracker_rootfs_path {
|
|
config.firecracker.rootfs_path = Some(rootfs_path);
|
|
}
|
|
|
|
// Initialize tracing
|
|
tracing_subscriber::fmt()
|
|
.with_env_filter(
|
|
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
|
|
)
|
|
.init();
|
|
|
|
tracing::info!("Starting PlasmaVMC server on {}", config.addr);
|
|
|
|
// Initialize Prometheus metrics exporter
|
|
let metrics_addr = format!("0.0.0.0:{}", args.metrics_port);
|
|
let builder = PrometheusBuilder::new();
|
|
builder
|
|
.with_http_listener(metrics_addr.parse::<std::net::SocketAddr>()?)
|
|
.install()
|
|
.expect("Failed to install Prometheus metrics exporter");
|
|
|
|
tracing::info!(
|
|
"Prometheus metrics available at http://{}/metrics",
|
|
metrics_addr
|
|
);
|
|
|
|
// Create hypervisor registry and register backends
|
|
let registry = Arc::new(HypervisorRegistry::new());
|
|
|
|
// Register KVM backend (always available)
|
|
let kvm_backend = Arc::new(KvmBackend::with_defaults());
|
|
registry.register(kvm_backend);
|
|
|
|
// Register FireCracker backend if kernel/rootfs paths are configured (config or env)
|
|
let has_kernel = config.firecracker.kernel_path.is_some()
|
|
|| std::env::var_os("PLASMAVMC_FIRECRACKER_KERNEL_PATH").is_some();
|
|
let has_rootfs = config.firecracker.rootfs_path.is_some()
|
|
|| std::env::var_os("PLASMAVMC_FIRECRACKER_ROOTFS_PATH").is_some();
|
|
|
|
if has_kernel && has_rootfs {
|
|
match FireCrackerBackend::from_config(&config.firecracker) {
|
|
Ok(firecracker_backend) => {
|
|
registry.register(Arc::new(firecracker_backend));
|
|
tracing::info!("Registered FireCracker backend");
|
|
}
|
|
Err(err) => {
|
|
tracing::warn!("Failed to initialize FireCracker backend: {}", err);
|
|
}
|
|
}
|
|
} else if has_kernel || has_rootfs {
|
|
tracing::warn!(
|
|
"FireCracker backend configuration incomplete: kernel_path/rootfs_path must both be set (config or env)"
|
|
);
|
|
} else {
|
|
tracing::debug!("FireCracker backend not available (missing kernel/rootfs paths)");
|
|
}
|
|
|
|
tracing::info!("Registered hypervisors: {:?}", registry.available());
|
|
|
|
// Initialize IAM authentication service
|
|
tracing::info!(
|
|
"Connecting to IAM server at {}",
|
|
config.auth.iam_server_addr
|
|
);
|
|
let auth_service = AuthService::new(&config.auth.iam_server_addr)
|
|
.await
|
|
.map_err(|e| format!("Failed to connect to IAM server: {}", e))?;
|
|
let auth_service = Arc::new(auth_service);
|
|
|
|
// gRPC interceptors are synchronous, so bridge into the current Tokio runtime
|
|
// from a blocking section instead of creating a nested runtime that would
|
|
// later be dropped from async context during shutdown.
|
|
let auth_handle = tokio::runtime::Handle::current();
|
|
let make_interceptor = |auth: Arc<AuthService>| {
|
|
let handle = auth_handle.clone();
|
|
move |mut req: Request<()>| -> Result<Request<()>, Status> {
|
|
let auth = auth.clone();
|
|
tokio::task::block_in_place(|| {
|
|
handle.block_on(async move {
|
|
let tenant_context = auth.authenticate_request(&req).await?;
|
|
req.extensions_mut().insert(tenant_context);
|
|
Ok(req)
|
|
})
|
|
})
|
|
}
|
|
};
|
|
|
|
// Create services
|
|
let vm_service = Arc::new(
|
|
VmServiceImpl::new(
|
|
registry,
|
|
auth_service.clone(),
|
|
config.auth.iam_server_addr.clone(),
|
|
)
|
|
.await?,
|
|
);
|
|
|
|
// Optional: start state watcher for multi-instance HA sync
|
|
if std::env::var("PLASMAVMC_STATE_WATCHER")
|
|
.map(|v| matches!(v.as_str(), "1" | "true" | "yes"))
|
|
.unwrap_or(false)
|
|
{
|
|
let config = WatcherConfig::default();
|
|
let (watcher, rx) = StateWatcher::new(vm_service.store(), config);
|
|
let synchronizer = StateSynchronizer::new(vm_service.clone());
|
|
tokio::spawn(async move {
|
|
if let Err(e) = watcher.start().await {
|
|
tracing::error!(error = %e, "State watcher failed to start");
|
|
}
|
|
});
|
|
tokio::spawn(async move {
|
|
synchronizer.run(rx).await;
|
|
});
|
|
tracing::info!("State watcher enabled (PLASMAVMC_STATE_WATCHER)");
|
|
}
|
|
|
|
// Optional: start health monitor to refresh VM status periodically
|
|
if let Some(secs) = std::env::var("PLASMAVMC_HEALTH_MONITOR_INTERVAL_SECS")
|
|
.ok()
|
|
.and_then(|v| v.parse::<u64>().ok())
|
|
{
|
|
if secs > 0 {
|
|
vm_service
|
|
.clone()
|
|
.start_health_monitor(Duration::from_secs(secs));
|
|
}
|
|
}
|
|
|
|
// Optional: start node health monitor to detect stale heartbeats
|
|
if let Some(interval_secs) = std::env::var("PLASMAVMC_NODE_HEALTH_MONITOR_INTERVAL_SECS")
|
|
.ok()
|
|
.and_then(|v| v.parse::<u64>().ok())
|
|
{
|
|
if interval_secs > 0 {
|
|
let timeout_secs = std::env::var("PLASMAVMC_NODE_HEARTBEAT_TIMEOUT_SECS")
|
|
.ok()
|
|
.and_then(|v| v.parse::<u64>().ok())
|
|
.unwrap_or(60);
|
|
vm_service.clone().start_node_health_monitor(
|
|
Duration::from_secs(interval_secs),
|
|
Duration::from_secs(timeout_secs),
|
|
);
|
|
}
|
|
}
|
|
|
|
// Setup health service
|
|
let (mut health_reporter, health_service) = health_reporter();
|
|
health_reporter
|
|
.set_serving::<VmServiceServer<VmServiceImpl>>()
|
|
.await;
|
|
health_reporter
|
|
.set_serving::<ImageServiceServer<VmServiceImpl>>()
|
|
.await;
|
|
health_reporter
|
|
.set_serving::<VolumeServiceServer<VmServiceImpl>>()
|
|
.await;
|
|
health_reporter
|
|
.set_serving::<NodeServiceServer<VmServiceImpl>>()
|
|
.await;
|
|
|
|
// Parse address
|
|
let addr: SocketAddr = config.addr;
|
|
let heartbeat_volume_drivers = vm_service
|
|
.supported_volume_drivers()
|
|
.into_iter()
|
|
.map(|driver| match driver {
|
|
plasmavmc_types::VolumeDriverKind::Managed => ProtoVolumeDriverKind::Managed as i32,
|
|
plasmavmc_types::VolumeDriverKind::CephRbd => ProtoVolumeDriverKind::CephRbd as i32,
|
|
})
|
|
.collect();
|
|
let heartbeat_storage_classes = vm_service.supported_storage_classes();
|
|
let shared_live_migration = vm_service.shared_live_migration();
|
|
start_agent_heartbeat(
|
|
addr,
|
|
heartbeat_volume_drivers,
|
|
heartbeat_storage_classes,
|
|
shared_live_migration,
|
|
)
|
|
.await;
|
|
|
|
tracing::info!("PlasmaVMC gRPC server listening on {}", addr);
|
|
|
|
// Configure TLS if enabled
|
|
let mut server = Server::builder();
|
|
|
|
if let Some(tls_config) = &config.tls {
|
|
tracing::info!("TLS enabled, loading certificates...");
|
|
let cert = tokio::fs::read(&tls_config.cert_file).await?;
|
|
let key = tokio::fs::read(&tls_config.key_file).await?;
|
|
let server_identity = Identity::from_pem(cert, key);
|
|
|
|
let tls = if tls_config.require_client_cert {
|
|
tracing::info!("mTLS enabled");
|
|
let ca_cert = tokio::fs::read(
|
|
tls_config
|
|
.ca_file
|
|
.as_ref()
|
|
.ok_or("ca_file required for mTLS")?,
|
|
)
|
|
.await?;
|
|
let ca = Certificate::from_pem(ca_cert);
|
|
ServerTlsConfig::new()
|
|
.identity(server_identity)
|
|
.client_ca_root(ca)
|
|
} else {
|
|
ServerTlsConfig::new().identity(server_identity)
|
|
};
|
|
|
|
server = server.tls_config(tls)?;
|
|
}
|
|
|
|
// gRPC server (clone Arc for gRPC service)
|
|
let grpc_vm_service = Arc::clone(&vm_service);
|
|
let grpc_server = server
|
|
.add_service(health_service)
|
|
.add_service(tonic::codegen::InterceptedService::new(
|
|
VmServiceServer::from_arc(grpc_vm_service),
|
|
make_interceptor(auth_service.clone()),
|
|
))
|
|
.add_service(tonic::codegen::InterceptedService::new(
|
|
ImageServiceServer::from_arc(Arc::clone(&vm_service)),
|
|
make_interceptor(auth_service.clone()),
|
|
))
|
|
.add_service(tonic::codegen::InterceptedService::new(
|
|
VolumeServiceServer::from_arc(Arc::clone(&vm_service)),
|
|
make_interceptor(auth_service.clone()),
|
|
))
|
|
.add_service(NodeServiceServer::from_arc(Arc::clone(&vm_service)))
|
|
.serve(addr);
|
|
|
|
// HTTP REST API server
|
|
let http_addr = config.http_addr;
|
|
let rest_state = plasmavmc_server::rest::RestApiState {
|
|
vm_service: vm_service,
|
|
auth_service: auth_service.clone(),
|
|
};
|
|
let rest_app = plasmavmc_server::rest::build_router(rest_state);
|
|
let http_listener = tokio::net::TcpListener::bind(&http_addr).await?;
|
|
|
|
tracing::info!("PlasmaVMC HTTP REST API server starting on {}", http_addr);
|
|
|
|
let http_server = async move {
|
|
axum::serve(http_listener, rest_app)
|
|
.await
|
|
.map_err(|e| format!("HTTP server error: {}", e))
|
|
};
|
|
|
|
// Run both servers concurrently
|
|
tokio::select! {
|
|
result = grpc_server => {
|
|
result?;
|
|
}
|
|
result = http_server => {
|
|
result?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|