mod auth; mod cni; mod config; mod fiberlb_controller; mod flashdns_controller; mod ipam_client; mod rest; mod scheduler; mod services; mod storage; use anyhow::Result; use auth::AuthService; use clap::Parser; use config::Config; use ipam_client::IpamClient; use metrics_exporter_prometheus::PrometheusBuilder; use k8shost_proto::{ deployment_service_server::{DeploymentService, DeploymentServiceServer}, node_service_server::NodeServiceServer, pod_service_server::PodServiceServer, service_service_server::ServiceServiceServer, *, }; use services::{node::NodeServiceImpl, pod::PodServiceImpl, service::ServiceServiceImpl}; use std::{path::PathBuf, sync::Arc}; use storage::Storage; use tonic::{transport::Server, Request, Response, Status}; use tracing::{info, warn}; use tracing_subscriber::EnvFilter; /// k8shost API Server #[derive(Parser, Debug)] #[command(name = "k8shost-server")] #[command(about = "Kubernetes API server for PlasmaCloud's k8shost component")] struct Args { /// Configuration file path #[arg(short, long)] config: Option, /// Listen address for gRPC server (e.g., "[::]:6443") #[arg(long)] addr: Option, /// Log level (e.g., "info", "debug", "trace") #[arg(long)] log_level: Option, /// FlareDB Placement Driver address (e.g., "127.0.0.1:2379") #[arg(long)] flaredb_pd_addr: Option, /// FlareDB direct address (e.g., "127.0.0.1:50051") #[arg(long)] flaredb_direct_addr: Option, /// IAM server address (e.g., "http://127.0.0.1:50051") #[arg(long)] iam_server_addr: Option, /// FiberLB server address (e.g., "http://127.0.0.1:50082") #[arg(long)] fiberlb_server_addr: Option, /// FlashDNS server address (e.g., "http://127.0.0.1:50053") #[arg(long)] flashdns_server_addr: Option, /// Metrics port for Prometheus scraping #[arg(long, default_value = "9094")] metrics_port: u16, } #[tokio::main] async fn main() -> Result<(), Box> { let args = Args::parse(); // Load configuration let mut settings = ::config::Config::builder() .add_source(::config::File::from_str( toml::to_string(&Config::default())?.as_str(), ::config::FileFormat::Toml, )) .add_source(::config::Environment::with_prefix("K8SHOST").separator("_")); // Add config file if specified if let Some(config_path) = &args.config { info!("Loading config from file: {}", config_path.display()); settings = settings.add_source(::config::File::from(config_path.as_path())); } let loaded_config: Config = settings .build()? .try_deserialize() .map_err(|e| anyhow::anyhow!("Failed to load configuration: {}", e))?; // Apply CLI overrides to the loaded configuration let config = Config { server: config::ServerConfig { addr: args .addr .map(|s| s.parse().unwrap_or(loaded_config.server.addr)) .unwrap_or(loaded_config.server.addr), http_addr: loaded_config.server.http_addr, log_level: args.log_level.unwrap_or(loaded_config.server.log_level), }, flaredb: config::FlareDbConfig { pd_addr: args.flaredb_pd_addr.or(loaded_config.flaredb.pd_addr), direct_addr: args.flaredb_direct_addr.or(loaded_config.flaredb.direct_addr), }, iam: config::IamConfig { server_addr: args.iam_server_addr.unwrap_or(loaded_config.iam.server_addr), }, fiberlb: config::FiberLbConfig { server_addr: args.fiberlb_server_addr.unwrap_or(loaded_config.fiberlb.server_addr), }, flashdns: config::FlashDnsConfig { server_addr: args.flashdns_server_addr.unwrap_or(loaded_config.flashdns.server_addr), }, prismnet: loaded_config.prismnet, }; // Initialize tracing init_logging(&config.server.log_level); info!("Starting k8shost API server on {}", config.server.addr); // Initialize Prometheus metrics exporter let metrics_addr = format!("0.0.0.0:{}", args.metrics_port); let builder = PrometheusBuilder::new(); builder .with_http_listener(metrics_addr.parse::()?) .install() .expect("Failed to install Prometheus metrics exporter"); info!( "Prometheus metrics available at http://{}/metrics", metrics_addr ); // Initialize FlareDB storage let storage = if let Some(addr) = &config.flaredb.direct_addr { info!("Connecting to FlareDB directly at {}", addr); match Storage::new_direct(addr.clone()).await { Ok(s) => { info!("Successfully connected to FlareDB (direct)"); Arc::new(s) } Err(e) => { warn!("Failed direct FlareDB connection: {}", e); return Err(anyhow::anyhow!("Failed to connect to FlareDB (direct): {}", e).into()); } } } else if let Some(addr) = &config.flaredb.pd_addr { info!("Connecting to FlareDB PD at {}", addr); match Storage::new(addr.clone()).await { Ok(s) => { info!("Successfully connected to FlareDB"); Arc::new(s) } Err(e) => { warn!("Failed to connect to FlareDB: {}. Server will start but may not function correctly.", e); return Err(anyhow::anyhow!("Failed to connect to FlareDB: {}", e).into()); } } } else { return Err(anyhow::anyhow!("No FlareDB address configured.").into()); }; // Initialize IAM authentication service info!("Connecting to IAM server at {}", config.iam.server_addr); let auth_service = match AuthService::new(&config.iam.server_addr).await { Ok(s) => { info!("Successfully connected to IAM server"); Arc::new(s) } Err(e) => { warn!("Failed to connect to IAM server: {}. Authentication will be disabled.", e); return Err(anyhow::anyhow!("Failed to connect to IAM server: {}", e).into()); } }; // Dedicated runtime for auth interceptors to avoid blocking the main async runtime let auth_runtime = Arc::new(tokio::runtime::Runtime::new()?); let make_interceptor = |auth: Arc| { let rt = auth_runtime.clone(); move |mut req: Request<()>| -> Result, Status> { let auth = auth.clone(); tokio::task::block_in_place(|| { rt.block_on(async move { let tenant_context = auth.authenticate(&req).await?; req.extensions_mut().insert(tenant_context); Ok(req) }) }) } }; // Create IPAM client let ipam_client = Arc::new(IpamClient::new(config.prismnet.server_addr.clone())); // Create service implementations with storage let pod_service = Arc::new(PodServiceImpl::new_with_credit_service(storage.clone()).await); let service_service = Arc::new(ServiceServiceImpl::new(storage.clone(), ipam_client)); let node_service = Arc::new(NodeServiceImpl::new(storage.clone())); let deployment_service = DeploymentServiceImpl; // Still unimplemented // Start scheduler in background with CreditService integration let scheduler = Arc::new(scheduler::Scheduler::new_with_credit_service(storage.clone()).await); tokio::spawn(async move { scheduler.run().await; }); info!("Scheduler started - tenant-aware with quota enforcement"); // Start FiberLB controller in background let fiberlb_controller = Arc::new(fiberlb_controller::FiberLbController::new( storage.clone(), config.fiberlb.server_addr.clone(), )); tokio::spawn(async move { fiberlb_controller.run().await; }); info!("FiberLB controller started - monitoring LoadBalancer services"); // Start FlashDNS controller in background let flashdns_controller = Arc::new(flashdns_controller::FlashDnsController::new( storage.clone(), config.flashdns.server_addr.clone(), )); tokio::spawn(async move { flashdns_controller.run().await; }); info!("FlashDNS controller started - managing cluster.local DNS records"); info!("Starting gRPC server with authentication..."); // Build gRPC server with authentication layer let grpc_server = Server::builder() .add_service( tonic::codegen::InterceptedService::new( PodServiceServer::new(pod_service.as_ref().clone()), make_interceptor(auth_service.clone()), ), ) .add_service( tonic::codegen::InterceptedService::new( ServiceServiceServer::new(service_service.as_ref().clone()), make_interceptor(auth_service.clone()), ), ) .add_service( tonic::codegen::InterceptedService::new( NodeServiceServer::new(node_service.as_ref().clone()), make_interceptor(auth_service.clone()), ), ) .add_service(DeploymentServiceServer::new(deployment_service)) .serve(config.server.addr); // HTTP REST API server let http_addr = config.server.http_addr; let rest_state = rest::RestApiState { pod_service: pod_service.clone(), service_service: service_service.clone(), node_service: node_service.clone(), }; let rest_app = rest::build_router(rest_state); let http_listener = tokio::net::TcpListener::bind(&http_addr).await?; info!("k8shost HTTP REST API server starting on {}", http_addr); let http_server = async move { axum::serve(http_listener, rest_app) .await .map_err(|e| format!("HTTP server error: {}", e)) }; // Run both servers concurrently tokio::select! { result = grpc_server => { result?; } result = http_server => { result?; } } Ok(()) } // Deployment Service Implementation (placeholder - not part of MVP) #[derive(Debug, Default)] struct DeploymentServiceImpl; #[tonic::async_trait] impl DeploymentService for DeploymentServiceImpl { async fn create_deployment( &self, _request: Request, ) -> Result, Status> { Err(Status::unimplemented("create_deployment not yet implemented")) } async fn get_deployment( &self, _request: Request, ) -> Result, Status> { Err(Status::unimplemented("get_deployment not yet implemented")) } async fn list_deployments( &self, _request: Request, ) -> Result, Status> { Err(Status::unimplemented("list_deployments not yet implemented")) } async fn update_deployment( &self, _request: Request, ) -> Result, Status> { Err(Status::unimplemented("update_deployment not yet implemented")) } async fn delete_deployment( &self, _request: Request, ) -> Result, Status> { Err(Status::unimplemented("delete_deployment not yet implemented")) } } fn init_logging(level: &str) { tracing_subscriber::fmt() .with_env_filter(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level))) .init(); }