photoncloud-monorepo/k8shost/crates/k8shost-server/src/main.rs
centra d2149b6249 fix(lightningstor): Fix SigV4 canonicalization for AWS S3 auth
- Replace form_urlencoded with RFC 3986 compliant URI encoding
- Implement aws_uri_encode() matching AWS SigV4 spec exactly
- Unreserved chars (A-Z,a-z,0-9,-,_,.,~) not encoded
- All other chars percent-encoded with uppercase hex
- Preserve slashes in paths, encode in query params
- Normalize empty paths to '/' per AWS spec
- Fix test expectations (body hash, HMAC values)
- Add comprehensive SigV4 signature determinism test

This fixes the canonicalization mismatch that caused signature
validation failures in T047. Auth can now be enabled for production.

Refs: T058.S1
2025-12-12 06:23:46 +09:00

303 lines
10 KiB
Rust

mod auth;
mod cni;
mod config;
mod fiberlb_controller;
mod flashdns_controller;
mod scheduler;
mod services;
mod storage;
use anyhow::Result;
use auth::AuthService;
use clap::Parser;
use config::Config;
use metrics_exporter_prometheus::PrometheusBuilder;
use k8shost_proto::{
deployment_service_server::{DeploymentService, DeploymentServiceServer},
node_service_server::NodeServiceServer,
pod_service_server::PodServiceServer,
service_service_server::ServiceServiceServer,
*,
};
use services::{node::NodeServiceImpl, pod::PodServiceImpl, service::ServiceServiceImpl};
use std::{path::PathBuf, sync::Arc};
use storage::Storage;
use tonic::{transport::Server, Request, Response, Status};
use tracing::{info, warn};
use tracing_subscriber::EnvFilter;
/// k8shost API Server
#[derive(Parser, Debug)]
#[command(name = "k8shost-server")]
#[command(about = "Kubernetes API server for PlasmaCloud's k8shost component")]
struct Args {
/// Configuration file path
#[arg(short, long)]
config: Option<PathBuf>,
/// Listen address for gRPC server (e.g., "[::]:6443")
#[arg(long)]
addr: Option<String>,
/// Log level (e.g., "info", "debug", "trace")
#[arg(long)]
log_level: Option<String>,
/// FlareDB Placement Driver address (e.g., "127.0.0.1:2379")
#[arg(long)]
flaredb_pd_addr: Option<String>,
/// FlareDB direct address (e.g., "127.0.0.1:50051")
#[arg(long)]
flaredb_direct_addr: Option<String>,
/// IAM server address (e.g., "http://127.0.0.1:50051")
#[arg(long)]
iam_server_addr: Option<String>,
/// FiberLB server address (e.g., "http://127.0.0.1:50082")
#[arg(long)]
fiberlb_server_addr: Option<String>,
/// FlashDNS server address (e.g., "http://127.0.0.1:50053")
#[arg(long)]
flashdns_server_addr: Option<String>,
/// Metrics port for Prometheus scraping
#[arg(long, default_value = "9094")]
metrics_port: u16,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
// Load configuration
let mut settings = ::config::Config::builder()
.add_source(::config::File::from_str(
toml::to_string(&Config::default())?.as_str(),
::config::FileFormat::Toml,
))
.add_source(::config::Environment::with_prefix("K8SHOST").separator("_"));
// Add config file if specified
if let Some(config_path) = &args.config {
info!("Loading config from file: {}", config_path.display());
settings = settings.add_source(::config::File::from(config_path.as_path()));
}
let loaded_config: Config = settings
.build()?
.try_deserialize()
.map_err(|e| anyhow::anyhow!("Failed to load configuration: {}", e))?;
// Apply CLI overrides to the loaded configuration
let config = Config {
server: config::ServerConfig {
addr: args
.addr
.map(|s| s.parse().unwrap_or_else(|_| loaded_config.server.addr))
.unwrap_or(loaded_config.server.addr),
log_level: args.log_level.unwrap_or(loaded_config.server.log_level),
},
flaredb: config::FlareDbConfig {
pd_addr: args.flaredb_pd_addr.or(loaded_config.flaredb.pd_addr),
direct_addr: args.flaredb_direct_addr.or(loaded_config.flaredb.direct_addr),
},
iam: config::IamConfig {
server_addr: args.iam_server_addr.unwrap_or(loaded_config.iam.server_addr),
},
fiberlb: config::FiberLbConfig {
server_addr: args.fiberlb_server_addr.unwrap_or(loaded_config.fiberlb.server_addr),
},
flashdns: config::FlashDnsConfig {
server_addr: args.flashdns_server_addr.unwrap_or(loaded_config.flashdns.server_addr),
},
};
// Initialize tracing
init_logging(&config.server.log_level);
info!("Starting k8shost API server on {}", config.server.addr);
// Initialize Prometheus metrics exporter
let metrics_addr = format!("0.0.0.0:{}", args.metrics_port);
let builder = PrometheusBuilder::new();
builder
.with_http_listener(metrics_addr.parse::<std::net::SocketAddr>()?)
.install()
.expect("Failed to install Prometheus metrics exporter");
info!(
"Prometheus metrics available at http://{}/metrics",
metrics_addr
);
// Initialize FlareDB storage
let storage = if let Some(addr) = &config.flaredb.direct_addr {
info!("Connecting to FlareDB directly at {}", addr);
match Storage::new_direct(addr.clone()).await {
Ok(s) => {
info!("Successfully connected to FlareDB (direct)");
Arc::new(s)
}
Err(e) => {
warn!("Failed direct FlareDB connection: {}", e);
return Err(anyhow::anyhow!("Failed to connect to FlareDB (direct): {}", e).into());
}
}
} else if let Some(addr) = &config.flaredb.pd_addr {
info!("Connecting to FlareDB PD at {}", addr);
match Storage::new(addr.clone()).await {
Ok(s) => {
info!("Successfully connected to FlareDB");
Arc::new(s)
}
Err(e) => {
warn!("Failed to connect to FlareDB: {}. Server will start but may not function correctly.", e);
return Err(anyhow::anyhow!("Failed to connect to FlareDB: {}", e).into());
}
}
} else {
return Err(anyhow::anyhow!("No FlareDB address configured.").into());
};
// Initialize IAM authentication service
info!("Connecting to IAM server at {}", config.iam.server_addr);
let auth_service = match AuthService::new(&config.iam.server_addr).await {
Ok(s) => {
info!("Successfully connected to IAM server");
Arc::new(s)
}
Err(e) => {
warn!("Failed to connect to IAM server: {}. Authentication will be disabled.", e);
return Err(anyhow::anyhow!("Failed to connect to IAM server: {}", e).into());
}
};
// Dedicated runtime for auth interceptors to avoid blocking the main async runtime
let auth_runtime = Arc::new(tokio::runtime::Runtime::new()?);
let make_interceptor = |auth: Arc<AuthService>| {
let rt = auth_runtime.clone();
move |mut req: Request<()>| -> Result<Request<()>, Status> {
let auth = auth.clone();
tokio::task::block_in_place(|| {
rt.block_on(async move {
let tenant_context = auth.authenticate(&req).await?;
req.extensions_mut().insert(tenant_context);
Ok(req)
})
})
}
};
// Create service implementations with storage
let pod_service = PodServiceImpl::new_with_credit_service(storage.clone()).await;
let service_service = ServiceServiceImpl::new(storage.clone());
let node_service = NodeServiceImpl::new(storage.clone());
let deployment_service = DeploymentServiceImpl::default(); // Still unimplemented
// Start scheduler in background
let scheduler = Arc::new(scheduler::Scheduler::new(storage.clone()));
tokio::spawn(async move {
scheduler.run().await;
});
info!("Scheduler started - monitoring for pending pods");
// Start FiberLB controller in background
let fiberlb_controller = Arc::new(fiberlb_controller::FiberLbController::new(
storage.clone(),
config.fiberlb.server_addr.clone(),
));
tokio::spawn(async move {
fiberlb_controller.run().await;
});
info!("FiberLB controller started - monitoring LoadBalancer services");
// Start FlashDNS controller in background
let flashdns_controller = Arc::new(flashdns_controller::FlashDnsController::new(
storage.clone(),
config.flashdns.server_addr.clone(),
));
tokio::spawn(async move {
flashdns_controller.run().await;
});
info!("FlashDNS controller started - managing cluster.local DNS records");
info!("Starting gRPC server with authentication...");
// Build server with authentication layer
Server::builder()
.add_service(
tonic::codegen::InterceptedService::new(
PodServiceServer::new(pod_service),
make_interceptor(auth_service.clone()),
),
)
.add_service(
tonic::codegen::InterceptedService::new(
ServiceServiceServer::new(service_service),
make_interceptor(auth_service.clone()),
),
)
.add_service(
tonic::codegen::InterceptedService::new(
NodeServiceServer::new(node_service),
make_interceptor(auth_service.clone()),
),
)
.add_service(DeploymentServiceServer::new(deployment_service))
.serve(config.server.addr)
.await?;
Ok(())
}
// Deployment Service Implementation (placeholder - not part of MVP)
#[derive(Debug, Default)]
struct DeploymentServiceImpl;
#[tonic::async_trait]
impl DeploymentService for DeploymentServiceImpl {
async fn create_deployment(
&self,
_request: Request<CreateDeploymentRequest>,
) -> Result<Response<CreateDeploymentResponse>, Status> {
Err(Status::unimplemented("create_deployment not yet implemented"))
}
async fn get_deployment(
&self,
_request: Request<GetDeploymentRequest>,
) -> Result<Response<GetDeploymentResponse>, Status> {
Err(Status::unimplemented("get_deployment not yet implemented"))
}
async fn list_deployments(
&self,
_request: Request<ListDeploymentsRequest>,
) -> Result<Response<ListDeploymentsResponse>, Status> {
Err(Status::unimplemented("list_deployments not yet implemented"))
}
async fn update_deployment(
&self,
_request: Request<UpdateDeploymentRequest>,
) -> Result<Response<UpdateDeploymentResponse>, Status> {
Err(Status::unimplemented("update_deployment not yet implemented"))
}
async fn delete_deployment(
&self,
_request: Request<DeleteDeploymentRequest>,
) -> Result<Response<DeleteDeploymentResponse>, Status> {
Err(Status::unimplemented("delete_deployment not yet implemented"))
}
}
fn init_logging(level: &str) {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level)))
.init();
}