photoncloud-monorepo/k8shost/crates/k8shost-server/src/storage.rs

908 lines
29 KiB
Rust

//! Storage layer for k8shost using FlareDB
//!
//! This module provides CRUD operations for Kubernetes resources (Pod, Service, Node)
//! with multi-tenant support using FlareDB as the backend.
use flaredb_client::RdbClient;
use k8shost_types::{Deployment, Node, Pod, Service};
use serde::Deserialize;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::Mutex;
use tonic::Status;
/// Storage backend for k8shost resources
pub struct Storage {
client: Arc<Mutex<RdbClient>>,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct TenantRef {
pub org_id: String,
pub project_id: String,
}
#[derive(Debug, Deserialize)]
struct ResourceMetadataEnvelope {
metadata: ResourceMetadataRef,
}
#[derive(Debug, Deserialize)]
struct ResourceMetadataRef {
#[serde(default)]
org_id: Option<String>,
#[serde(default)]
project_id: Option<String>,
}
const CONTROLLER_TENANTS_PREFIX: &str = "controllers";
impl Storage {
/// Create a new storage instance with FlareDB backend
pub async fn new(pd_addr: String) -> Result<Self, Box<dyn std::error::Error>> {
let client =
RdbClient::connect_with_pd_namespace(pd_addr.clone(), pd_addr, "k8shost").await?;
Ok(Self {
client: Arc::new(Mutex::new(client)),
})
}
/// Create a storage instance that connects directly to a single FlareDB server (no PD)
pub async fn new_direct(server_addr: String) -> Result<Self, Box<dyn std::error::Error>> {
let client = RdbClient::connect_direct(server_addr, "k8shost").await?;
Ok(Self {
client: Arc::new(Mutex::new(client)),
})
}
/// Create an in-memory storage for testing
#[cfg(test)]
pub fn new_in_memory() -> Self {
// For testing, we'll use a mock that stores data in a HashMap
// This is a simplified version - in production, use actual FlareDB
unimplemented!("Use new() with a test FlareDB instance")
}
// ============================================================================
// Pod Operations
// ============================================================================
/// Build key for pod storage
fn pod_key(org_id: &str, project_id: &str, namespace: &str, name: &str) -> Vec<u8> {
format!("k8s/{}/{}/pods/{}/{}", org_id, project_id, namespace, name).into_bytes()
}
/// Build prefix for pod listing
fn pod_prefix(org_id: &str, project_id: &str, namespace: Option<&str>) -> Vec<u8> {
if let Some(ns) = namespace {
format!("k8s/{}/{}/pods/{}/", org_id, project_id, ns).into_bytes()
} else {
format!("k8s/{}/{}/pods/", org_id, project_id).into_bytes()
}
}
/// Create or update a pod
pub async fn put_pod(&self, pod: &Pod) -> Result<(), Status> {
let org_id = pod
.metadata
.org_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("org_id is required"))?;
let project_id = pod
.metadata
.project_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("project_id is required"))?;
let namespace = pod
.metadata
.namespace
.as_ref()
.ok_or_else(|| Status::invalid_argument("namespace is required"))?;
let key = Self::pod_key(org_id, project_id, namespace, &pod.metadata.name);
let value = serde_json::to_vec(pod)
.map_err(|e| Status::internal(format!("Failed to serialize pod: {}", e)))?;
let mut client = self.client.lock().await;
client
.raw_put(key, value)
.await
.map_err(|e| Status::internal(format!("FlareDB put failed: {}", e)))?;
Ok(())
}
/// Get a pod by name
pub async fn get_pod(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<Option<Pod>, Status> {
let key = Self::pod_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let result = client
.raw_get(key)
.await
.map_err(|e| Status::internal(format!("FlareDB get failed: {}", e)))?;
if let Some(bytes) = result {
let pod: Pod = serde_json::from_slice(&bytes)
.map_err(|e| Status::internal(format!("Failed to deserialize pod: {}", e)))?;
Ok(Some(pod))
} else {
Ok(None)
}
}
/// List pods in a namespace with optional label selector
pub async fn list_pods(
&self,
org_id: &str,
project_id: &str,
namespace: Option<&str>,
label_selector: Option<&HashMap<String, String>>,
) -> Result<Vec<Pod>, Status> {
let prefix = Self::pod_prefix(org_id, project_id, namespace);
// Calculate end_key for scan
let mut end_key = prefix.clone();
if let Some(last) = end_key.last_mut() {
if *last == 0xff {
end_key.push(0x00);
} else {
*last += 1;
}
} else {
end_key.push(0xff);
}
let mut pods = Vec::new();
let mut start_key = prefix;
// Paginate through all results
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(
start_key.clone(),
end_key.clone(),
1000, // Batch size
)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
// Deserialize pods
for value in values {
if let Ok(pod) = serde_json::from_slice::<Pod>(&value) {
// Apply label selector filter if provided
if let Some(selector) = label_selector {
let matches = selector.iter().all(|(k, v)| {
pod.metadata
.labels
.get(k)
.map(|pv| pv == v)
.unwrap_or(false)
});
if matches {
pods.push(pod);
}
} else {
pods.push(pod);
}
}
}
// Check if there are more results
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(pods)
}
/// List all pods across all tenants (for scheduler tenant discovery)
pub async fn list_all_pods(&self) -> Result<Vec<Pod>, Status> {
let prefix = b"k8s/".to_vec(); // Scan all k8s resources
let mut end_key = prefix.clone();
end_key.push(0xff);
let mut pods = Vec::new();
let mut start_key = prefix;
// Paginate through all results
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
// Deserialize pods
for value in values {
if let Ok(pod) = serde_json::from_slice::<Pod>(&value) {
pods.push(pod);
}
}
// Check if there are more results
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(pods)
}
/// List active tenants discovered from all persisted k8shost resources.
pub async fn list_active_tenants(&self) -> Result<Vec<TenantRef>, Status> {
let prefix = b"k8s/".to_vec();
let mut end_key = prefix.clone();
end_key.push(0xff);
let mut tenants = std::collections::BTreeSet::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
tenants.extend(collect_active_tenants(values));
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(tenants.into_iter().collect())
}
/// Persist a controller-specific tenant registry entry so cleanup survives restarts.
pub async fn remember_controller_tenant(
&self,
controller: &str,
tenant: &TenantRef,
) -> Result<(), Status> {
let key = controller_tenant_key(controller, &tenant.org_id, &tenant.project_id);
let mut client = self.client.lock().await;
client
.raw_put(key, Vec::new())
.await
.map_err(|e| Status::internal(format!("FlareDB put failed: {}", e)))?;
Ok(())
}
/// Remove a controller-specific tenant registry entry after authoritative cleanup.
pub async fn forget_controller_tenant(
&self,
controller: &str,
tenant: &TenantRef,
) -> Result<(), Status> {
let key = controller_tenant_key(controller, &tenant.org_id, &tenant.project_id);
let mut client = self.client.lock().await;
client
.raw_delete(key)
.await
.map_err(|e| Status::internal(format!("FlareDB delete failed: {}", e)))?;
Ok(())
}
/// List controller-specific tenants that still require reconciliation or cleanup.
pub async fn list_controller_tenants(
&self,
controller: &str,
) -> Result<Vec<TenantRef>, Status> {
let prefix = controller_tenant_prefix(controller);
let end_key = range_end_key(&prefix);
let mut tenants = std::collections::BTreeSet::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (keys, _values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
for key in keys {
if let Some(tenant) = parse_controller_tenant_key(controller, &key) {
tenants.insert(tenant);
}
}
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(tenants.into_iter().collect())
}
/// Delete a pod
pub async fn delete_pod(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<bool, Status> {
let key = Self::pod_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let existed = client
.raw_delete(key)
.await
.map_err(|e| Status::internal(format!("FlareDB delete failed: {}", e)))?;
Ok(existed)
}
// ============================================================================
// Service Operations
// ============================================================================
/// Build key for service storage
fn service_key(org_id: &str, project_id: &str, namespace: &str, name: &str) -> Vec<u8> {
format!(
"k8s/{}/{}/services/{}/{}",
org_id, project_id, namespace, name
)
.into_bytes()
}
/// Build prefix for service listing
fn service_prefix(org_id: &str, project_id: &str, namespace: Option<&str>) -> Vec<u8> {
if let Some(ns) = namespace {
format!("k8s/{}/{}/services/{}/", org_id, project_id, ns).into_bytes()
} else {
format!("k8s/{}/{}/services/", org_id, project_id).into_bytes()
}
}
/// Create or update a service
pub async fn put_service(&self, service: &Service) -> Result<(), Status> {
let org_id = service
.metadata
.org_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("org_id is required"))?;
let project_id = service
.metadata
.project_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("project_id is required"))?;
let namespace = service
.metadata
.namespace
.as_ref()
.ok_or_else(|| Status::invalid_argument("namespace is required"))?;
let key = Self::service_key(org_id, project_id, namespace, &service.metadata.name);
let value = serde_json::to_vec(service)
.map_err(|e| Status::internal(format!("Failed to serialize service: {}", e)))?;
let mut client = self.client.lock().await;
client
.raw_put(key, value)
.await
.map_err(|e| Status::internal(format!("FlareDB put failed: {}", e)))?;
Ok(())
}
/// Get a service by name
pub async fn get_service(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<Option<Service>, Status> {
let key = Self::service_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let result = client
.raw_get(key)
.await
.map_err(|e| Status::internal(format!("FlareDB get failed: {}", e)))?;
if let Some(bytes) = result {
let service: Service = serde_json::from_slice(&bytes)
.map_err(|e| Status::internal(format!("Failed to deserialize service: {}", e)))?;
Ok(Some(service))
} else {
Ok(None)
}
}
/// List services in a namespace
pub async fn list_services(
&self,
org_id: &str,
project_id: &str,
namespace: Option<&str>,
) -> Result<Vec<Service>, Status> {
let prefix = Self::service_prefix(org_id, project_id, namespace);
let mut end_key = prefix.clone();
if let Some(last) = end_key.last_mut() {
if *last == 0xff {
end_key.push(0x00);
} else {
*last += 1;
}
} else {
end_key.push(0xff);
}
let mut services = Vec::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
for value in values {
if let Ok(service) = serde_json::from_slice::<Service>(&value) {
services.push(service);
}
}
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(services)
}
/// Delete a service
pub async fn delete_service(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<bool, Status> {
let key = Self::service_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let existed = client
.raw_delete(key)
.await
.map_err(|e| Status::internal(format!("FlareDB delete failed: {}", e)))?;
Ok(existed)
}
// ============================================================================
// Node Operations
// ============================================================================
/// Build key for node storage
fn node_key(org_id: &str, project_id: &str, name: &str) -> Vec<u8> {
format!("k8s/{}/{}/nodes/{}", org_id, project_id, name).into_bytes()
}
/// Build prefix for node listing
fn node_prefix(org_id: &str, project_id: &str) -> Vec<u8> {
format!("k8s/{}/{}/nodes/", org_id, project_id).into_bytes()
}
/// Create or update a node
pub async fn put_node(&self, node: &Node) -> Result<(), Status> {
let org_id = node
.metadata
.org_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("org_id is required"))?;
let project_id = node
.metadata
.project_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("project_id is required"))?;
let key = Self::node_key(org_id, project_id, &node.metadata.name);
let value = serde_json::to_vec(node)
.map_err(|e| Status::internal(format!("Failed to serialize node: {}", e)))?;
let mut client = self.client.lock().await;
client
.raw_put(key, value)
.await
.map_err(|e| Status::internal(format!("FlareDB put failed: {}", e)))?;
Ok(())
}
/// Get a node by name
pub async fn get_node(
&self,
org_id: &str,
project_id: &str,
name: &str,
) -> Result<Option<Node>, Status> {
let key = Self::node_key(org_id, project_id, name);
let mut client = self.client.lock().await;
let result = client
.raw_get(key)
.await
.map_err(|e| Status::internal(format!("FlareDB get failed: {}", e)))?;
if let Some(bytes) = result {
let node: Node = serde_json::from_slice(&bytes)
.map_err(|e| Status::internal(format!("Failed to deserialize node: {}", e)))?;
Ok(Some(node))
} else {
Ok(None)
}
}
/// List all nodes
pub async fn list_nodes(&self, org_id: &str, project_id: &str) -> Result<Vec<Node>, Status> {
let prefix = Self::node_prefix(org_id, project_id);
let mut end_key = prefix.clone();
if let Some(last) = end_key.last_mut() {
if *last == 0xff {
end_key.push(0x00);
} else {
*last += 1;
}
} else {
end_key.push(0xff);
}
let mut nodes = Vec::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
for value in values {
if let Ok(node) = serde_json::from_slice::<Node>(&value) {
nodes.push(node);
}
}
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(nodes)
}
/// Delete a node
pub async fn delete_node(
&self,
org_id: &str,
project_id: &str,
name: &str,
) -> Result<bool, Status> {
let key = Self::node_key(org_id, project_id, name);
let mut client = self.client.lock().await;
let existed = client
.raw_delete(key)
.await
.map_err(|e| Status::internal(format!("FlareDB delete failed: {}", e)))?;
Ok(existed)
}
// ============================================================================
// Deployment Operations
// ============================================================================
/// Build key for deployment storage
fn deployment_key(org_id: &str, project_id: &str, namespace: &str, name: &str) -> Vec<u8> {
format!(
"k8s/{}/{}/deployments/{}/{}",
org_id, project_id, namespace, name
)
.into_bytes()
}
/// Build prefix for deployment listing
fn deployment_prefix(org_id: &str, project_id: &str, namespace: Option<&str>) -> Vec<u8> {
if let Some(ns) = namespace {
format!("k8s/{}/{}/deployments/{}/", org_id, project_id, ns).into_bytes()
} else {
format!("k8s/{}/{}/deployments/", org_id, project_id).into_bytes()
}
}
/// Create or update a deployment
pub async fn put_deployment(&self, deployment: &Deployment) -> Result<(), Status> {
let org_id = deployment
.metadata
.org_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("org_id is required"))?;
let project_id = deployment
.metadata
.project_id
.as_ref()
.ok_or_else(|| Status::invalid_argument("project_id is required"))?;
let namespace = deployment
.metadata
.namespace
.as_ref()
.ok_or_else(|| Status::invalid_argument("namespace is required"))?;
let key = Self::deployment_key(org_id, project_id, namespace, &deployment.metadata.name);
let value = serde_json::to_vec(deployment)
.map_err(|e| Status::internal(format!("Failed to serialize deployment: {}", e)))?;
let mut client = self.client.lock().await;
client
.raw_put(key, value)
.await
.map_err(|e| Status::internal(format!("FlareDB put failed: {}", e)))?;
Ok(())
}
/// Get a deployment by name
pub async fn get_deployment(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<Option<Deployment>, Status> {
let key = Self::deployment_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let result = client
.raw_get(key)
.await
.map_err(|e| Status::internal(format!("FlareDB get failed: {}", e)))?;
if let Some(bytes) = result {
let deployment: Deployment = serde_json::from_slice(&bytes).map_err(|e| {
Status::internal(format!("Failed to deserialize deployment: {}", e))
})?;
Ok(Some(deployment))
} else {
Ok(None)
}
}
/// List deployments in a namespace
pub async fn list_deployments(
&self,
org_id: &str,
project_id: &str,
namespace: Option<&str>,
) -> Result<Vec<Deployment>, Status> {
let prefix = Self::deployment_prefix(org_id, project_id, namespace);
let mut end_key = prefix.clone();
if let Some(last) = end_key.last_mut() {
if *last == 0xff {
end_key.push(0x00);
} else {
*last += 1;
}
} else {
end_key.push(0xff);
}
let mut deployments = Vec::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
for value in values {
if let Ok(deployment) = serde_json::from_slice::<Deployment>(&value) {
deployments.push(deployment);
}
}
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(deployments)
}
/// List deployments across all tenants
pub async fn list_all_deployments(&self) -> Result<Vec<Deployment>, Status> {
let prefix = b"k8s/".to_vec();
let mut end_key = prefix.clone();
end_key.push(0xff);
let mut deployments = Vec::new();
let mut start_key = prefix;
loop {
let mut client = self.client.lock().await;
let (_keys, values, next) = client
.raw_scan(start_key.clone(), end_key.clone(), 1000)
.await
.map_err(|e| Status::internal(format!("FlareDB scan failed: {}", e)))?;
for value in values {
if let Ok(deployment) = serde_json::from_slice::<Deployment>(&value) {
deployments.push(deployment);
}
}
if let Some(next_key) = next {
start_key = next_key;
} else {
break;
}
}
Ok(deployments)
}
/// Delete a deployment
pub async fn delete_deployment(
&self,
org_id: &str,
project_id: &str,
namespace: &str,
name: &str,
) -> Result<bool, Status> {
let key = Self::deployment_key(org_id, project_id, namespace, name);
let mut client = self.client.lock().await;
let existed = client
.raw_delete(key)
.await
.map_err(|e| Status::internal(format!("FlareDB delete failed: {}", e)))?;
Ok(existed)
}
}
fn collect_active_tenants(
values: impl IntoIterator<Item = Vec<u8>>,
) -> std::collections::BTreeSet<TenantRef> {
let mut tenants = std::collections::BTreeSet::new();
for value in values {
let Ok(resource) = serde_json::from_slice::<ResourceMetadataEnvelope>(&value) else {
continue;
};
let (Some(org_id), Some(project_id)) =
(resource.metadata.org_id, resource.metadata.project_id)
else {
continue;
};
tenants.insert(TenantRef { org_id, project_id });
}
tenants
}
fn controller_tenant_key(controller: &str, org_id: &str, project_id: &str) -> Vec<u8> {
format!("{CONTROLLER_TENANTS_PREFIX}/{controller}/tenants/{org_id}/{project_id}").into_bytes()
}
fn controller_tenant_prefix(controller: &str) -> Vec<u8> {
format!("{CONTROLLER_TENANTS_PREFIX}/{controller}/tenants/").into_bytes()
}
fn parse_controller_tenant_key(controller: &str, key: &[u8]) -> Option<TenantRef> {
let key = std::str::from_utf8(key).ok()?;
let prefix = format!("{CONTROLLER_TENANTS_PREFIX}/{controller}/tenants/");
let suffix = key.strip_prefix(&prefix)?;
let (org_id, project_id) = suffix.split_once('/')?;
if org_id.is_empty() || project_id.is_empty() || project_id.contains('/') {
return None;
}
Some(TenantRef {
org_id: org_id.to_string(),
project_id: project_id.to_string(),
})
}
fn range_end_key(prefix: &[u8]) -> Vec<u8> {
let mut end_key = prefix.to_vec();
if let Some(last) = end_key.last_mut() {
if *last == 0xff {
end_key.push(0x00);
} else {
*last += 1;
}
} else {
end_key.push(0xff);
}
end_key
}
#[cfg(test)]
mod tests {
use super::{
collect_active_tenants, controller_tenant_key, parse_controller_tenant_key, TenantRef,
};
#[test]
fn collect_active_tenants_discovers_unique_tenants_from_mixed_resources() {
let tenants = collect_active_tenants(vec![
br#"{"metadata":{"name":"pod-a","org_id":"org-a","project_id":"project-a"}}"#.to_vec(),
br#"{"metadata":{"name":"svc-a","org_id":"org-a","project_id":"project-a"}}"#.to_vec(),
br#"{"metadata":{"name":"node-b","org_id":"org-b","project_id":"project-b"}}"#.to_vec(),
br#"{"metadata":{"name":"deploy-c","org_id":"org-c","project_id":"project-c"}}"#
.to_vec(),
br#"{"metadata":{"name":"invalid-missing-project","org_id":"org-z"}}"#.to_vec(),
br#"not-json"#.to_vec(),
]);
let tenants = tenants.into_iter().collect::<Vec<_>>();
assert_eq!(
tenants,
vec![
TenantRef {
org_id: "org-a".to_string(),
project_id: "project-a".to_string(),
},
TenantRef {
org_id: "org-b".to_string(),
project_id: "project-b".to_string(),
},
TenantRef {
org_id: "org-c".to_string(),
project_id: "project-c".to_string(),
},
]
);
}
#[test]
fn parse_controller_tenant_key_round_trips_controller_registry_entries() {
let key = controller_tenant_key("flashdns", "org-a", "project-a");
let tenant = parse_controller_tenant_key("flashdns", &key)
.expect("controller tenant key should parse");
assert_eq!(
tenant,
TenantRef {
org_id: "org-a".to_string(),
project_id: "project-a".to_string(),
}
);
assert!(parse_controller_tenant_key("fiberlb", &key).is_none());
}
}