403 lines
11 KiB
Rust
403 lines
11 KiB
Rust
//! Node client for communicating with storage nodes
|
|
|
|
use super::{NodeError, NodeResult};
|
|
use async_trait::async_trait;
|
|
use bytes::Bytes;
|
|
use lightningstor_node::proto::{
|
|
ChunkExistsRequest, ChunkSizeRequest, DeleteChunkRequest, GetChunkRequest, PingRequest,
|
|
PutChunkRequest,
|
|
};
|
|
use lightningstor_node::NodeServiceClient;
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use tokio::sync::RwLock;
|
|
use tonic::transport::Channel;
|
|
|
|
/// Trait for storage node client operations
|
|
#[async_trait]
|
|
pub trait NodeClientTrait: Send + Sync {
|
|
/// Get the node ID
|
|
fn node_id(&self) -> &str;
|
|
|
|
/// Get the node endpoint
|
|
fn endpoint(&self) -> &str;
|
|
|
|
/// Check if the node is currently considered healthy
|
|
async fn is_healthy(&self) -> bool;
|
|
|
|
/// Store a chunk on this node
|
|
async fn put_chunk(
|
|
&self,
|
|
chunk_id: &str,
|
|
shard_index: u32,
|
|
is_parity: bool,
|
|
data: Bytes,
|
|
) -> NodeResult<()>;
|
|
|
|
/// Retrieve a chunk from this node
|
|
async fn get_chunk(
|
|
&self,
|
|
chunk_id: &str,
|
|
shard_index: u32,
|
|
is_parity: bool,
|
|
) -> NodeResult<Vec<u8>>;
|
|
|
|
/// Delete a chunk from this node
|
|
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()>;
|
|
|
|
/// Check if a chunk exists on this node
|
|
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool>;
|
|
|
|
/// Get the size of a chunk on this node
|
|
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>>;
|
|
|
|
/// Ping the node to check connectivity
|
|
async fn ping(&self) -> NodeResult<Duration>;
|
|
}
|
|
|
|
/// Real gRPC client for storage nodes
|
|
///
|
|
/// This client communicates with storage nodes over gRPC.
|
|
/// For now, this is a placeholder that will be implemented
|
|
/// when the storage node service is created.
|
|
pub struct NodeClient {
|
|
node_id: String,
|
|
endpoint: String,
|
|
healthy: AtomicBool,
|
|
client: RwLock<NodeServiceClient<Channel>>,
|
|
}
|
|
|
|
impl NodeClient {
|
|
/// Connect to a storage node at the given endpoint
|
|
pub async fn connect(endpoint: &str) -> NodeResult<Self> {
|
|
// Ensure endpoint has scheme
|
|
let endpoint_url = if endpoint.contains("://") {
|
|
endpoint.to_string()
|
|
} else {
|
|
format!("http://{}", endpoint)
|
|
};
|
|
|
|
let channel = Channel::from_shared(endpoint_url.clone())
|
|
.map_err(|e| NodeError::ConnectionFailed {
|
|
node_id: "unknown".to_string(),
|
|
reason: e.to_string(),
|
|
})?
|
|
.connect_timeout(Duration::from_secs(5))
|
|
.connect()
|
|
.await
|
|
.map_err(|e| NodeError::ConnectionFailed {
|
|
node_id: "unknown".to_string(),
|
|
reason: e.to_string(),
|
|
})?;
|
|
|
|
let client = NodeServiceClient::new(channel);
|
|
|
|
// Try to get node status to get the real node ID
|
|
// If that fails, generate a temporary one based on endpoint, but connection is established
|
|
let node_id = match client.clone().get_status(lightningstor_node::proto::GetStatusRequest {}).await {
|
|
Ok(response) => response.into_inner().node_id,
|
|
Err(_) => format!("node-{}", endpoint.replace([':', '.', '/'], "-")),
|
|
};
|
|
|
|
Ok(Self {
|
|
node_id,
|
|
endpoint: endpoint.to_string(),
|
|
healthy: AtomicBool::new(true),
|
|
client: RwLock::new(client),
|
|
})
|
|
}
|
|
|
|
/// Create a client with a specific node ID
|
|
pub async fn connect_with_id(node_id: &str, endpoint: &str) -> NodeResult<Self> {
|
|
let endpoint_url = if endpoint.contains("://") {
|
|
endpoint.to_string()
|
|
} else {
|
|
format!("http://{}", endpoint)
|
|
};
|
|
|
|
// We use lazy connection here to not block startup if a node is temporarily down
|
|
let channel = Channel::from_shared(endpoint_url.clone())
|
|
.map_err(|e| NodeError::ConnectionFailed {
|
|
node_id: node_id.to_string(),
|
|
reason: e.to_string(),
|
|
})?
|
|
.connect_timeout(Duration::from_secs(5))
|
|
.connect_lazy();
|
|
|
|
let client = NodeServiceClient::new(channel);
|
|
|
|
Ok(Self {
|
|
node_id: node_id.to_string(),
|
|
endpoint: endpoint.to_string(),
|
|
healthy: AtomicBool::new(true),
|
|
client: RwLock::new(client),
|
|
})
|
|
}
|
|
|
|
/// Mark the node as unhealthy
|
|
pub fn mark_unhealthy(&self) {
|
|
self.healthy.store(false, Ordering::SeqCst);
|
|
}
|
|
|
|
/// Mark the node as healthy
|
|
pub fn mark_healthy(&self) {
|
|
self.healthy.store(true, Ordering::SeqCst);
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl NodeClientTrait for NodeClient {
|
|
fn node_id(&self) -> &str {
|
|
&self.node_id
|
|
}
|
|
|
|
fn endpoint(&self) -> &str {
|
|
&self.endpoint
|
|
}
|
|
|
|
async fn is_healthy(&self) -> bool {
|
|
self.healthy.load(Ordering::SeqCst)
|
|
}
|
|
|
|
async fn put_chunk(
|
|
&self,
|
|
chunk_id: &str,
|
|
shard_index: u32,
|
|
is_parity: bool,
|
|
data: Bytes,
|
|
) -> NodeResult<()> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let request = PutChunkRequest {
|
|
chunk_id: chunk_id.to_string(),
|
|
shard_index,
|
|
is_parity,
|
|
data: data.to_vec(),
|
|
};
|
|
|
|
let mut client = self.client.write().await;
|
|
client
|
|
.put_chunk(request)
|
|
.await
|
|
.map(|_| ())
|
|
.map_err(|e| NodeError::RpcFailed(e.to_string()))
|
|
}
|
|
|
|
async fn get_chunk(
|
|
&self,
|
|
chunk_id: &str,
|
|
shard_index: u32,
|
|
is_parity: bool,
|
|
) -> NodeResult<Vec<u8>> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let request = GetChunkRequest {
|
|
chunk_id: chunk_id.to_string(),
|
|
shard_index,
|
|
is_parity,
|
|
};
|
|
|
|
let mut client = self.client.write().await;
|
|
let response = client
|
|
.get_chunk(request)
|
|
.await
|
|
.map_err(|e| match e.code() {
|
|
tonic::Code::NotFound => NodeError::NotFound(chunk_id.to_string()),
|
|
_ => NodeError::RpcFailed(e.to_string()),
|
|
})?;
|
|
|
|
Ok(response.into_inner().data)
|
|
}
|
|
|
|
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let request = DeleteChunkRequest {
|
|
chunk_id: chunk_id.to_string(),
|
|
};
|
|
|
|
let mut client = self.client.write().await;
|
|
client
|
|
.delete_chunk(request)
|
|
.await
|
|
.map(|_| ())
|
|
.map_err(|e| NodeError::RpcFailed(e.to_string()))
|
|
}
|
|
|
|
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let request = ChunkExistsRequest {
|
|
chunk_id: chunk_id.to_string(),
|
|
};
|
|
|
|
let mut client = self.client.write().await;
|
|
let response = client
|
|
.chunk_exists(request)
|
|
.await
|
|
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
|
|
|
Ok(response.into_inner().exists)
|
|
}
|
|
|
|
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let request = ChunkSizeRequest {
|
|
chunk_id: chunk_id.to_string(),
|
|
};
|
|
|
|
let mut client = self.client.write().await;
|
|
let response = client
|
|
.chunk_size(request)
|
|
.await
|
|
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
|
|
|
let inner = response.into_inner();
|
|
if inner.exists {
|
|
Ok(Some(inner.size))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
async fn ping(&self) -> NodeResult<Duration> {
|
|
if !self.is_healthy().await {
|
|
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
|
}
|
|
|
|
let start = std::time::Instant::now();
|
|
let request = PingRequest {};
|
|
|
|
let mut client = self.client.write().await;
|
|
let _ = client
|
|
.ping(request)
|
|
.await
|
|
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
|
|
|
Ok(start.elapsed())
|
|
}
|
|
}
|
|
|
|
/// A pool of node clients for connection reuse
|
|
pub struct NodeClientPool {
|
|
clients: RwLock<Vec<Arc<dyn NodeClientTrait>>>,
|
|
}
|
|
|
|
impl NodeClientPool {
|
|
/// Create a new empty client pool
|
|
pub fn new() -> Self {
|
|
Self {
|
|
clients: RwLock::new(Vec::new()),
|
|
}
|
|
}
|
|
|
|
/// Add a client to the pool
|
|
pub async fn add(&self, client: Arc<dyn NodeClientTrait>) {
|
|
self.clients.write().await.push(client);
|
|
}
|
|
|
|
/// Get all clients in the pool
|
|
pub async fn all(&self) -> Vec<Arc<dyn NodeClientTrait>> {
|
|
self.clients.read().await.clone()
|
|
}
|
|
|
|
/// Get all healthy clients
|
|
pub async fn healthy(&self) -> Vec<Arc<dyn NodeClientTrait>> {
|
|
let clients = self.clients.read().await;
|
|
let mut healthy = Vec::new();
|
|
for client in clients.iter() {
|
|
if client.is_healthy().await {
|
|
healthy.push(client.clone());
|
|
}
|
|
}
|
|
healthy
|
|
}
|
|
|
|
/// Get a client by node ID
|
|
pub async fn get(&self, node_id: &str) -> Option<Arc<dyn NodeClientTrait>> {
|
|
self.clients
|
|
.read()
|
|
.await
|
|
.iter()
|
|
.find(|c| c.node_id() == node_id)
|
|
.cloned()
|
|
}
|
|
|
|
/// Remove a client from the pool
|
|
pub async fn remove(&self, node_id: &str) {
|
|
self.clients
|
|
.write()
|
|
.await
|
|
.retain(|c| c.node_id() != node_id);
|
|
}
|
|
|
|
/// Get the number of clients in the pool
|
|
pub async fn len(&self) -> usize {
|
|
self.clients.read().await.len()
|
|
}
|
|
|
|
/// Check if the pool is empty
|
|
pub async fn is_empty(&self) -> bool {
|
|
self.clients.read().await.is_empty()
|
|
}
|
|
}
|
|
|
|
impl Default for NodeClientPool {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[tokio::test]
|
|
async fn test_node_client_creation() {
|
|
let client = NodeClient::connect("http://localhost:9002").await.unwrap();
|
|
assert!(client.is_healthy().await);
|
|
assert!(!client.node_id().is_empty());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_node_client_health_toggle() {
|
|
let client = NodeClient::connect("http://localhost:9002").await.unwrap();
|
|
|
|
assert!(client.is_healthy().await);
|
|
client.mark_unhealthy();
|
|
assert!(!client.is_healthy().await);
|
|
client.mark_healthy();
|
|
assert!(client.is_healthy().await);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_node_client_pool() {
|
|
let pool = NodeClientPool::new();
|
|
assert!(pool.is_empty().await);
|
|
|
|
let client1 = Arc::new(NodeClient::connect("http://node1:9002").await.unwrap());
|
|
let client2 = Arc::new(NodeClient::connect("http://node2:9002").await.unwrap());
|
|
|
|
pool.add(client1.clone()).await;
|
|
pool.add(client2.clone()).await;
|
|
|
|
assert_eq!(pool.len().await, 2);
|
|
assert!(pool.get(client1.node_id()).await.is_some());
|
|
|
|
pool.remove(client1.node_id()).await;
|
|
assert_eq!(pool.len().await, 1);
|
|
assert!(pool.get(client1.node_id()).await.is_none());
|
|
}
|
|
}
|