- netboot-base.nix with SSH key auth - Launch scripts for node01/02/03 - Node configuration.nix and disko.nix - Nix modules for first-boot automation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
210 lines
7.1 KiB
Rust
210 lines
7.1 KiB
Rust
//! Cluster management service implementation
|
|
//!
|
|
//! This service handles cluster membership operations including adding,
|
|
//! removing, and listing members.
|
|
|
|
use crate::conversions::make_header;
|
|
use crate::proto::{
|
|
cluster_server::Cluster, Member, MemberAddRequest, MemberAddResponse, MemberListRequest,
|
|
MemberListResponse, MemberRemoveRequest, MemberRemoveResponse, StatusRequest, StatusResponse,
|
|
};
|
|
use chainfire_raft::RaftNode;
|
|
use openraft::BasicNode;
|
|
use std::collections::BTreeMap;
|
|
use std::sync::Arc;
|
|
use tonic::{Request, Response, Status};
|
|
use tracing::{debug, info, warn};
|
|
|
|
/// Cluster service implementation
|
|
pub struct ClusterServiceImpl {
|
|
/// Raft node
|
|
raft: Arc<RaftNode>,
|
|
/// gRPC Raft client for managing node addresses
|
|
rpc_client: Arc<crate::GrpcRaftClient>,
|
|
/// Cluster ID
|
|
cluster_id: u64,
|
|
/// Server version
|
|
version: String,
|
|
}
|
|
|
|
impl ClusterServiceImpl {
|
|
/// Create a new cluster service
|
|
pub fn new(raft: Arc<RaftNode>, rpc_client: Arc<crate::GrpcRaftClient>, cluster_id: u64) -> Self {
|
|
Self {
|
|
raft,
|
|
rpc_client,
|
|
cluster_id,
|
|
version: env!("CARGO_PKG_VERSION").to_string(),
|
|
}
|
|
}
|
|
|
|
fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
|
|
make_header(self.cluster_id, self.raft.id(), revision, 0)
|
|
}
|
|
|
|
/// Get current members as proto Member list
|
|
async fn get_member_list(&self) -> Vec<Member> {
|
|
self.raft
|
|
.membership()
|
|
.await
|
|
.iter()
|
|
.map(|&id| Member {
|
|
id,
|
|
name: format!("node-{}", id),
|
|
peer_urls: vec![],
|
|
client_urls: vec![],
|
|
is_learner: false,
|
|
})
|
|
.collect()
|
|
}
|
|
}
|
|
|
|
#[tonic::async_trait]
|
|
impl Cluster for ClusterServiceImpl {
|
|
async fn member_add(
|
|
&self,
|
|
request: Request<MemberAddRequest>,
|
|
) -> Result<Response<MemberAddResponse>, Status> {
|
|
let req = request.into_inner();
|
|
debug!(node_id = req.node_id, peer_urls = ?req.peer_urls, is_learner = req.is_learner, "Member add request");
|
|
|
|
// Use the request's node ID (not random)
|
|
let member_id = req.node_id;
|
|
|
|
// Register the node address in the RPC client FIRST (before Raft operations)
|
|
if !req.peer_urls.is_empty() {
|
|
let peer_url = &req.peer_urls[0];
|
|
self.rpc_client.add_node(member_id, peer_url.clone()).await;
|
|
info!(node_id = member_id, peer_url = %peer_url, "Registered node address in RPC client");
|
|
} else {
|
|
return Err(Status::invalid_argument("peer_urls cannot be empty"));
|
|
}
|
|
|
|
// Create BasicNode for the new member
|
|
let node = BasicNode::default();
|
|
|
|
// Add as learner first (safer for cluster stability)
|
|
match self.raft.add_learner(member_id, node, true).await {
|
|
Ok(()) => {
|
|
info!(member_id, "Added learner node");
|
|
|
|
// If not explicitly a learner, promote to voter
|
|
if !req.is_learner {
|
|
// Get current membership and add new member
|
|
let mut members: BTreeMap<u64, BasicNode> = self
|
|
.raft
|
|
.membership()
|
|
.await
|
|
.iter()
|
|
.map(|&id| (id, BasicNode::default()))
|
|
.collect();
|
|
members.insert(member_id, BasicNode::default());
|
|
|
|
if let Err(e) = self.raft.change_membership(members, false).await {
|
|
warn!(error = %e, member_id, "Failed to promote learner to voter");
|
|
// Still return success for the learner add
|
|
} else {
|
|
info!(member_id, "Promoted learner to voter");
|
|
}
|
|
}
|
|
|
|
let new_member = Member {
|
|
id: member_id,
|
|
name: String::new(),
|
|
peer_urls: req.peer_urls,
|
|
client_urls: vec![],
|
|
is_learner: req.is_learner,
|
|
};
|
|
|
|
Ok(Response::new(MemberAddResponse {
|
|
header: Some(self.make_header(0)),
|
|
member: Some(new_member),
|
|
members: self.get_member_list().await,
|
|
}))
|
|
}
|
|
Err(e) => {
|
|
warn!(error = %e, "Failed to add member");
|
|
Err(Status::internal(format!("Failed to add member: {}", e)))
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn member_remove(
|
|
&self,
|
|
request: Request<MemberRemoveRequest>,
|
|
) -> Result<Response<MemberRemoveResponse>, Status> {
|
|
let req = request.into_inner();
|
|
debug!(member_id = req.id, "Member remove request");
|
|
|
|
// Get current membership and remove the member
|
|
let mut members: BTreeMap<u64, BasicNode> = self
|
|
.raft
|
|
.membership()
|
|
.await
|
|
.iter()
|
|
.map(|&id| (id, BasicNode::default()))
|
|
.collect();
|
|
|
|
if !members.contains_key(&req.id) {
|
|
return Err(Status::not_found(format!(
|
|
"Member {} not found in cluster",
|
|
req.id
|
|
)));
|
|
}
|
|
|
|
members.remove(&req.id);
|
|
|
|
match self.raft.change_membership(members, false).await {
|
|
Ok(()) => {
|
|
info!(member_id = req.id, "Removed member from cluster");
|
|
Ok(Response::new(MemberRemoveResponse {
|
|
header: Some(self.make_header(0)),
|
|
members: self.get_member_list().await,
|
|
}))
|
|
}
|
|
Err(e) => {
|
|
warn!(error = %e, member_id = req.id, "Failed to remove member");
|
|
Err(Status::internal(format!("Failed to remove member: {}", e)))
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn member_list(
|
|
&self,
|
|
_request: Request<MemberListRequest>,
|
|
) -> Result<Response<MemberListResponse>, Status> {
|
|
debug!("Member list request");
|
|
|
|
Ok(Response::new(MemberListResponse {
|
|
header: Some(self.make_header(0)),
|
|
members: self.get_member_list().await,
|
|
}))
|
|
}
|
|
|
|
async fn status(
|
|
&self,
|
|
_request: Request<StatusRequest>,
|
|
) -> Result<Response<StatusResponse>, Status> {
|
|
debug!("Status request");
|
|
|
|
let leader = self.raft.leader().await;
|
|
let term = self.raft.current_term().await;
|
|
let is_leader = self.raft.is_leader().await;
|
|
|
|
// Get storage info from Raft node
|
|
let storage = self.raft.storage();
|
|
let storage_guard = storage.read().await;
|
|
let sm = storage_guard.state_machine().read().await;
|
|
let revision = sm.current_revision();
|
|
|
|
Ok(Response::new(StatusResponse {
|
|
header: Some(self.make_header(revision)),
|
|
version: self.version.clone(),
|
|
db_size: 0, // TODO: get actual RocksDB size
|
|
leader: leader.unwrap_or(0),
|
|
raft_index: revision,
|
|
raft_term: term,
|
|
raft_applied_index: revision,
|
|
}))
|
|
}
|
|
}
|