photoncloud-monorepo/chainfire/crates/chainfire-api/src/cluster_service.rs
centra 5c6eb04a46 T036: Add VM cluster deployment configs for nixos-anywhere
- netboot-base.nix with SSH key auth
- Launch scripts for node01/02/03
- Node configuration.nix and disko.nix
- Nix modules for first-boot automation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 09:59:19 +09:00

210 lines
7.1 KiB
Rust

//! Cluster management service implementation
//!
//! This service handles cluster membership operations including adding,
//! removing, and listing members.
use crate::conversions::make_header;
use crate::proto::{
cluster_server::Cluster, Member, MemberAddRequest, MemberAddResponse, MemberListRequest,
MemberListResponse, MemberRemoveRequest, MemberRemoveResponse, StatusRequest, StatusResponse,
};
use chainfire_raft::RaftNode;
use openraft::BasicNode;
use std::collections::BTreeMap;
use std::sync::Arc;
use tonic::{Request, Response, Status};
use tracing::{debug, info, warn};
/// Cluster service implementation
pub struct ClusterServiceImpl {
/// Raft node
raft: Arc<RaftNode>,
/// gRPC Raft client for managing node addresses
rpc_client: Arc<crate::GrpcRaftClient>,
/// Cluster ID
cluster_id: u64,
/// Server version
version: String,
}
impl ClusterServiceImpl {
/// Create a new cluster service
pub fn new(raft: Arc<RaftNode>, rpc_client: Arc<crate::GrpcRaftClient>, cluster_id: u64) -> Self {
Self {
raft,
rpc_client,
cluster_id,
version: env!("CARGO_PKG_VERSION").to_string(),
}
}
fn make_header(&self, revision: u64) -> crate::proto::ResponseHeader {
make_header(self.cluster_id, self.raft.id(), revision, 0)
}
/// Get current members as proto Member list
async fn get_member_list(&self) -> Vec<Member> {
self.raft
.membership()
.await
.iter()
.map(|&id| Member {
id,
name: format!("node-{}", id),
peer_urls: vec![],
client_urls: vec![],
is_learner: false,
})
.collect()
}
}
#[tonic::async_trait]
impl Cluster for ClusterServiceImpl {
async fn member_add(
&self,
request: Request<MemberAddRequest>,
) -> Result<Response<MemberAddResponse>, Status> {
let req = request.into_inner();
debug!(node_id = req.node_id, peer_urls = ?req.peer_urls, is_learner = req.is_learner, "Member add request");
// Use the request's node ID (not random)
let member_id = req.node_id;
// Register the node address in the RPC client FIRST (before Raft operations)
if !req.peer_urls.is_empty() {
let peer_url = &req.peer_urls[0];
self.rpc_client.add_node(member_id, peer_url.clone()).await;
info!(node_id = member_id, peer_url = %peer_url, "Registered node address in RPC client");
} else {
return Err(Status::invalid_argument("peer_urls cannot be empty"));
}
// Create BasicNode for the new member
let node = BasicNode::default();
// Add as learner first (safer for cluster stability)
match self.raft.add_learner(member_id, node, true).await {
Ok(()) => {
info!(member_id, "Added learner node");
// If not explicitly a learner, promote to voter
if !req.is_learner {
// Get current membership and add new member
let mut members: BTreeMap<u64, BasicNode> = self
.raft
.membership()
.await
.iter()
.map(|&id| (id, BasicNode::default()))
.collect();
members.insert(member_id, BasicNode::default());
if let Err(e) = self.raft.change_membership(members, false).await {
warn!(error = %e, member_id, "Failed to promote learner to voter");
// Still return success for the learner add
} else {
info!(member_id, "Promoted learner to voter");
}
}
let new_member = Member {
id: member_id,
name: String::new(),
peer_urls: req.peer_urls,
client_urls: vec![],
is_learner: req.is_learner,
};
Ok(Response::new(MemberAddResponse {
header: Some(self.make_header(0)),
member: Some(new_member),
members: self.get_member_list().await,
}))
}
Err(e) => {
warn!(error = %e, "Failed to add member");
Err(Status::internal(format!("Failed to add member: {}", e)))
}
}
}
async fn member_remove(
&self,
request: Request<MemberRemoveRequest>,
) -> Result<Response<MemberRemoveResponse>, Status> {
let req = request.into_inner();
debug!(member_id = req.id, "Member remove request");
// Get current membership and remove the member
let mut members: BTreeMap<u64, BasicNode> = self
.raft
.membership()
.await
.iter()
.map(|&id| (id, BasicNode::default()))
.collect();
if !members.contains_key(&req.id) {
return Err(Status::not_found(format!(
"Member {} not found in cluster",
req.id
)));
}
members.remove(&req.id);
match self.raft.change_membership(members, false).await {
Ok(()) => {
info!(member_id = req.id, "Removed member from cluster");
Ok(Response::new(MemberRemoveResponse {
header: Some(self.make_header(0)),
members: self.get_member_list().await,
}))
}
Err(e) => {
warn!(error = %e, member_id = req.id, "Failed to remove member");
Err(Status::internal(format!("Failed to remove member: {}", e)))
}
}
}
async fn member_list(
&self,
_request: Request<MemberListRequest>,
) -> Result<Response<MemberListResponse>, Status> {
debug!("Member list request");
Ok(Response::new(MemberListResponse {
header: Some(self.make_header(0)),
members: self.get_member_list().await,
}))
}
async fn status(
&self,
_request: Request<StatusRequest>,
) -> Result<Response<StatusResponse>, Status> {
debug!("Status request");
let leader = self.raft.leader().await;
let term = self.raft.current_term().await;
let is_leader = self.raft.is_leader().await;
// Get storage info from Raft node
let storage = self.raft.storage();
let storage_guard = storage.read().await;
let sm = storage_guard.state_machine().read().await;
let revision = sm.current_revision();
Ok(Response::new(StatusResponse {
header: Some(self.make_header(revision)),
version: self.version.clone(),
db_size: 0, // TODO: get actual RocksDB size
leader: leader.unwrap_or(0),
raft_index: revision,
raft_term: term,
raft_applied_index: revision,
}))
}
}