- netboot-base.nix with SSH key auth - Launch scripts for node01/02/03 - Node configuration.nix and disko.nix - Nix modules for first-boot automation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
416 lines
14 KiB
Rust
416 lines
14 KiB
Rust
//! Chainfire 3-Node Cluster Integration Test
|
|
//!
|
|
//! Verifies HA behavior: leader election, state replication, and node recovery.
|
|
|
|
use chainfire_client::Client;
|
|
use chainfire_server::{
|
|
config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
|
|
server::Server,
|
|
};
|
|
use std::net::SocketAddr;
|
|
use std::time::Duration;
|
|
use tokio::time::sleep;
|
|
|
|
/// Create a 3-node cluster configuration with join flow
|
|
/// Node 1 bootstraps alone, nodes 2 & 3 join via member_add API
|
|
fn cluster_config_with_join(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
|
|
let base_port = match node_id {
|
|
1 => 12379,
|
|
2 => 22379,
|
|
3 => 32379,
|
|
_ => panic!("Invalid node_id"),
|
|
};
|
|
|
|
let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
|
|
let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
|
|
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
|
|
|
|
let temp_dir = tempfile::tempdir().unwrap();
|
|
|
|
let config = ServerConfig {
|
|
node: NodeConfig {
|
|
id: node_id,
|
|
name: format!("test-node-{}", node_id),
|
|
role: "control_plane".to_string(),
|
|
},
|
|
cluster: ClusterConfig {
|
|
id: 1,
|
|
bootstrap: node_id == 1, // Only node 1 bootstraps
|
|
initial_members: vec![], // Node 1 starts alone, others join via API
|
|
},
|
|
network: NetworkConfig {
|
|
api_addr,
|
|
raft_addr,
|
|
gossip_addr,
|
|
tls: None,
|
|
},
|
|
storage: StorageConfig {
|
|
data_dir: temp_dir.path().to_path_buf(),
|
|
},
|
|
raft: RaftConfig::default(),
|
|
};
|
|
|
|
(config, temp_dir)
|
|
}
|
|
|
|
/// Alias for backwards compatibility (old tests use this)
|
|
fn cluster_config(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
|
|
cluster_config_with_join(node_id)
|
|
}
|
|
|
|
/// Create a single-node cluster configuration (for testing basic Raft functionality)
|
|
fn single_node_config() -> (ServerConfig, tempfile::TempDir) {
|
|
let api_addr: SocketAddr = "127.0.0.1:12379".parse().unwrap();
|
|
let raft_addr: SocketAddr = "127.0.0.1:12380".parse().unwrap();
|
|
let gossip_addr: SocketAddr = "127.0.0.1:12381".parse().unwrap();
|
|
|
|
let temp_dir = tempfile::tempdir().unwrap();
|
|
|
|
let config = ServerConfig {
|
|
node: NodeConfig {
|
|
id: 1,
|
|
name: "test-node-1".to_string(),
|
|
role: "control_plane".to_string(),
|
|
},
|
|
cluster: ClusterConfig {
|
|
id: 1,
|
|
bootstrap: true, // Single-node bootstrap
|
|
initial_members: vec![], // Empty = single node
|
|
},
|
|
network: NetworkConfig {
|
|
api_addr,
|
|
raft_addr,
|
|
gossip_addr,
|
|
tls: None,
|
|
},
|
|
storage: StorageConfig {
|
|
data_dir: temp_dir.path().to_path_buf(),
|
|
},
|
|
raft: RaftConfig::default(),
|
|
};
|
|
|
|
(config, temp_dir)
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
|
|
async fn test_single_node_raft_leader_election() {
|
|
println!("\n=== Test: Single-Node Raft Leader Election ===");
|
|
|
|
// Start single node
|
|
let (config, _temp) = single_node_config();
|
|
let api_addr = config.network.api_addr;
|
|
println!("Creating single-node cluster...");
|
|
let server = Server::new(config).await.unwrap();
|
|
let handle = tokio::spawn(async move { server.run().await });
|
|
println!("Node started: {}", api_addr);
|
|
|
|
// Wait for leader election
|
|
println!("Waiting for leader election...");
|
|
sleep(Duration::from_secs(2)).await;
|
|
|
|
// Verify leader elected
|
|
let mut client = Client::connect(format!("http://{}", api_addr))
|
|
.await
|
|
.expect("Failed to connect");
|
|
|
|
let status = client.status().await.expect("Failed to get status");
|
|
println!(
|
|
"Node status: leader={}, term={}",
|
|
status.leader, status.raft_term
|
|
);
|
|
|
|
assert_eq!(status.leader, 1, "Node 1 should be leader in single-node cluster");
|
|
assert!(status.raft_term > 0, "Raft term should be > 0");
|
|
|
|
// Test basic KV operations
|
|
println!("Testing KV operations...");
|
|
client.put("test-key", "test-value").await.unwrap();
|
|
let value = client.get("test-key").await.unwrap();
|
|
assert_eq!(value, Some(b"test-value".to_vec()));
|
|
|
|
println!("✓ Single-node Raft working correctly");
|
|
|
|
// Cleanup
|
|
handle.abort();
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
|
|
async fn test_3node_leader_election_with_join() {
|
|
println!("\n=== Test: 3-Node Leader Election with Join Flow ===");
|
|
|
|
// Start Node 1 (bootstrap alone)
|
|
let (config1, _temp1) = cluster_config_with_join(1);
|
|
let api1 = config1.network.api_addr;
|
|
let raft1 = config1.network.raft_addr;
|
|
println!("Creating Node 1 (bootstrap)...");
|
|
let server1 = Server::new(config1).await.unwrap();
|
|
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
println!("Node 1 started: API={}, Raft={}", api1, raft1);
|
|
|
|
// Wait for node 1 to become leader
|
|
sleep(Duration::from_secs(2)).await;
|
|
|
|
// Verify node 1 is leader
|
|
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
.await
|
|
.expect("Failed to connect to node 1");
|
|
let status1 = client1.status().await.expect("Failed to get status");
|
|
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
assert_eq!(status1.leader, 1, "Node 1 should be leader");
|
|
|
|
// Start Node 2 (no bootstrap)
|
|
let (config2, _temp2) = cluster_config_with_join(2);
|
|
let api2 = config2.network.api_addr;
|
|
let raft2 = config2.network.raft_addr;
|
|
println!("Creating Node 2...");
|
|
let server2 = Server::new(config2).await.unwrap();
|
|
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
println!("Node 2 started: API={}, Raft={}", api2, raft2);
|
|
sleep(Duration::from_millis(500)).await;
|
|
|
|
// Start Node 3 (no bootstrap)
|
|
let (config3, _temp3) = cluster_config_with_join(3);
|
|
let api3 = config3.network.api_addr;
|
|
let raft3 = config3.network.raft_addr;
|
|
println!("Creating Node 3...");
|
|
let server3 = Server::new(config3).await.unwrap();
|
|
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
println!("Node 3 started: API={}, Raft={}", api3, raft3);
|
|
sleep(Duration::from_millis(500)).await;
|
|
|
|
// Add node 2 to cluster via member_add API
|
|
println!("Adding node 2 to cluster via member_add API...");
|
|
let member2_id = client1
|
|
.member_add(2, raft2.to_string(), false) // node_id=2, false=voter
|
|
.await
|
|
.expect("Failed to add node 2");
|
|
println!("Node 2 added with ID: {}", member2_id);
|
|
assert_eq!(member2_id, 2, "Node 2 should have ID 2");
|
|
|
|
// Add node 3 to cluster via member_add API
|
|
println!("Adding node 3 to cluster via member_add API...");
|
|
let member3_id = client1
|
|
.member_add(3, raft3.to_string(), false) // node_id=3, false=voter
|
|
.await
|
|
.expect("Failed to add node 3");
|
|
println!("Node 3 added with ID: {}", member3_id);
|
|
assert_eq!(member3_id, 3, "Node 3 should have ID 3");
|
|
|
|
// Wait for cluster membership changes to propagate
|
|
sleep(Duration::from_secs(3)).await;
|
|
|
|
// Verify all nodes see the same leader
|
|
let status1 = client1.status().await.expect("Failed to get status from node 1");
|
|
println!("Node 1 final status: leader={}, term={}", status1.leader, status1.raft_term);
|
|
|
|
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
.await
|
|
.expect("Failed to connect to node 2");
|
|
let status2 = client2.status().await.expect("Failed to get status from node 2");
|
|
println!("Node 2 final status: leader={}, term={}", status2.leader, status2.raft_term);
|
|
|
|
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
.await
|
|
.expect("Failed to connect to node 3");
|
|
let status3 = client3.status().await.expect("Failed to get status from node 3");
|
|
println!("Node 3 final status: leader={}, term={}", status3.leader, status3.raft_term);
|
|
|
|
// All nodes should agree on the leader
|
|
assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
|
|
assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
|
|
assert!(status1.leader > 0, "No leader elected");
|
|
|
|
println!("✓ 3-node cluster formed successfully with join flow");
|
|
|
|
// Cleanup
|
|
handle1.abort();
|
|
handle2.abort();
|
|
handle3.abort();
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_state_replication() {
|
|
println!("\n=== Test: 3-Node State Replication ===");
|
|
|
|
// Start cluster
|
|
let (config1, _temp1) = cluster_config(1);
|
|
let api1 = config1.network.api_addr;
|
|
let server1 = Server::new(config1).await.unwrap();
|
|
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
|
|
let (config2, _temp2) = cluster_config(2);
|
|
let api2 = config2.network.api_addr;
|
|
let server2 = Server::new(config2).await.unwrap();
|
|
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
|
|
let (config3, _temp3) = cluster_config(3);
|
|
let api3 = config3.network.api_addr;
|
|
let server3 = Server::new(config3).await.unwrap();
|
|
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
|
|
sleep(Duration::from_secs(2)).await;
|
|
println!("Cluster started");
|
|
|
|
// Write data to node 1 (leader)
|
|
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
.await
|
|
.unwrap();
|
|
|
|
println!("Writing test data to node 1...");
|
|
client1.put("test/key1", "value1").await.unwrap();
|
|
client1.put("test/key2", "value2").await.unwrap();
|
|
client1.put("test/key3", "value3").await.unwrap();
|
|
|
|
// Wait for replication
|
|
sleep(Duration::from_millis(500)).await;
|
|
|
|
// Read from node 2 and node 3 (followers)
|
|
println!("Reading from node 2...");
|
|
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
.await
|
|
.unwrap();
|
|
let val2 = client2.get("test/key1").await.unwrap();
|
|
assert_eq!(val2, Some(b"value1".to_vec()), "Data not replicated to node 2");
|
|
|
|
println!("Reading from node 3...");
|
|
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
.await
|
|
.unwrap();
|
|
let val3 = client3.get("test/key1").await.unwrap();
|
|
assert_eq!(val3, Some(b"value1".to_vec()), "Data not replicated to node 3");
|
|
|
|
println!("✓ State replication verified");
|
|
|
|
// Cleanup
|
|
handle1.abort();
|
|
handle2.abort();
|
|
handle3.abort();
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_follower_crash() {
|
|
println!("\n=== Test: Follower Crash (Node Remains Available) ===");
|
|
|
|
// Start cluster
|
|
let (config1, _temp1) = cluster_config(1);
|
|
let api1 = config1.network.api_addr;
|
|
let server1 = Server::new(config1).await.unwrap();
|
|
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
|
|
let (config2, _temp2) = cluster_config(2);
|
|
let server2 = Server::new(config2).await.unwrap();
|
|
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
|
|
let (config3, _temp3) = cluster_config(3);
|
|
let api3 = config3.network.api_addr;
|
|
let server3 = Server::new(config3).await.unwrap();
|
|
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
|
|
sleep(Duration::from_secs(2)).await;
|
|
println!("Cluster started");
|
|
|
|
// Write initial data
|
|
let mut client1 = Client::connect(format!("http://{}", api1))
|
|
.await
|
|
.unwrap();
|
|
println!("Writing initial data...");
|
|
client1.put("test/before-crash", "initial").await.unwrap();
|
|
|
|
// Kill node 2 (follower)
|
|
println!("Killing node 2 (follower)...");
|
|
handle2.abort();
|
|
sleep(Duration::from_millis(500)).await;
|
|
|
|
// Cluster should still be operational (2/3 quorum)
|
|
println!("Writing data after crash...");
|
|
client1
|
|
.put("test/after-crash", "still-working")
|
|
.await
|
|
.expect("Write should succeed with 2/3 quorum");
|
|
|
|
// Read from node 3
|
|
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
.await
|
|
.unwrap();
|
|
let val = client3.get("test/after-crash").await.unwrap();
|
|
assert_eq!(val, Some(b"still-working".to_vec()));
|
|
|
|
println!("✓ Cluster operational after follower crash");
|
|
|
|
// Cleanup
|
|
handle1.abort();
|
|
handle3.abort();
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_leader_crash_reelection() {
|
|
println!("\n=== Test: Leader Crash & Re-election ===");
|
|
|
|
// Start cluster
|
|
let (config1, _temp1) = cluster_config(1);
|
|
let server1 = Server::new(config1).await.unwrap();
|
|
let handle1 = tokio::spawn(async move { server1.run().await });
|
|
|
|
let (config2, _temp2) = cluster_config(2);
|
|
let api2 = config2.network.api_addr;
|
|
let server2 = Server::new(config2).await.unwrap();
|
|
let handle2 = tokio::spawn(async move { server2.run().await });
|
|
|
|
let (config3, _temp3) = cluster_config(3);
|
|
let api3 = config3.network.api_addr;
|
|
let server3 = Server::new(config3).await.unwrap();
|
|
let handle3 = tokio::spawn(async move { server3.run().await });
|
|
|
|
sleep(Duration::from_secs(2)).await;
|
|
println!("Cluster started");
|
|
|
|
// Determine initial leader
|
|
let mut client2 = Client::connect(format!("http://{}", api2))
|
|
.await
|
|
.unwrap();
|
|
let initial_status = client2.status().await.unwrap();
|
|
let initial_leader = initial_status.leader;
|
|
println!("Initial leader: node {}", initial_leader);
|
|
|
|
// Kill the leader (assume node 1)
|
|
println!("Killing leader (node 1)...");
|
|
handle1.abort();
|
|
|
|
// Wait for re-election (should be < 1s per requirements)
|
|
println!("Waiting for re-election...");
|
|
sleep(Duration::from_secs(1)).await;
|
|
|
|
// Verify new leader elected
|
|
let new_status = client2.status().await.unwrap();
|
|
println!(
|
|
"New leader: node {}, term: {}",
|
|
new_status.leader, new_status.raft_term
|
|
);
|
|
assert!(new_status.leader > 0, "No new leader elected");
|
|
assert!(
|
|
new_status.raft_term > initial_status.raft_term,
|
|
"Raft term should increase after re-election"
|
|
);
|
|
|
|
println!("✓ Leader re-election successful within 1s");
|
|
|
|
// Verify cluster still functional
|
|
let mut client3 = Client::connect(format!("http://{}", api3))
|
|
.await
|
|
.unwrap();
|
|
client3
|
|
.put("test/post-reelection", "functional")
|
|
.await
|
|
.expect("Cluster should be functional after re-election");
|
|
|
|
println!("✓ Cluster operational after re-election");
|
|
|
|
// Cleanup
|
|
handle2.abort();
|
|
handle3.abort();
|
|
}
|