//! Integration tests for Leader Election (P1) and Log Replication (P2) //! //! Tests cover: //! - Single-node auto-election //! - 3-node majority election //! - Role transitions //! - Term management //! - Heartbeat mechanism //! - Log replication //! - Leader failure recovery #![cfg(all(test, feature = "custom-raft"))] use std::sync::Arc; use std::time::Duration; use tokio::time; use tokio::sync::mpsc; use chainfire_raft::core::{ RaftCore, RaftConfig, RaftRole, NodeId, }; use chainfire_raft::network::custom_test_client::{InMemoryRpcClient, RpcMessage}; use chainfire_storage::{LogStorage, StateMachine, RocksStore}; /// Helper to create a test node async fn create_test_node(node_id: NodeId, peers: Vec) -> (Arc, tempfile::TempDir) { let temp_dir = tempfile::TempDir::new().unwrap(); let rocks = RocksStore::new(temp_dir.path()).unwrap(); let storage = Arc::new(LogStorage::new(rocks.clone())); let state_machine = Arc::new(StateMachine::new(rocks).unwrap()); let network = Arc::new(InMemoryRpcClient::new()); let config = RaftConfig { election_timeout_min: 150, election_timeout_max: 300, heartbeat_interval: 50, }; let node = Arc::new(RaftCore::new( node_id, peers, storage, state_machine, network, config, )); node.initialize().await.unwrap(); (node, temp_dir) } /// Helper to create a 3-node cluster with RPC wiring async fn create_3node_cluster() -> ( Vec>, Vec, Arc, ) { let network = Arc::new(InMemoryRpcClient::new()); let mut nodes = Vec::new(); let mut temp_dirs = Vec::new(); // Create 3 nodes for node_id in 1..=3 { let peers: Vec = (1..=3).filter(|&id| id != node_id).collect(); let temp_dir = tempfile::TempDir::new().unwrap(); let rocks = RocksStore::new(temp_dir.path()).unwrap(); let storage = Arc::new(LogStorage::new(rocks.clone())); let state_machine = Arc::new(StateMachine::new(rocks).unwrap()); let config = RaftConfig { election_timeout_min: 150, // 150ms - matches single-node test election_timeout_max: 300, // 300ms heartbeat_interval: 50, // 50ms - matches single-node test }; let node = Arc::new(RaftCore::new( node_id, peers, storage, state_machine, Arc::clone(&network) as Arc, config, )); node.initialize().await.unwrap(); nodes.push(node); temp_dirs.push(temp_dir); } // Wire up RPC channels for each node for node in &nodes { let node_id = node.node_id(); let (tx, mut rx) = mpsc::unbounded_channel::(); network.register(node_id, tx).await; // Spawn handler for this node's RPC messages let node_clone = Arc::clone(node); tokio::spawn(async move { eprintln!("[RPC Handler {}] Started", node_clone.node_id()); while let Some(msg) = rx.recv().await { match msg { RpcMessage::Vote(req, resp_tx) => { eprintln!("[RPC Handler {}] Processing Vote from {}", node_clone.node_id(), req.candidate_id); node_clone.request_vote_rpc(req, resp_tx).await; } RpcMessage::AppendEntries(req, resp_tx) => { eprintln!("[RPC Handler {}] Processing AppendEntries from {} term={}", node_clone.node_id(), req.leader_id, req.term); node_clone.append_entries_rpc(req, resp_tx).await; } } } eprintln!("[RPC Handler {}] Stopped (channel closed)", node_clone.node_id()); }); } // Give all RPC handler tasks time to start tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; (nodes, temp_dirs, network) } // ============================================================================ // Test Cases // ============================================================================ #[tokio::test] async fn test_node_creation_and_initialization() { // Test that we can create a node and initialize it let (node, _temp_dir) = create_test_node(1, vec![2, 3]).await; // Node should start as follower assert_eq!(node.role().await, RaftRole::Follower); // Node ID should be correct assert_eq!(node.node_id(), 1); // Term should start at 0 assert_eq!(node.current_term().await, 0); } #[tokio::test] async fn test_role_transitions() { // Test basic role enumeration assert_ne!(RaftRole::Follower, RaftRole::Candidate); assert_ne!(RaftRole::Candidate, RaftRole::Leader); assert_ne!(RaftRole::Leader, RaftRole::Follower); } #[tokio::test] async fn test_term_persistence() { // Test that term can be persisted and loaded let temp_dir = tempfile::TempDir::new().unwrap(); let path = temp_dir.path().to_str().unwrap().to_string(); { // Create first node and let it initialize let rocks = RocksStore::new(&path).unwrap(); let storage = Arc::new(LogStorage::new(rocks.clone())); let state_machine = Arc::new(StateMachine::new(rocks).unwrap()); let network = Arc::new(InMemoryRpcClient::new()); let node = Arc::new(RaftCore::new( 1, vec![2, 3], storage, state_machine, network, RaftConfig::default(), )); node.initialize().await.unwrap(); // Initial term should be 0 assert_eq!(node.current_term().await, 0); } { // Create second node with same storage path let rocks = RocksStore::new(&path).unwrap(); let storage = Arc::new(LogStorage::new(rocks.clone())); let state_machine = Arc::new(StateMachine::new(rocks).unwrap()); let network = Arc::new(InMemoryRpcClient::new()); let node = Arc::new(RaftCore::new( 1, vec![2, 3], storage, state_machine, network, RaftConfig::default(), )); node.initialize().await.unwrap(); // Term should still be 0 (loaded from storage) assert_eq!(node.current_term().await, 0); } } #[tokio::test] async fn test_config_defaults() { // Test that default config has reasonable values let config = RaftConfig::default(); assert!(config.election_timeout_min > 0); assert!(config.election_timeout_max > config.election_timeout_min); assert!(config.heartbeat_interval > 0); assert!(config.heartbeat_interval < config.election_timeout_min); } // ============================================================================ // P2: Log Replication Integration Tests // ============================================================================ #[tokio::test] async fn test_3node_cluster_formation() { // Test 1: 3-Node Cluster Formation Test // - 3 nodes start → Leader elected // - All followers receive heartbeat // - No election timeout occurs let (nodes, _temp_dirs, _network) = create_3node_cluster().await; // Start event loops for all nodes let mut handles = Vec::new(); for node in &nodes { let node_clone = Arc::clone(node); let handle = tokio::spawn(async move { let _ = node_clone.run().await; }); handles.push(handle); } // Wait for leader election (should happen within ~500ms) time::sleep(Duration::from_millis(500)).await; // Check that exactly one leader was elected let mut leader_count = 0; let mut follower_count = 0; let mut leader_id = None; for node in &nodes { match node.role().await { RaftRole::Leader => { leader_count += 1; leader_id = Some(node.node_id()); } RaftRole::Follower => { follower_count += 1; } RaftRole::Candidate => { // Should not have candidates after election panic!("Node {} is still candidate after election", node.node_id()); } } } assert_eq!(leader_count, 1, "Expected exactly one leader"); assert_eq!(follower_count, 2, "Expected exactly two followers"); assert!(leader_id.is_some(), "Leader should be identified"); println!("✓ Leader elected: node {}", leader_id.unwrap()); // Wait a bit more to ensure heartbeats prevent election timeout // Heartbeat interval is 50ms, election timeout is 150-300ms // So after 400ms, no new election should occur time::sleep(Duration::from_millis(400)).await; // Verify leader is still the same for node in &nodes { if node.node_id() == leader_id.unwrap() { assert_eq!(node.role().await, RaftRole::Leader, "Leader should remain leader"); } else { assert_eq!( node.role().await, RaftRole::Follower, "Followers should remain followers due to heartbeats" ); } } println!("✓ Heartbeats prevent election timeout"); } #[tokio::test] #[ignore] // Requires client write API implementation async fn test_log_replication() { // Test 2: Log Replication Test // - Leader adds entries // - Replicated to all followers // - commit_index synchronized // TODO: Implement once client write API is ready // This requires handle_client_write to be fully implemented } #[tokio::test] #[ignore] // Requires graceful node shutdown async fn test_leader_failure_recovery() { // Test 3: Leader Failure Test // - Leader stops → New leader elected // - Log consistency maintained // TODO: Implement once we have graceful shutdown mechanism // Currently, aborting the event loop doesn't cleanly stop the node } // ============================================================================ // Deferred complex tests // ============================================================================ #[tokio::test] #[ignore] // Requires full cluster setup async fn test_split_vote_recovery() { // Test that cluster recovers from split vote // Deferred: Requires complex timing control } #[tokio::test] #[ignore] // Requires node restart mechanism async fn test_vote_persistence_across_restart() { // Test that votes persist across node restarts // Deferred: Requires proper shutdown/startup sequencing } // ============================================================================ // P3: Commitment & State Machine Integration Tests // ============================================================================ #[tokio::test] async fn test_write_replicate_commit() { // Test: Client write on leader → replication → commit → state machine apply // Verifies the complete write→replicate→commit→apply flow use chainfire_types::command::RaftCommand; let (nodes, _temp_dirs, _network) = create_3node_cluster().await; // Start event loops for all nodes let mut handles = Vec::new(); for node in &nodes { let node_clone = Arc::clone(node); let handle = tokio::spawn(async move { let _ = node_clone.run().await; }); handles.push(handle); } // Wait for leader election (election timeout is 2-4s) time::sleep(Duration::from_millis(5000)).await; // Find the leader let mut leader = None; for node in &nodes { if matches!(node.role().await, RaftRole::Leader) { leader = Some(node); break; } } let leader = leader.expect("Leader should be elected"); println!("✓ Leader elected: node {}", leader.node_id()); // Submit a write command to the leader let cmd = RaftCommand::Put { key: b"test_key_1".to_vec(), value: b"test_value_1".to_vec(), lease_id: None, prev_kv: false, }; leader .client_write(cmd) .await .expect("Client write should succeed"); println!("✓ Client write submitted to leader"); // Wait for replication and commit (heartbeat + replication + commit) // Heartbeat interval is 50ms, need multiple rounds: // 1. First heartbeat sends entries // 2. Followers ack, leader updates match_index and commit_index // 3. Second heartbeat propagates new leader_commit to followers // 4. Followers update their commit_index and apply entries // Give extra time to avoid re-election issues time::sleep(Duration::from_millis(1500)).await; // Debug: Check all nodes' roles and states println!("\nDEBUG: All nodes after write:"); for node in &nodes { println!(" Node {} role={:?} term={} commit_index={} last_applied={}", node.node_id(), node.role().await, node.current_term().await, node.commit_index().await, node.last_applied().await); } println!(); // Verify that the value is committed and applied on all nodes for node in &nodes { let commit_index = node.commit_index().await; let last_applied = node.last_applied().await; assert!( commit_index >= 1, "Node {} should have commit_index >= 1, got {}", node.node_id(), commit_index ); assert!( last_applied >= 1, "Node {} should have last_applied >= 1, got {}", node.node_id(), last_applied ); // Verify the value exists in the state machine let state_machine = node.state_machine(); let result = state_machine.kv().get(b"test_key_1").expect("Get should succeed"); assert!( result.is_some(), "Node {} should have test_key_1 in state machine", node.node_id() ); let entry = result.unwrap(); assert_eq!( entry.value, b"test_value_1", "Node {} has wrong value for test_key_1", node.node_id() ); println!( "✓ Node {} has test_key_1=test_value_1 (commit_index={}, last_applied={})", node.node_id(), commit_index, last_applied ); } println!("✓ All nodes have committed and applied the write"); } #[tokio::test] async fn test_commit_consistency() { // Test: Multiple writes preserve order across all nodes // Verifies that the commit mechanism maintains consistency use chainfire_types::command::RaftCommand; let (nodes, _temp_dirs, _network) = create_3node_cluster().await; // Start event loops let mut handles = Vec::new(); for node in &nodes { let node_clone = Arc::clone(node); let handle = tokio::spawn(async move { let _ = node_clone.run().await; }); handles.push(handle); } // Wait for leader election (election timeout is 2-4s) time::sleep(Duration::from_millis(5000)).await; // Find the leader let mut leader = None; for node in &nodes { if matches!(node.role().await, RaftRole::Leader) { leader = Some(node); break; } } let leader = leader.expect("Leader should be elected"); println!("✓ Leader elected: node {}", leader.node_id()); // Submit multiple writes in sequence for i in 1..=5 { let cmd = RaftCommand::Put { key: format!("key_{}", i).into_bytes(), value: format!("value_{}", i).into_bytes(), lease_id: None, prev_kv: false, }; leader .client_write(cmd) .await .expect("Client write should succeed"); } println!("✓ Submitted 5 writes to leader"); // Wait for all writes to commit and apply time::sleep(Duration::from_millis(500)).await; // Verify all nodes have all 5 keys in correct order for node in &nodes { let commit_index = node.commit_index().await; let last_applied = node.last_applied().await; assert!( commit_index >= 5, "Node {} should have commit_index >= 5, got {}", node.node_id(), commit_index ); assert!( last_applied >= 5, "Node {} should have last_applied >= 5, got {}", node.node_id(), last_applied ); let state_machine = node.state_machine(); for i in 1..=5 { let key = format!("key_{}", i).into_bytes(); let expected_value = format!("value_{}", i).into_bytes(); let result = state_machine.kv().get(&key).expect("Get should succeed"); assert!( result.is_some(), "Node {} missing key_{}", node.node_id(), i ); let entry = result.unwrap(); assert_eq!( entry.value, expected_value, "Node {} has wrong value for key_{}", node.node_id(), i ); } println!( "✓ Node {} has all 5 keys in correct order (commit_index={}, last_applied={})", node.node_id(), commit_index, last_applied ); } println!("✓ All nodes maintain consistent order"); } #[tokio::test] async fn test_leader_only_write() { // Test: Follower should reject client writes // Verifies that only the leader can accept writes (Raft safety) use chainfire_types::command::RaftCommand; use chainfire_raft::core::RaftError; let (nodes, _temp_dirs, _network) = create_3node_cluster().await; // Start event loops let mut handles = Vec::new(); for node in &nodes { let node_clone = Arc::clone(node); let handle = tokio::spawn(async move { let _ = node_clone.run().await; }); handles.push(handle); } // Wait for leader election (election timeout is 2-4s) time::sleep(Duration::from_millis(5000)).await; // Find a follower let mut follower = None; for node in &nodes { if matches!(node.role().await, RaftRole::Follower) { follower = Some(node); break; } } let follower = follower.expect("Follower should exist"); println!("✓ Found follower: node {}", follower.node_id()); // Try to write to the follower let cmd = RaftCommand::Put { key: b"follower_write".to_vec(), value: b"should_fail".to_vec(), lease_id: None, prev_kv: false, }; let result = follower.client_write(cmd).await; // Should return NotLeader error assert!( result.is_err(), "Follower write should fail with NotLeader error" ); if let Err(RaftError::NotLeader { .. }) = result { println!("✓ Follower correctly rejected write with NotLeader error"); } else { panic!( "Expected NotLeader error, got: {:?}", result.err().unwrap() ); } }