photoncloud-monorepo/chainfire/crates/chainfire-server/tests/cluster_integration.rs
centra d2149b6249 fix(lightningstor): Fix SigV4 canonicalization for AWS S3 auth
- Replace form_urlencoded with RFC 3986 compliant URI encoding
- Implement aws_uri_encode() matching AWS SigV4 spec exactly
- Unreserved chars (A-Z,a-z,0-9,-,_,.,~) not encoded
- All other chars percent-encoded with uppercase hex
- Preserve slashes in paths, encode in query params
- Normalize empty paths to '/' per AWS spec
- Fix test expectations (body hash, HMAC values)
- Add comprehensive SigV4 signature determinism test

This fixes the canonicalization mismatch that caused signature
validation failures in T047. Auth can now be enabled for production.

Refs: T058.S1
2025-12-12 06:23:46 +09:00

655 lines
24 KiB
Rust

//! Chainfire 3-Node Cluster Integration Test
//!
//! Verifies HA behavior: leader election, state replication, and node recovery.
use chainfire_client::Client;
use chainfire_server::{
config::{ClusterConfig, NetworkConfig, NodeConfig, RaftConfig, ServerConfig, StorageConfig},
server::Server,
};
use chainfire_types::RaftRole;
use std::net::SocketAddr;
use std::time::Duration;
use tokio::time::sleep;
/// Create a 3-node cluster configuration with join flow
/// Node 1 bootstraps alone, nodes 2 & 3 join via member_add API
fn cluster_config_with_join(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
let base_port = match node_id {
1 => 12379,
2 => 22379,
3 => 32379,
_ => panic!("Invalid node_id"),
};
let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
let temp_dir = tempfile::tempdir().unwrap();
let config = ServerConfig {
node: NodeConfig {
id: node_id,
name: format!("test-node-{}", node_id),
role: "control_plane".to_string(),
},
cluster: ClusterConfig {
id: 1,
bootstrap: node_id == 1, // Only node 1 bootstraps
initial_members: vec![], // Node 1 starts alone, others join via API
},
network: NetworkConfig {
api_addr,
raft_addr,
gossip_addr,
tls: None,
},
storage: StorageConfig {
data_dir: temp_dir.path().to_path_buf(),
},
// Node 1 is Voter (bootstrap), nodes 2 & 3 are Learner (join via member_add)
raft: RaftConfig {
role: if node_id == 1 { RaftRole::Voter } else { RaftRole::Learner },
},
};
(config, temp_dir)
}
/// Alias for backwards compatibility (old tests use this)
fn cluster_config(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
cluster_config_with_join(node_id)
}
/// Create a 3-node cluster configuration with simultaneous bootstrap
/// All nodes start together with the same initial_members (avoids add_learner bug)
fn cluster_config_simultaneous_bootstrap(node_id: u64) -> (ServerConfig, tempfile::TempDir) {
use chainfire_server::config::MemberConfig;
let base_port = match node_id {
1 => 12379,
2 => 22379,
3 => 32379,
_ => panic!("Invalid node_id"),
};
let api_addr: SocketAddr = format!("127.0.0.1:{}", base_port).parse().unwrap();
let raft_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 1).parse().unwrap();
let gossip_addr: SocketAddr = format!("127.0.0.1:{}", base_port + 2).parse().unwrap();
let temp_dir = tempfile::tempdir().unwrap();
// All nodes have the same initial_members list
let initial_members = vec![
MemberConfig { id: 1, raft_addr: "127.0.0.1:12380".to_string() },
MemberConfig { id: 2, raft_addr: "127.0.0.1:22380".to_string() },
MemberConfig { id: 3, raft_addr: "127.0.0.1:32380".to_string() },
];
let config = ServerConfig {
node: NodeConfig {
id: node_id,
name: format!("test-node-{}", node_id),
role: "control_plane".to_string(),
},
cluster: ClusterConfig {
id: 1,
bootstrap: node_id == 1, // Only node 1 bootstraps, but with full member list
initial_members: initial_members.clone(),
},
network: NetworkConfig {
api_addr,
raft_addr,
gossip_addr,
tls: None,
},
storage: StorageConfig {
data_dir: temp_dir.path().to_path_buf(),
},
raft: RaftConfig {
role: RaftRole::Voter, // All nodes are voters from the start
},
};
(config, temp_dir)
}
/// Create a single-node cluster configuration (for testing basic Raft functionality)
fn single_node_config() -> (ServerConfig, tempfile::TempDir) {
let api_addr: SocketAddr = "127.0.0.1:12379".parse().unwrap();
let raft_addr: SocketAddr = "127.0.0.1:12380".parse().unwrap();
let gossip_addr: SocketAddr = "127.0.0.1:12381".parse().unwrap();
let temp_dir = tempfile::tempdir().unwrap();
let config = ServerConfig {
node: NodeConfig {
id: 1,
name: "test-node-1".to_string(),
role: "control_plane".to_string(),
},
cluster: ClusterConfig {
id: 1,
bootstrap: true, // Single-node bootstrap
initial_members: vec![], // Empty = single node
},
network: NetworkConfig {
api_addr,
raft_addr,
gossip_addr,
tls: None,
},
storage: StorageConfig {
data_dir: temp_dir.path().to_path_buf(),
},
raft: RaftConfig::default(),
};
(config, temp_dir)
}
#[tokio::test]
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
async fn test_single_node_raft_leader_election() {
println!("\n=== Test: Single-Node Raft Leader Election ===");
// Start single node
let (config, _temp) = single_node_config();
let api_addr = config.network.api_addr;
println!("Creating single-node cluster...");
let server = Server::new(config).await.unwrap();
let handle = tokio::spawn(async move { server.run().await });
println!("Node started: {}", api_addr);
// Wait for leader election
println!("Waiting for leader election...");
sleep(Duration::from_secs(2)).await;
// Verify leader elected
let mut client = Client::connect(format!("http://{}", api_addr))
.await
.expect("Failed to connect");
let status = client.status().await.expect("Failed to get status");
println!(
"Node status: leader={}, term={}",
status.leader, status.raft_term
);
assert_eq!(status.leader, 1, "Node 1 should be leader in single-node cluster");
assert!(status.raft_term > 0, "Raft term should be > 0");
// Test basic KV operations
println!("Testing KV operations...");
client.put("test-key", "test-value").await.unwrap();
let value = client.get("test-key").await.unwrap();
assert_eq!(value, Some(b"test-value".to_vec()));
println!("✓ Single-node Raft working correctly");
// Cleanup
handle.abort();
}
#[tokio::test]
#[ignore] // Run with: cargo test --test cluster_integration -- --ignored
async fn test_3node_leader_election_with_join() {
println!("\n=== Test: 3-Node Leader Election with Join Flow ===");
// Start Node 1 (bootstrap alone)
let (config1, _temp1) = cluster_config_with_join(1);
let api1 = config1.network.api_addr;
let raft1 = config1.network.raft_addr;
println!("Creating Node 1 (bootstrap)...");
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
println!("Node 1 started: API={}, Raft={}", api1, raft1);
// Wait for node 1 to become leader
sleep(Duration::from_secs(2)).await;
// Verify node 1 is leader
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.expect("Failed to connect to node 1");
let status1 = client1.status().await.expect("Failed to get status");
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
assert_eq!(status1.leader, 1, "Node 1 should be leader");
// Start Node 2 (no bootstrap)
let (config2, _temp2) = cluster_config_with_join(2);
let api2 = config2.network.api_addr;
let raft2 = config2.network.raft_addr;
println!("Creating Node 2...");
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
println!("Node 2 started: API={}, Raft={}", api2, raft2);
sleep(Duration::from_millis(500)).await;
// Start Node 3 (no bootstrap)
let (config3, _temp3) = cluster_config_with_join(3);
let api3 = config3.network.api_addr;
let raft3 = config3.network.raft_addr;
println!("Creating Node 3...");
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
println!("Node 3 started: API={}, Raft={}", api3, raft3);
sleep(Duration::from_millis(500)).await;
// Add node 2 to cluster via member_add API
println!("Adding node 2 to cluster via member_add API...");
let member2_id = client1
.member_add(2, raft2.to_string(), false) // node_id=2, false=voter
.await
.expect("Failed to add node 2");
println!("Node 2 added with ID: {}", member2_id);
assert_eq!(member2_id, 2, "Node 2 should have ID 2");
// Add node 3 to cluster via member_add API
println!("Adding node 3 to cluster via member_add API...");
let member3_id = client1
.member_add(3, raft3.to_string(), false) // node_id=3, false=voter
.await
.expect("Failed to add node 3");
println!("Node 3 added with ID: {}", member3_id);
assert_eq!(member3_id, 3, "Node 3 should have ID 3");
// Wait for cluster membership changes to propagate
sleep(Duration::from_secs(3)).await;
// Verify all nodes see the same leader
let status1 = client1.status().await.expect("Failed to get status from node 1");
println!("Node 1 final status: leader={}, term={}", status1.leader, status1.raft_term);
let mut client2 = Client::connect(format!("http://{}", api2))
.await
.expect("Failed to connect to node 2");
let status2 = client2.status().await.expect("Failed to get status from node 2");
println!("Node 2 final status: leader={}, term={}", status2.leader, status2.raft_term);
let mut client3 = Client::connect(format!("http://{}", api3))
.await
.expect("Failed to connect to node 3");
let status3 = client3.status().await.expect("Failed to get status from node 3");
println!("Node 3 final status: leader={}, term={}", status3.leader, status3.raft_term);
// All nodes should agree on the leader
assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
assert!(status1.leader > 0, "No leader elected");
println!("✓ 3-node cluster formed successfully with join flow");
// Cleanup
handle1.abort();
handle2.abort();
handle3.abort();
}
#[tokio::test]
#[ignore]
async fn test_3node_state_replication() {
println!("\n=== Test: 3-Node State Replication ===");
// Start cluster
let (config1, _temp1) = cluster_config(1);
let api1 = config1.network.api_addr;
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
let (config2, _temp2) = cluster_config(2);
let api2 = config2.network.api_addr;
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
let (config3, _temp3) = cluster_config(3);
let api3 = config3.network.api_addr;
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
sleep(Duration::from_secs(2)).await;
println!("Cluster started");
// Write data to node 1 (leader)
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.unwrap();
println!("Writing test data to node 1...");
client1.put("test/key1", "value1").await.unwrap();
client1.put("test/key2", "value2").await.unwrap();
client1.put("test/key3", "value3").await.unwrap();
// Wait for replication
sleep(Duration::from_millis(500)).await;
// Read from node 2 and node 3 (followers)
println!("Reading from node 2...");
let mut client2 = Client::connect(format!("http://{}", api2))
.await
.unwrap();
let val2 = client2.get("test/key1").await.unwrap();
assert_eq!(val2, Some(b"value1".to_vec()), "Data not replicated to node 2");
println!("Reading from node 3...");
let mut client3 = Client::connect(format!("http://{}", api3))
.await
.unwrap();
let val3 = client3.get("test/key1").await.unwrap();
assert_eq!(val3, Some(b"value1".to_vec()), "Data not replicated to node 3");
println!("✓ State replication verified");
// Cleanup
handle1.abort();
handle2.abort();
handle3.abort();
}
#[tokio::test]
#[ignore]
async fn test_3node_follower_crash() {
println!("\n=== Test: Follower Crash (Node Remains Available) ===");
// Start cluster
let (config1, _temp1) = cluster_config(1);
let api1 = config1.network.api_addr;
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
let (config2, _temp2) = cluster_config(2);
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
let (config3, _temp3) = cluster_config(3);
let api3 = config3.network.api_addr;
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
sleep(Duration::from_secs(2)).await;
println!("Cluster started");
// Write initial data
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.unwrap();
println!("Writing initial data...");
client1.put("test/before-crash", "initial").await.unwrap();
// Kill node 2 (follower)
println!("Killing node 2 (follower)...");
handle2.abort();
sleep(Duration::from_millis(500)).await;
// Cluster should still be operational (2/3 quorum)
println!("Writing data after crash...");
client1
.put("test/after-crash", "still-working")
.await
.expect("Write should succeed with 2/3 quorum");
// Read from node 3
let mut client3 = Client::connect(format!("http://{}", api3))
.await
.unwrap();
let val = client3.get("test/after-crash").await.unwrap();
assert_eq!(val, Some(b"still-working".to_vec()));
println!("✓ Cluster operational after follower crash");
// Cleanup
handle1.abort();
handle3.abort();
}
#[tokio::test]
#[ignore]
async fn test_3node_leader_crash_reelection() {
println!("\n=== Test: Leader Crash & Re-election ===");
// Start cluster
let (config1, _temp1) = cluster_config(1);
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
let (config2, _temp2) = cluster_config(2);
let api2 = config2.network.api_addr;
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
let (config3, _temp3) = cluster_config(3);
let api3 = config3.network.api_addr;
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
sleep(Duration::from_secs(2)).await;
println!("Cluster started");
// Determine initial leader
let mut client2 = Client::connect(format!("http://{}", api2))
.await
.unwrap();
let initial_status = client2.status().await.unwrap();
let initial_leader = initial_status.leader;
println!("Initial leader: node {}", initial_leader);
// Kill the leader (assume node 1)
println!("Killing leader (node 1)...");
handle1.abort();
// Wait for re-election (should be < 1s per requirements)
println!("Waiting for re-election...");
sleep(Duration::from_secs(1)).await;
// Verify new leader elected
let new_status = client2.status().await.unwrap();
println!(
"New leader: node {}, term: {}",
new_status.leader, new_status.raft_term
);
assert!(new_status.leader > 0, "No new leader elected");
assert!(
new_status.raft_term > initial_status.raft_term,
"Raft term should increase after re-election"
);
println!("✓ Leader re-election successful within 1s");
// Verify cluster still functional
let mut client3 = Client::connect(format!("http://{}", api3))
.await
.unwrap();
client3
.put("test/post-reelection", "functional")
.await
.expect("Cluster should be functional after re-election");
println!("✓ Cluster operational after re-election");
// Cleanup
handle2.abort();
handle3.abort();
}
/// Test 3-node cluster with learners only (no voter promotion)
/// T041 Workaround: Avoids change_membership by keeping nodes as learners
#[tokio::test]
#[ignore] // Run with: cargo test --test cluster_integration test_3node_with_learners -- --ignored
async fn test_3node_with_learners() {
println!("\n=== Test: 3-Node Cluster with Learners (T041 Workaround) ===");
// Start Node 1 (bootstrap alone as single voter)
let (config1, _temp1) = cluster_config_with_join(1);
let api1 = config1.network.api_addr;
let raft1 = config1.network.raft_addr;
println!("Creating Node 1 (bootstrap)...");
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
println!("Node 1 started: API={}, Raft={}", api1, raft1);
// Wait for node 1 to become leader
sleep(Duration::from_secs(2)).await;
// Verify node 1 is leader
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.expect("Failed to connect to node 1");
let status1 = client1.status().await.expect("Failed to get status");
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
assert_eq!(status1.leader, 1, "Node 1 should be leader");
// Start Node 2
let (config2, _temp2) = cluster_config_with_join(2);
let api2 = config2.network.api_addr;
let raft2 = config2.network.raft_addr;
println!("Creating Node 2...");
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
println!("Node 2 started: API={}, Raft={}", api2, raft2);
sleep(Duration::from_millis(500)).await;
// Start Node 3
let (config3, _temp3) = cluster_config_with_join(3);
let api3 = config3.network.api_addr;
let raft3 = config3.network.raft_addr;
println!("Creating Node 3...");
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
println!("Node 3 started: API={}, Raft={}", api3, raft3);
sleep(Duration::from_millis(500)).await;
// Add node 2 as LEARNER (is_learner=true, no voter promotion)
println!("Adding node 2 as learner (no voter promotion)...");
let member2_id = client1
.member_add(2, raft2.to_string(), true) // is_learner=true
.await
.expect("Failed to add node 2 as learner");
println!("Node 2 added as learner with ID: {}", member2_id);
assert_eq!(member2_id, 2);
// Add node 3 as LEARNER
println!("Adding node 3 as learner (no voter promotion)...");
let member3_id = client1
.member_add(3, raft3.to_string(), true) // is_learner=true
.await
.expect("Failed to add node 3 as learner");
println!("Node 3 added as learner with ID: {}", member3_id);
assert_eq!(member3_id, 3);
// Wait for replication
sleep(Duration::from_secs(2)).await;
// Test write on leader
println!("Testing KV write on leader...");
client1.put("test-key", "test-value").await.expect("Put failed");
// Wait for replication to learners
sleep(Duration::from_secs(1)).await;
// Verify data replicated to learner (should be able to read)
let mut client2 = Client::connect(format!("http://{}", api2))
.await
.expect("Failed to connect to node 2");
// Note: Reading from a learner may require forwarding to leader
// For now, just verify the cluster is operational
let status2 = client2.status().await.expect("Failed to get status from learner");
println!("Node 2 (learner) status: leader={}, term={}", status2.leader, status2.raft_term);
// All nodes should see node 1 as leader
assert_eq!(status2.leader, 1, "Learner should see node 1 as leader");
println!("✓ 3-node cluster with learners working");
// Cleanup
handle1.abort();
handle2.abort();
handle3.abort();
}
/// Test 3-node cluster formation using staggered bootstrap (DISABLED - doesn't work)
#[tokio::test]
#[ignore]
async fn test_3node_simultaneous_bootstrap_disabled() {
println!("\n=== Test: 3-Node Staggered Bootstrap (T041 Workaround) ===");
// Start Node 1 first (bootstrap=true, will initialize with full membership)
let (config1, _temp1) = cluster_config_simultaneous_bootstrap(1);
let api1 = config1.network.api_addr;
println!("Creating Node 1 (bootstrap)...");
let server1 = Server::new(config1).await.unwrap();
let handle1 = tokio::spawn(async move { server1.run().await });
println!("Node 1 started: API={}", api1);
// Give node 1 time to become leader
println!("Waiting for Node 1 to become leader (3s)...");
sleep(Duration::from_secs(3)).await;
// Verify node 1 is leader
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.expect("Failed to connect to node 1");
let status1 = client1.status().await.expect("Failed to get status");
println!("Node 1 status before others: leader={}, term={}", status1.leader, status1.raft_term);
// Now start nodes 2 and 3
let (config2, _temp2) = cluster_config_simultaneous_bootstrap(2);
let api2 = config2.network.api_addr;
println!("Creating Node 2...");
let server2 = Server::new(config2).await.unwrap();
let handle2 = tokio::spawn(async move { server2.run().await });
println!("Node 2 started: API={}", api2);
let (config3, _temp3) = cluster_config_simultaneous_bootstrap(3);
let api3 = config3.network.api_addr;
println!("Creating Node 3...");
let server3 = Server::new(config3).await.unwrap();
let handle3 = tokio::spawn(async move { server3.run().await });
println!("Node 3 started: API={}", api3);
// Wait for cluster to stabilize
println!("Waiting for cluster to stabilize (5s)...");
sleep(Duration::from_secs(5)).await;
// Verify cluster formed and leader elected
let mut client1 = Client::connect(format!("http://{}", api1))
.await
.expect("Failed to connect to node 1");
let status1 = client1.status().await.expect("Failed to get status from node 1");
println!("Node 1 status: leader={}, term={}", status1.leader, status1.raft_term);
let mut client2 = Client::connect(format!("http://{}", api2))
.await
.expect("Failed to connect to node 2");
let status2 = client2.status().await.expect("Failed to get status from node 2");
println!("Node 2 status: leader={}, term={}", status2.leader, status2.raft_term);
let mut client3 = Client::connect(format!("http://{}", api3))
.await
.expect("Failed to connect to node 3");
let status3 = client3.status().await.expect("Failed to get status from node 3");
println!("Node 3 status: leader={}, term={}", status3.leader, status3.raft_term);
// All nodes should agree on the leader
assert!(status1.leader > 0, "No leader elected");
assert_eq!(status1.leader, status2.leader, "Nodes 1 and 2 disagree on leader");
assert_eq!(status1.leader, status3.leader, "Nodes 1 and 3 disagree on leader");
// Test KV operations on the cluster
println!("Testing KV operations...");
client1.put("test-key", "test-value").await.expect("Put failed");
// Wait for commit to propagate to followers via heartbeat (heartbeat_interval=100ms)
sleep(Duration::from_millis(200)).await;
let value = client2.get("test-key").await.expect("Get failed");
assert_eq!(value, Some(b"test-value".to_vec()), "Value not replicated");
println!("✓ 3-node cluster formed successfully with simultaneous bootstrap");
// Cleanup
handle1.abort();
handle2.abort();
handle3.abort();
}