- Remove gitlinks (160000 mode) for chainfire, flaredb, iam - Add workspace contents as regular tracked files - Update flake.nix to use simple paths instead of builtins.fetchGit This resolves the nix build failure where submodule directories appeared empty in the nix store. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
316 lines
8.9 KiB
Rust
316 lines
8.9 KiB
Rust
//! Snapshot management for Raft state
|
|
//!
|
|
//! Snapshots allow compacting the Raft log while preserving the state machine state.
|
|
|
|
use crate::{cf, RocksStore};
|
|
use chainfire_types::error::StorageError;
|
|
use chainfire_types::kv::KvEntry;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::io::{Read, Write};
|
|
use tracing::info;
|
|
|
|
/// Snapshot metadata
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct SnapshotMeta {
|
|
/// Last log index included in snapshot
|
|
pub last_log_index: u64,
|
|
/// Term of last log entry included
|
|
pub last_log_term: u64,
|
|
/// Cluster membership at snapshot time
|
|
pub membership: Vec<u64>,
|
|
/// Size of snapshot data in bytes
|
|
pub size: u64,
|
|
}
|
|
|
|
/// A complete snapshot
|
|
#[derive(Debug)]
|
|
pub struct Snapshot {
|
|
pub meta: SnapshotMeta,
|
|
pub data: Vec<u8>,
|
|
}
|
|
|
|
impl Snapshot {
|
|
/// Create snapshot from raw data
|
|
pub fn new(meta: SnapshotMeta, data: Vec<u8>) -> Self {
|
|
Self { meta, data }
|
|
}
|
|
|
|
/// Serialize snapshot to bytes
|
|
pub fn to_bytes(&self) -> Result<Vec<u8>, StorageError> {
|
|
// Format: [meta_len: u32][meta][data]
|
|
let meta_bytes =
|
|
bincode::serialize(&self.meta).map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
let mut result = Vec::with_capacity(4 + meta_bytes.len() + self.data.len());
|
|
result.extend_from_slice(&(meta_bytes.len() as u32).to_le_bytes());
|
|
result.extend_from_slice(&meta_bytes);
|
|
result.extend_from_slice(&self.data);
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Deserialize snapshot from bytes
|
|
pub fn from_bytes(bytes: &[u8]) -> Result<Self, StorageError> {
|
|
if bytes.len() < 4 {
|
|
return Err(StorageError::Snapshot("Invalid snapshot: too short".into()));
|
|
}
|
|
|
|
let meta_len = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as usize;
|
|
if bytes.len() < 4 + meta_len {
|
|
return Err(StorageError::Snapshot(
|
|
"Invalid snapshot: meta truncated".into(),
|
|
));
|
|
}
|
|
|
|
let meta: SnapshotMeta = bincode::deserialize(&bytes[4..4 + meta_len])
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
let data = bytes[4 + meta_len..].to_vec();
|
|
|
|
Ok(Self { meta, data })
|
|
}
|
|
}
|
|
|
|
/// Builder for creating snapshots from KV store state
|
|
pub struct SnapshotBuilder {
|
|
store: RocksStore,
|
|
}
|
|
|
|
impl SnapshotBuilder {
|
|
pub fn new(store: RocksStore) -> Self {
|
|
Self { store }
|
|
}
|
|
|
|
/// Build a snapshot of the current KV state
|
|
pub fn build(
|
|
&self,
|
|
last_log_index: u64,
|
|
last_log_term: u64,
|
|
membership: Vec<u64>,
|
|
) -> Result<Snapshot, StorageError> {
|
|
let cf = self
|
|
.store
|
|
.cf_handle(cf::KV)
|
|
.ok_or_else(|| StorageError::RocksDb("KV cf not found".into()))?;
|
|
|
|
// Collect all KV entries
|
|
let mut entries: Vec<KvEntry> = Vec::new();
|
|
let iter = self
|
|
.store
|
|
.db()
|
|
.iterator_cf(&cf, rocksdb::IteratorMode::Start);
|
|
|
|
for item in iter {
|
|
let (_, value) = item.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
let entry: KvEntry = bincode::deserialize(&value)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
entries.push(entry);
|
|
}
|
|
|
|
// Serialize entries
|
|
let data = bincode::serialize(&entries)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
let meta = SnapshotMeta {
|
|
last_log_index,
|
|
last_log_term,
|
|
membership,
|
|
size: data.len() as u64,
|
|
};
|
|
|
|
info!(
|
|
last_log_index,
|
|
entries = entries.len(),
|
|
size = data.len(),
|
|
"Built snapshot"
|
|
);
|
|
|
|
Ok(Snapshot::new(meta, data))
|
|
}
|
|
|
|
/// Apply a snapshot to restore state
|
|
pub fn apply(&self, snapshot: &Snapshot) -> Result<(), StorageError> {
|
|
let cf = self
|
|
.store
|
|
.cf_handle(cf::KV)
|
|
.ok_or_else(|| StorageError::RocksDb("KV cf not found".into()))?;
|
|
|
|
// Deserialize entries
|
|
let entries: Vec<KvEntry> = bincode::deserialize(&snapshot.data)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
|
|
// Clear existing KV data
|
|
let mut batch = rocksdb::WriteBatch::default();
|
|
let iter = self
|
|
.store
|
|
.db()
|
|
.iterator_cf(&cf, rocksdb::IteratorMode::Start);
|
|
for item in iter {
|
|
let (key, _) = item.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
batch.delete_cf(&cf, key);
|
|
}
|
|
|
|
// Write new entries
|
|
for entry in &entries {
|
|
let value = bincode::serialize(entry)
|
|
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
|
batch.put_cf(&cf, &entry.key, value);
|
|
}
|
|
|
|
self.store
|
|
.db()
|
|
.write(batch)
|
|
.map_err(|e| StorageError::RocksDb(e.to_string()))?;
|
|
|
|
info!(
|
|
last_log_index = snapshot.meta.last_log_index,
|
|
entries = entries.len(),
|
|
"Applied snapshot"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// Streaming snapshot reader for large snapshots
|
|
pub struct SnapshotReader {
|
|
data: Vec<u8>,
|
|
position: usize,
|
|
}
|
|
|
|
impl SnapshotReader {
|
|
pub fn new(data: Vec<u8>) -> Self {
|
|
Self { data, position: 0 }
|
|
}
|
|
|
|
pub fn remaining(&self) -> usize {
|
|
self.data.len() - self.position
|
|
}
|
|
}
|
|
|
|
impl Read for SnapshotReader {
|
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
|
let remaining = self.remaining();
|
|
if remaining == 0 {
|
|
return Ok(0);
|
|
}
|
|
|
|
let to_read = std::cmp::min(buf.len(), remaining);
|
|
buf[..to_read].copy_from_slice(&self.data[self.position..self.position + to_read]);
|
|
self.position += to_read;
|
|
Ok(to_read)
|
|
}
|
|
}
|
|
|
|
/// Streaming snapshot writer for building large snapshots
|
|
pub struct SnapshotWriter {
|
|
data: Vec<u8>,
|
|
}
|
|
|
|
impl SnapshotWriter {
|
|
pub fn new() -> Self {
|
|
Self { data: Vec::new() }
|
|
}
|
|
|
|
pub fn into_inner(self) -> Vec<u8> {
|
|
self.data
|
|
}
|
|
}
|
|
|
|
impl Default for SnapshotWriter {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl Write for SnapshotWriter {
|
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
|
self.data.extend_from_slice(buf);
|
|
Ok(buf.len())
|
|
}
|
|
|
|
fn flush(&mut self) -> std::io::Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::KvStore;
|
|
use tempfile::tempdir;
|
|
|
|
fn create_test_store() -> RocksStore {
|
|
let dir = tempdir().unwrap();
|
|
RocksStore::new(dir.path()).unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn test_snapshot_roundtrip() {
|
|
let store = create_test_store();
|
|
|
|
// Add some data
|
|
let kv = KvStore::new(store.clone()).unwrap();
|
|
kv.put(b"key1".to_vec(), b"value1".to_vec(), None).unwrap();
|
|
kv.put(b"key2".to_vec(), b"value2".to_vec(), None).unwrap();
|
|
|
|
// Build snapshot
|
|
let builder = SnapshotBuilder::new(store.clone());
|
|
let snapshot = builder.build(10, 1, vec![1, 2, 3]).unwrap();
|
|
|
|
assert_eq!(snapshot.meta.last_log_index, 10);
|
|
assert_eq!(snapshot.meta.last_log_term, 1);
|
|
assert_eq!(snapshot.meta.membership, vec![1, 2, 3]);
|
|
|
|
// Serialize and deserialize
|
|
let bytes = snapshot.to_bytes().unwrap();
|
|
let restored = Snapshot::from_bytes(&bytes).unwrap();
|
|
|
|
assert_eq!(restored.meta.last_log_index, snapshot.meta.last_log_index);
|
|
assert_eq!(restored.data.len(), snapshot.data.len());
|
|
}
|
|
|
|
#[test]
|
|
fn test_snapshot_apply() {
|
|
let store1 = create_test_store();
|
|
let store2 = create_test_store();
|
|
|
|
// Add data to store1
|
|
let kv1 = KvStore::new(store1.clone()).unwrap();
|
|
kv1.put(b"key1".to_vec(), b"value1".to_vec(), None)
|
|
.unwrap();
|
|
kv1.put(b"key2".to_vec(), b"value2".to_vec(), None)
|
|
.unwrap();
|
|
|
|
// Build snapshot from store1
|
|
let builder1 = SnapshotBuilder::new(store1.clone());
|
|
let snapshot = builder1.build(10, 1, vec![1]).unwrap();
|
|
|
|
// Apply to store2
|
|
let builder2 = SnapshotBuilder::new(store2.clone());
|
|
builder2.apply(&snapshot).unwrap();
|
|
|
|
// Verify data in store2
|
|
let kv2 = KvStore::new(store2).unwrap();
|
|
let entry1 = kv2.get(b"key1").unwrap().unwrap();
|
|
let entry2 = kv2.get(b"key2").unwrap().unwrap();
|
|
|
|
assert_eq!(entry1.value, b"value1");
|
|
assert_eq!(entry2.value, b"value2");
|
|
}
|
|
|
|
#[test]
|
|
fn test_snapshot_reader() {
|
|
let data = vec![1, 2, 3, 4, 5];
|
|
let mut reader = SnapshotReader::new(data.clone());
|
|
|
|
let mut buf = [0u8; 3];
|
|
assert_eq!(reader.read(&mut buf).unwrap(), 3);
|
|
assert_eq!(&buf, &[1, 2, 3]);
|
|
|
|
assert_eq!(reader.read(&mut buf).unwrap(), 2);
|
|
assert_eq!(&buf[..2], &[4, 5]);
|
|
|
|
assert_eq!(reader.read(&mut buf).unwrap(), 0);
|
|
}
|
|
}
|