//! Time-series storage layer backed by an in-memory head, a write-ahead log, //! and periodic snapshots. use anyhow::Result; use chrono::Utc; #[cfg(test)] use nightlight_types::SeriesId; use nightlight_types::TimeSeries; use std::{ fs::{File, OpenOptions}, io::{Read, Write}, path::{Path, PathBuf}, sync::Arc, }; use tokio::sync::{Mutex, RwLock}; use tracing::debug; use crate::query::QueryableStorage; #[derive(serde::Serialize, serde::Deserialize)] struct WalRecord { series: TimeSeries, } pub struct Storage { head: Arc>, wal_path: PathBuf, snapshot_path: PathBuf, wal_lock: Mutex<()>, } impl Storage { pub fn new(data_dir: &str) -> Result { let data_dir = PathBuf::from(data_dir); std::fs::create_dir_all(&data_dir)?; let snapshot_path = data_dir.join("nightlight.db"); let wal_path = data_dir.join("wal.log"); let mut head = QueryableStorage::load_from_file(&snapshot_path)?; if wal_path.exists() { replay_wal(&wal_path, &mut head)?; } head.rebuild_index(); Ok(Self { head: Arc::new(RwLock::new(head)), wal_path, snapshot_path, wal_lock: Mutex::new(()), }) } pub fn queryable(&self) -> Arc> { Arc::clone(&self.head) } pub async fn append(&self, series_list: Vec) -> Result<()> { if series_list.is_empty() { return Ok(()); } let _guard = self.wal_lock.lock().await; let mut wal_file = OpenOptions::new() .create(true) .append(true) .open(&self.wal_path)?; let mut head = self.head.write().await; for series in series_list { let record = WalRecord { series: series.clone(), }; let encoded = bincode::serialize(&record)?; let len = encoded.len() as u32; wal_file.write_all(&len.to_le_bytes())?; wal_file.write_all(&encoded)?; head.upsert_series(series); } wal_file.flush()?; Ok(()) } #[cfg(test)] pub async fn query_series( &self, series_id: SeriesId, start: i64, end: i64, ) -> Result> { let head = self.head.read().await; Ok(head .series .get(&series_id) .map(|series| series.filter_by_time(start, end))) } #[cfg(test)] pub async fn find_series(&self, matchers: Vec) -> Result> { let parsed: Vec<(String, String)> = matchers .iter() .filter_map(|matcher| matcher.split_once('=')) .map(|(key, value)| { ( key.trim().to_string(), value.trim().trim_matches('"').to_string(), ) }) .collect(); let head = self.head.read().await; let mut result = Vec::new(); 'outer: for (series_id, series) in &head.series { for (key, value) in &parsed { if !series .labels .iter() .any(|label| &label.name == key && &label.value == value) { continue 'outer; } } result.push(*series_id); } result.sort_unstable(); Ok(result) } pub async fn flush(&self) -> Result<()> { let _guard = self.wal_lock.lock().await; let snapshot = { let head = self.head.read().await; head.clone() }; snapshot.save_to_file(&self.snapshot_path)?; File::create(&self.wal_path)?; Ok(()) } pub async fn enforce_retention(&self, retention_days: u32) -> Result<()> { if retention_days == 0 { return Ok(()); } let retention_ms = i64::from(retention_days) * 24 * 60 * 60 * 1000; let cutoff = Utc::now().timestamp_millis() - retention_ms; let removed_samples = { let mut head = self.head.write().await; head.prune_before(cutoff) }; if removed_samples > 0 { debug!(removed_samples, cutoff, "pruned expired Nightlight samples"); } Ok(()) } pub async fn compact(&self) -> Result<()> { self.flush().await } pub async fn stats(&self) -> Result { let head = self.head.read().await; let total_samples: u64 = head .series .values() .map(|series| series.samples.len() as u64) .sum(); let oldest_sample_time = head .series .values() .filter_map(|series| series.oldest_sample().map(|sample| sample.timestamp)) .min() .unwrap_or(0); let newest_sample_time = head .series .values() .filter_map(|series| series.latest_sample().map(|sample| sample.timestamp)) .max() .unwrap_or(0); let wal_size = std::fs::metadata(&self.wal_path) .map(|metadata| metadata.len()) .unwrap_or(0); let snapshot_size = std::fs::metadata(&self.snapshot_path) .map(|metadata| metadata.len()) .unwrap_or(0); Ok(StorageStats { active_series: head.series.len() as u64, total_samples, blocks_count: u64::from(snapshot_size > 0), head_samples: total_samples, disk_bytes_used: wal_size + snapshot_size, oldest_sample_time, newest_sample_time, }) } } fn replay_wal(path: &Path, storage: &mut QueryableStorage) -> Result<()> { let mut file = File::open(path)?; let mut len_buf = [0u8; 4]; loop { if let Err(error) = file.read_exact(&mut len_buf) { if error.kind() == std::io::ErrorKind::UnexpectedEof { break; } return Err(error.into()); } let len = u32::from_le_bytes(len_buf) as usize; let mut buffer = vec![0u8; len]; file.read_exact(&mut buffer)?; let record: WalRecord = bincode::deserialize(&buffer)?; storage.upsert_series(record.series); } Ok(()) } #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct StorageStats { pub active_series: u64, pub total_samples: u64, pub blocks_count: u64, pub head_samples: u64, pub disk_bytes_used: u64, pub oldest_sample_time: i64, pub newest_sample_time: i64, } #[cfg(test)] mod tests { use super::*; use nightlight_types::Label; #[test] fn test_storage_creation() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new(dir.path().to_str().unwrap()); assert!(storage.is_ok()); } #[tokio::test] async fn test_append_and_query_persists() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new(dir.path().to_str().unwrap()).unwrap(); let ts = TimeSeries { id: SeriesId::new(1), labels: vec![Label { name: "__name__".into(), value: "cpu_usage".into(), }], samples: vec![ nightlight_types::Sample { timestamp: 1000, value: 1.0, }, nightlight_types::Sample { timestamp: 2000, value: 2.0, }, ], }; storage.append(vec![ts.clone()]).await.unwrap(); storage.flush().await.unwrap(); let storage2 = Storage::new(dir.path().to_str().unwrap()).unwrap(); let res = storage2 .query_series(SeriesId::new(1), 0, 5000) .await .unwrap() .unwrap(); assert_eq!(res.samples.len(), 2); assert_eq!(res.samples[1].value, 2.0); } #[tokio::test] async fn test_retention_prunes_old_samples_and_series() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new(dir.path().to_str().unwrap()).unwrap(); let now = Utc::now().timestamp_millis(); storage .append(vec![ TimeSeries { id: SeriesId::new(1), labels: vec![Label::new("__name__", "retained_metric")], samples: vec![ nightlight_types::Sample::new(now - (2 * 24 * 60 * 60 * 1000), 1.0), nightlight_types::Sample::new(now, 2.0), ], }, TimeSeries { id: SeriesId::new(2), labels: vec![Label::new("__name__", "expired_metric")], samples: vec![nightlight_types::Sample::new( now - (3 * 24 * 60 * 60 * 1000), 3.0, )], }, ]) .await .unwrap(); storage.enforce_retention(1).await.unwrap(); let retained = storage .query_series(SeriesId::new(1), 0, now + 1) .await .unwrap() .unwrap(); assert_eq!(retained.samples.len(), 1); assert_eq!(retained.samples[0].value, 2.0); let expired = storage .query_series(SeriesId::new(2), 0, now + 1) .await .unwrap(); assert!(expired.is_none()); } #[tokio::test] async fn test_stats_report_sample_bounds() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new(dir.path().to_str().unwrap()).unwrap(); storage .append(vec![TimeSeries { id: SeriesId::new(99), labels: vec![Label::new("__name__", "stats_metric")], samples: vec![ nightlight_types::Sample::new(1000, 1.0), nightlight_types::Sample::new(2000, 2.0), ], }]) .await .unwrap(); let stats = storage.stats().await.unwrap(); assert_eq!(stats.active_series, 1); assert_eq!(stats.total_samples, 2); assert_eq!(stats.head_samples, 2); assert_eq!(stats.oldest_sample_time, 1000); assert_eq!(stats.newest_sample_time, 2000); } #[tokio::test] async fn test_find_series() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new(dir.path().to_str().unwrap()).unwrap(); let ts = TimeSeries { id: SeriesId::new(42), labels: vec![ Label { name: "__name__".into(), value: "http_requests_total".into(), }, Label { name: "project_id".into(), value: "proj-1".into(), }, ], samples: vec![nightlight_types::Sample { timestamp: 1, value: 1.0, }], }; storage.append(vec![ts]).await.unwrap(); let found = storage .find_series(vec!["project_id=proj-1".into()]) .await .unwrap(); assert_eq!(found, vec![SeriesId::new(42)]); } }