photoncloud-monorepo/nix/modules/observability.nix
centra 8317b22b9e fix(nix): Remove deprecated max_retries from Prometheus config
- Remove queue_config.max_retries option from observability.nix
- Option deprecated/removed in recent NixOS/Prometheus versions
- Found by nix eval audit (T039.S3 pre-deployment validation)

Error: services.prometheus.remoteWrite."[...]".queue_config.max_retries' does not exist

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 08:49:30 +09:00

338 lines
8.2 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.services.cloud-observability;
in
{
options.services.cloud-observability = {
enable = lib.mkEnableOption "cloud platform observability stack (Prometheus, Grafana, Loki)";
prometheusPort = lib.mkOption {
type = lib.types.port;
default = 9090;
description = "Port for Prometheus web interface";
};
grafanaPort = lib.mkOption {
type = lib.types.port;
default = 3000;
description = "Port for Grafana web interface";
};
lokiPort = lib.mkOption {
type = lib.types.port;
default = 3100;
description = "Port for Loki API";
};
scrapeInterval = lib.mkOption {
type = lib.types.str;
default = "15s";
description = "Default Prometheus scrape interval";
};
enableAllTargets = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Enable scraping all cloud platform services";
};
};
config = lib.mkIf cfg.enable {
# Prometheus configuration
services.prometheus = {
enable = true;
port = cfg.prometheusPort;
globalConfig = {
scrape_interval = cfg.scrapeInterval;
evaluation_interval = cfg.scrapeInterval;
};
remoteWrite = [
{
url = "http://localhost:9101/api/v1/write"; # Nightlight's remote_write endpoint
queue_config = {
capacity = 2500; # Increased capacity for better handling of metric bursts
max_shards = 20;
min_backoff = "30ms";
max_backoff = "10s";
# max_retries removed - deprecated in recent Prometheus/NixOS versions
};
}
];
scrapeConfigs = lib.mkIf cfg.enableAllTargets [
# Prometheus self-monitoring
{
job_name = "prometheus";
static_configs = [{
targets = [ "localhost:${toString cfg.prometheusPort}" ];
}];
}
# Chainfire metrics
{
job_name = "chainfire";
static_configs = [{
targets = [ "localhost:9091" ];
}];
}
# FlareDB metrics
{
job_name = "flaredb";
static_configs = [{
targets = [ "localhost:9092" ];
}];
}
# IAM metrics
{
job_name = "iam";
static_configs = [{
targets = [ "localhost:9093" ];
}];
}
# k8shost metrics
{
job_name = "k8shost";
static_configs = [{
targets = [ "localhost:9094" ];
}];
}
# PlasmaVMC metrics
{
job_name = "plasmavmc";
static_configs = [{
targets = [ "localhost:9095" ];
}];
}
# PrismNET metrics
{
job_name = "prismnet";
static_configs = [{
targets = [ "localhost:9096" ];
}];
}
# FlashDNS metrics
{
job_name = "flashdns";
static_configs = [{
targets = [ "localhost:9097" ];
}];
}
# FiberLB metrics
{
job_name = "fiberlb";
static_configs = [{
targets = [ "localhost:9098" ];
}];
}
# LightningSTOR metrics
{
job_name = "lightningstor";
static_configs = [{
targets = [ "localhost:9099" ];
}];
}
];
exporters = {
node = {
enable = true;
enabledCollectors = [ "systemd" ];
port = 9100;
};
};
};
# Loki configuration
services.loki = {
enable = true;
configuration = {
server.http_listen_port = cfg.lokiPort;
auth_enabled = false;
ingester = {
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore.store = "inmemory";
replication_factor = 1;
};
final_sleep = "0s";
};
chunk_idle_period = "5m";
chunk_retain_period = "30s";
};
schema_config = {
configs = [{
from = "2024-01-01";
store = "tsdb";
object_store = "filesystem";
schema = "v13";
index = {
prefix = "index_";
period = "24h";
};
}];
};
storage_config = {
tsdb_shipper = {
active_index_directory = "/var/lib/loki/tsdb-index";
cache_location = "/var/lib/loki/tsdb-cache";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
compactor = {
working_directory = "/var/lib/loki/compactor";
compaction_interval = "10m";
};
};
};
# Promtail for shipping logs to Loki
services.promtail = {
enable = true;
configuration = {
server = {
http_listen_port = 9080;
grpc_listen_port = 0;
};
positions.filename = "/var/lib/promtail/positions.yaml";
clients = [{
url = "http://localhost:${toString cfg.lokiPort}/loki/api/v1/push";
}];
scrape_configs = [
# Systemd journal scraping
{
job_name = "journal";
journal = {
max_age = "12h";
labels = {
job = "systemd-journal";
host = config.networking.hostName;
};
};
relabel_configs = [
{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}
{
source_labels = [ "__journal__hostname" ];
target_label = "hostname";
}
];
}
# Application logs (if services write to files)
{
job_name = "cloud-services";
static_configs = [{
targets = [ "localhost" ];
labels = {
job = "cloud-services";
host = config.networking.hostName;
__path__ = "/var/log/cloud/*.log";
};
}];
}
];
};
};
# Grafana configuration
services.grafana = {
enable = true;
settings = {
server = {
http_port = cfg.grafanaPort;
http_addr = "0.0.0.0";
};
analytics.reporting_enabled = false;
security = {
admin_user = "admin";
admin_password = "admin"; # TODO: Make this configurable
};
};
provision = {
enable = true;
datasources.settings.datasources = [
{
name = "Prometheus";
type = "prometheus";
access = "proxy";
url = "http://localhost:${toString cfg.prometheusPort}";
isDefault = true;
jsonData = {
timeInterval = cfg.scrapeInterval;
};
}
{
name = "Loki";
type = "loki";
access = "proxy";
url = "http://localhost:${toString cfg.lokiPort}";
jsonData = {
maxLines = 1000;
};
}
];
dashboards.settings.providers = [
{
name = "Cloud Platform";
type = "file";
options.path = "/var/lib/grafana/dashboards";
}
];
};
};
# Ensure directories exist
systemd.tmpfiles.rules = [
"d /var/lib/loki 0750 loki loki -"
"d /var/lib/loki/chunks 0750 loki loki -"
"d /var/lib/loki/tsdb-index 0750 loki loki -"
"d /var/lib/loki/tsdb-cache 0750 loki loki -"
"d /var/lib/loki/compactor 0750 loki loki -"
"d /var/lib/promtail 0750 promtail promtail -"
"d /var/lib/grafana/dashboards 0755 grafana grafana -"
"d /var/log/cloud 0755 root root -"
];
# Open firewall ports if needed
networking.firewall.allowedTCPPorts = lib.mkIf cfg.enable [
cfg.prometheusPort
cfg.grafanaPort
cfg.lokiPort
];
};
}