{ config, lib, pkgs, ... }: let cfg = config.services.cloud-observability; in { options.services.cloud-observability = { enable = lib.mkEnableOption "cloud platform observability stack (Prometheus, Grafana, Loki)"; prometheusPort = lib.mkOption { type = lib.types.port; default = 9090; description = "Port for Prometheus web interface"; }; grafanaPort = lib.mkOption { type = lib.types.port; default = 3000; description = "Port for Grafana web interface"; }; lokiPort = lib.mkOption { type = lib.types.port; default = 3100; description = "Port for Loki API"; }; scrapeInterval = lib.mkOption { type = lib.types.str; default = "15s"; description = "Default Prometheus scrape interval"; }; enableAllTargets = lib.mkOption { type = lib.types.bool; default = true; description = "Enable scraping all cloud platform services"; }; }; config = lib.mkIf cfg.enable { # Prometheus configuration services.prometheus = { enable = true; port = cfg.prometheusPort; globalConfig = { scrape_interval = cfg.scrapeInterval; evaluation_interval = cfg.scrapeInterval; }; scrapeConfigs = lib.mkIf cfg.enableAllTargets [ # Prometheus self-monitoring { job_name = "prometheus"; static_configs = [{ targets = [ "localhost:${toString cfg.prometheusPort}" ]; }]; } # Chainfire metrics { job_name = "chainfire"; static_configs = [{ targets = [ "localhost:9091" ]; }]; } # FlareDB metrics { job_name = "flaredb"; static_configs = [{ targets = [ "localhost:9092" ]; }]; } # IAM metrics { job_name = "iam"; static_configs = [{ targets = [ "localhost:9093" ]; }]; } # k8shost metrics { job_name = "k8shost"; static_configs = [{ targets = [ "localhost:9094" ]; }]; } # PlasmaVMC metrics { job_name = "plasmavmc"; static_configs = [{ targets = [ "localhost:9095" ]; }]; } # NovaNET metrics { job_name = "novanet"; static_configs = [{ targets = [ "localhost:9096" ]; }]; } # FlashDNS metrics { job_name = "flashdns"; static_configs = [{ targets = [ "localhost:9097" ]; }]; } # FiberLB metrics { job_name = "fiberlb"; static_configs = [{ targets = [ "localhost:9098" ]; }]; } # LightningSTOR metrics { job_name = "lightningstor"; static_configs = [{ targets = [ "localhost:9099" ]; }]; } ]; exporters = { node = { enable = true; enabledCollectors = [ "systemd" ]; port = 9100; }; }; }; # Loki configuration services.loki = { enable = true; configuration = { server.http_listen_port = cfg.lokiPort; auth_enabled = false; ingester = { lifecycler = { address = "127.0.0.1"; ring = { kvstore.store = "inmemory"; replication_factor = 1; }; final_sleep = "0s"; }; chunk_idle_period = "5m"; chunk_retain_period = "30s"; }; schema_config = { configs = [{ from = "2024-01-01"; store = "tsdb"; object_store = "filesystem"; schema = "v13"; index = { prefix = "index_"; period = "24h"; }; }]; }; storage_config = { tsdb_shipper = { active_index_directory = "/var/lib/loki/tsdb-index"; cache_location = "/var/lib/loki/tsdb-cache"; }; filesystem = { directory = "/var/lib/loki/chunks"; }; }; limits_config = { reject_old_samples = true; reject_old_samples_max_age = "168h"; }; compactor = { working_directory = "/var/lib/loki/compactor"; compaction_interval = "10m"; }; }; }; # Promtail for shipping logs to Loki services.promtail = { enable = true; configuration = { server = { http_listen_port = 9080; grpc_listen_port = 0; }; positions.filename = "/var/lib/promtail/positions.yaml"; clients = [{ url = "http://localhost:${toString cfg.lokiPort}/loki/api/v1/push"; }]; scrape_configs = [ # Systemd journal scraping { job_name = "journal"; journal = { max_age = "12h"; labels = { job = "systemd-journal"; host = config.networking.hostName; }; }; relabel_configs = [ { source_labels = [ "__journal__systemd_unit" ]; target_label = "unit"; } { source_labels = [ "__journal__hostname" ]; target_label = "hostname"; } ]; } # Application logs (if services write to files) { job_name = "cloud-services"; static_configs = [{ targets = [ "localhost" ]; labels = { job = "cloud-services"; host = config.networking.hostName; __path__ = "/var/log/cloud/*.log"; }; }]; } ]; }; }; # Grafana configuration services.grafana = { enable = true; settings = { server = { http_port = cfg.grafanaPort; http_addr = "0.0.0.0"; }; analytics.reporting_enabled = false; security = { admin_user = "admin"; admin_password = "admin"; # TODO: Make this configurable }; }; provision = { enable = true; datasources.settings.datasources = [ { name = "Prometheus"; type = "prometheus"; access = "proxy"; url = "http://localhost:${toString cfg.prometheusPort}"; isDefault = true; jsonData = { timeInterval = cfg.scrapeInterval; }; } { name = "Loki"; type = "loki"; access = "proxy"; url = "http://localhost:${toString cfg.lokiPort}"; jsonData = { maxLines = 1000; }; } ]; dashboards.settings.providers = [ { name = "Cloud Platform"; type = "file"; options.path = "/var/lib/grafana/dashboards"; } ]; }; }; # Ensure directories exist systemd.tmpfiles.rules = [ "d /var/lib/loki 0750 loki loki -" "d /var/lib/loki/chunks 0750 loki loki -" "d /var/lib/loki/tsdb-index 0750 loki loki -" "d /var/lib/loki/tsdb-cache 0750 loki loki -" "d /var/lib/loki/compactor 0750 loki loki -" "d /var/lib/promtail 0750 promtail promtail -" "d /var/lib/grafana/dashboards 0755 grafana grafana -" "d /var/log/cloud 0755 root root -" ]; # Open firewall ports if needed networking.firewall.allowedTCPPorts = lib.mkIf cfg.enable [ cfg.prometheusPort cfg.grafanaPort cfg.lokiPort ]; }; }