diff --git a/modules/nixos/prometheus/blackbox.nix b/modules/nixos/prometheus/blackbox.nix new file mode 100644 index 0000000..7886b06 --- /dev/null +++ b/modules/nixos/prometheus/blackbox.nix @@ -0,0 +1,83 @@ +{ config, lib, pkgs, ... }: +let + cfg = config.custom.prometheus; +in +{ + config = lib.mkIf (cfg.enable && cfg.exporters.blackbox.enable) { + services.prometheus.exporters.blackbox = { + enable = true; + listenAddress = "127.0.0.1"; + configFile = pkgs.writeText "blackbox.config.yaml" ( + lib.generators.toYAML {} { + modules = { + tcp4_connect = { + prober = "tcp"; + tcp = { + ip_protocol_fallback = false; + preferred_ip_protocol = "ip4"; + tls = false; + }; + timeout = "15s"; + }; + }; + } + ); + }; + + services.prometheus.scrapeConfigs = [ + { + job_name = "blackbox"; + scrape_interval = "1m"; + metrics_path = "/probe"; + params = { + module = [ "tcp4_connect" ]; + }; + static_configs = [ + { + targets = [ + "tok-00.namely.icu:8080" + "la-00.video.namely.icu:8080" + "auth.xinyang.life:443" + "home.xinyang.life:8000" + ]; + } + ]; + relabel_configs = [ + { + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + target_label = "__address__"; + replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"; + } + ]; + } + { + job_name = "blackbox_exporter"; + static_configs = [ + { targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; } + ]; + } + ]; + + custom.prometheus.ruleModules = [ + { + name = "probe_alerts"; + rules = [ + { + alert = "HighProbeLatency"; + expr = "probe_duration_seconds > 0.5"; + for = "2m"; + labels = { severity = "warning"; }; + annotations = { summary = "High request latency on {{ $labels.instance }}"; description = "95th percentile of request latency is above 0.5 seconds for the last 2 minutes."; }; + } + ]; + } + ]; + }; +} diff --git a/modules/nixos/prometheus/caddy.nix b/modules/nixos/prometheus/caddy.nix index a62b639..d35049b 100644 --- a/modules/nixos/prometheus/caddy.nix +++ b/modules/nixos/prometheus/caddy.nix @@ -3,7 +3,7 @@ let cfg = config.custom.prometheus; in { - config = lib.mkIf cfg.enable { + config = lib.mkIf (cfg.enable && cfg.exporters.caddy.enable) { services.caddy.globalConfig = lib.mkIf cfg.exporters.caddy.enable '' servers { metrics @@ -11,16 +11,16 @@ in ''; services.prometheus.scrapeConfigs = [ - (lib.mkIf cfg.exporters.caddy.enable { + { job_name = "caddy"; static_configs = [ { targets = [ "127.0.0.1:2019" ]; } ]; - }) + } ]; custom.prometheus.ruleModules = [ - (lib.mkIf cfg.exporters.caddy.enable { + { name = "caddy_alerts"; rules = [ { @@ -38,7 +38,7 @@ in annotations = { summary = "High request latency on {{ $labels.instance }}"; description = "95th percentile of request latency is above 0.5 seconds for the last 2 minutes."; }; } ]; - }) + } ]; }; diff --git a/modules/nixos/prometheus/default.nix b/modules/nixos/prometheus/default.nix index 803b9aa..c0f0a70 100644 --- a/modules/nixos/prometheus/default.nix +++ b/modules/nixos/prometheus/default.nix @@ -4,7 +4,6 @@ with lib; let cfg = config.custom.prometheus; - exporterCfg = config.custom.prometheus.exporters; mkExporterOption = enableOption: (mkOption { type = types.bool; default = enableOption; @@ -26,6 +25,7 @@ let in { imports = [ + ./blackbox.nix ./caddy.nix ./gotosocial.nix ./ntfy-sh.nix @@ -43,6 +43,7 @@ in }; restic.enable = mkExporterOption config.services.restic.server.enable; + blackbox.enable = mkExporterOption false; caddy.enable = mkExporterOption config.services.caddy.enable; gotosocial.enable = mkExporterOption config.services.gotosocial.enable; ntfy-sh.enable = mkExporterOption config.services.gotosocial.enable; @@ -187,6 +188,13 @@ in labels = { severity = "critical"; }; annotations = { summary = "Outbound network traffic exceed 300GB for last 30 day"; }; } + { + alert = "JobDown"; + expr = "up == 0"; + for = "1m"; + labels = { severity = "critical"; }; + annotations = { summary = "Job {{ $labels.job }} down for 1m."; }; + } ]; } ]; diff --git a/modules/nixos/prometheus/gotosocial.nix b/modules/nixos/prometheus/gotosocial.nix index 4870e88..a643d19 100644 --- a/modules/nixos/prometheus/gotosocial.nix +++ b/modules/nixos/prometheus/gotosocial.nix @@ -3,8 +3,8 @@ let cfg = config.custom.prometheus; in { - config = lib.mkIf cfg.exporters.gotosocial.enable { - services.gotosocial.settings = lib.mkIf cfg.exporters.gotosocial.enable { + config = lib.mkIf (cfg.enable && cfg.exporters.gotosocial.enable) { + services.gotosocial.settings = { metrics-enabled = true; }; services.prometheus.scrapeConfigs = [ diff --git a/modules/nixos/prometheus/ntfy-sh.nix b/modules/nixos/prometheus/ntfy-sh.nix index 35d62ff..513f130 100644 --- a/modules/nixos/prometheus/ntfy-sh.nix +++ b/modules/nixos/prometheus/ntfy-sh.nix @@ -3,15 +3,15 @@ let cfg = config.custom.prometheus; in { - config = lib.mkIf cfg.enable { + config = lib.mkIf (cfg.enable && cfg.exporters.ntfy-sh.enable) { services.ntfy-sh.settings.enable-metrics = true; services.prometheus.scrapeConfigs = [ - (lib.mkIf cfg.exporters.ntfy-sh.enable { + { job_name = "ntfy-sh"; static_configs = [ { targets = [ "ntfy.xinyang.life" ]; } ]; - }) + } ]; }; } diff --git a/modules/nixos/prometheus/restic.nix b/modules/nixos/prometheus/restic.nix index 80f0316..750b61a 100644 --- a/modules/nixos/prometheus/restic.nix +++ b/modules/nixos/prometheus/restic.nix @@ -3,7 +3,7 @@ let cfg = config.custom.prometheus; in { - config = lib.mkIf cfg.enable { + config = lib.mkIf (cfg.enable && cfg.exporters.restic.enable) { services.restic.server.prometheus = true; services.prometheus.scrapeConfigs = [ @@ -16,7 +16,7 @@ in ]; custom.prometheus.ruleModules = [ - (lib.mkIf cfg.exporters.restic.enable { + { name = "restic_alerts"; rules = [ { @@ -34,7 +34,7 @@ in annotations = { summary = "Restic {{ $labels.client_hostname }} / {{ $labels.client_username }} backup is outdated"; description = "Restic backup is outdated\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"; }; } ]; - }) + } ]; };