modules/prometheus: add blackbox exporter
This commit is contained in:
parent
5fe7ff0434
commit
daeeb211a2
6 changed files with 105 additions and 14 deletions
83
modules/nixos/prometheus/blackbox.nix
Normal file
83
modules/nixos/prometheus/blackbox.nix
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
let
|
||||||
|
cfg = config.custom.prometheus;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
config = lib.mkIf (cfg.enable && cfg.exporters.blackbox.enable) {
|
||||||
|
services.prometheus.exporters.blackbox = {
|
||||||
|
enable = true;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
configFile = pkgs.writeText "blackbox.config.yaml" (
|
||||||
|
lib.generators.toYAML {} {
|
||||||
|
modules = {
|
||||||
|
tcp4_connect = {
|
||||||
|
prober = "tcp";
|
||||||
|
tcp = {
|
||||||
|
ip_protocol_fallback = false;
|
||||||
|
preferred_ip_protocol = "ip4";
|
||||||
|
tls = false;
|
||||||
|
};
|
||||||
|
timeout = "15s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
services.prometheus.scrapeConfigs = [
|
||||||
|
{
|
||||||
|
job_name = "blackbox";
|
||||||
|
scrape_interval = "1m";
|
||||||
|
metrics_path = "/probe";
|
||||||
|
params = {
|
||||||
|
module = [ "tcp4_connect" ];
|
||||||
|
};
|
||||||
|
static_configs = [
|
||||||
|
{
|
||||||
|
targets = [
|
||||||
|
"tok-00.namely.icu:8080"
|
||||||
|
"la-00.video.namely.icu:8080"
|
||||||
|
"auth.xinyang.life:443"
|
||||||
|
"home.xinyang.life:8000"
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
relabel_configs = [
|
||||||
|
{
|
||||||
|
source_labels = [ "__address__" ];
|
||||||
|
target_label = "__param_target";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
source_labels = [ "__param_target" ];
|
||||||
|
target_label = "instance";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
target_label = "__address__";
|
||||||
|
replacement = "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
job_name = "blackbox_exporter";
|
||||||
|
static_configs = [
|
||||||
|
{ targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}" ]; }
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
custom.prometheus.ruleModules = [
|
||||||
|
{
|
||||||
|
name = "probe_alerts";
|
||||||
|
rules = [
|
||||||
|
{
|
||||||
|
alert = "HighProbeLatency";
|
||||||
|
expr = "probe_duration_seconds > 0.5";
|
||||||
|
for = "2m";
|
||||||
|
labels = { severity = "warning"; };
|
||||||
|
annotations = { summary = "High request latency on {{ $labels.instance }}"; description = "95th percentile of request latency is above 0.5 seconds for the last 2 minutes."; };
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
|
@ -3,7 +3,7 @@ let
|
||||||
cfg = config.custom.prometheus;
|
cfg = config.custom.prometheus;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf (cfg.enable && cfg.exporters.caddy.enable) {
|
||||||
services.caddy.globalConfig = lib.mkIf cfg.exporters.caddy.enable ''
|
services.caddy.globalConfig = lib.mkIf cfg.exporters.caddy.enable ''
|
||||||
servers {
|
servers {
|
||||||
metrics
|
metrics
|
||||||
|
@ -11,16 +11,16 @@ in
|
||||||
'';
|
'';
|
||||||
|
|
||||||
services.prometheus.scrapeConfigs = [
|
services.prometheus.scrapeConfigs = [
|
||||||
(lib.mkIf cfg.exporters.caddy.enable {
|
{
|
||||||
job_name = "caddy";
|
job_name = "caddy";
|
||||||
static_configs = [
|
static_configs = [
|
||||||
{ targets = [ "127.0.0.1:2019" ]; }
|
{ targets = [ "127.0.0.1:2019" ]; }
|
||||||
];
|
];
|
||||||
})
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
custom.prometheus.ruleModules = [
|
custom.prometheus.ruleModules = [
|
||||||
(lib.mkIf cfg.exporters.caddy.enable {
|
{
|
||||||
name = "caddy_alerts";
|
name = "caddy_alerts";
|
||||||
rules = [
|
rules = [
|
||||||
{
|
{
|
||||||
|
@ -38,7 +38,7 @@ in
|
||||||
annotations = { summary = "High request latency on {{ $labels.instance }}"; description = "95th percentile of request latency is above 0.5 seconds for the last 2 minutes."; };
|
annotations = { summary = "High request latency on {{ $labels.instance }}"; description = "95th percentile of request latency is above 0.5 seconds for the last 2 minutes."; };
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
})
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ with lib;
|
||||||
|
|
||||||
let
|
let
|
||||||
cfg = config.custom.prometheus;
|
cfg = config.custom.prometheus;
|
||||||
exporterCfg = config.custom.prometheus.exporters;
|
|
||||||
mkExporterOption = enableOption: (mkOption {
|
mkExporterOption = enableOption: (mkOption {
|
||||||
type = types.bool;
|
type = types.bool;
|
||||||
default = enableOption;
|
default = enableOption;
|
||||||
|
@ -26,6 +25,7 @@ let
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
|
./blackbox.nix
|
||||||
./caddy.nix
|
./caddy.nix
|
||||||
./gotosocial.nix
|
./gotosocial.nix
|
||||||
./ntfy-sh.nix
|
./ntfy-sh.nix
|
||||||
|
@ -43,6 +43,7 @@ in
|
||||||
};
|
};
|
||||||
|
|
||||||
restic.enable = mkExporterOption config.services.restic.server.enable;
|
restic.enable = mkExporterOption config.services.restic.server.enable;
|
||||||
|
blackbox.enable = mkExporterOption false;
|
||||||
caddy.enable = mkExporterOption config.services.caddy.enable;
|
caddy.enable = mkExporterOption config.services.caddy.enable;
|
||||||
gotosocial.enable = mkExporterOption config.services.gotosocial.enable;
|
gotosocial.enable = mkExporterOption config.services.gotosocial.enable;
|
||||||
ntfy-sh.enable = mkExporterOption config.services.gotosocial.enable;
|
ntfy-sh.enable = mkExporterOption config.services.gotosocial.enable;
|
||||||
|
@ -187,6 +188,13 @@ in
|
||||||
labels = { severity = "critical"; };
|
labels = { severity = "critical"; };
|
||||||
annotations = { summary = "Outbound network traffic exceed 300GB for last 30 day"; };
|
annotations = { summary = "Outbound network traffic exceed 300GB for last 30 day"; };
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
alert = "JobDown";
|
||||||
|
expr = "up == 0";
|
||||||
|
for = "1m";
|
||||||
|
labels = { severity = "critical"; };
|
||||||
|
annotations = { summary = "Job {{ $labels.job }} down for 1m."; };
|
||||||
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
|
@ -3,8 +3,8 @@ let
|
||||||
cfg = config.custom.prometheus;
|
cfg = config.custom.prometheus;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
config = lib.mkIf cfg.exporters.gotosocial.enable {
|
config = lib.mkIf (cfg.enable && cfg.exporters.gotosocial.enable) {
|
||||||
services.gotosocial.settings = lib.mkIf cfg.exporters.gotosocial.enable {
|
services.gotosocial.settings = {
|
||||||
metrics-enabled = true;
|
metrics-enabled = true;
|
||||||
};
|
};
|
||||||
services.prometheus.scrapeConfigs = [
|
services.prometheus.scrapeConfigs = [
|
||||||
|
|
|
@ -3,15 +3,15 @@ let
|
||||||
cfg = config.custom.prometheus;
|
cfg = config.custom.prometheus;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf (cfg.enable && cfg.exporters.ntfy-sh.enable) {
|
||||||
services.ntfy-sh.settings.enable-metrics = true;
|
services.ntfy-sh.settings.enable-metrics = true;
|
||||||
services.prometheus.scrapeConfigs = [
|
services.prometheus.scrapeConfigs = [
|
||||||
(lib.mkIf cfg.exporters.ntfy-sh.enable {
|
{
|
||||||
job_name = "ntfy-sh";
|
job_name = "ntfy-sh";
|
||||||
static_configs = [
|
static_configs = [
|
||||||
{ targets = [ "ntfy.xinyang.life" ]; }
|
{ targets = [ "ntfy.xinyang.life" ]; }
|
||||||
];
|
];
|
||||||
})
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@ let
|
||||||
cfg = config.custom.prometheus;
|
cfg = config.custom.prometheus;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf (cfg.enable && cfg.exporters.restic.enable) {
|
||||||
services.restic.server.prometheus = true;
|
services.restic.server.prometheus = true;
|
||||||
|
|
||||||
services.prometheus.scrapeConfigs = [
|
services.prometheus.scrapeConfigs = [
|
||||||
|
@ -16,7 +16,7 @@ in
|
||||||
];
|
];
|
||||||
|
|
||||||
custom.prometheus.ruleModules = [
|
custom.prometheus.ruleModules = [
|
||||||
(lib.mkIf cfg.exporters.restic.enable {
|
{
|
||||||
name = "restic_alerts";
|
name = "restic_alerts";
|
||||||
rules = [
|
rules = [
|
||||||
{
|
{
|
||||||
|
@ -34,7 +34,7 @@ in
|
||||||
annotations = { summary = "Restic {{ $labels.client_hostname }} / {{ $labels.client_username }} backup is outdated"; description = "Restic backup is outdated\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"; };
|
annotations = { summary = "Restic {{ $labels.client_hostname }} / {{ $labels.client_username }} backup is outdated"; description = "Restic backup is outdated\\n VALUE = {{ $value }}\\n LABELS = {{ $labels }}"; };
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
})
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue