Skip to content

Commit

Permalink
feat: migrate from prometheus to victoriametrics (#171)
Browse files Browse the repository at this point in the history
* feat: migrate from prometheus to victoriametrics

* fix: duplicated declaration

* fix: victoriametrics - isSystemUser

* fix: import promTypes

* fix: vmalert

* fix(victoriametrics): cli args
  • Loading branch information
ryan4yin authored Oct 23, 2024
1 parent 0fb0601 commit 34072df
Show file tree
Hide file tree
Showing 12 changed files with 1,716 additions and 206 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Prometheus & Alertmanager
# Monitoring & Alerting

## Alert Rules

Expand Down
47 changes: 47 additions & 0 deletions hosts/idols-aquamarine/monitoring/alertmanager.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{config, ...}: {
services.prometheus.alertmanager = {
enable = true;
listenAddress = "127.0.0.1";
port = 9093;
webExternalUrl = "http://alertmanager.writefor.fun";
logLevel = "info";

environmentFile = config.age.secrets."alertmanager.env".path;
configuration = {
global = {
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost = "smtp.qq.com:465";
smtp_from = "$SMTP_SENDER_EMAIL";
smtp_auth_username = "$SMTP_AUTH_USERNAME";
smtp_auth_password = "$SMTP_AUTH_PASSWORD";
# smtp.qq.com:465 support SSL only, so we need to disable TLS here.
# https://service.mail.qq.com/detail/0/310
smtp_require_tls = false;
};
route = {
receiver = "default";
routes = [
{
group_by = ["host"];
group_wait = "5m";
group_interval = "5m";
repeat_interval = "4h";
receiver = "default";
}
];
};
receivers = [
{
name = "default";
email_configs = [
{
to = "[email protected]";
# Whether to notify about resolved alerts.
send_resolved = true;
}
];
}
];
};
};
}
7 changes: 7 additions & 0 deletions hosts/idols-aquamarine/monitoring/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{mylib, ...}: {
imports = [
./module
./victoriametrics.nix
./alertmanager.nix
];
}
195 changes: 195 additions & 0 deletions hosts/idols-aquamarine/monitoring/module/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Based on
# - https://github.com/NixOS/nixpkgs/blob/nixos-24.05/nixos/modules/services/databases/victoriametrics.nix
# - https://github.com/NixOS/nixpkgs/blob/nixos-24.05/nixos/modules/services/monitoring/prometheus/default.nix
{
config,
pkgs,
lib,
...
}:
with lib; let
cfg = config.services.my-victoriametrics;
yaml = pkgs.formats.yaml {};

promTypes = import ./promTypes.nix {inherit lib;};

bindAddr = "${cfg.listenAddress}:${builtins.toString cfg.port}";
workingDir = "/var/lib/" + cfg.stateDir;

generatedPrometheusYml = yaml.generate "prometheus.yml" scrapeConfig;

# This becomes the main config file for VictoriaMetrics's `-promscrape.config`
# https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format
scrapeConfig = {
global = filterValidPrometheus cfg.globalConfig;
scrape_configs = filterValidPrometheus cfg.scrapeConfigs;
};

filterValidPrometheus = filterAttrsListRecursive (n: v: !(n == "_module" || v == null));
filterAttrsListRecursive = pred: x:
if isAttrs x
then
listToAttrs
(
concatMap
(
name: let
v = x.${name};
in
if pred name v
then [
(nameValuePair name (filterAttrsListRecursive pred v))
]
else []
)
(attrNames x)
)
else if isList x
then map (filterAttrsListRecursive pred) x
else x;
in {
options.services.my-victoriametrics = {
enable = mkEnableOption "VictoriaMetrics, a time series database, long-term remote storage for victoriametrics";
package = mkPackageOption pkgs "victoriametrics" {};

port = mkOption {
type = types.port;
default = 8428;
description = ''
Port to listen on.
'';
};

listenAddress = mkOption {
type = types.str;
default = "0.0.0.0";
description = ''
Address to listen on for the http API.
'';
};

stateDir = mkOption {
type = types.str;
default = "victoriametrics2";
description = ''
Directory below `/var/lib` to store VictoriaMetrics metrics data.
This directory will be created automatically using systemd's StateDirectory mechanism.
'';
};

retentionTime = mkOption {
type = types.nullOr types.str;
default = null;
example = "15d";
description = ''
How long to retain samples in storage.
The minimum retentionPeriod is 24h or 1d. See also -retentionFilter
The following optional suffixes are supported: s (second), h (hour), d (day), w (week), y (year).
If suffix isn't set, then the duration is counted in months (default 1)
'';
};

globalConfig = mkOption {
type = promTypes.globalConfig;
default = {};
description = ''
Parameters that are valid in all configuration contexts. They
also serve as defaults for other configuration sections
'';
};

scrapeConfigs = mkOption {
type = types.listOf promTypes.scrape_config;
default = [];
description = ''
A list of scrape configurations.
See docs: <https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format>
'';
};

extraFlags = mkOption {
type = types.listOf types.str;
default = [];
description = ''
Extra options to pass to VictoriaMetrics. See the README:
<https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md>
or {command}`victoriametrics -help` for more
information.
'';
};
};
config = lib.mkIf cfg.enable {
users.groups.victoriametrics = {};
users.users.victoriametrics = {
description = "victoriametrics daemon user";
isSystemUser = true; # required when uid is null
group = "victoriametrics";
};

systemd.services.my-victoriametrics = {
description = "VictoriaMetrics time series database";
wantedBy = ["multi-user.target"];
after = ["network.target"];

startLimitBurst = 5;
serviceConfig = {
ExecStart = ''
${cfg.package}/bin/victoria-metrics \
-storageDataPath=${workingDir} \
-httpListenAddr=${bindAddr} \
-retentionPeriod=${cfg.retentionTime} \
-promscrape.config=${generatedPrometheusYml} \
${lib.escapeShellArgs cfg.extraFlags}
'';
RestartSec = 1;
User = "victoriametrics";
Restart = "on-failure";
RuntimeDirectory = "victoriametrics";
RuntimeDirectoryMode = "0700";
WorkingDirectory = workingDir;
StateDirectory = cfg.stateDir;
StateDirectoryMode = "0700";

# Increase the limit to avoid errors like 'too many open files' when merging small parts
LimitNOFILE = 1048576;

# Hardening
AmbientCapabilities = lib.mkIf (cfg.port < 1024) ["CAP_NET_BIND_SERVICE"];
CapabilityBoundingSet =
if (cfg.port < 1024)
then ["CAP_NET_BIND_SERVICE"]
else [""];
DeviceAllow = ["/dev/null rw"];
DevicePolicy = "strict";
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "full";
RemoveIPC = true;
RestrictAddressFamilies = ["AF_INET" "AF_INET6" "AF_UNIX"];
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
SystemCallFilter = ["@system-service" "~@privileged"];
};

postStart = lib.mkBefore ''
until ${lib.getBin pkgs.curl}/bin/curl -s -o /dev/null http://${bindAddr}/ping; do
sleep 1;
done
'';
};
};
}
Loading

0 comments on commit 34072df

Please sign in to comment.