Files
homelab-optimized/hosts/vms/homelab-vm/monitoring.yaml
Gitea Mirror Bot d6eb5dcb1e
Some checks failed
Documentation / Build Docusaurus (push) Failing after 18m8s
Documentation / Deploy to GitHub Pages (push) Has been skipped
Sanitized mirror from private repository - 2026-04-19 07:39:14 UTC
2026-04-19 07:39:14 +00:00

422 lines
14 KiB
YAML

# Prometheus + Grafana Monitoring Stack - Portainer GitOps Version
# =============================================================================
# NOTE: The live deployment is monitoring-compose.yml (plain docker compose,
# bind-mounted configs at /home/homelab/docker/monitoring/).
# This file is the self-contained Portainer GitOps version (embedded configs).
# Stack 476 on endpoint 443399 no longer exists in Portainer.
# =============================================================================
# Ports: 9090 (Prometheus), 3300 (Grafana), 9116 (SNMP Exporter)
#
# Uses docker configs for prometheus.yml and snmp.yml since bind mounts have
# symlink issues with Portainer git deploy
#
# Dashboard Provisioning:
# - Datasources: Auto-configured Prometheus
# - Dashboards: Infrastructure Overview, Synology NAS, Node Exporter Full (from Grafana.com)
#
# Old/deprecated configs have been moved to: archive/deprecated-monitoring-stacks/
configs:
# Grafana Datasource Provisioning
grafana_datasources:
content: |
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: cfbskvs8upds0b
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
# Grafana Dashboard Provisioning Config
# Dashboards are loaded from bind-mounted /home/homelab/docker/grafana-dashboards/
# To add a new dashboard: drop a JSON file in that directory and restart Grafana
# Dashboard JSONs are backed up in the repo at hosts/vms/homelab-vm/grafana/dashboards/
grafana_dashboards_config:
content: |
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: 'Provisioned'
folderUid: 'provisioned'
type: file
disableDeletion: true
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
# Dashboard JSON files are now bind-mounted from /home/homelab/docker/grafana-dashboards/
# Backed up in repo at hosts/vms/homelab-vm/grafana/dashboards/
# Dashboards: infrastructure-overview-v2, node-details-v2, node-exporter-full,
# synology-nas-v3, tailscale-bandwidth, truenas-guava
prometheus_config:
content: |
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
rule_files:
- /etc/prometheus/alert-rules.yml
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets: ['host.docker.internal:9100']
relabel_configs:
- target_label: instance
replacement: 'homelab-vm'
- job_name: 'homelab-node'
static_configs:
- targets: ['100.67.40.126:9100']
relabel_configs:
- target_label: instance
replacement: 'homelab-vm'
- job_name: 'raspberry-pis'
static_configs:
- targets: ['100.77.151.40:9100']
# pi-5-kevin (100.123.246.75) removed - offline 127+ days
relabel_configs:
- target_label: instance
replacement: 'pi-5'
- job_name: 'setillo-node'
static_configs:
- targets: ['100.125.0.20:9100']
relabel_configs:
- target_label: instance
replacement: 'setillo'
- job_name: 'setillo-snmp'
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ['127.0.0.1']
static_configs:
- targets: ['100.125.0.20:9116']
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: '127.0.0.1'
- source_labels: [__param_target]
target_label: instance
replacement: 'setillo'
- target_label: __address__
replacement: '100.125.0.20:9116'
- job_name: 'calypso-node'
static_configs:
- targets: ['100.103.48.78:9100']
relabel_configs:
- target_label: instance
replacement: 'calypso'
- job_name: 'calypso-snmp'
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ['127.0.0.1']
static_configs:
- targets: ['100.103.48.78:9116']
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: '127.0.0.1'
- source_labels: [__param_target]
target_label: instance
replacement: 'calypso'
- target_label: __address__
replacement: '100.103.48.78:9116'
- job_name: 'atlantis-node'
static_configs:
- targets: ['100.83.230.112:9100']
relabel_configs:
- target_label: instance
replacement: 'atlantis'
- job_name: 'atlantis-snmp'
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ['127.0.0.1']
static_configs:
- targets: ['100.83.230.112:9116']
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: '127.0.0.1'
- source_labels: [__param_target]
target_label: instance
replacement: 'atlantis'
- target_label: __address__
replacement: '100.83.230.112:9116'
- job_name: 'concord-nuc-node'
static_configs:
- targets: ['100.72.55.21:9100']
relabel_configs:
- target_label: instance
replacement: 'concord-nuc'
- job_name: 'truenas-node'
static_configs:
- targets: ['100.75.252.64:9100']
relabel_configs:
- target_label: instance
replacement: 'guava'
- job_name: 'seattle-node'
static_configs:
- targets: ['100.82.197.124:9100']
relabel_configs:
- target_label: instance
replacement: 'seattle'
- job_name: 'proxmox-node'
static_configs:
- targets: ['100.87.12.28:9100']
relabel_configs:
- target_label: instance
replacement: 'proxmox'
snmp_config:
content: |
auths:
snmpv3:
version: 3
security_level: authPriv
auth_protocol: MD5
username: snmp-exporter
password: "REDACTED_PASSWORD"
priv_protocol: DES
priv_password: "REDACTED_PASSWORD"
modules:
synology:
walk:
- 1.3.6.1.2.1.1
- 1.3.6.1.2.1.2
- 1.3.6.1.2.1.25.2
- 1.3.6.1.2.1.25.3.3
- 1.3.6.1.2.1.31.1.1
- 1.3.6.1.4.1.2021.4
- 1.3.6.1.4.1.2021.10
- 1.3.6.1.4.1.2021.11
- 1.3.6.1.4.1.6574.1
- 1.3.6.1.4.1.6574.2
- 1.3.6.1.4.1.6574.3
- 1.3.6.1.4.1.6574.4
- 1.3.6.1.4.1.6574.5
- 1.3.6.1.4.1.6574.6
- 1.3.6.1.4.1.6574.101
- 1.3.6.1.4.1.6574.102
metrics:
- name: sysDescr
oid: 1.3.6.1.2.1.1.1
type: DisplayString
- name: sysUpTime
oid: 1.3.6.1.2.1.1.3
type: gauge
- name: sysName
oid: 1.3.6.1.2.1.1.5
type: DisplayString
- name: ssCpuRawUser
oid: 1.3.6.1.4.1.2021.11.50
type: counter
- name: ssCpuRawSystem
oid: 1.3.6.1.4.1.2021.11.52
type: counter
- name: ssCpuRawIdle
oid: 1.3.6.1.4.1.2021.11.53
type: counter
- name: memTotalSwap
oid: 1.3.6.1.4.1.2021.4.3
type: gauge
- name: memAvailSwap
oid: 1.3.6.1.4.1.2021.4.4
type: gauge
- name: memTotalReal
oid: 1.3.6.1.4.1.2021.4.5
type: gauge
- name: memAvailReal
oid: 1.3.6.1.4.1.2021.4.6
type: gauge
- name: systemStatus
oid: 1.3.6.1.4.1.6574.1.1
type: gauge
- name: temperature
oid: 1.3.6.1.4.1.6574.1.2
type: gauge
- name: powerStatus
oid: 1.3.6.1.4.1.6574.1.3
type: gauge
- name: modelName
oid: 1.3.6.1.4.1.6574.1.5.1
type: DisplayString
- name: version
oid: 1.3.6.1.4.1.6574.1.5.3
type: DisplayString
- name: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
indexes:
- labelname: diskIndex
type: gauge
- name: diskStatus
oid: 1.3.6.1.4.1.6574.2.1.1.5
type: gauge
indexes:
- labelname: diskIndex
type: gauge
- name: diskTemperature
oid: 1.3.6.1.4.1.6574.2.1.1.6
type: gauge
indexes:
- labelname: diskIndex
type: gauge
- name: raidName
oid: 1.3.6.1.4.1.6574.3.1.1.2
type: DisplayString
indexes:
- labelname: raidIndex
type: gauge
- name: raidStatus
oid: 1.3.6.1.4.1.6574.3.1.1.3
type: gauge
indexes:
- labelname: raidIndex
type: gauge
- name: raidFreeSize
oid: 1.3.6.1.4.1.6574.3.1.1.4
type: gauge
indexes:
- labelname: raidIndex
type: gauge
- name: raidTotalSize
oid: 1.3.6.1.4.1.6574.3.1.1.5
type: gauge
indexes:
- labelname: raidIndex
type: gauge
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
configs:
- source: prometheus_config
target: /etc/prometheus/prometheus.yml
volumes:
- prometheus-data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--web.enable-lifecycle"
ports:
- "9090:9090"
restart: unless-stopped
networks:
- monitoring
extra_hosts:
- "host.docker.internal:host-gateway"
grafana:
image: grafana/grafana-oss:12.4.0
container_name: grafana
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD"
# Disable Grafana 12 unified storage feature to restore home dashboard env var support
- GF_FEATURE_TOGGLES_DISABLE=kubernetesDashboards
# Authentik OAuth2 SSO Configuration
- GF_AUTH_GENERIC_OAUTH_ENABLED=true
- GF_AUTH_GENERIC_OAUTH_NAME=Authentik
- GF_AUTH_GENERIC_OAUTH_CLIENT_ID="REDACTED_CLIENT_ID"
- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET="REDACTED_CLIENT_SECRET"
- GF_AUTH_GENERIC_OAUTH_SCOPES=openid profile email
- GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://sso.vish.gg/application/o/authorize/
- GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://sso.vish.gg/application/o/token/
- GF_AUTH_GENERIC_OAUTH_API_URL=https://sso.vish.gg/application/o/userinfo/
- GF_AUTH_SIGNOUT_REDIRECT_URL=https://sso.vish.gg/application/o/grafana/end-session/
- GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'
# Required for Authentik - extract email and login from userinfo response
- GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email
- GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username
- GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=name
- GF_SERVER_ROOT_URL=https://gf.vish.gg
# Home dashboard is set via org preferences in Grafana DB (node-details-v2)
# GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH is not used - home is DB-persisted via API
configs:
# Datasource provisioning
- source: grafana_datasources
target: /etc/grafana/provisioning/datasources/datasources.yaml
# Dashboard provider config
- source: grafana_dashboards_config
target: /etc/grafana/provisioning/dashboards/dashboards.yaml
volumes:
- grafana-data:/var/lib/grafana
# Dashboard JSONs — bind-mounted from host for easy add/update
- /home/homelab/docker/grafana-dashboards:/var/lib/grafana/dashboards:ro
ports:
- "3300:3000"
restart: unless-stopped
depends_on:
- prometheus
networks:
- monitoring
node_exporter:
image: prom/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /:/host:ro,rslave
- /sys:/host/sys:ro
- /proc:/host/proc:ro
command:
- '--path.rootfs=/host'
restart: unless-stopped
snmp_exporter:
image: prom/snmp-exporter:latest
container_name: snmp_exporter
configs:
- source: snmp_config
target: /etc/snmp_exporter/snmp.yml
ports:
- "9116:9116"
restart: unless-stopped
networks:
- monitoring
volumes:
prometheus-data:
grafana-data:
networks:
monitoring:
driver: bridge