422 lines
14 KiB
YAML
422 lines
14 KiB
YAML
# Prometheus + Grafana Monitoring Stack - Portainer GitOps Version
|
|
# =============================================================================
|
|
# NOTE: The live deployment is monitoring-compose.yml (plain docker compose,
|
|
# bind-mounted configs at /home/homelab/docker/monitoring/).
|
|
# This file is the self-contained Portainer GitOps version (embedded configs).
|
|
# Stack 476 on endpoint 443399 no longer exists in Portainer.
|
|
# =============================================================================
|
|
# Ports: 9090 (Prometheus), 3300 (Grafana), 9116 (SNMP Exporter)
|
|
#
|
|
# Uses docker configs for prometheus.yml and snmp.yml since bind mounts have
|
|
# symlink issues with Portainer git deploy
|
|
#
|
|
# Dashboard Provisioning:
|
|
# - Datasources: Auto-configured Prometheus
|
|
# - Dashboards: Infrastructure Overview, Synology NAS, Node Exporter Full (from Grafana.com)
|
|
#
|
|
# Old/deprecated configs have been moved to: archive/deprecated-monitoring-stacks/
|
|
|
|
configs:
|
|
# Grafana Datasource Provisioning
|
|
grafana_datasources:
|
|
content: |
|
|
apiVersion: 1
|
|
datasources:
|
|
- name: Prometheus
|
|
type: prometheus
|
|
uid: cfbskvs8upds0b
|
|
access: proxy
|
|
url: http://prometheus:9090
|
|
isDefault: true
|
|
editable: true
|
|
|
|
# Grafana Dashboard Provisioning Config
|
|
# Dashboards are loaded from bind-mounted /home/homelab/docker/grafana-dashboards/
|
|
# To add a new dashboard: drop a JSON file in that directory and restart Grafana
|
|
# Dashboard JSONs are backed up in the repo at hosts/vms/homelab-vm/grafana/dashboards/
|
|
grafana_dashboards_config:
|
|
content: |
|
|
apiVersion: 1
|
|
providers:
|
|
- name: 'default'
|
|
orgId: 1
|
|
folder: 'Provisioned'
|
|
folderUid: 'provisioned'
|
|
type: file
|
|
disableDeletion: true
|
|
updateIntervalSeconds: 30
|
|
allowUiUpdates: true
|
|
options:
|
|
path: /var/lib/grafana/dashboards
|
|
|
|
|
|
# Dashboard JSON files are now bind-mounted from /home/homelab/docker/grafana-dashboards/
|
|
# Backed up in repo at hosts/vms/homelab-vm/grafana/dashboards/
|
|
# Dashboards: infrastructure-overview-v2, node-details-v2, node-exporter-full,
|
|
# synology-nas-v3, tailscale-bandwidth, truenas-guava
|
|
|
|
prometheus_config:
|
|
content: |
|
|
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- alertmanager:9093
|
|
|
|
rule_files:
|
|
- /etc/prometheus/alert-rules.yml
|
|
|
|
scrape_configs:
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
|
|
- job_name: 'node_exporter'
|
|
static_configs:
|
|
- targets: ['host.docker.internal:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'homelab-vm'
|
|
|
|
- job_name: 'homelab-node'
|
|
static_configs:
|
|
- targets: ['100.67.40.126:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'homelab-vm'
|
|
|
|
- job_name: 'raspberry-pis'
|
|
static_configs:
|
|
- targets: ['100.77.151.40:9100']
|
|
# pi-5-kevin (100.123.246.75) removed - offline 127+ days
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'pi-5'
|
|
|
|
- job_name: 'setillo-node'
|
|
static_configs:
|
|
- targets: ['100.125.0.20:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'setillo'
|
|
|
|
- job_name: 'setillo-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.125.0.20:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'setillo'
|
|
- target_label: __address__
|
|
replacement: '100.125.0.20:9116'
|
|
|
|
- job_name: 'calypso-node'
|
|
static_configs:
|
|
- targets: ['100.103.48.78:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'calypso'
|
|
|
|
- job_name: 'calypso-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.103.48.78:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'calypso'
|
|
- target_label: __address__
|
|
replacement: '100.103.48.78:9116'
|
|
|
|
- job_name: 'atlantis-node'
|
|
static_configs:
|
|
- targets: ['100.83.230.112:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'atlantis'
|
|
|
|
- job_name: 'atlantis-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.83.230.112:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'atlantis'
|
|
- target_label: __address__
|
|
replacement: '100.83.230.112:9116'
|
|
|
|
- job_name: 'concord-nuc-node'
|
|
static_configs:
|
|
- targets: ['100.72.55.21:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'concord-nuc'
|
|
|
|
- job_name: 'truenas-node'
|
|
static_configs:
|
|
- targets: ['100.75.252.64:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'guava'
|
|
|
|
- job_name: 'seattle-node'
|
|
static_configs:
|
|
- targets: ['100.82.197.124:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'seattle'
|
|
|
|
- job_name: 'proxmox-node'
|
|
static_configs:
|
|
- targets: ['100.87.12.28:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'proxmox'
|
|
|
|
snmp_config:
|
|
content: |
|
|
auths:
|
|
snmpv3:
|
|
version: 3
|
|
security_level: authPriv
|
|
auth_protocol: MD5
|
|
username: snmp-exporter
|
|
password: "REDACTED_PASSWORD"
|
|
priv_protocol: DES
|
|
priv_password: "REDACTED_PASSWORD"
|
|
|
|
modules:
|
|
synology:
|
|
walk:
|
|
- 1.3.6.1.2.1.1
|
|
- 1.3.6.1.2.1.2
|
|
- 1.3.6.1.2.1.25.2
|
|
- 1.3.6.1.2.1.25.3.3
|
|
- 1.3.6.1.2.1.31.1.1
|
|
- 1.3.6.1.4.1.2021.4
|
|
- 1.3.6.1.4.1.2021.10
|
|
- 1.3.6.1.4.1.2021.11
|
|
- 1.3.6.1.4.1.6574.1
|
|
- 1.3.6.1.4.1.6574.2
|
|
- 1.3.6.1.4.1.6574.3
|
|
- 1.3.6.1.4.1.6574.4
|
|
- 1.3.6.1.4.1.6574.5
|
|
- 1.3.6.1.4.1.6574.6
|
|
- 1.3.6.1.4.1.6574.101
|
|
- 1.3.6.1.4.1.6574.102
|
|
metrics:
|
|
- name: sysDescr
|
|
oid: 1.3.6.1.2.1.1.1
|
|
type: DisplayString
|
|
- name: sysUpTime
|
|
oid: 1.3.6.1.2.1.1.3
|
|
type: gauge
|
|
- name: sysName
|
|
oid: 1.3.6.1.2.1.1.5
|
|
type: DisplayString
|
|
- name: ssCpuRawUser
|
|
oid: 1.3.6.1.4.1.2021.11.50
|
|
type: counter
|
|
- name: ssCpuRawSystem
|
|
oid: 1.3.6.1.4.1.2021.11.52
|
|
type: counter
|
|
- name: ssCpuRawIdle
|
|
oid: 1.3.6.1.4.1.2021.11.53
|
|
type: counter
|
|
- name: memTotalSwap
|
|
oid: 1.3.6.1.4.1.2021.4.3
|
|
type: gauge
|
|
- name: memAvailSwap
|
|
oid: 1.3.6.1.4.1.2021.4.4
|
|
type: gauge
|
|
- name: memTotalReal
|
|
oid: 1.3.6.1.4.1.2021.4.5
|
|
type: gauge
|
|
- name: memAvailReal
|
|
oid: 1.3.6.1.4.1.2021.4.6
|
|
type: gauge
|
|
- name: systemStatus
|
|
oid: 1.3.6.1.4.1.6574.1.1
|
|
type: gauge
|
|
- name: temperature
|
|
oid: 1.3.6.1.4.1.6574.1.2
|
|
type: gauge
|
|
- name: powerStatus
|
|
oid: 1.3.6.1.4.1.6574.1.3
|
|
type: gauge
|
|
- name: modelName
|
|
oid: 1.3.6.1.4.1.6574.1.5.1
|
|
type: DisplayString
|
|
- name: version
|
|
oid: 1.3.6.1.4.1.6574.1.5.3
|
|
type: DisplayString
|
|
- name: diskID
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.2
|
|
type: DisplayString
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: diskStatus
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.5
|
|
type: gauge
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: diskTemperature
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.6
|
|
type: gauge
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: raidName
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.2
|
|
type: DisplayString
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidStatus
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.3
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidFreeSize
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.4
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidTotalSize
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.5
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
|
|
|
|
services:
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
container_name: prometheus
|
|
configs:
|
|
- source: prometheus_config
|
|
target: /etc/prometheus/prometheus.yml
|
|
volumes:
|
|
- prometheus-data:/prometheus
|
|
command:
|
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
|
- "--storage.tsdb.path=/prometheus"
|
|
- "--web.enable-lifecycle"
|
|
ports:
|
|
- "9090:9090"
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
|
|
grafana:
|
|
image: grafana/grafana-oss:12.4.0
|
|
container_name: grafana
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD"
|
|
# Disable Grafana 12 unified storage feature to restore home dashboard env var support
|
|
- GF_FEATURE_TOGGLES_DISABLE=kubernetesDashboards
|
|
# Authentik OAuth2 SSO Configuration
|
|
- GF_AUTH_GENERIC_OAUTH_ENABLED=true
|
|
- GF_AUTH_GENERIC_OAUTH_NAME=Authentik
|
|
- GF_AUTH_GENERIC_OAUTH_CLIENT_ID="REDACTED_CLIENT_ID"
|
|
- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET="REDACTED_CLIENT_SECRET"
|
|
- GF_AUTH_GENERIC_OAUTH_SCOPES=openid profile email
|
|
- GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://sso.vish.gg/application/o/authorize/
|
|
- GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://sso.vish.gg/application/o/token/
|
|
- GF_AUTH_GENERIC_OAUTH_API_URL=https://sso.vish.gg/application/o/userinfo/
|
|
- GF_AUTH_SIGNOUT_REDIRECT_URL=https://sso.vish.gg/application/o/grafana/end-session/
|
|
- GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'
|
|
# Required for Authentik - extract email and login from userinfo response
|
|
- GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email
|
|
- GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username
|
|
- GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=name
|
|
- GF_SERVER_ROOT_URL=https://gf.vish.gg
|
|
# Home dashboard is set via org preferences in Grafana DB (node-details-v2)
|
|
# GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH is not used - home is DB-persisted via API
|
|
configs:
|
|
# Datasource provisioning
|
|
- source: grafana_datasources
|
|
target: /etc/grafana/provisioning/datasources/datasources.yaml
|
|
# Dashboard provider config
|
|
- source: grafana_dashboards_config
|
|
target: /etc/grafana/provisioning/dashboards/dashboards.yaml
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
# Dashboard JSONs — bind-mounted from host for easy add/update
|
|
- /home/homelab/docker/grafana-dashboards:/var/lib/grafana/dashboards:ro
|
|
ports:
|
|
- "3300:3000"
|
|
restart: unless-stopped
|
|
depends_on:
|
|
- prometheus
|
|
networks:
|
|
- monitoring
|
|
|
|
node_exporter:
|
|
image: prom/node-exporter:latest
|
|
container_name: node_exporter
|
|
network_mode: host
|
|
pid: host
|
|
volumes:
|
|
- /:/host:ro,rslave
|
|
- /sys:/host/sys:ro
|
|
- /proc:/host/proc:ro
|
|
command:
|
|
- '--path.rootfs=/host'
|
|
restart: unless-stopped
|
|
|
|
snmp_exporter:
|
|
image: prom/snmp-exporter:latest
|
|
container_name: snmp_exporter
|
|
configs:
|
|
- source: snmp_config
|
|
target: /etc/snmp_exporter/snmp.yml
|
|
ports:
|
|
- "9116:9116"
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
grafana-data:
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|