# Prometheus + Grafana Monitoring Stack - Portainer GitOps Version # ============================================================================= # NOTE: The live deployment is monitoring-compose.yml (plain docker compose, # bind-mounted configs at /home/homelab/docker/monitoring/). # This file is the self-contained Portainer GitOps version (embedded configs). # Stack 476 on endpoint 443399 no longer exists in Portainer. # ============================================================================= # Ports: 9090 (Prometheus), 3300 (Grafana), 9116 (SNMP Exporter) # # Uses docker configs for prometheus.yml and snmp.yml since bind mounts have # symlink issues with Portainer git deploy # # Dashboard Provisioning: # - Datasources: Auto-configured Prometheus # - Dashboards: Infrastructure Overview, Synology NAS, Node Exporter Full (from Grafana.com) # # Old/deprecated configs have been moved to: archive/deprecated-monitoring-stacks/ configs: # Grafana Datasource Provisioning grafana_datasources: content: | apiVersion: 1 datasources: - name: Prometheus type: prometheus uid: cfbskvs8upds0b access: proxy url: http://prometheus:9090 isDefault: true editable: true # Grafana Dashboard Provisioning Config # Dashboards are loaded from bind-mounted /home/homelab/docker/grafana-dashboards/ # To add a new dashboard: drop a JSON file in that directory and restart Grafana # Dashboard JSONs are backed up in the repo at hosts/vms/homelab-vm/grafana/dashboards/ grafana_dashboards_config: content: | apiVersion: 1 providers: - name: 'default' orgId: 1 folder: 'Provisioned' folderUid: 'provisioned' type: file disableDeletion: true updateIntervalSeconds: 30 allowUiUpdates: true options: path: /var/lib/grafana/dashboards # Dashboard JSON files are now bind-mounted from /home/homelab/docker/grafana-dashboards/ # Backed up in repo at hosts/vms/homelab-vm/grafana/dashboards/ # Dashboards: infrastructure-overview-v2, node-details-v2, node-exporter-full, # synology-nas-v3, tailscale-bandwidth, truenas-guava prometheus_config: content: | global: scrape_interval: 15s evaluation_interval: 15s alerting: alertmanagers: - static_configs: - targets: - alertmanager:9093 rule_files: - /etc/prometheus/alert-rules.yml scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'node_exporter' static_configs: - targets: ['host.docker.internal:9100'] relabel_configs: - target_label: instance replacement: 'homelab-vm' - job_name: 'homelab-node' static_configs: - targets: ['100.67.40.126:9100'] relabel_configs: - target_label: instance replacement: 'homelab-vm' - job_name: 'raspberry-pis' static_configs: - targets: ['100.77.151.40:9100'] # pi-5-kevin (100.123.246.75) removed - offline 127+ days relabel_configs: - target_label: instance replacement: 'pi-5' - job_name: 'setillo-node' static_configs: - targets: ['100.125.0.20:9100'] relabel_configs: - target_label: instance replacement: 'setillo' - job_name: 'setillo-snmp' metrics_path: /snmp params: module: [synology] auth: [snmpv3] target: ['127.0.0.1'] static_configs: - targets: ['100.125.0.20:9116'] relabel_configs: - source_labels: [__address__] target_label: __param_target replacement: '127.0.0.1' - source_labels: [__param_target] target_label: instance replacement: 'setillo' - target_label: __address__ replacement: '100.125.0.20:9116' - job_name: 'calypso-node' static_configs: - targets: ['100.103.48.78:9100'] relabel_configs: - target_label: instance replacement: 'calypso' - job_name: 'calypso-snmp' metrics_path: /snmp params: module: [synology] auth: [snmpv3] target: ['127.0.0.1'] static_configs: - targets: ['100.103.48.78:9116'] relabel_configs: - source_labels: [__address__] target_label: __param_target replacement: '127.0.0.1' - source_labels: [__param_target] target_label: instance replacement: 'calypso' - target_label: __address__ replacement: '100.103.48.78:9116' - job_name: 'atlantis-node' static_configs: - targets: ['100.83.230.112:9100'] relabel_configs: - target_label: instance replacement: 'atlantis' - job_name: 'atlantis-snmp' metrics_path: /snmp params: module: [synology] auth: [snmpv3] target: ['127.0.0.1'] static_configs: - targets: ['100.83.230.112:9116'] relabel_configs: - source_labels: [__address__] target_label: __param_target replacement: '127.0.0.1' - source_labels: [__param_target] target_label: instance replacement: 'atlantis' - target_label: __address__ replacement: '100.83.230.112:9116' - job_name: 'concord-nuc-node' static_configs: - targets: ['100.72.55.21:9100'] relabel_configs: - target_label: instance replacement: 'concord-nuc' - job_name: 'truenas-node' static_configs: - targets: ['100.75.252.64:9100'] relabel_configs: - target_label: instance replacement: 'guava' - job_name: 'seattle-node' static_configs: - targets: ['100.82.197.124:9100'] relabel_configs: - target_label: instance replacement: 'seattle' - job_name: 'proxmox-node' static_configs: - targets: ['100.87.12.28:9100'] relabel_configs: - target_label: instance replacement: 'proxmox' snmp_config: content: | auths: snmpv3: version: 3 security_level: authPriv auth_protocol: MD5 username: snmp-exporter password: "REDACTED_PASSWORD" priv_protocol: DES priv_password: "REDACTED_PASSWORD" modules: synology: walk: - 1.3.6.1.2.1.1 - 1.3.6.1.2.1.2 - 1.3.6.1.2.1.25.2 - 1.3.6.1.2.1.25.3.3 - 1.3.6.1.2.1.31.1.1 - 1.3.6.1.4.1.2021.4 - 1.3.6.1.4.1.2021.10 - 1.3.6.1.4.1.2021.11 - 1.3.6.1.4.1.6574.1 - 1.3.6.1.4.1.6574.2 - 1.3.6.1.4.1.6574.3 - 1.3.6.1.4.1.6574.4 - 1.3.6.1.4.1.6574.5 - 1.3.6.1.4.1.6574.6 - 1.3.6.1.4.1.6574.101 - 1.3.6.1.4.1.6574.102 metrics: - name: sysDescr oid: 1.3.6.1.2.1.1.1 type: DisplayString - name: sysUpTime oid: 1.3.6.1.2.1.1.3 type: gauge - name: sysName oid: 1.3.6.1.2.1.1.5 type: DisplayString - name: ssCpuRawUser oid: 1.3.6.1.4.1.2021.11.50 type: counter - name: ssCpuRawSystem oid: 1.3.6.1.4.1.2021.11.52 type: counter - name: ssCpuRawIdle oid: 1.3.6.1.4.1.2021.11.53 type: counter - name: memTotalSwap oid: 1.3.6.1.4.1.2021.4.3 type: gauge - name: memAvailSwap oid: 1.3.6.1.4.1.2021.4.4 type: gauge - name: memTotalReal oid: 1.3.6.1.4.1.2021.4.5 type: gauge - name: memAvailReal oid: 1.3.6.1.4.1.2021.4.6 type: gauge - name: systemStatus oid: 1.3.6.1.4.1.6574.1.1 type: gauge - name: temperature oid: 1.3.6.1.4.1.6574.1.2 type: gauge - name: powerStatus oid: 1.3.6.1.4.1.6574.1.3 type: gauge - name: modelName oid: 1.3.6.1.4.1.6574.1.5.1 type: DisplayString - name: version oid: 1.3.6.1.4.1.6574.1.5.3 type: DisplayString - name: diskID oid: 1.3.6.1.4.1.6574.2.1.1.2 type: DisplayString indexes: - labelname: diskIndex type: gauge - name: diskStatus oid: 1.3.6.1.4.1.6574.2.1.1.5 type: gauge indexes: - labelname: diskIndex type: gauge - name: diskTemperature oid: 1.3.6.1.4.1.6574.2.1.1.6 type: gauge indexes: - labelname: diskIndex type: gauge - name: raidName oid: 1.3.6.1.4.1.6574.3.1.1.2 type: DisplayString indexes: - labelname: raidIndex type: gauge - name: raidStatus oid: 1.3.6.1.4.1.6574.3.1.1.3 type: gauge indexes: - labelname: raidIndex type: gauge - name: raidFreeSize oid: 1.3.6.1.4.1.6574.3.1.1.4 type: gauge indexes: - labelname: raidIndex type: gauge - name: raidTotalSize oid: 1.3.6.1.4.1.6574.3.1.1.5 type: gauge indexes: - labelname: raidIndex type: gauge services: prometheus: image: prom/prometheus:latest container_name: prometheus configs: - source: prometheus_config target: /etc/prometheus/prometheus.yml volumes: - prometheus-data:/prometheus command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" - "--web.enable-lifecycle" ports: - "9090:9090" restart: unless-stopped networks: - monitoring extra_hosts: - "host.docker.internal:host-gateway" grafana: image: grafana/grafana-oss:12.4.0 container_name: grafana environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD" # Disable Grafana 12 unified storage feature to restore home dashboard env var support - GF_FEATURE_TOGGLES_DISABLE=kubernetesDashboards # Authentik OAuth2 SSO Configuration - GF_AUTH_GENERIC_OAUTH_ENABLED=true - GF_AUTH_GENERIC_OAUTH_NAME=Authentik - GF_AUTH_GENERIC_OAUTH_CLIENT_ID="REDACTED_CLIENT_ID" - GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET="REDACTED_CLIENT_SECRET" - GF_AUTH_GENERIC_OAUTH_SCOPES=openid profile email - GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://sso.vish.gg/application/o/authorize/ - GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://sso.vish.gg/application/o/token/ - GF_AUTH_GENERIC_OAUTH_API_URL=https://sso.vish.gg/application/o/userinfo/ - GF_AUTH_SIGNOUT_REDIRECT_URL=https://sso.vish.gg/application/o/grafana/end-session/ - GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer' # Required for Authentik - extract email and login from userinfo response - GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email - GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username - GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=name - GF_SERVER_ROOT_URL=https://gf.vish.gg # Home dashboard is set via org preferences in Grafana DB (node-details-v2) # GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH is not used - home is DB-persisted via API configs: # Datasource provisioning - source: grafana_datasources target: /etc/grafana/provisioning/datasources/datasources.yaml # Dashboard provider config - source: grafana_dashboards_config target: /etc/grafana/provisioning/dashboards/dashboards.yaml volumes: - grafana-data:/var/lib/grafana # Dashboard JSONs — bind-mounted from host for easy add/update - /home/homelab/docker/grafana-dashboards:/var/lib/grafana/dashboards:ro ports: - "3300:3000" restart: unless-stopped depends_on: - prometheus networks: - monitoring node_exporter: image: prom/node-exporter:latest container_name: node_exporter network_mode: host pid: host volumes: - /:/host:ro,rslave - /sys:/host/sys:ro - /proc:/host/proc:ro command: - '--path.rootfs=/host' restart: unless-stopped snmp_exporter: image: prom/snmp-exporter:latest container_name: snmp_exporter configs: - source: snmp_config target: /etc/snmp_exporter/snmp.yml ports: - "9116:9116" restart: unless-stopped networks: - monitoring volumes: prometheus-data: grafana-data: networks: monitoring: driver: bridge