1055 lines
33 KiB
YAML
1055 lines
33 KiB
YAML
# Prometheus + Grafana Monitoring Stack - Portainer GitOps Version
|
|
# =============================================================================
|
|
# NOTE: The live deployment is monitoring-compose.yml (plain docker compose,
|
|
# bind-mounted configs at /home/homelab/docker/monitoring/).
|
|
# This file is the self-contained Portainer GitOps version (embedded configs).
|
|
# Stack 476 on endpoint 443399 no longer exists in Portainer.
|
|
# =============================================================================
|
|
# Ports: 9090 (Prometheus), 3300 (Grafana), 9116 (SNMP Exporter)
|
|
#
|
|
# Uses docker configs for prometheus.yml and snmp.yml since bind mounts have
|
|
# symlink issues with Portainer git deploy
|
|
#
|
|
# Dashboard Provisioning:
|
|
# - Datasources: Auto-configured Prometheus
|
|
# - Dashboards: Infrastructure Overview, Synology NAS, Node Exporter Full (from Grafana.com)
|
|
#
|
|
# Old/deprecated configs have been moved to: archive/deprecated-monitoring-stacks/
|
|
|
|
configs:
|
|
# Grafana Datasource Provisioning
|
|
grafana_datasources:
|
|
content: |
|
|
apiVersion: 1
|
|
datasources:
|
|
- name: Prometheus
|
|
type: prometheus
|
|
access: proxy
|
|
url: http://prometheus:9090
|
|
isDefault: true
|
|
editable: true
|
|
|
|
# Grafana Dashboard Provisioning Config
|
|
grafana_dashboards_config:
|
|
content: |
|
|
apiVersion: 1
|
|
providers:
|
|
- name: 'default'
|
|
orgId: 1
|
|
folder: 'Provisioned'
|
|
folderUid: 'provisioned'
|
|
type: file
|
|
disableDeletion: false
|
|
updateIntervalSeconds: 30
|
|
allowUiUpdates: true
|
|
options:
|
|
path: /etc/grafana/provisioning/dashboards/json
|
|
|
|
# Infrastructure Overview Dashboard
|
|
dashboard_infrastructure:
|
|
content: |
|
|
{
|
|
"uid": "infrastructure-overview-v2",
|
|
"title": "Infrastructure Overview - All Devices",
|
|
"tags": ["infrastructure", "node-exporter", "tailscale"],
|
|
"timezone": "browser",
|
|
"schemaVersion": 38,
|
|
"version": 1,
|
|
"refresh": "30s",
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {},
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "Data Source",
|
|
"multi": false,
|
|
"name": "datasource",
|
|
"options": [],
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"type": "datasource"
|
|
},
|
|
{
|
|
"allValue": "",
|
|
"current": {},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"definition": "label_values(node_uname_info, job)",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"label": "Host",
|
|
"multi": true,
|
|
"name": "job",
|
|
"query": "label_values(node_uname_info, job)",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"sort": 1,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"type": "stat",
|
|
"title": "Device Status",
|
|
"gridPos": {"h": 5, "w": 24, "x": 0, "y": 0},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}],
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}
|
|
}
|
|
},
|
|
"options": {"colorMode": "background", "textMode": "value_and_name", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"]}},
|
|
"targets": [{"expr": "up{job=~\"$job\"}", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"type": "timeseries",
|
|
"title": "CPU Usage",
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 5},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {"defaults": {"unit": "percent", "max": 100, "min": 0}},
|
|
"options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}},
|
|
"targets": [{"expr": "100 - (avg by(job) (rate(node_cpu_seconds_total{mode=\"idle\", job=~\"$job\"}[5m])) * 100)", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "timeseries",
|
|
"title": "Memory Usage",
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 5},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {"defaults": {"unit": "percent", "max": 100, "min": 0}},
|
|
"options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}},
|
|
"targets": [{"expr": "(1 - (node_memory_MemAvailable_bytes{job=~\"$job\"} / node_memory_MemTotal_bytes{job=~\"$job\"})) * 100", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "bargauge",
|
|
"title": "Root Disk Usage",
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 13},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent", "max": 100, "min": 0,
|
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 70}, {"color": "red", "value": 85}]}
|
|
}
|
|
},
|
|
"options": {"displayMode": "gradient", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"]}},
|
|
"targets": [{"expr": "100 - ((node_filesystem_avail_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}) * 100)", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "stat",
|
|
"title": "Uptime",
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 13},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}},
|
|
"options": {"colorMode": "value", "orientation": "horizontal", "reduceOptions": {"calcs": ["lastNotNull"]}},
|
|
"targets": [{"expr": "node_time_seconds{job=~\"$job\"} - node_boot_time_seconds{job=~\"$job\"}", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"type": "timeseries",
|
|
"title": "Network Receive",
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 21},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {"defaults": {"unit": "Bps"}},
|
|
"options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}},
|
|
"targets": [{"expr": "sum by(job) (rate(node_network_receive_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))", "legendFormat": "{{job}}", "refId": "A"}]
|
|
},
|
|
{
|
|
"id": 7,
|
|
"type": "timeseries",
|
|
"title": "Network Transmit",
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 21},
|
|
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
|
"fieldConfig": {"defaults": {"unit": "Bps"}},
|
|
"options": {"legend": {"displayMode": "table", "placement": "right", "calcs": ["mean", "max"]}},
|
|
"targets": [{"expr": "sum by(job) (rate(node_network_transmit_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))", "legendFormat": "{{job}}", "refId": "A"}]
|
|
}
|
|
]
|
|
}
|
|
|
|
# Synology NAS Monitoring Dashboard
|
|
# Synology NAS Monitoring Dashboard (FIXED - All datasource UIDs and template variables corrected)
|
|
dashboard_synology:
|
|
content: |
|
|
{
|
|
"id": 3,
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"1": {
|
|
"color": "green",
|
|
"text": "Normal"
|
|
},
|
|
"2": {
|
|
"color": "red",
|
|
"text": "Failed"
|
|
}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 2
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 1,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
},
|
|
"textMode": "value_and_name"
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "systemStatus{instance=~\"\"}",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "NAS Status",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"max": 80,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 65
|
|
}
|
|
]
|
|
},
|
|
"unit": "celsius"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 4
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "temperature{instance=~\"\"}",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Temperature",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 4
|
|
},
|
|
"id": 3,
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "((memTotalReal{instance=~\"\"} - memAvailReal{instance=~\"\"}) / memTotalReal{instance=~\"\"}) * 100",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Usage",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 4
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "memTotalReal{instance=~\"\"} * 1024",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Total Memory",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 40
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 50
|
|
}
|
|
]
|
|
},
|
|
"unit": "celsius"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "diskTemperature{instance=~\"\"}",
|
|
"legendFormat": "{{instance}} - Disk {{diskIndex}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Disk Temperature",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"1": {
|
|
"color": "green",
|
|
"text": "Normal"
|
|
},
|
|
"11": {
|
|
"color": "orange",
|
|
"text": "Degraded"
|
|
},
|
|
"12": {
|
|
"color": "red",
|
|
"text": "Crashed"
|
|
},
|
|
"2": {
|
|
"color": "yellow",
|
|
"text": "Repairing"
|
|
},
|
|
"3": {
|
|
"color": "yellow",
|
|
"text": "Migrating"
|
|
},
|
|
"4": {
|
|
"color": "yellow",
|
|
"text": "Expanding"
|
|
},
|
|
"5": {
|
|
"color": "orange",
|
|
"text": "Deleting"
|
|
},
|
|
"6": {
|
|
"color": "blue",
|
|
"text": "Creating"
|
|
}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 10
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
},
|
|
"textMode": "value_and_name"
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "raidStatus{instance=~\"\"}",
|
|
"legendFormat": "{{instance}} - {{raidIndex}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "RAID Status",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "((raidTotalSize{instance=~\"\"} - raidFreeSize{instance=~\"\"}) / raidTotalSize{instance=~\"\"}) * 100",
|
|
"legendFormat": "{{instance}} - RAID {{raidIndex}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "RAID Usage",
|
|
"type": "bargauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "dtdurations"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sysUpTime{instance=~\"\"} / 100",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Uptime",
|
|
"type": "stat"
|
|
}
|
|
],
|
|
"refresh": "30s",
|
|
"schemaVersion": 38,
|
|
"tags": [
|
|
"synology",
|
|
"nas",
|
|
"snmp"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {
|
|
"text": "Prometheus",
|
|
"value": "PREDACTED_APP_PASSWORD"
|
|
},
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "Data Source",
|
|
"multi": false,
|
|
"name": "datasource",
|
|
"options": [],
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"type": "datasource"
|
|
},
|
|
{
|
|
"allValue": "",
|
|
"current": {
|
|
"text": "All",
|
|
"value": "$__all"
|
|
},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${datasource}"
|
|
},
|
|
"definition": "label_values(diskTemperature, instance)",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"label": "NAS",
|
|
"multi": true,
|
|
"name": "instance",
|
|
"query": "label_values(diskTemperature, instance)",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"sort": 1,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"timezone": "browser",
|
|
"title": "Synology NAS Monitoring",
|
|
"uid": "synology-dashboard-v2",
|
|
"version": 4
|
|
}
|
|
|
|
prometheus_config:
|
|
content: |
|
|
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- alertmanager:9093
|
|
|
|
rule_files:
|
|
- /etc/prometheus/alert-rules.yml
|
|
|
|
scrape_configs:
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
|
|
- job_name: 'node_exporter'
|
|
static_configs:
|
|
- targets: ['host.docker.internal:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'homelab-vm'
|
|
|
|
- job_name: 'homelab-node'
|
|
static_configs:
|
|
- targets: ['100.67.40.126:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'homelab-vm'
|
|
|
|
- job_name: 'raspberry-pis'
|
|
static_configs:
|
|
- targets: ['100.77.151.40:9100']
|
|
# pi-5-kevin (100.123.246.75) removed - offline 127+ days
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'pi-5'
|
|
|
|
- job_name: 'setillo-node'
|
|
static_configs:
|
|
- targets: ['100.125.0.20:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'setillo'
|
|
|
|
- job_name: 'setillo-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.125.0.20:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'setillo'
|
|
- target_label: __address__
|
|
replacement: '100.125.0.20:9116'
|
|
|
|
- job_name: 'calypso-node'
|
|
static_configs:
|
|
- targets: ['100.103.48.78:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'calypso'
|
|
|
|
- job_name: 'calypso-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.103.48.78:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'calypso'
|
|
- target_label: __address__
|
|
replacement: '100.103.48.78:9116'
|
|
|
|
- job_name: 'atlantis-node'
|
|
static_configs:
|
|
- targets: ['100.83.230.112:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'atlantis'
|
|
|
|
- job_name: 'atlantis-snmp'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
auth: [snmpv3]
|
|
target: ['127.0.0.1']
|
|
static_configs:
|
|
- targets: ['100.83.230.112:9116']
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
replacement: '127.0.0.1'
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
replacement: 'atlantis'
|
|
- target_label: __address__
|
|
replacement: '100.83.230.112:9116'
|
|
|
|
- job_name: 'concord-nuc-node'
|
|
static_configs:
|
|
- targets: ['100.72.55.21:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'concord-nuc'
|
|
|
|
- job_name: 'truenas-node'
|
|
static_configs:
|
|
- targets: ['100.75.252.64:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'guava'
|
|
|
|
- job_name: 'seattle-node'
|
|
static_configs:
|
|
- targets: ['100.82.197.124:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'seattle'
|
|
|
|
- job_name: 'proxmox-node'
|
|
static_configs:
|
|
- targets: ['100.87.12.28:9100']
|
|
relabel_configs:
|
|
- target_label: instance
|
|
replacement: 'proxmox'
|
|
|
|
snmp_config:
|
|
content: |
|
|
auths:
|
|
snmpv3:
|
|
version: 3
|
|
security_level: authPriv
|
|
auth_protocol: MD5
|
|
username: snmp-exporter
|
|
password: "REDACTED_PASSWORD"
|
|
priv_protocol: DES
|
|
priv_password: "REDACTED_PASSWORD"
|
|
|
|
modules:
|
|
synology:
|
|
walk:
|
|
- 1.3.6.1.2.1.1
|
|
- 1.3.6.1.2.1.2
|
|
- 1.3.6.1.2.1.25.2
|
|
- 1.3.6.1.2.1.25.3.3
|
|
- 1.3.6.1.2.1.31.1.1
|
|
- 1.3.6.1.4.1.2021.4
|
|
- 1.3.6.1.4.1.2021.10
|
|
- 1.3.6.1.4.1.2021.11
|
|
- 1.3.6.1.4.1.6574.1
|
|
- 1.3.6.1.4.1.6574.2
|
|
- 1.3.6.1.4.1.6574.3
|
|
- 1.3.6.1.4.1.6574.4
|
|
- 1.3.6.1.4.1.6574.5
|
|
- 1.3.6.1.4.1.6574.6
|
|
- 1.3.6.1.4.1.6574.101
|
|
- 1.3.6.1.4.1.6574.102
|
|
metrics:
|
|
- name: sysDescr
|
|
oid: 1.3.6.1.2.1.1.1
|
|
type: DisplayString
|
|
- name: sysUpTime
|
|
oid: 1.3.6.1.2.1.1.3
|
|
type: gauge
|
|
- name: sysName
|
|
oid: 1.3.6.1.2.1.1.5
|
|
type: DisplayString
|
|
- name: ssCpuRawUser
|
|
oid: 1.3.6.1.4.1.2021.11.50
|
|
type: counter
|
|
- name: ssCpuRawSystem
|
|
oid: 1.3.6.1.4.1.2021.11.52
|
|
type: counter
|
|
- name: ssCpuRawIdle
|
|
oid: 1.3.6.1.4.1.2021.11.53
|
|
type: counter
|
|
- name: memTotalSwap
|
|
oid: 1.3.6.1.4.1.2021.4.3
|
|
type: gauge
|
|
- name: memAvailSwap
|
|
oid: 1.3.6.1.4.1.2021.4.4
|
|
type: gauge
|
|
- name: memTotalReal
|
|
oid: 1.3.6.1.4.1.2021.4.5
|
|
type: gauge
|
|
- name: memAvailReal
|
|
oid: 1.3.6.1.4.1.2021.4.6
|
|
type: gauge
|
|
- name: systemStatus
|
|
oid: 1.3.6.1.4.1.6574.1.1
|
|
type: gauge
|
|
- name: temperature
|
|
oid: 1.3.6.1.4.1.6574.1.2
|
|
type: gauge
|
|
- name: powerStatus
|
|
oid: 1.3.6.1.4.1.6574.1.3
|
|
type: gauge
|
|
- name: modelName
|
|
oid: 1.3.6.1.4.1.6574.1.5.1
|
|
type: DisplayString
|
|
- name: version
|
|
oid: 1.3.6.1.4.1.6574.1.5.3
|
|
type: DisplayString
|
|
- name: diskID
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.2
|
|
type: DisplayString
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: diskStatus
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.5
|
|
type: gauge
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: diskTemperature
|
|
oid: 1.3.6.1.4.1.6574.2.1.1.6
|
|
type: gauge
|
|
indexes:
|
|
- labelname: diskIndex
|
|
type: gauge
|
|
- name: raidName
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.2
|
|
type: DisplayString
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidStatus
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.3
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidFreeSize
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.4
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
- name: raidTotalSize
|
|
oid: 1.3.6.1.4.1.6574.3.1.1.5
|
|
type: gauge
|
|
indexes:
|
|
- labelname: raidIndex
|
|
type: gauge
|
|
|
|
services:
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
container_name: prometheus
|
|
configs:
|
|
- source: prometheus_config
|
|
target: /etc/prometheus/prometheus.yml
|
|
volumes:
|
|
- prometheus-data:/prometheus
|
|
command:
|
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
|
- "--storage.tsdb.path=/prometheus"
|
|
- "--web.enable-lifecycle"
|
|
ports:
|
|
- "9090:9090"
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
|
|
grafana:
|
|
image: grafana/grafana-oss:12.4.0
|
|
container_name: grafana
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD"
|
|
# Disable Grafana 12 unified storage feature to restore home dashboard env var support
|
|
- GF_FEATURE_TOGGLES_DISABLE=kubernetesDashboards
|
|
# Authentik OAuth2 SSO Configuration
|
|
- GF_AUTH_GENERIC_OAUTH_ENABLED=true
|
|
- GF_AUTH_GENERIC_OAUTH_NAME=Authentik
|
|
- GF_AUTH_GENERIC_OAUTH_CLIENT_ID="REDACTED_CLIENT_ID"
|
|
- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET="REDACTED_CLIENT_SECRET"
|
|
- GF_AUTH_GENERIC_OAUTH_SCOPES=openid profile email
|
|
- GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://sso.vish.gg/application/o/authorize/
|
|
- GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://sso.vish.gg/application/o/token/
|
|
- GF_AUTH_GENERIC_OAUTH_API_URL=https://sso.vish.gg/application/o/userinfo/
|
|
- GF_AUTH_SIGNOUT_REDIRECT_URL=https://sso.vish.gg/application/o/grafana/end-session/
|
|
- GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'
|
|
# Required for Authentik - extract email and login from userinfo response
|
|
- GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email
|
|
- GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username
|
|
- GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=name
|
|
- GF_SERVER_ROOT_URL=https://gf.vish.gg
|
|
# Home dashboard is set via org preferences in Grafana DB (node-details-v2)
|
|
# GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH is not used - home is DB-persisted via API
|
|
configs:
|
|
# Datasource provisioning
|
|
- source: grafana_datasources
|
|
target: /etc/grafana/provisioning/datasources/datasources.yaml
|
|
# Dashboard provider config
|
|
- source: grafana_dashboards_config
|
|
target: /etc/grafana/provisioning/dashboards/dashboards.yaml
|
|
# Dashboard JSON files
|
|
- source: dashboard_infrastructure
|
|
target: /etc/grafana/provisioning/dashboards/json/infrastructure-overview.json
|
|
- source: dashboard_synology
|
|
target: /etc/grafana/provisioning/dashboards/json/synology-monitoring.json
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
ports:
|
|
- "3300:3000"
|
|
restart: unless-stopped
|
|
depends_on:
|
|
- prometheus
|
|
networks:
|
|
- monitoring
|
|
|
|
node_exporter:
|
|
image: prom/node-exporter:latest
|
|
container_name: node_exporter
|
|
network_mode: host
|
|
pid: host
|
|
volumes:
|
|
- /:/host:ro,rslave
|
|
- /sys:/host/sys:ro
|
|
- /proc:/host/proc:ro
|
|
command:
|
|
- '--path.rootfs=/host'
|
|
restart: unless-stopped
|
|
|
|
snmp_exporter:
|
|
image: prom/snmp-exporter:latest
|
|
container_name: snmp_exporter
|
|
configs:
|
|
- source: snmp_config
|
|
target: /etc/snmp_exporter/snmp.yml
|
|
ports:
|
|
- "9116:9116"
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
grafana-data:
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|