Sanitized mirror from private repository - 2026-04-18 11:19:59 UTC
Some checks failed
Documentation / Build Docusaurus (push) Failing after 5m14s
Documentation / Deploy to GitHub Pages (push) Has been skipped

This commit is contained in:
Gitea Mirror Bot
2026-04-18 11:19:59 +00:00
commit fb00a325d1
1418 changed files with 359990 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
# Deprecated Monitoring Stacks
These monitoring configurations are **DEPRECATED** and should not be used.
## Current Working Stack
The current working monitoring stack is located at:
- **`homelab_vm/monitoring.yaml`**
This stack is deployed via Portainer GitOps to the homelab-vm and includes:
- Prometheus with all scrape targets
- Grafana
- Node Exporter
- SNMP Exporter for Synology NAS devices
## Archived Configurations
The following directories contain old/deprecated monitoring configurations that were used before the consolidated stack:
### `prometheus_grafana_hub/`
Old monitoring hub setup with separate docker-compose files for each host.
- Used bind mounts which caused issues with Portainer git deploy
- Had separate compose files for each Synology NAS
- **Status: DEPRECATED** - Replaced by `homelab_vm/monitoring.yaml`
### `stacks-monitoring/`
Another old monitoring stack attempt.
- Used separate directories for prometheus and grafana configs
- **Status: DEPRECATED** - Replaced by `homelab_vm/monitoring.yaml`
### `prometheus/`
Standalone prometheus config directory.
- **Status: DEPRECATED** - Config now embedded in `homelab_vm/monitoring.yaml`
### `grafana/`
Standalone grafana provisioning configs.
- **Status: DEPRECATED** - Dashboards now managed directly in Grafana
## Migration Date
Archived on: $(date +%Y-%m-%d)

View File

@@ -0,0 +1,366 @@
{
"uid": "infrastructure-overview-v2",
"title": "Infrastructure Overview - All Devices",
"tags": [
"infrastructure",
"node-exporter",
"tailscale"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": true,
"label": "Host",
"multi": true,
"name": "job",
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "Device Status",
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "timeseries",
"title": "CPU Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "100 - (avg by(job) (rate(node_cpu_seconds_total{mode=\"idle\", job=~\"$job\"}[5m])) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=~\"$job\"} / node_memory_MemTotal_bytes{job=~\"$job\"})) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "bargauge",
"title": "Root Disk Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=~\"$job\"} - node_boot_time_seconds{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 6,
"type": "timeseries",
"title": "Network Receive",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_receive_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 7,
"type": "timeseries",
"title": "Network Transmit",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_transmit_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,936 @@
{
"uid": "node-details-v2",
"title": "Node Details - Full Metrics",
"tags": [
"node-exporter",
"detailed",
"infrastructure"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": false,
"label": "Host",
"multi": false,
"name": "job",
"options": [],
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [],
"query": "label_values(node_uname_info{job=\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "\ud83d\udcca Quick Stats",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"collapsed": false
},
{
"id": 2,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=\"$job\",instance=\"$instance\"} - node_boot_time_seconds{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Uptime",
"refId": "A"
}
]
},
{
"id": 3,
"type": "stat",
"title": "CPU Cores",
"gridPos": {
"h": 4,
"w": 3,
"x": 4,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "count(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"})",
"legendFormat": "Cores",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total RAM",
"gridPos": {
"h": 4,
"w": 3,
"x": 7,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "purple",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "RAM",
"refId": "A"
}
]
},
{
"id": 5,
"type": "gauge",
"title": "CPU",
"gridPos": {
"h": 4,
"w": 3,
"x": 10,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - (avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m])) * 100)",
"legendFormat": "CPU",
"refId": "A"
}
]
},
{
"id": 6,
"type": "gauge",
"title": "Memory",
"gridPos": {
"h": 4,
"w": 3,
"x": 13,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"} / node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"})) * 100",
"legendFormat": "Memory",
"refId": "A"
}
]
},
{
"id": 7,
"type": "gauge",
"title": "Disk /",
"gridPos": {
"h": 4,
"w": 3,
"x": 16,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"}) * 100)",
"legendFormat": "Disk",
"refId": "A"
}
]
},
{
"id": 8,
"type": "stat",
"title": "Load 1m",
"gridPos": {
"h": 4,
"w": 2,
"x": 19,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load1{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "1m",
"refId": "A"
}
]
},
{
"id": 9,
"type": "stat",
"title": "Load 5m",
"gridPos": {
"h": 4,
"w": 2,
"x": 21,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load5{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "5m",
"refId": "A"
}
]
},
{
"id": 10,
"type": "row",
"title": "\ud83d\udda5\ufe0f CPU Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"collapsed": false
},
{
"id": 11,
"type": "timeseries",
"title": "CPU Usage Breakdown",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"custom": {
"fillOpacity": 50,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"user\"}[5m])) * 100",
"legendFormat": "User",
"refId": "A"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"system\"}[5m])) * 100",
"legendFormat": "System",
"refId": "B"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"iowait\"}[5m])) * 100",
"legendFormat": "IOWait",
"refId": "C"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"steal\"}[5m])) * 100",
"legendFormat": "Steal",
"refId": "D"
}
]
},
{
"id": 12,
"type": "timeseries",
"title": "CPU Per Core",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "100 - (rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m]) * 100)",
"legendFormat": "CPU {{cpu}}",
"refId": "A"
}
]
},
{
"id": 20,
"type": "row",
"title": "\ud83e\udde0 Memory Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"collapsed": false
},
{
"id": 21,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"custom": {
"fillOpacity": 30,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "A"
},
{
"expr": "node_memory_Buffers_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Free",
"refId": "D"
}
]
},
{
"id": 22,
"type": "timeseries",
"title": "Swap Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"targets": [
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Total",
"refId": "A"
},
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_SwapFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "B"
}
]
},
{
"id": 30,
"type": "row",
"title": "\ud83d\udcbe Disk Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 23
},
"collapsed": false
},
{
"id": 31,
"type": "bargauge",
"title": "Disk Space Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"}) * 100)",
"legendFormat": "{{mountpoint}}",
"refId": "A"
}
]
},
{
"id": 32,
"type": "timeseries",
"title": "Disk I/O",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*Write.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Read",
"refId": "A"
},
{
"expr": "rate(node_disk_written_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Write",
"refId": "B"
}
]
},
{
"id": 40,
"type": "row",
"title": "\ud83c\udf10 Network Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"collapsed": false
},
{
"id": 41,
"type": "timeseries",
"title": "Network Traffic",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*TX.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} RX",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} TX",
"refId": "B"
}
]
},
{
"id": 42,
"type": "timeseries",
"title": "Network Errors",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "pps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} RX Errors",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} TX Errors",
"refId": "B"
}
]
}
],
"id": null
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,351 @@
{
"uid": "synology-dashboard-v2",
"title": "Synology NAS Monitoring",
"tags": [
"synology",
"nas",
"snmp"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(ssCpuRawIdle, job)",
"hide": 0,
"includeAll": true,
"label": "NAS",
"multi": true,
"name": "job",
"query": "label_values(ssCpuRawIdle, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "NAS Status",
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "gauge",
"title": "CPU Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((ssCpuRawIdle{job=~\"$job\"} / (ssCpuRawUser{job=~\"$job\"} + ssCpuRawSystem{job=~\"$job\"} + ssCpuRawIdle{job=~\"$job\"} + ssCpuRawWait{job=~\"$job\"})) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "gauge",
"title": "Memory Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 8,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "((memTotalReal{job=~\"$job\"} - memAvailReal{job=~\"$job\"}) / memTotalReal{job=~\"$job\"}) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total Memory",
"gridPos": {
"h": 6,
"w": 8,
"x": 16,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "decbytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "memTotalReal{job=~\"$job\"} * 1024",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "timeseries",
"title": "Load Average",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "laLoad{job=~\"$job\", laIndex=\"1\"}",
"legendFormat": "{{job}} 1m",
"refId": "A"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"2\"}",
"legendFormat": "{{job}} 5m",
"refId": "B"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"3\"}",
"legendFormat": "{{job}} 15m",
"refId": "C"
}
]
},
{
"id": 6,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "sysUpTime{job=~\"$job\"} / 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Homelab Dashboards'
orgId: 1
folder: ''
folderUid: ''
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /etc/grafana/dashboards

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false

View File

@@ -0,0 +1,98 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["prometheus:9090"]
- job_name: "homelab-node"
static_configs:
- targets: ["100.67.40.126:9100"]
- job_name: "raspberry-pis"
static_configs:
- targets: ["100.77.151.40:9100"] # pi-5
- targets: ["100.123.246.75:9100"] # pi-5-kevin
- job_name: "setillo-node"
static_configs:
- targets: ["100.125.0.20:9100"]
- job_name: "setillo-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.125.0.20:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.125.0.20"
- target_label: __address__
replacement: "100.125.0.20:9116"
- job_name: "calypso-node"
static_configs:
- targets: ["100.103.48.78:9100"]
- job_name: "calypso-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.103.48.78:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.103.48.78"
- target_label: __address__
replacement: "100.103.48.78:9116"
- job_name: "atlantis-node"
static_configs:
- targets: ["100.83.230.112:9100"]
- job_name: "atlantis-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.83.230.112:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.83.230.112"
- target_label: __address__
replacement: "100.83.230.112:9116"
- job_name: "concord-nuc-node"
static_configs:
- targets: ["100.72.55.21:9100"]
- job_name: "truenas-node"
static_configs:
- targets: ["100.75.252.64:9100"]
- job_name: "vmi2076105-node"
static_configs:
- targets: ["100.99.156.20:9100"]
- job_name: "proxmox-node"
static_configs:
- targets: ["100.87.12.28:9100"]

View File

@@ -0,0 +1,11 @@
FROM golang:1.23 AS build
WORKDIR /app
RUN git clone https://github.com/kradalby/truenas_exporter.git .
RUN go build -o truenas_exporter .
FROM debian:stable-slim
WORKDIR /root/
COPY --from=build /app/truenas_exporter .
EXPOSE 9163
ENTRYPOINT ["./truenas_exporter"]

View File

@@ -0,0 +1,83 @@
# Prometheus & Grafana Monitoring Hub
This folder contains the configuration for the centralized monitoring stack running on the Homelab VM.
## Folder Structure
```
prometheus_grafana_hub/
├── dashboards/ # Grafana dashboard JSON files
│ ├── infrastructure-overview.json # Fleet-wide status of all devices
│ ├── node-details.json # Detailed per-host metrics
│ ├── synology-monitoring.json # Synology NAS SNMP metrics
│ └── node-exporter.json # Full Node Exporter dashboard
├── snmp-configs/ # SNMP Exporter configurations
│ └── snmp_synology.yml # Synology NAS SNMP config
├── docker-compose/ # Docker compose files for remote hosts
│ ├── atlantis-docker-compose.yml
│ ├── calypso-docker-compose.yml
│ ├── setillo-docker-compose.yml
│ ├── concord-nuc-docker-compose.yml
│ └── guava-docker-compose-node-exporter.yml
├── docker-compose.homelab-vm.yml # Main stack compose (Homelab VM)
├── prometheus.yml # Prometheus scrape configuration
├── Dockerfile # Custom Prometheus image (if needed)
└── README.md
```
## Dashboards
| Dashboard | UID | Description |
|-----------|-----|-------------|
| Infrastructure Overview | `infrastructure-overview-v2` | Fleet status, CPU, Memory, Disk, Network for all hosts |
| Node Details | `node-details-v2` | Per-REDACTED_APP_PASSWORD CPU breakdown, per-core usage, memory details, disk I/O |
| Synology Monitoring | `synology-dashboard-v2` | Synology NAS CPU, Memory, Load, Uptime via SNMP |
| Node Exporter Full | `rYdddlPWk` | Comprehensive node exporter metrics |
## SNMP Configuration
The `snmp_synology.yml` config is deployed to each Synology NAS at:
- **Atlantis**: `/volume2/metadata/docker/snmp/snmp.yml`
- **Calypso**: `/volume1/docker/snmp/snmp.yml`
- **Setillo**: `/volume1/docker/snmp/snmp.yml`
## Monitored Hosts
### Node Exporter Targets
- homelab-node (100.67.40.126:9100)
- atlantis-node (100.83.230.112:9100)
- calypso-node (100.103.48.78:9100)
- setillo-node (100.125.0.20:9100)
- concord-nuc-node (100.72.55.21:9100)
- proxmox-node (100.87.12.28:9100)
- truenas-node (100.75.252.64:9100)
- raspberry-pis (100.77.151.40:9100)
### SNMP Targets (Synology)
- atlantis-snmp (100.83.230.112)
- calypso-snmp (100.103.48.78)
- setillo-snmp (100.125.0.20)
## Deployment
### Homelab VM (Main Stack)
The main monitoring stack runs on Homelab VM:
```bash
cd ~/docker/monitoring
# Using the compose file from this repo:
docker-compose -f docker-compose.homelab-vm.yml up -d
# Or if already deployed:
docker-compose up -d
```
**Services:**
- **Grafana**: http://homelab:3300 (admin / set via GF_SECURITY_ADMIN_PASSWORD)
- **Prometheus**: http://homelab:9090
- **Node Exporter**: Runs in host network mode on port 9100
### Remote Hosts
Each remote host runs node-exporter and/or snmp-exporter as specified in the `docker-compose/` folder.

View File

@@ -0,0 +1,135 @@
# Homelab Alerting Stack
This adds Prometheus Alertmanager with notifications to both **ntfy** and **Signal**.
## Components
| Component | Purpose | Port |
|-----------|---------|------|
| Alertmanager | Routes alerts based on severity | 9093 |
| Signal Bridge | Forwards critical alerts to Signal | 5000 |
## Alert Routing
- **Warning alerts** → ntfy only (`homelab-alerts` topic)
- **Critical alerts** → Both ntfy AND Signal
## Deployment Steps
### 1. Update your phone number
Edit `docker-compose.alerting.yml` and replace `REPLACE_WITH_YOUR_NUMBER`:
```yaml
environment:
- SIGNAL_SENDER=+REDACTED_PHONE_NUMBER # Your Signal number
- SIGNAL_RECIPIENTS=+REDACTED_PHONE_NUMBER # Where to send alerts
```
### 2. Copy files to Homelab VM
```bash
# On your local machine or wherever you have SSH access
scp -r alerting-configs/* homelab@192.168.0.210:~/docker/monitoring/
```
### 3. Update Prometheus config
Replace the existing `prometheus.yml` with `prometheus-updated.yml`:
```bash
cd ~/docker/monitoring
cp prometheus-updated.yml prometheus/prometheus.yml
cp alert-rules.yml prometheus/alert-rules.yml
```
### 4. Create alertmanager directory
```bash
mkdir -p alertmanager
cp alertmanager.yml alertmanager/
```
### 5. Deploy the alerting stack
```bash
# Build and start alertmanager + signal bridge
docker-compose -f docker-compose.alerting.yml up -d --build
# Reload Prometheus to pick up new config
curl -X POST http://localhost:9090/-/reload
```
### 6. Verify deployment
```bash
# Check Alertmanager is running
curl http://localhost:9093/-/healthy
# Check Signal Bridge is running
curl http://localhost:5000/health
# Send test alert to Signal
curl -X POST http://localhost:5000/test \
-H "Content-Type: application/json" \
-d '{"message": "🧪 Test alert from Homelab!"}'
# Send test notification to ntfy
curl -d "Test alert from Alertmanager setup" https://ntfy.vish.gg/REDACTED_NTFY_TOPIC
```
## Alert Rules Included
| Alert | Severity | Trigger |
|-------|----------|---------|
| HostDown | Critical | Host unreachable for 2 min |
| REDACTED_APP_PASSWORD | Warning | CPU > 80% for 5 min |
| HostCriticalCpuUsage | Critical | CPU > 95% for 5 min |
| HostHighMemoryUsage | Warning | Memory > 85% for 5 min |
| HostCriticalMemoryUsage | Critical | Memory > 95% for 5 min |
| HostOutOfMemory | Critical | Memory < 5% available |
| HostHighDiskUsage | Warning | Disk > 80% full |
| HostCriticalDiskUsage | Critical | Disk > 90% full |
| HostDiskWillFillIn24Hours | Warning | Predicted to fill in 24h |
| REDACTED_APP_PASSWORD | Critical | Filesystem became read-only |
| HostNetworkErrors | Warning | Network errors detected |
| HostClockSkew | Warning | Time drift > 0.5 seconds |
## Receiving Alerts
### ntfy App
1. Install ntfy app on your phone (iOS/Android)
2. Add server: `https://ntfy.vish.gg`
3. Subscribe to topic: `homelab-alerts`
### Signal
- Alerts will arrive as regular Signal messages from your registered number
## Troubleshooting
### Check Alertmanager status
```bash
docker logs alertmanager
curl http://localhost:9093/api/v2/status
```
### Check active alerts
```bash
curl http://localhost:9093/api/v2/alerts
```
### Check Signal Bridge logs
```bash
docker logs signal-bridge
```
### Manually trigger test alert in Prometheus
Add this rule temporarily to test:
```yaml
- alert: TestAlert
expr: vector(1)
labels:
severity: warning
annotations:
summary: "Test alert"
```

View File

@@ -0,0 +1,146 @@
# Prometheus Alerting Rules for Homelab Infrastructure
groups:
- name: host-availability
interval: 30s
rules:
- alert: HostDown
expr: up{job=~".*-node"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Host {{ $labels.instance }} is down"
description: "Host {{ $labels.instance }} has been unreachable for more than 2 minutes."
- alert: HostHighLoadAverage
expr: node_load15 / count without(cpu, mode) (node_cpu_seconds_total{mode="idle"}) > 2
for: 10m
labels:
severity: warning
annotations:
summary: "High load average on {{ $labels.instance }}"
description: "15-minute load average is {{ $value | printf \"%.2f\" }} on {{ $labels.instance }}."
- name: cpu-alerts
interval: 30s
rules:
- alert: REDACTED_APP_PASSWORD
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
description: "CPU usage is {{ $value | printf \"%.1f\" }}% on {{ $labels.instance }}."
- alert: HostCriticalCpuUsage
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
for: 5m
labels:
severity: critical
annotations:
summary: "🔥 CRITICAL CPU on {{ $labels.instance }}"
description: "CPU usage is {{ $value | printf \"%.1f\" }}% on {{ $labels.instance }}. Immediate attention required!"
- name: memory-alerts
interval: 30s
rules:
- alert: HostHighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is {{ $value | printf \"%.1f\" }}% on {{ $labels.instance }}."
- alert: HostCriticalMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
for: 5m
labels:
severity: critical
annotations:
summary: "🔥 CRITICAL Memory on {{ $labels.instance }}"
description: "Memory usage is {{ $value | printf \"%.1f\" }}% on {{ $labels.instance }}."
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
for: 2m
labels:
severity: critical
annotations:
summary: "💀 OUT OF MEMORY on {{ $labels.instance }}"
description: "Only {{ $value | printf \"%.1f\" }}% memory available on {{ $labels.instance }}."
- name: disk-alerts
interval: 60s
rules:
- alert: HostHighDiskUsage
expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"})) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Disk space warning on {{ $labels.instance }}"
description: "Disk {{ $labels.mountpoint }} is {{ $value | printf \"%.1f\" }}% full on {{ $labels.instance }}."
- alert: HostCriticalDiskUsage
expr: (1 - (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"})) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "🔥 CRITICAL Disk space on {{ $labels.instance }}"
description: "Disk {{ $labels.mountpoint }} is {{ $value | printf \"%.1f\" }}% full on {{ $labels.instance }}."
- alert: HostDiskWillFillIn24Hours
expr: predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"}[6h], 24*60*60) < 0
for: 30m
labels:
severity: warning
annotations:
summary: "Disk {{ $labels.mountpoint }} will fill within 24 hours"
description: "Based on current growth rate, disk on {{ $labels.instance }} will be full within 24 hours."
- alert: REDACTED_APP_PASSWORD
expr: node_filesystem_readonly{fstype!~"tmpfs|overlay"} == 1
for: 1m
labels:
severity: critical
annotations:
summary: "🔥 Filesystem is read-only on {{ $labels.instance }}"
description: "Filesystem {{ $labels.mountpoint }} has become read-only. This usually indicates disk failure!"
- name: network-alerts
interval: 30s
rules:
- alert: HostNetworkReceiveErrors
expr: rate(node_network_receive_errs_total{device!~"lo|veth.*|docker.*|br-.*"}[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "Network receive errors on {{ $labels.instance }}"
description: "{{ $labels.device }} has {{ $value | printf \"%.0f\" }} receive errors/sec."
- alert: HostNetworkTransmitErrors
expr: rate(node_network_transmit_errs_total{device!~"lo|veth.*|docker.*|br-.*"}[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "Network transmit errors on {{ $labels.instance }}"
description: "{{ $labels.device }} has {{ $value | printf \"%.0f\" }} transmit errors/sec."
- name: system-alerts
interval: 60s
rules:
- alert: HostClockSkew
expr: abs(node_timex_offset_seconds) > 0.5
for: 5m
labels:
severity: warning
annotations:
summary: "Clock skew detected on {{ $labels.instance }}"
description: "Clock is off by {{ $value | printf \"%.2f\" }} seconds."

View File

@@ -0,0 +1,58 @@
# Alertmanager Configuration for Homelab
# Routes alerts to both ntfy and Signal
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'severity', 'instance']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'ntfy-all'
routes:
# Critical alerts go to both Signal AND ntfy
- match:
severity: critical
receiver: 'critical-alerts'
continue: false
# Warning alerts go to ntfy only
- match:
severity: warning
receiver: 'ntfy-all'
receivers:
# ntfy receiver for all alerts
- name: 'ntfy-all'
webhook_configs:
- url: 'http://NTFY:80/homelab-alerts'
send_resolved: true
http_config:
follow_redirects: true
max_alerts: 10
# Critical alerts: Signal + ntfy
- name: 'critical-alerts'
webhook_configs:
# ntfy for critical
- url: 'http://NTFY:80/homelab-alerts'
send_resolved: true
http_config:
follow_redirects: true
max_alerts: 5
# Signal via bridge service
- url: 'http://signal-bridge:5000/alert'
send_resolved: true
http_config:
follow_redirects: true
max_alerts: 3
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']

View File

@@ -0,0 +1,49 @@
# Alertmanager Configuration for Homelab
# Routes alerts to both ntfy (via bridge) and Signal
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'severity', 'instance']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'ntfy-all'
routes:
# Critical alerts go to both Signal AND ntfy
- match:
severity: critical
receiver: 'critical-alerts'
continue: false
# Warning alerts go to ntfy only
- match:
severity: warning
receiver: 'ntfy-all'
receivers:
# ntfy receiver for all alerts (via bridge for nice formatting)
- name: 'ntfy-all'
webhook_configs:
- url: 'http://ntfy-bridge:5001/alert'
send_resolved: true
# Critical alerts: Signal + ntfy
- name: 'critical-alerts'
webhook_configs:
# ntfy via bridge (formatted nicely)
- url: 'http://ntfy-bridge:5001/alert'
send_resolved: true
# Signal via bridge service
- url: 'http://signal-bridge:5000/alert'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']

View File

@@ -0,0 +1,68 @@
# Alerting Stack for Homelab
services:
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
restart: unless-stopped
ports:
- "9093:9093"
volumes:
- ./alertmanager:/etc/alertmanager
- alertmanager-data:/alertmanager
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=http://localhost:9093'
networks:
- monitoring-stack_default
- signal-api-stack_default
- ntfy-stack_default
signal-bridge:
build: ./signal-bridge
container_name: signal-bridge
restart: unless-stopped
ports:
- "5000:5000"
environment:
- SIGNAL_API_URL=http://signal-api:8080
- SIGNAL_SENDER=+REDACTED_PHONE_NUMBER
- SIGNAL_RECIPIENTS=+REDACTED_PHONE_NUMBER
networks:
- monitoring-stack_default
- signal-api-stack_default
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"]
interval: 30s
timeout: 10s
retries: 3
ntfy-bridge:
build: ./ntfy-bridge
container_name: ntfy-bridge
restart: unless-stopped
ports:
- "5001:5001"
environment:
- NTFY_URL=http://NTFY:80
- NTFY_TOPIC="REDACTED_NTFY_TOPIC"
networks:
- monitoring-stack_default
- ntfy-stack_default
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5001/health')"]
interval: 30s
timeout: 10s
retries: 3
volumes:
alertmanager-data:
networks:
monitoring-stack_default:
external: true
signal-api-stack_default:
external: true
ntfy-stack_default:
external: true

View File

@@ -0,0 +1,5 @@
FROM python:3.11-slim
WORKDIR /app
RUN pip install --no-cache-dir flask requests gunicorn
COPY app.py .
CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--workers", "2", "app:app"]

View File

@@ -0,0 +1,104 @@
from flask import Flask, request, jsonify
import requests
import os
app = Flask(__name__)
NTFY_URL = os.environ.get('NTFY_URL', 'http://NTFY:80')
NTFY_TOPIC = os.environ.get('NTFY_TOPIC', 'homelab-alerts')
def get_status_icon(severity, status):
if status == 'resolved':
return 'white_check_mark'
if severity == 'critical':
return 'rotating_light'
return 'warning'
def get_priority(severity, status):
if status == 'resolved':
return '3'
if severity == 'critical':
return '5'
return '4'
def format_alert(alert):
status = alert.get('status', 'firing')
labels = alert.get('labels', {})
annotations = alert.get('annotations', {})
alertname = labels.get('alertname', 'Unknown Alert')
severity = labels.get('severity', 'warning')
instance = labels.get('instance', 'unknown')
status_text = 'RESOLVED' if status == 'resolved' else 'FIRING'
title = f"{alertname} [{status_text}]"
summary = annotations.get('summary', '')
description = annotations.get('description', '')
body_parts = []
if summary:
body_parts.append(summary)
if description and description != summary:
body_parts.append(description)
if instance and instance != 'unknown':
body_parts.append(f"Host: {instance}")
body = '\n'.join(body_parts) if body_parts else f"Alert {status_text.lower()} on {instance}"
return title, body, severity, status
@app.route('/alert', methods=['POST'])
def handle_alert():
try:
data = request.json
alerts = data.get('alerts', [])
for alert in alerts:
title, body, severity, status = format_alert(alert)
priority = get_priority(severity, status)
tag = get_status_icon(severity, status)
response = requests.post(
f"{NTFY_URL}/{NTFY_TOPIC}",
data=body,
headers={
'Title': title,
'Priority': priority,
'Tags': tag
}
)
if response.status_code not in [200, 201]:
print(f"Failed to send to ntfy: {response.status_code} - {response.text}")
return jsonify({'status': 'sent', 'count': len(alerts)})
except Exception as e:
print(f"Error: {e}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route('/health', methods=['GET'])
def health():
return jsonify({'status': 'healthy'})
@app.route('/test', methods=['POST'])
def test():
try:
data = request.json or {}
message = data.get('message', 'Test notification from ntfy-bridge')
response = requests.post(
f"{NTFY_URL}/{NTFY_TOPIC}",
data=message,
headers={
'Title': 'Test Alert',
'Priority': '4',
'Tags': 'test_tube'
}
)
return jsonify({'status': 'sent'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5001)

View File

@@ -0,0 +1,117 @@
# Updated Prometheus Configuration with Alertmanager
# This adds alerting configuration to your existing prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s # How often to evaluate rules
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load alerting rules
rule_files:
- /etc/prometheus/alert-rules.yml
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["prometheus:9090"]
- job_name: "alertmanager"
static_configs:
- targets: ["alertmanager:9093"]
- job_name: "homelab-node"
static_configs:
- targets: ["100.67.40.126:9100"]
- job_name: "raspberry-pis"
static_configs:
- targets: ["100.77.151.40:9100"] # pi-5
- targets: ["100.123.246.75:9100"] # pi-5-kevin
- job_name: "setillo-node"
static_configs:
- targets: ["100.125.0.20:9100"]
- job_name: "setillo-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.125.0.20:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.125.0.20"
- target_label: __address__
replacement: "100.125.0.20:9116"
- job_name: "calypso-node"
static_configs:
- targets: ["100.103.48.78:9100"]
- job_name: "calypso-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.103.48.78:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.103.48.78"
- target_label: __address__
replacement: "100.103.48.78:9116"
- job_name: "atlantis-node"
static_configs:
- targets: ["100.83.230.112:9100"]
- job_name: "atlantis-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.83.230.112:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.83.230.112"
- target_label: __address__
replacement: "100.83.230.112:9116"
- job_name: "concord-nuc-node"
static_configs:
- targets: ["100.72.55.21:9100"]
- job_name: "truenas-node"
static_configs:
- targets: ["100.75.252.64:9100"]
- job_name: "vmi2076105-node"
static_configs:
- targets: ["100.99.156.20:9100"]
- job_name: "proxmox-node"
static_configs:
- targets: ["100.87.12.28:9100"]

View File

@@ -0,0 +1,11 @@
FROM python:3.11-slim
WORKDIR /app
RUN pip install --no-cache-dir flask requests gunicorn
COPY app.py .
EXPOSE 5000
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "--timeout", "60", "app:app"]

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""
Signal Bridge for Alertmanager
Receives webhooks from Alertmanager and forwards to Signal API
"""
import os
import json
import requests
from flask import Flask, request, jsonify
app = Flask(__name__)
# Configuration from environment variables
SIGNAL_API_URL = os.environ.get('SIGNAL_API_URL', 'http://signal-api:8080')
SIGNAL_SENDER = os.environ.get('SIGNAL_SENDER', '') # Your Signal number
SIGNAL_RECIPIENTS = os.environ.get('SIGNAL_RECIPIENTS', '').split(',') # Comma-separated
def format_alert_message(alert_data):
"""Format Alertmanager webhook payload into a readable message"""
messages = []
status = alert_data.get('status', 'unknown')
for alert in alert_data.get('alerts', []):
alert_status = alert.get('status', status)
labels = alert.get('labels', {})
annotations = alert.get('annotations', {})
severity = labels.get('severity', 'unknown')
alertname = labels.get('alertname', 'Unknown Alert')
instance = labels.get('instance', 'unknown')
summary = annotations.get('summary', alertname)
description = annotations.get('description', '')
# Status emoji
if alert_status == 'resolved':
status_emoji = ''
status_text = 'RESOLVED'
elif severity == 'critical':
status_emoji = '🚨'
status_text = 'CRITICAL'
else:
status_emoji = '⚠️'
status_text = 'WARNING'
msg = f"{status_emoji} [{status_text}] {summary}"
if description:
msg += f"\n{description}"
messages.append(msg)
return "\n\n".join(messages)
def send_signal_message(message):
"""Send message via Signal API"""
if not SIGNAL_SENDER or not SIGNAL_RECIPIENTS:
app.logger.error("Signal sender or recipients not configured")
return False
success = True
for recipient in SIGNAL_RECIPIENTS:
recipient = recipient.strip()
if not recipient:
continue
try:
payload = {
"message": message,
"number": SIGNAL_SENDER,
"recipients": [recipient]
}
response = requests.post(
f"{SIGNAL_API_URL}/v2/send",
json=payload,
timeout=30
)
if response.status_code in [200, 201]:
app.logger.info(f"Message sent to {recipient}")
else:
app.logger.error(f"Failed to send to {recipient}: {response.status_code} - {response.text}")
success = False
except Exception as e:
app.logger.error(f"Error sending to {recipient}: {e}")
success = False
return success
@app.route('/health', methods=['GET'])
def health():
return jsonify({"status": "healthy"}), 200
@app.route('/alert', methods=['POST'])
def receive_alert():
"""Receive alert from Alertmanager and forward to Signal"""
try:
alert_data = request.get_json()
if not alert_data:
return jsonify({"error": "No data received"}), 400
app.logger.info(f"Received alert: {json.dumps(alert_data, indent=2)}")
message = format_alert_message(alert_data)
if send_signal_message(message):
return jsonify({"status": "sent"}), 200
else:
return jsonify({"status": "partial_failure"}), 207
except Exception as e:
app.logger.error(f"Error processing alert: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/test', methods=['POST'])
def test_message():
"""Send a test message"""
message = request.json.get('message', '🧪 Test alert from Signal Bridge')
if send_signal_message(message):
return jsonify({"status": "sent"}), 200
else:
return jsonify({"status": "failed"}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

View File

@@ -0,0 +1,366 @@
{
"uid": "infrastructure-overview-v2",
"title": "Infrastructure Overview - All Devices",
"tags": [
"infrastructure",
"node-exporter",
"tailscale"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": true,
"label": "Host",
"multi": true,
"name": "job",
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "Device Status",
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "timeseries",
"title": "CPU Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "100 - (avg by(job) (rate(node_cpu_seconds_total{mode=\"idle\", job=~\"$job\"}[5m])) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=~\"$job\"} / node_memory_MemTotal_bytes{job=~\"$job\"})) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "bargauge",
"title": "Root Disk Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=~\"$job\"} - node_boot_time_seconds{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 6,
"type": "timeseries",
"title": "Network Receive",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_receive_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 7,
"type": "timeseries",
"title": "Network Transmit",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_transmit_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,936 @@
{
"uid": "node-details-v2",
"title": "Node Details - Full Metrics",
"tags": [
"node-exporter",
"detailed",
"infrastructure"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": false,
"label": "Host",
"multi": false,
"name": "job",
"options": [],
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [],
"query": "label_values(node_uname_info{job=\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "\ud83d\udcca Quick Stats",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"collapsed": false
},
{
"id": 2,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=\"$job\",instance=\"$instance\"} - node_boot_time_seconds{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Uptime",
"refId": "A"
}
]
},
{
"id": 3,
"type": "stat",
"title": "CPU Cores",
"gridPos": {
"h": 4,
"w": 3,
"x": 4,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "count(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"})",
"legendFormat": "Cores",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total RAM",
"gridPos": {
"h": 4,
"w": 3,
"x": 7,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "purple",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "RAM",
"refId": "A"
}
]
},
{
"id": 5,
"type": "gauge",
"title": "CPU",
"gridPos": {
"h": 4,
"w": 3,
"x": 10,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - (avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m])) * 100)",
"legendFormat": "CPU",
"refId": "A"
}
]
},
{
"id": 6,
"type": "gauge",
"title": "Memory",
"gridPos": {
"h": 4,
"w": 3,
"x": 13,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"} / node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"})) * 100",
"legendFormat": "Memory",
"refId": "A"
}
]
},
{
"id": 7,
"type": "gauge",
"title": "Disk /",
"gridPos": {
"h": 4,
"w": 3,
"x": 16,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"}) * 100)",
"legendFormat": "Disk",
"refId": "A"
}
]
},
{
"id": 8,
"type": "stat",
"title": "Load 1m",
"gridPos": {
"h": 4,
"w": 2,
"x": 19,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load1{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "1m",
"refId": "A"
}
]
},
{
"id": 9,
"type": "stat",
"title": "Load 5m",
"gridPos": {
"h": 4,
"w": 2,
"x": 21,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load5{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "5m",
"refId": "A"
}
]
},
{
"id": 10,
"type": "row",
"title": "\ud83d\udda5\ufe0f CPU Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"collapsed": false
},
{
"id": 11,
"type": "timeseries",
"title": "CPU Usage Breakdown",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"custom": {
"fillOpacity": 50,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"user\"}[5m])) * 100",
"legendFormat": "User",
"refId": "A"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"system\"}[5m])) * 100",
"legendFormat": "System",
"refId": "B"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"iowait\"}[5m])) * 100",
"legendFormat": "IOWait",
"refId": "C"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"steal\"}[5m])) * 100",
"legendFormat": "Steal",
"refId": "D"
}
]
},
{
"id": 12,
"type": "timeseries",
"title": "CPU Per Core",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "100 - (rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m]) * 100)",
"legendFormat": "CPU {{cpu}}",
"refId": "A"
}
]
},
{
"id": 20,
"type": "row",
"title": "\ud83e\udde0 Memory Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"collapsed": false
},
{
"id": 21,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"custom": {
"fillOpacity": 30,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "A"
},
{
"expr": "node_memory_Buffers_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Free",
"refId": "D"
}
]
},
{
"id": 22,
"type": "timeseries",
"title": "Swap Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"targets": [
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Total",
"refId": "A"
},
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_SwapFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "B"
}
]
},
{
"id": 30,
"type": "row",
"title": "\ud83d\udcbe Disk Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 23
},
"collapsed": false
},
{
"id": 31,
"type": "bargauge",
"title": "Disk Space Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"}) * 100)",
"legendFormat": "{{mountpoint}}",
"refId": "A"
}
]
},
{
"id": 32,
"type": "timeseries",
"title": "Disk I/O",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*Write.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Read",
"refId": "A"
},
{
"expr": "rate(node_disk_written_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Write",
"refId": "B"
}
]
},
{
"id": 40,
"type": "row",
"title": "\ud83c\udf10 Network Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"collapsed": false
},
{
"id": 41,
"type": "timeseries",
"title": "Network Traffic",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*TX.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} RX",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} TX",
"refId": "B"
}
]
},
{
"id": 42,
"type": "timeseries",
"title": "Network Errors",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "pps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} RX Errors",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} TX Errors",
"refId": "B"
}
]
}
],
"id": null
}

View File

@@ -0,0 +1,351 @@
{
"uid": "synology-dashboard-v2",
"title": "Synology NAS Monitoring",
"tags": [
"synology",
"nas",
"snmp"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(ssCpuRawIdle, job)",
"hide": 0,
"includeAll": true,
"label": "NAS",
"multi": true,
"name": "job",
"query": "label_values(ssCpuRawIdle, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "NAS Status",
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "gauge",
"title": "CPU Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((ssCpuRawIdle{job=~\"$job\"} / (ssCpuRawUser{job=~\"$job\"} + ssCpuRawSystem{job=~\"$job\"} + ssCpuRawIdle{job=~\"$job\"} + ssCpuRawWait{job=~\"$job\"})) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "gauge",
"title": "Memory Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 8,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "((memTotalReal{job=~\"$job\"} - memAvailReal{job=~\"$job\"}) / memTotalReal{job=~\"$job\"}) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total Memory",
"gridPos": {
"h": 6,
"w": 8,
"x": 16,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "decbytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "memTotalReal{job=~\"$job\"} * 1024",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "timeseries",
"title": "Load Average",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "laLoad{job=~\"$job\", laIndex=\"1\"}",
"legendFormat": "{{job}} 1m",
"refId": "A"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"2\"}",
"legendFormat": "{{job}} 5m",
"refId": "B"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"3\"}",
"legendFormat": "{{job}} 15m",
"refId": "C"
}
]
},
{
"id": 6,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "sysUpTime{job=~\"$job\"} / 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,61 @@
# Prometheus & Grafana Monitoring Stack
# Deployed on Homelab VM at ~/docker/monitoring
#
# Usage:
# cd ~/docker/monitoring
# docker-compose up -d
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.enable-lifecycle'
networks:
- monitoring
grafana:
image: grafana/grafana-oss:latest
container_name: grafana
restart: unless-stopped
ports:
- "3300:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD"
volumes:
- grafana-data:/var/lib/grafana
depends_on:
- prometheus
networks:
- monitoring
node_exporter:
image: prom/node-exporter:latest
container_name: node_exporter
restart: unless-stopped
network_mode: host
pid: host
user: nobody
command:
- '--path.rootfs=/host'
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/host:ro,rslave
volumes:
prometheus-data:
grafana-data:
networks:
monitoring:
driver: bridge

View File

@@ -0,0 +1,26 @@
version: "3.8"
services:
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped
snmp-exporter:
image: quay.io/prometheus/snmp-exporter:latest
container_name: snmp_exporter
network_mode: host # important, so exporter can talk to DSM SNMP on localhost
volumes:
- /volume1/docker/snmp/snmp.yml:/etc/snmp_exporter/snmp.yml:ro
restart: unless-stopped

View File

@@ -0,0 +1,26 @@
version: "3.8"
services:
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped
snmp-exporter:
image: quay.io/prometheus/snmp-exporter:latest
container_name: snmp_exporter
network_mode: host
volumes:
- /volume1/docker/snmp/snmp.yml:/etc/snmp_exporter/snmp.yml:ro
restart: unless-stopped

View File

@@ -0,0 +1,18 @@
version: "3.8"
services:
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped

View File

@@ -0,0 +1,18 @@
version: "3.9"
services:
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
network_mode: "host"
pid: "host"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'

View File

@@ -0,0 +1,26 @@
version: "3.8"
services:
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
restart: unless-stopped
snmp-exporter:
image: quay.io/prometheus/snmp-exporter:latest
container_name: snmp_exporter
network_mode: host
volumes:
- /volume1/docker/snmp/snmp.yml:/etc/snmp_exporter/snmp.yml:ro
restart: unless-stopped

View File

@@ -0,0 +1,98 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["prometheus:9090"]
- job_name: "homelab-node"
static_configs:
- targets: ["100.67.40.126:9100"]
- job_name: "raspberry-pis"
static_configs:
- targets: ["100.77.151.40:9100"] # pi-5
- targets: ["100.123.246.75:9100"] # pi-5-kevin
- job_name: "setillo-node"
static_configs:
- targets: ["100.125.0.20:9100"]
- job_name: "setillo-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.125.0.20:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.125.0.20"
- target_label: __address__
replacement: "100.125.0.20:9116"
- job_name: "calypso-node"
static_configs:
- targets: ["100.103.48.78:9100"]
- job_name: "calypso-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.103.48.78:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.103.48.78"
- target_label: __address__
replacement: "100.103.48.78:9116"
- job_name: "atlantis-node"
static_configs:
- targets: ["100.83.230.112:9100"]
- job_name: "atlantis-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.83.230.112:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.83.230.112"
- target_label: __address__
replacement: "100.83.230.112:9116"
- job_name: "concord-nuc-node"
static_configs:
- targets: ["100.72.55.21:9100"]
- job_name: "truenas-node"
static_configs:
- targets: ["100.75.252.64:9100"]
- job_name: "vmi2076105-node"
static_configs:
- targets: ["100.99.156.20:9100"]
- job_name: "proxmox-node"
static_configs:
- targets: ["100.87.12.28:9100"]

View File

@@ -0,0 +1,582 @@
# Synology SNMP Exporter Configuration
# Comprehensive config for monitoring Synology NAS devices
# Includes: CPU, Memory, Load, Storage, Network, Disks, RAID, Temperature
auths:
snmpv3:
version: 3
security_level: authPriv
auth_protocol: MD5
username: snmp-exporter
password: "REDACTED_PASSWORD"
priv_protocol: DES
priv_password: "REDACTED_PASSWORD"
modules:
synology:
walk:
# Standard MIBs
- 1.3.6.1.2.1.1 # System info (sysDescr, sysUpTime, etc.)
- 1.3.6.1.2.1.2 # Interfaces
- 1.3.6.1.2.1.25.2 # hrStorage (disk/memory usage)
- 1.3.6.1.2.1.25.3.3 # hrProcessorLoad
- 1.3.6.1.2.1.31.1.1 # ifXTable (64-bit counters)
# UCD-SNMP-MIB (CPU, Memory, Load)
- 1.3.6.1.4.1.2021.4 # Memory stats
- 1.3.6.1.4.1.2021.10 # Load average
- 1.3.6.1.4.1.2021.11 # CPU stats
# Synology-specific MIBs
- 1.3.6.1.4.1.6574.1 # System status, temp, power, fans, model
- 1.3.6.1.4.1.6574.2 # Disk information
- 1.3.6.1.4.1.6574.3 # RAID status
- 1.3.6.1.4.1.6574.4 # UPS status
- 1.3.6.1.4.1.6574.5 # Disk SMART info
- 1.3.6.1.4.1.6574.6 # Service users
- 1.3.6.1.4.1.6574.101 # Storage IO
- 1.3.6.1.4.1.6574.102 # Space IO
- 1.3.6.1.4.1.6574.104 # GPU info (if available)
metrics:
# ============================================
# SYSTEM INFO
# ============================================
- name: sysDescr
oid: 1.3.6.1.2.1.1.1
type: DisplayString
help: System description
- name: sysUpTime
oid: 1.3.6.1.2.1.1.3
type: gauge
help: System uptime in hundredths of a second
- name: sysName
oid: 1.3.6.1.2.1.1.5
type: DisplayString
help: System name
# ============================================
# CPU METRICS (UCD-SNMP-MIB)
# ============================================
- name: ssCpuRawUser
oid: 1.3.6.1.4.1.2021.11.50
type: counter
help: Raw CPU user time
- name: ssCpuRawNice
oid: 1.3.6.1.4.1.2021.11.51
type: counter
help: Raw CPU nice time
- name: ssCpuRawSystem
oid: 1.3.6.1.4.1.2021.11.52
type: counter
help: Raw CPU system time
- name: ssCpuRawIdle
oid: 1.3.6.1.4.1.2021.11.53
type: counter
help: Raw CPU idle time
- name: ssCpuRawWait
oid: 1.3.6.1.4.1.2021.11.54
type: counter
help: Raw CPU wait time
- name: ssCpuRawKernel
oid: 1.3.6.1.4.1.2021.11.55
type: counter
help: Raw CPU kernel time
- name: ssCpuRawInterrupt
oid: 1.3.6.1.4.1.2021.11.56
type: counter
help: Raw CPU interrupt time
# ============================================
# MEMORY METRICS (UCD-SNMP-MIB)
# ============================================
- name: memTotalSwap
oid: 1.3.6.1.4.1.2021.4.3
type: gauge
help: Total swap size in KB
- name: memAvailSwap
oid: 1.3.6.1.4.1.2021.4.4
type: gauge
help: Available swap in KB
- name: memTotalReal
oid: 1.3.6.1.4.1.2021.4.5
type: gauge
help: Total RAM in KB
- name: memAvailReal
oid: 1.3.6.1.4.1.2021.4.6
type: gauge
help: Available RAM in KB
- name: memTotalFree
oid: 1.3.6.1.4.1.2021.4.11
type: gauge
help: Total free memory in KB
- name: memShared
oid: 1.3.6.1.4.1.2021.4.13
type: gauge
help: Shared memory in KB
- name: memBuffer
oid: 1.3.6.1.4.1.2021.4.14
type: gauge
help: Buffer memory in KB
- name: memCached
oid: 1.3.6.1.4.1.2021.4.15
type: gauge
help: Cached memory in KB
# ============================================
# LOAD AVERAGE (UCD-SNMP-MIB)
# ============================================
- name: laLoad
oid: 1.3.6.1.4.1.2021.10.1.3
type: DisplayString
help: Load average (1, 5, 15 min)
indexes:
- labelname: laIndex
type: gauge
lookups:
- labels: [laIndex]
labelname: laNames
oid: 1.3.6.1.4.1.2021.10.1.2
type: DisplayString
# ============================================
# HOST RESOURCES - STORAGE
# ============================================
- name: hrStorageDescr
oid: 1.3.6.1.2.1.25.2.3.1.3
type: DisplayString
help: Storage description
indexes:
- labelname: hrStorageIndex
type: gauge
- name: hrStorageAllocationUnits
oid: 1.3.6.1.2.1.25.2.3.1.4
type: gauge
help: Storage allocation unit size in bytes
indexes:
- labelname: hrStorageIndex
type: gauge
lookups:
- labels: [hrStorageIndex]
labelname: hrStorageDescr
oid: 1.3.6.1.2.1.25.2.3.1.3
type: DisplayString
- name: hrStorageSize
oid: 1.3.6.1.2.1.25.2.3.1.5
type: gauge
help: Storage size in allocation units
indexes:
- labelname: hrStorageIndex
type: gauge
lookups:
- labels: [hrStorageIndex]
labelname: hrStorageDescr
oid: 1.3.6.1.2.1.25.2.3.1.3
type: DisplayString
- name: hrStorageUsed
oid: 1.3.6.1.2.1.25.2.3.1.6
type: gauge
help: Storage used in allocation units
indexes:
- labelname: hrStorageIndex
type: gauge
lookups:
- labels: [hrStorageIndex]
labelname: hrStorageDescr
oid: 1.3.6.1.2.1.25.2.3.1.3
type: DisplayString
# ============================================
# NETWORK INTERFACES
# ============================================
- name: ifNumber
oid: 1.3.6.1.2.1.2.1
type: gauge
help: Number of network interfaces
- name: ifDescr
oid: 1.3.6.1.2.1.2.2.1.2
type: DisplayString
help: Interface description
indexes:
- labelname: ifIndex
type: gauge
- name: ifOperStatus
oid: 1.3.6.1.2.1.2.2.1.8
type: gauge
help: Interface operational status (1=up, 2=down)
indexes:
- labelname: ifIndex
type: gauge
lookups:
- labels: [ifIndex]
labelname: ifDescr
oid: 1.3.6.1.2.1.2.2.1.2
type: DisplayString
enum_values:
1: up
2: down
3: testing
- name: ifHCInOctets
oid: 1.3.6.1.2.1.31.1.1.1.6
type: counter
help: Total bytes received (64-bit)
indexes:
- labelname: ifIndex
type: gauge
lookups:
- labels: [ifIndex]
labelname: ifDescr
oid: 1.3.6.1.2.1.2.2.1.2
type: DisplayString
- name: ifHCOutOctets
oid: 1.3.6.1.2.1.31.1.1.1.10
type: counter
help: Total bytes transmitted (64-bit)
indexes:
- labelname: ifIndex
type: gauge
lookups:
- labels: [ifIndex]
labelname: ifDescr
oid: 1.3.6.1.2.1.2.2.1.2
type: DisplayString
# ============================================
# SYNOLOGY SYSTEM STATUS
# ============================================
- name: systemStatus
oid: 1.3.6.1.4.1.6574.1.1
type: gauge
help: System status (1=Normal, 2=Failed)
- name: temperature
oid: 1.3.6.1.4.1.6574.1.2
type: gauge
help: System temperature in Celsius
- name: powerStatus
oid: 1.3.6.1.4.1.6574.1.3
type: gauge
help: Power status (1=Normal, 2=Failed)
- name: systemFanStatus
oid: 1.3.6.1.4.1.6574.1.4.1
type: gauge
help: System fan status (1=Normal, 2=Failed)
- name: cpuFanStatus
oid: 1.3.6.1.4.1.6574.1.4.2
type: gauge
help: CPU fan status (1=Normal, 2=Failed)
- name: modelName
oid: 1.3.6.1.4.1.6574.1.5.1
type: DisplayString
help: NAS model name
- name: serialNumber
oid: 1.3.6.1.4.1.6574.1.5.2
type: DisplayString
help: NAS serial number
- name: version
oid: 1.3.6.1.4.1.6574.1.5.3
type: DisplayString
help: DSM version
- name: REDACTED_APP_PASSWORD
oid: 1.3.6.1.4.1.6574.1.5.4
type: gauge
help: DSM upgrade available (1=available, 2=unavailable)
# ============================================
# SYNOLOGY DISK INFO
# ============================================
- name: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
help: Disk ID
indexes:
- labelname: diskIndex
type: gauge
- name: diskModel
oid: 1.3.6.1.4.1.6574.2.1.1.3
type: DisplayString
help: Disk model
indexes:
- labelname: diskIndex
type: gauge
lookups:
- labels: [diskIndex]
labelname: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
- name: diskType
oid: 1.3.6.1.4.1.6574.2.1.1.4
type: DisplayString
help: Disk type (SATA, SSD, etc.)
indexes:
- labelname: diskIndex
type: gauge
lookups:
- labels: [diskIndex]
labelname: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
- name: diskStatus
oid: 1.3.6.1.4.1.6574.2.1.1.5
type: gauge
help: Disk status (1=Normal, 2=Initialized, 3=NotInitialized, 4=SystemPartitionFailed, 5=Crashed)
indexes:
- labelname: diskIndex
type: gauge
lookups:
- labels: [diskIndex]
labelname: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
- name: diskTemperature
oid: 1.3.6.1.4.1.6574.2.1.1.6
type: gauge
help: Disk temperature in Celsius
indexes:
- labelname: diskIndex
type: gauge
lookups:
- labels: [diskIndex]
labelname: diskID
oid: 1.3.6.1.4.1.6574.2.1.1.2
type: DisplayString
# ============================================
# SYNOLOGY RAID INFO
# ============================================
- name: raidName
oid: 1.3.6.1.4.1.6574.3.1.1.2
type: DisplayString
help: RAID/Volume name
indexes:
- labelname: raidIndex
type: gauge
- name: raidStatus
oid: 1.3.6.1.4.1.6574.3.1.1.3
type: gauge
help: RAID status (1=Normal, 2=Repairing, 3=Migrating, 4=Expanding, 5=Deleting, 6=Creating, 7=RaidSyncing, 8=RaidParityChecking, 9=RaidAssembling, 10=Canceling, 11=Degrade, 12=Crashed, 13=DataScrubbing, 14=RaidDeploying, 15=RaidUnDeploying, 16=RaidMountCache, 17=REDACTED_APP_PASSWORD, 18=RaidExpandingUnfinishedSHR, 19=RaidConvertSHRToPool, 20=RaidMigrateSHR1ToSHR2, 21=RaidUnknownStatus)
indexes:
- labelname: raidIndex
type: gauge
lookups:
- labels: [raidIndex]
labelname: raidName
oid: 1.3.6.1.4.1.6574.3.1.1.2
type: DisplayString
- name: raidFreeSize
oid: 1.3.6.1.4.1.6574.3.1.1.4
type: gauge
help: RAID free size in bytes
indexes:
- labelname: raidIndex
type: gauge
lookups:
- labels: [raidIndex]
labelname: raidName
oid: 1.3.6.1.4.1.6574.3.1.1.2
type: DisplayString
- name: raidTotalSize
oid: 1.3.6.1.4.1.6574.3.1.1.5
type: gauge
help: RAID total size in bytes
indexes:
- labelname: raidIndex
type: gauge
lookups:
- labels: [raidIndex]
labelname: raidName
oid: 1.3.6.1.4.1.6574.3.1.1.2
type: DisplayString
# ============================================
# SYNOLOGY UPS INFO (if connected)
# ============================================
- name: upsModel
oid: 1.3.6.1.4.1.6574.4.1.1
type: DisplayString
help: UPS model name
- name: upsSN
oid: 1.3.6.1.4.1.6574.4.1.2
type: DisplayString
help: UPS serial number
- name: upsStatus
oid: 1.3.6.1.4.1.6574.4.1.3
type: DisplayString
help: UPS status
- name: upsLoad
oid: 1.3.6.1.4.1.6574.4.2.1
type: gauge
help: UPS load percentage
- name: REDACTED_APP_PASSWORD
oid: 1.3.6.1.4.1.6574.4.3.1.1
type: gauge
help: UPS battery charge percentage
- name: upsBatteryChargeWarning
oid: 1.3.6.1.4.1.6574.4.3.1.2
type: gauge
help: UPS battery charge warning level
# ============================================
# SYNOLOGY SERVICE USERS
# ============================================
- name: serviceName
oid: 1.3.6.1.4.1.6574.6.1.1.2
type: DisplayString
help: Service name
indexes:
- labelname: REDACTED_APP_PASSWORD
type: gauge
- name: serviceUsers
oid: 1.3.6.1.4.1.6574.6.1.1.3
type: gauge
help: Number of users connected to service
indexes:
- labelname: REDACTED_APP_PASSWORD
type: gauge
lookups:
- labels: [serviceInfoIndex]
labelname: serviceName
oid: 1.3.6.1.4.1.6574.6.1.1.2
type: DisplayString
# ============================================
# SYNOLOGY STORAGE IO
# ============================================
- name: storageIODevice
oid: 1.3.6.1.4.1.6574.101.1.1.2
type: DisplayString
help: Storage IO device name
indexes:
- labelname: storageIOIndex
type: gauge
- name: storageIONReadX
oid: 1.3.6.1.4.1.6574.101.1.1.12
type: counter
help: Total bytes read (64-bit)
indexes:
- labelname: storageIOIndex
type: gauge
lookups:
- labels: [storageIOIndex]
labelname: storageIODevice
oid: 1.3.6.1.4.1.6574.101.1.1.2
type: DisplayString
- name: storageIONWrittenX
oid: 1.3.6.1.4.1.6574.101.1.1.13
type: counter
help: Total bytes written (64-bit)
indexes:
- labelname: storageIOIndex
type: gauge
lookups:
- labels: [storageIOIndex]
labelname: storageIODevice
oid: 1.3.6.1.4.1.6574.101.1.1.2
type: DisplayString
- name: storageIOLA
oid: 1.3.6.1.4.1.6574.101.1.1.8
type: gauge
help: Storage IO load average
indexes:
- labelname: storageIOIndex
type: gauge
lookups:
- labels: [storageIOIndex]
labelname: storageIODevice
oid: 1.3.6.1.4.1.6574.101.1.1.2
type: DisplayString
# ============================================
# SYNOLOGY SPACE IO (Volume IO)
# ============================================
- name: spaceIODevice
oid: 1.3.6.1.4.1.6574.102.1.1.2
type: DisplayString
help: Space/Volume IO device name
indexes:
- labelname: spaceIOIndex
type: gauge
- name: spaceIONReadX
oid: 1.3.6.1.4.1.6574.102.1.1.12
type: counter
help: Volume bytes read (64-bit)
indexes:
- labelname: spaceIOIndex
type: gauge
lookups:
- labels: [spaceIOIndex]
labelname: spaceIODevice
oid: 1.3.6.1.4.1.6574.102.1.1.2
type: DisplayString
- name: REDACTED_APP_PASSWORD
oid: 1.3.6.1.4.1.6574.102.1.1.13
type: counter
help: Volume bytes written (64-bit)
indexes:
- labelname: spaceIOIndex
type: gauge
lookups:
- labels: [spaceIOIndex]
labelname: spaceIODevice
oid: 1.3.6.1.4.1.6574.102.1.1.2
type: DisplayString
- name: spaceIOLA
oid: 1.3.6.1.4.1.6574.102.1.1.8
type: gauge
help: Volume IO load average
indexes:
- labelname: spaceIOIndex
type: gauge
lookups:
- labels: [spaceIOIndex]
labelname: spaceIODevice
oid: 1.3.6.1.4.1.6574.102.1.1.2
type: DisplayString

View File

@@ -0,0 +1 @@
1-y71kjkcRGpoNXqSABU07nwduE0jUOrVXVfYOcSPdoZlPuFbKNG1gIPou74HcdqTr

View File

@@ -0,0 +1,62 @@
# Prometheus + Grafana Monitoring Stack
# Ports: 9090 (Prometheus), 3300 (Grafana)
#
# Config files are in prometheus/ and grafana/ subdirectories relative to this file
# Dashboards provisioned: infrastructure-overview, node-details, node-exporter, synology-monitoring
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
volumes:
- ./prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--web.enable-lifecycle"
ports:
- "9090:9090"
restart: unless-stopped
networks:
- monitoring
grafana:
image: grafana/grafana-oss:latest
container_name: grafana
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD="REDACTED_PASSWORD"
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./grafana/dashboards:/etc/grafana/dashboards:ro
ports:
- "3300:3000"
restart: unless-stopped
depends_on:
- prometheus
networks:
- monitoring
node_exporter:
image: prom/node-exporter:latest
container_name: node_exporter
network_mode: host
pid: host
volumes:
- /:/host:ro,rslave
- /sys:/host/sys:ro
- /proc:/host/proc:ro
command:
- '--path.rootfs=/host'
restart: unless-stopped
volumes:
prometheus-data:
grafana-data:
networks:
monitoring:
driver: bridge

View File

@@ -0,0 +1,366 @@
{
"uid": "infrastructure-overview-v2",
"title": "Infrastructure Overview - All Devices",
"tags": [
"infrastructure",
"node-exporter",
"tailscale"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": true,
"label": "Host",
"multi": true,
"name": "job",
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "Device Status",
"gridPos": {
"h": 5,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "timeseries",
"title": "CPU Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "100 - (avg by(job) (rate(node_cpu_seconds_total{mode=\"idle\", job=~\"$job\"}[5m])) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=~\"$job\"} / node_memory_MemTotal_bytes{job=~\"$job\"})) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "bargauge",
"title": "Root Disk Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=~\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 13
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=~\"$job\"} - node_boot_time_seconds{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 6,
"type": "timeseries",
"title": "Network Receive",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_receive_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 7,
"type": "timeseries",
"title": "Network Transmit",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 21
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "sum by(job) (rate(node_network_transmit_bytes_total{job=~\"$job\", device!~\"lo|docker.*|br-.*|veth.*\"}[5m]))",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,936 @@
{
"uid": "node-details-v2",
"title": "Node Details - Full Metrics",
"tags": [
"node-exporter",
"detailed",
"infrastructure"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info, job)",
"hide": 0,
"includeAll": false,
"label": "Host",
"multi": false,
"name": "job",
"options": [],
"query": "label_values(node_uname_info, job)",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
},
{
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
"hide": 0,
"includeAll": false,
"label": "Instance",
"multi": false,
"name": "instance",
"options": [],
"query": "label_values(node_uname_info{job=\"$job\"}, instance)",
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "row",
"title": "\ud83d\udcca Quick Stats",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"collapsed": false
},
{
"id": 2,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_time_seconds{job=\"$job\",instance=\"$instance\"} - node_boot_time_seconds{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Uptime",
"refId": "A"
}
]
},
{
"id": 3,
"type": "stat",
"title": "CPU Cores",
"gridPos": {
"h": 4,
"w": 3,
"x": 4,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "count(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"})",
"legendFormat": "Cores",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total RAM",
"gridPos": {
"h": 4,
"w": 3,
"x": 7,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "purple",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "RAM",
"refId": "A"
}
]
},
{
"id": 5,
"type": "gauge",
"title": "CPU",
"gridPos": {
"h": 4,
"w": 3,
"x": 10,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - (avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m])) * 100)",
"legendFormat": "CPU",
"refId": "A"
}
]
},
{
"id": 6,
"type": "gauge",
"title": "Memory",
"gridPos": {
"h": 4,
"w": 3,
"x": 13,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "(1 - (node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"} / node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"})) * 100",
"legendFormat": "Memory",
"refId": "A"
}
]
},
{
"id": 7,
"type": "gauge",
"title": "Disk /",
"gridPos": {
"h": 4,
"w": 3,
"x": 16,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",mountpoint=\"/\",fstype!=\"rootfs\"}) * 100)",
"legendFormat": "Disk",
"refId": "A"
}
]
},
{
"id": 8,
"type": "stat",
"title": "Load 1m",
"gridPos": {
"h": 4,
"w": 2,
"x": 19,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load1{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "1m",
"refId": "A"
}
]
},
{
"id": 9,
"type": "stat",
"title": "Load 5m",
"gridPos": {
"h": 4,
"w": 2,
"x": 21,
"y": 1
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"decimals": 2,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 2
},
{
"color": "red",
"value": 4
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "area",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "node_load5{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "5m",
"refId": "A"
}
]
},
{
"id": 10,
"type": "row",
"title": "\ud83d\udda5\ufe0f CPU Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 5
},
"collapsed": false
},
{
"id": 11,
"type": "timeseries",
"title": "CPU Usage Breakdown",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"custom": {
"fillOpacity": 50,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"user\"}[5m])) * 100",
"legendFormat": "User",
"refId": "A"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"system\"}[5m])) * 100",
"legendFormat": "System",
"refId": "B"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"iowait\"}[5m])) * 100",
"legendFormat": "IOWait",
"refId": "C"
},
{
"expr": "avg(rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"steal\"}[5m])) * 100",
"legendFormat": "Steal",
"refId": "D"
}
]
},
{
"id": 12,
"type": "timeseries",
"title": "CPU Per Core",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 6
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "100 - (rate(node_cpu_seconds_total{job=\"$job\",instance=\"$instance\",mode=\"idle\"}[5m]) * 100)",
"legendFormat": "CPU {{cpu}}",
"refId": "A"
}
]
},
{
"id": 20,
"type": "row",
"title": "\ud83e\udde0 Memory Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"collapsed": false
},
{
"id": 21,
"type": "timeseries",
"title": "Memory Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes",
"custom": {
"fillOpacity": 30,
"stacking": {
"mode": "normal",
"group": "A"
}
}
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "node_memory_MemTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_MemAvailable_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "A"
},
{
"expr": "node_memory_Buffers_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Free",
"refId": "D"
}
]
},
{
"id": 22,
"type": "timeseries",
"title": "Swap Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 15
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bytes"
}
},
"targets": [
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Total",
"refId": "A"
},
{
"expr": "node_memory_SwapTotal_bytes{job=\"$job\",instance=\"$instance\"} - node_memory_SwapFree_bytes{job=\"$job\",instance=\"$instance\"}",
"legendFormat": "Used",
"refId": "B"
}
]
},
{
"id": 30,
"type": "row",
"title": "\ud83d\udcbe Disk Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 23
},
"collapsed": false
},
{
"id": 31,
"type": "bargauge",
"title": "Disk Space Usage",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
}
}
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((node_filesystem_avail_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{job=\"$job\",instance=\"$instance\",fstype!~\"tmpfs|overlay|squashfs\"}) * 100)",
"legendFormat": "{{mountpoint}}",
"refId": "A"
}
]
},
{
"id": 32,
"type": "timeseries",
"title": "Disk I/O",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "Bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*Write.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Read",
"refId": "A"
},
{
"expr": "rate(node_disk_written_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"loop.*|dm-.*\"}[5m])",
"legendFormat": "{{device}} Write",
"refId": "B"
}
]
},
{
"id": 40,
"type": "row",
"title": "\ud83c\udf10 Network Details",
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"collapsed": false
},
{
"id": 41,
"type": "timeseries",
"title": "Network Traffic",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "bps"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": ".*TX.*"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean",
"max"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} RX",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_bytes_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m]) * 8",
"legendFormat": "{{device}} TX",
"refId": "B"
}
]
},
{
"id": 42,
"type": "timeseries",
"title": "Network Errors",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "pps"
}
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "rate(node_network_receive_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} RX Errors",
"refId": "A"
},
{
"expr": "rate(node_network_transmit_errs_total{job=\"$job\",instance=\"$instance\",device!~\"lo|docker.*|br-.*|veth.*\"}[5m])",
"legendFormat": "{{device}} TX Errors",
"refId": "B"
}
]
}
],
"id": null
}

View File

@@ -0,0 +1,351 @@
{
"uid": "synology-dashboard-v2",
"title": "Synology NAS Monitoring",
"tags": [
"synology",
"nas",
"snmp"
],
"timezone": "browser",
"schemaVersion": 38,
"version": 1,
"refresh": "30s",
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Data Source",
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"definition": "label_values(ssCpuRawIdle, job)",
"hide": 0,
"includeAll": true,
"label": "NAS",
"multi": true,
"name": "job",
"query": "label_values(ssCpuRawIdle, job)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
}
]
},
"panels": [
{
"id": 1,
"type": "stat",
"title": "NAS Status",
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"mappings": [
{
"type": "value",
"options": {
"0": {
"text": "DOWN",
"color": "red"
},
"1": {
"text": "UP",
"color": "green"
}
}
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "green",
"value": 1
}
]
}
}
},
"options": {
"colorMode": "background",
"textMode": "value_and_name",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "up{job=~\"$job\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 2,
"type": "gauge",
"title": "CPU Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 60
},
{
"color": "red",
"value": 80
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "100 - ((ssCpuRawIdle{job=~\"$job\"} / (ssCpuRawUser{job=~\"$job\"} + ssCpuRawSystem{job=~\"$job\"} + ssCpuRawIdle{job=~\"$job\"} + ssCpuRawWait{job=~\"$job\"})) * 100)",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 3,
"type": "gauge",
"title": "Memory Usage",
"gridPos": {
"h": 6,
"w": 8,
"x": 8,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
}
}
},
"options": {
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "((memTotalReal{job=~\"$job\"} - memAvailReal{job=~\"$job\"}) / memTotalReal{job=~\"$job\"}) * 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 4,
"type": "stat",
"title": "Total Memory",
"gridPos": {
"h": 6,
"w": 8,
"x": 16,
"y": 4
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "decbytes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"graphMode": "none",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "memTotalReal{job=~\"$job\"} * 1024",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 5,
"type": "timeseries",
"title": "Load Average",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"mean"
]
}
},
"targets": [
{
"expr": "laLoad{job=~\"$job\", laIndex=\"1\"}",
"legendFormat": "{{job}} 1m",
"refId": "A"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"2\"}",
"legendFormat": "{{job}} 5m",
"refId": "B"
},
{
"expr": "laLoad{job=~\"$job\", laIndex=\"3\"}",
"legendFormat": "{{job}} 15m",
"refId": "C"
}
]
},
{
"id": 6,
"type": "stat",
"title": "Uptime",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 10
},
"datasource": {
"type": "prometheus",
"uid": "eeyq1w1zddtkwb"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"options": {
"colorMode": "value",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
]
}
},
"targets": [
{
"expr": "sysUpTime{job=~\"$job\"} / 100",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}

View File

@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Homelab Dashboards'
orgId: 1
folder: ''
folderUid: ''
type: file
disableDeletion: false
updateIntervalSeconds: 30
allowUiUpdates: true
options:
path: /etc/grafana/dashboards

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false

View File

@@ -0,0 +1,98 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["prometheus:9090"]
- job_name: "homelab-node"
static_configs:
- targets: ["100.67.40.126:9100"]
- job_name: "raspberry-pis"
static_configs:
- targets: ["100.77.151.40:9100"] # pi-5
- targets: ["100.123.246.75:9100"] # pi-5-kevin
- job_name: "setillo-node"
static_configs:
- targets: ["100.125.0.20:9100"]
- job_name: "setillo-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.125.0.20:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.125.0.20"
- target_label: __address__
replacement: "100.125.0.20:9116"
- job_name: "calypso-node"
static_configs:
- targets: ["100.103.48.78:9100"]
- job_name: "calypso-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.103.48.78:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.103.48.78"
- target_label: __address__
replacement: "100.103.48.78:9116"
- job_name: "atlantis-node"
static_configs:
- targets: ["100.83.230.112:9100"]
- job_name: "atlantis-snmp"
metrics_path: /snmp
params:
module: [synology]
auth: [snmpv3]
target: ["127.0.0.1"]
static_configs:
- targets: ["100.83.230.112:9116"]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
replacement: "127.0.0.1"
- source_labels: [__param_target]
target_label: instance
replacement: "100.83.230.112"
- target_label: __address__
replacement: "100.83.230.112:9116"
- job_name: "concord-nuc-node"
static_configs:
- targets: ["100.72.55.21:9100"]
- job_name: "truenas-node"
static_configs:
- targets: ["100.75.252.64:9100"]
- job_name: "vmi2076105-node"
static_configs:
- targets: ["100.99.156.20:9100"]
- job_name: "proxmox-node"
static_configs:
- targets: ["100.87.12.28:9100"]