Files
homelab-optimized/scripts/test-tailscale-monitoring.sh
Gitea Mirror Bot d74f7c7af5
Some checks failed
Documentation / Build Docusaurus (push) Failing after 4m59s
Documentation / Deploy to GitHub Pages (push) Has been skipped
Sanitized mirror from private repository - 2026-03-21 10:54:24 UTC
2026-03-21 10:54:25 +00:00

130 lines
4.2 KiB
Bash
Executable File

#!/bin/bash
# Test Tailscale Host Monitoring and Notifications
# Verifies that Tailscale hosts are monitored and alerts work
set -e
echo "🔍 Tailscale Host Monitoring Test"
echo "================================="
echo
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
PROMETHEUS_URL="http://100.67.40.126:9090"
ALERTMANAGER_URL="http://100.67.40.126:9093"
echo "📊 Checking Prometheus Targets..."
echo "--------------------------------"
# Get all Tailscale targets (100.x.x.x addresses)
TARGETS=$(curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "\(.labels.job)|\(.labels.instance)|\(.health)"')
echo "Tailscale Monitored Hosts:"
UP_COUNT=0
DOWN_COUNT=0
while IFS='|' read -r job instance health; do
if [ "$health" = "up" ]; then
echo -e " ${GREEN}✅ UP${NC} $job ($instance)"
UP_COUNT=$((UP_COUNT + 1))
else
echo -e " ${RED}❌ DOWN${NC} $job ($instance)"
DOWN_COUNT=$((DOWN_COUNT + 1))
fi
done <<< "$TARGETS"
echo
echo "Summary: $UP_COUNT up, $DOWN_COUNT down"
echo
echo "🚨 Checking Active HostDown Alerts..."
echo "------------------------------------"
# Check for active HostDown alerts
ACTIVE_ALERTS=$(curl -s "$PROMETHEUS_URL/api/v1/rules" | jq -r '.data.groups[] | select(.name == "host-availability") | .rules[] | select(.name == "HostDown") | .alerts[]? | "\(.labels.instance)|\(.labels.job)|\(.state)"')
if [ -z "$ACTIVE_ALERTS" ]; then
echo -e "${GREEN}✅ No HostDown alerts currently firing${NC}"
else
echo "Currently firing HostDown alerts:"
while IFS='|' read -r instance job state; do
echo -e " ${RED}🚨 ALERT${NC} $job ($instance) - $state"
done <<< "$ACTIVE_ALERTS"
fi
echo
echo "📬 Checking Alertmanager Status..."
echo "--------------------------------"
# Check Alertmanager alerts
AM_ALERTS=$(curl -s "$ALERTMANAGER_URL/api/v2/alerts" | jq -r '.[] | select(.labels.alertname == "HostDown") | "\(.labels.instance)|\(.labels.job)|\(.status.state)"')
if [ -z "$AM_ALERTS" ]; then
echo -e "${GREEN}✅ No HostDown alerts in Alertmanager${NC}"
else
echo "Active alerts in Alertmanager:"
while IFS='|' read -r instance job state; do
echo -e " ${YELLOW}📬 NOTIFYING${NC} $job ($instance) - $state"
done <<< "$AM_ALERTS"
fi
echo
echo "🧪 Testing Notification Endpoints..."
echo "-----------------------------------"
# Test ntfy notification
echo "Testing ntfy notification..."
NTFY_RESPONSE=$(curl -s -d "🧪 Tailscale monitoring test from $(hostname) at $(date)" \
-H "Title: Tailscale Monitoring Test" \
-H "Priority: 3" \
-H "Tags: test_tube" \
http://192.168.0.210:8081/homelab-alerts)
if echo "$NTFY_RESPONSE" | grep -q '"id"'; then
echo -e " ${GREEN}✅ ntfy notification sent successfully${NC}"
echo " Message ID: $(echo "$NTFY_RESPONSE" | jq -r '.id')"
else
echo -e " ${RED}❌ ntfy notification failed${NC}"
echo " Response: $NTFY_RESPONSE"
fi
echo
echo "📋 Tailscale Host Inventory..."
echo "-----------------------------"
# List all monitored Tailscale hosts with their job names
echo "Currently monitored Tailscale hosts:"
curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | " \(.labels.job): \(.labels.instance) (\(.health))"' | sort
echo
echo "⚙️ Alert Configuration Summary..."
echo "---------------------------------"
echo "• HostDown Alert: Triggers after 2 minutes of downtime"
echo "• Severity: Critical (triggers both ntfy + Signal notifications)"
echo "• Monitored via: node_exporter on port 9100"
echo "• Alert Rule: up{job=~\".*-node\"} == 0"
echo
echo "🔧 Notification Channels:"
echo "• ntfy: http://192.168.0.210:8081/homelab-alerts"
echo "• Signal: Via signal-bridge (critical alerts only)"
echo "• Alertmanager: http://100.67.40.126:9093"
echo
echo "✅ Tailscale monitoring test complete!"
echo
echo "💡 To manually test a HostDown alert:"
echo " 1. Stop node_exporter on any Tailscale host"
echo " 2. Wait 2+ minutes"
echo " 3. Check your ntfy app and Signal for notifications"
echo