130 lines
4.2 KiB
Bash
Executable File
130 lines
4.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# Test Tailscale Host Monitoring and Notifications
|
|
# Verifies that Tailscale hosts are monitored and alerts work
|
|
|
|
set -e
|
|
|
|
echo "🔍 Tailscale Host Monitoring Test"
|
|
echo "================================="
|
|
echo
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
PROMETHEUS_URL="http://100.67.40.126:9090"
|
|
ALERTMANAGER_URL="http://100.67.40.126:9093"
|
|
|
|
echo "📊 Checking Prometheus Targets..."
|
|
echo "--------------------------------"
|
|
|
|
# Get all Tailscale targets (100.x.x.x addresses)
|
|
TARGETS=$(curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "\(.labels.job)|\(.labels.instance)|\(.health)"')
|
|
|
|
echo "Tailscale Monitored Hosts:"
|
|
UP_COUNT=0
|
|
DOWN_COUNT=0
|
|
|
|
while IFS='|' read -r job instance health; do
|
|
if [ "$health" = "up" ]; then
|
|
echo -e " ${GREEN}✅ UP${NC} $job ($instance)"
|
|
UP_COUNT=$((UP_COUNT + 1))
|
|
else
|
|
echo -e " ${RED}❌ DOWN${NC} $job ($instance)"
|
|
DOWN_COUNT=$((DOWN_COUNT + 1))
|
|
fi
|
|
done <<< "$TARGETS"
|
|
|
|
echo
|
|
echo "Summary: $UP_COUNT up, $DOWN_COUNT down"
|
|
echo
|
|
|
|
echo "🚨 Checking Active HostDown Alerts..."
|
|
echo "------------------------------------"
|
|
|
|
# Check for active HostDown alerts
|
|
ACTIVE_ALERTS=$(curl -s "$PROMETHEUS_URL/api/v1/rules" | jq -r '.data.groups[] | select(.name == "host-availability") | .rules[] | select(.name == "HostDown") | .alerts[]? | "\(.labels.instance)|\(.labels.job)|\(.state)"')
|
|
|
|
if [ -z "$ACTIVE_ALERTS" ]; then
|
|
echo -e "${GREEN}✅ No HostDown alerts currently firing${NC}"
|
|
else
|
|
echo "Currently firing HostDown alerts:"
|
|
while IFS='|' read -r instance job state; do
|
|
echo -e " ${RED}🚨 ALERT${NC} $job ($instance) - $state"
|
|
done <<< "$ACTIVE_ALERTS"
|
|
fi
|
|
|
|
echo
|
|
|
|
echo "📬 Checking Alertmanager Status..."
|
|
echo "--------------------------------"
|
|
|
|
# Check Alertmanager alerts
|
|
AM_ALERTS=$(curl -s "$ALERTMANAGER_URL/api/v2/alerts" | jq -r '.[] | select(.labels.alertname == "HostDown") | "\(.labels.instance)|\(.labels.job)|\(.status.state)"')
|
|
|
|
if [ -z "$AM_ALERTS" ]; then
|
|
echo -e "${GREEN}✅ No HostDown alerts in Alertmanager${NC}"
|
|
else
|
|
echo "Active alerts in Alertmanager:"
|
|
while IFS='|' read -r instance job state; do
|
|
echo -e " ${YELLOW}📬 NOTIFYING${NC} $job ($instance) - $state"
|
|
done <<< "$AM_ALERTS"
|
|
fi
|
|
|
|
echo
|
|
|
|
echo "🧪 Testing Notification Endpoints..."
|
|
echo "-----------------------------------"
|
|
|
|
# Test ntfy notification
|
|
echo "Testing ntfy notification..."
|
|
NTFY_RESPONSE=$(curl -s -d "🧪 Tailscale monitoring test from $(hostname) at $(date)" \
|
|
-H "Title: Tailscale Monitoring Test" \
|
|
-H "Priority: 3" \
|
|
-H "Tags: test_tube" \
|
|
http://192.168.0.210:8081/homelab-alerts)
|
|
|
|
if echo "$NTFY_RESPONSE" | grep -q '"id"'; then
|
|
echo -e " ${GREEN}✅ ntfy notification sent successfully${NC}"
|
|
echo " Message ID: $(echo "$NTFY_RESPONSE" | jq -r '.id')"
|
|
else
|
|
echo -e " ${RED}❌ ntfy notification failed${NC}"
|
|
echo " Response: $NTFY_RESPONSE"
|
|
fi
|
|
|
|
echo
|
|
|
|
echo "📋 Tailscale Host Inventory..."
|
|
echo "-----------------------------"
|
|
|
|
# List all monitored Tailscale hosts with their job names
|
|
echo "Currently monitored Tailscale hosts:"
|
|
curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | " \(.labels.job): \(.labels.instance) (\(.health))"' | sort
|
|
|
|
echo
|
|
|
|
echo "⚙️ Alert Configuration Summary..."
|
|
echo "---------------------------------"
|
|
echo "• HostDown Alert: Triggers after 2 minutes of downtime"
|
|
echo "• Severity: Critical (triggers both ntfy + Signal notifications)"
|
|
echo "• Monitored via: node_exporter on port 9100"
|
|
echo "• Alert Rule: up{job=~\".*-node\"} == 0"
|
|
echo
|
|
|
|
echo "🔧 Notification Channels:"
|
|
echo "• ntfy: http://192.168.0.210:8081/homelab-alerts"
|
|
echo "• Signal: Via signal-bridge (critical alerts only)"
|
|
echo "• Alertmanager: http://100.67.40.126:9093"
|
|
echo
|
|
|
|
echo "✅ Tailscale monitoring test complete!"
|
|
echo
|
|
echo "💡 To manually test a HostDown alert:"
|
|
echo " 1. Stop node_exporter on any Tailscale host"
|
|
echo " 2. Wait 2+ minutes"
|
|
echo " 3. Check your ntfy app and Signal for notifications"
|
|
echo
|