Sanitized mirror from private repository - 2026-04-20 01:32:01 UTC
This commit is contained in:
129
scripts/test-tailscale-monitoring.sh
Executable file
129
scripts/test-tailscale-monitoring.sh
Executable file
@@ -0,0 +1,129 @@
|
||||
#!/bin/bash
|
||||
# Test Tailscale Host Monitoring and Notifications
|
||||
# Verifies that Tailscale hosts are monitored and alerts work
|
||||
|
||||
set -e
|
||||
|
||||
echo "🔍 Tailscale Host Monitoring Test"
|
||||
echo "================================="
|
||||
echo
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
PROMETHEUS_URL="http://100.67.40.126:9090"
|
||||
ALERTMANAGER_URL="http://100.67.40.126:9093"
|
||||
|
||||
echo "📊 Checking Prometheus Targets..."
|
||||
echo "--------------------------------"
|
||||
|
||||
# Get all Tailscale targets (100.x.x.x addresses)
|
||||
TARGETS=$(curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "\(.labels.job)|\(.labels.instance)|\(.health)"')
|
||||
|
||||
echo "Tailscale Monitored Hosts:"
|
||||
UP_COUNT=0
|
||||
DOWN_COUNT=0
|
||||
|
||||
while IFS='|' read -r job instance health; do
|
||||
if [ "$health" = "up" ]; then
|
||||
echo -e " ${GREEN}✅ UP${NC} $job ($instance)"
|
||||
UP_COUNT=$((UP_COUNT + 1))
|
||||
else
|
||||
echo -e " ${RED}❌ DOWN${NC} $job ($instance)"
|
||||
DOWN_COUNT=$((DOWN_COUNT + 1))
|
||||
fi
|
||||
done <<< "$TARGETS"
|
||||
|
||||
echo
|
||||
echo "Summary: $UP_COUNT up, $DOWN_COUNT down"
|
||||
echo
|
||||
|
||||
echo "🚨 Checking Active HostDown Alerts..."
|
||||
echo "------------------------------------"
|
||||
|
||||
# Check for active HostDown alerts
|
||||
ACTIVE_ALERTS=$(curl -s "$PROMETHEUS_URL/api/v1/rules" | jq -r '.data.groups[] | select(.name == "host-availability") | .rules[] | select(.name == "HostDown") | .alerts[]? | "\(.labels.instance)|\(.labels.job)|\(.state)"')
|
||||
|
||||
if [ -z "$ACTIVE_ALERTS" ]; then
|
||||
echo -e "${GREEN}✅ No HostDown alerts currently firing${NC}"
|
||||
else
|
||||
echo "Currently firing HostDown alerts:"
|
||||
while IFS='|' read -r instance job state; do
|
||||
echo -e " ${RED}🚨 ALERT${NC} $job ($instance) - $state"
|
||||
done <<< "$ACTIVE_ALERTS"
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
echo "📬 Checking Alertmanager Status..."
|
||||
echo "--------------------------------"
|
||||
|
||||
# Check Alertmanager alerts
|
||||
AM_ALERTS=$(curl -s "$ALERTMANAGER_URL/api/v2/alerts" | jq -r '.[] | select(.labels.alertname == "HostDown") | "\(.labels.instance)|\(.labels.job)|\(.status.state)"')
|
||||
|
||||
if [ -z "$AM_ALERTS" ]; then
|
||||
echo -e "${GREEN}✅ No HostDown alerts in Alertmanager${NC}"
|
||||
else
|
||||
echo "Active alerts in Alertmanager:"
|
||||
while IFS='|' read -r instance job state; do
|
||||
echo -e " ${YELLOW}📬 NOTIFYING${NC} $job ($instance) - $state"
|
||||
done <<< "$AM_ALERTS"
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
echo "🧪 Testing Notification Endpoints..."
|
||||
echo "-----------------------------------"
|
||||
|
||||
# Test ntfy notification
|
||||
echo "Testing ntfy notification..."
|
||||
NTFY_RESPONSE=$(curl -s -d "🧪 Tailscale monitoring test from $(hostname) at $(date)" \
|
||||
-H "Title: Tailscale Monitoring Test" \
|
||||
-H "Priority: 3" \
|
||||
-H "Tags: test_tube" \
|
||||
http://192.168.0.210:8081/homelab-alerts)
|
||||
|
||||
if echo "$NTFY_RESPONSE" | grep -q '"id"'; then
|
||||
echo -e " ${GREEN}✅ ntfy notification sent successfully${NC}"
|
||||
echo " Message ID: $(echo "$NTFY_RESPONSE" | jq -r '.id')"
|
||||
else
|
||||
echo -e " ${RED}❌ ntfy notification failed${NC}"
|
||||
echo " Response: $NTFY_RESPONSE"
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
echo "📋 Tailscale Host Inventory..."
|
||||
echo "-----------------------------"
|
||||
|
||||
# List all monitored Tailscale hosts with their job names
|
||||
echo "Currently monitored Tailscale hosts:"
|
||||
curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | " \(.labels.job): \(.labels.instance) (\(.health))"' | sort
|
||||
|
||||
echo
|
||||
|
||||
echo "⚙️ Alert Configuration Summary..."
|
||||
echo "---------------------------------"
|
||||
echo "• HostDown Alert: Triggers after 2 minutes of downtime"
|
||||
echo "• Severity: Critical (triggers both ntfy + Signal notifications)"
|
||||
echo "• Monitored via: node_exporter on port 9100"
|
||||
echo "• Alert Rule: up{job=~\".*-node\"} == 0"
|
||||
echo
|
||||
|
||||
echo "🔧 Notification Channels:"
|
||||
echo "• ntfy: http://192.168.0.210:8081/homelab-alerts"
|
||||
echo "• Signal: Via signal-bridge (critical alerts only)"
|
||||
echo "• Alertmanager: http://100.67.40.126:9093"
|
||||
echo
|
||||
|
||||
echo "✅ Tailscale monitoring test complete!"
|
||||
echo
|
||||
echo "💡 To manually test a HostDown alert:"
|
||||
echo " 1. Stop node_exporter on any Tailscale host"
|
||||
echo " 2. Wait 2+ minutes"
|
||||
echo " 3. Check your ntfy app and Signal for notifications"
|
||||
echo
|
||||
Reference in New Issue
Block a user