Sanitized mirror from private repository - 2026-04-20 01:32:01 UTC

2026-04-20 01:32:01 +00:00
commit e7652c8dab
1445 changed files with 364095 additions and 0 deletions
--- a/scripts/test-tailscale-monitoring.sh
+++ b/scripts/test-tailscale-monitoring.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# Test Tailscale Host Monitoring and Notifications
+# Verifies that Tailscale hosts are monitored and alerts work
+
+set -e
+
+echo "🔍 Tailscale Host Monitoring Test"
+echo "================================="
+echo
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+PROMETHEUS_URL="http://100.67.40.126:9090"
+ALERTMANAGER_URL="http://100.67.40.126:9093"
+
+echo "📊 Checking Prometheus Targets..."
+echo "--------------------------------"
+
+# Get all Tailscale targets (100.x.x.x addresses)
+TARGETS=$(curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "\(.labels.job)|\(.labels.instance)|\(.health)"')
+
+echo "Tailscale Monitored Hosts:"
+UP_COUNT=0
+DOWN_COUNT=0
+
+while IFS='|' read -r job instance health; do
+    if [ "$health" = "up" ]; then
+        echo -e "  ${GREEN}✅ UP${NC}   $job ($instance)"
+        UP_COUNT=$((UP_COUNT + 1))
+    else
+        echo -e "  ${RED}❌ DOWN${NC} $job ($instance)"
+        DOWN_COUNT=$((DOWN_COUNT + 1))
+    fi
+done <<< "$TARGETS"
+
+echo
+echo "Summary: $UP_COUNT up, $DOWN_COUNT down"
+echo
+
+echo "🚨 Checking Active HostDown Alerts..."
+echo "------------------------------------"
+
+# Check for active HostDown alerts
+ACTIVE_ALERTS=$(curl -s "$PROMETHEUS_URL/api/v1/rules" | jq -r '.data.groups[] | select(.name == "host-availability") | .rules[] | select(.name == "HostDown") | .alerts[]? | "\(.labels.instance)|\(.labels.job)|\(.state)"')
+
+if [ -z "$ACTIVE_ALERTS" ]; then
+    echo -e "${GREEN}✅ No HostDown alerts currently firing${NC}"
+else
+    echo "Currently firing HostDown alerts:"
+    while IFS='|' read -r instance job state; do
+        echo -e "  ${RED}🚨 ALERT${NC} $job ($instance) - $state"
+    done <<< "$ACTIVE_ALERTS"
+fi
+
+echo
+
+echo "📬 Checking Alertmanager Status..."
+echo "--------------------------------"
+
+# Check Alertmanager alerts
+AM_ALERTS=$(curl -s "$ALERTMANAGER_URL/api/v2/alerts" | jq -r '.[] | select(.labels.alertname == "HostDown") | "\(.labels.instance)|\(.labels.job)|\(.status.state)"')
+
+if [ -z "$AM_ALERTS" ]; then
+    echo -e "${GREEN}✅ No HostDown alerts in Alertmanager${NC}"
+else
+    echo "Active alerts in Alertmanager:"
+    while IFS='|' read -r instance job state; do
+        echo -e "  ${YELLOW}📬 NOTIFYING${NC} $job ($instance) - $state"
+    done <<< "$AM_ALERTS"
+fi
+
+echo
+
+echo "🧪 Testing Notification Endpoints..."
+echo "-----------------------------------"
+
+# Test ntfy notification
+echo "Testing ntfy notification..."
+NTFY_RESPONSE=$(curl -s -d "🧪 Tailscale monitoring test from $(hostname) at $(date)" \
+    -H "Title: Tailscale Monitoring Test" \
+    -H "Priority: 3" \
+    -H "Tags: test_tube" \
+    http://192.168.0.210:8081/homelab-alerts)
+
+if echo "$NTFY_RESPONSE" | grep -q '"id"'; then
+    echo -e "  ${GREEN}✅ ntfy notification sent successfully${NC}"
+    echo "     Message ID: $(echo "$NTFY_RESPONSE" | jq -r '.id')"
+else
+    echo -e "  ${RED}❌ ntfy notification failed${NC}"
+    echo "     Response: $NTFY_RESPONSE"
+fi
+
+echo
+
+echo "📋 Tailscale Host Inventory..."
+echo "-----------------------------"
+
+# List all monitored Tailscale hosts with their job names
+echo "Currently monitored Tailscale hosts:"
+curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "  \(.labels.job): \(.labels.instance) (\(.health))"' | sort
+
+echo
+
+echo "⚙️  Alert Configuration Summary..."
+echo "---------------------------------"
+echo "• HostDown Alert: Triggers after 2 minutes of downtime"
+echo "• Severity: Critical (triggers both ntfy + Signal notifications)"
+echo "• Monitored via: node_exporter on port 9100"
+echo "• Alert Rule: up{job=~\".*-node\"} == 0"
+echo
+
+echo "🔧 Notification Channels:"
+echo "• ntfy: http://192.168.0.210:8081/homelab-alerts"
+echo "• Signal: Via signal-bridge (critical alerts only)"
+echo "• Alertmanager: http://100.67.40.126:9093"
+echo
+
+echo "✅ Tailscale monitoring test complete!"
+echo
+echo "💡 To manually test a HostDown alert:"
+echo "   1. Stop node_exporter on any Tailscale host"
+echo "   2. Wait 2+ minutes"
+echo "   3. Check your ntfy app and Signal for notifications"
+echo