#!/bin/bash
# Test Tailscale Host Monitoring and Notifications
# Verifies that Tailscale hosts are monitored and alerts work

set -e

echo "🔍 Tailscale Host Monitoring Test"
echo "================================="
echo

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

PROMETHEUS_URL="http://100.67.40.126:9090"
ALERTMANAGER_URL="http://100.67.40.126:9093"

echo "📊 Checking Prometheus Targets..."
echo "--------------------------------"

# Get all Tailscale targets (100.x.x.x addresses)
TARGETS=$(curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "\(.labels.job)|\(.labels.instance)|\(.health)"')

echo "Tailscale Monitored Hosts:"
UP_COUNT=0
DOWN_COUNT=0

while IFS='|' read -r job instance health; do
    if [ "$health" = "up" ]; then
        echo -e "  ${GREEN}✅ UP${NC}   $job ($instance)"
        UP_COUNT=$((UP_COUNT + 1))
    else
        echo -e "  ${RED}❌ DOWN${NC} $job ($instance)"
        DOWN_COUNT=$((DOWN_COUNT + 1))
    fi
done <<< "$TARGETS"

echo
echo "Summary: $UP_COUNT up, $DOWN_COUNT down"
echo

echo "🚨 Checking Active HostDown Alerts..."
echo "------------------------------------"

# Check for active HostDown alerts
ACTIVE_ALERTS=$(curl -s "$PROMETHEUS_URL/api/v1/rules" | jq -r '.data.groups[] | select(.name == "host-availability") | .rules[] | select(.name == "HostDown") | .alerts[]? | "\(.labels.instance)|\(.labels.job)|\(.state)"')

if [ -z "$ACTIVE_ALERTS" ]; then
    echo -e "${GREEN}✅ No HostDown alerts currently firing${NC}"
else
    echo "Currently firing HostDown alerts:"
    while IFS='|' read -r instance job state; do
        echo -e "  ${RED}🚨 ALERT${NC} $job ($instance) - $state"
    done <<< "$ACTIVE_ALERTS"
fi

echo

echo "📬 Checking Alertmanager Status..."
echo "--------------------------------"

# Check Alertmanager alerts
AM_ALERTS=$(curl -s "$ALERTMANAGER_URL/api/v2/alerts" | jq -r '.[] | select(.labels.alertname == "HostDown") | "\(.labels.instance)|\(.labels.job)|\(.status.state)"')

if [ -z "$AM_ALERTS" ]; then
    echo -e "${GREEN}✅ No HostDown alerts in Alertmanager${NC}"
else
    echo "Active alerts in Alertmanager:"
    while IFS='|' read -r instance job state; do
        echo -e "  ${YELLOW}📬 NOTIFYING${NC} $job ($instance) - $state"
    done <<< "$AM_ALERTS"
fi

echo

echo "🧪 Testing Notification Endpoints..."
echo "-----------------------------------"

# Test ntfy notification
echo "Testing ntfy notification..."
NTFY_RESPONSE=$(curl -s -d "🧪 Tailscale monitoring test from $(hostname) at $(date)" \
    -H "Title: Tailscale Monitoring Test" \
    -H "Priority: 3" \
    -H "Tags: test_tube" \
    http://192.168.0.210:8081/homelab-alerts)

if echo "$NTFY_RESPONSE" | grep -q '"id"'; then
    echo -e "  ${GREEN}✅ ntfy notification sent successfully${NC}"
    echo "     Message ID: $(echo "$NTFY_RESPONSE" | jq -r '.id')"
else
    echo -e "  ${RED}❌ ntfy notification failed${NC}"
    echo "     Response: $NTFY_RESPONSE"
fi

echo

echo "📋 Tailscale Host Inventory..."
echo "-----------------------------"

# List all monitored Tailscale hosts with their job names
echo "Currently monitored Tailscale hosts:"
curl -s "$PROMETHEUS_URL/api/v1/targets" | jq -r '.data.activeTargets[] | select(.labels.instance | startswith("100.")) | "  \(.labels.job): \(.labels.instance) (\(.health))"' | sort

echo

echo "⚙️  Alert Configuration Summary..."
echo "---------------------------------"
echo "• HostDown Alert: Triggers after 2 minutes of downtime"
echo "• Severity: Critical (triggers both ntfy + Signal notifications)"
echo "• Monitored via: node_exporter on port 9100"
echo "• Alert Rule: up{job=~\".*-node\"} == 0"
echo

echo "🔧 Notification Channels:"
echo "• ntfy: http://192.168.0.210:8081/homelab-alerts"
echo "• Signal: Via signal-bridge (critical alerts only)"
echo "• Alertmanager: http://100.67.40.126:9093"
echo

echo "✅ Tailscale monitoring test complete!"
echo
echo "💡 To manually test a HostDown alert:"
echo "   1. Stop node_exporter on any Tailscale host"
echo "   2. Wait 2+ minutes"
echo "   3. Check your ntfy app and Signal for notifications"
echo