#!/bin/bash # Health check dashboard script for Arrs Media Stack # Generated by Ansible LOG_DIR="{{ docker_root }}/logs/system" TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') DASHBOARD_LOG="$LOG_DIR/health-dashboard-$(date '+%Y%m%d').log" # Ensure log directory exists mkdir -p "$LOG_DIR" # Function to log with timestamp log_health() { echo "[$TIMESTAMP] $1" >> "$DASHBOARD_LOG" } # Colors for terminal output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Function to display colored output display_status() { local service="$1" local status="$2" local details="$3" case "$status" in "OK"|"RUNNING") echo -e "${GREEN}✓${NC} $service: ${GREEN}$status${NC} $details" ;; "WARNING"|"DEGRADED") echo -e "${YELLOW}⚠${NC} $service: ${YELLOW}$status${NC} $details" ;; "CRITICAL"|"FAILED"|"DOWN") echo -e "${RED}✗${NC} $service: ${RED}$status${NC} $details" ;; *) echo -e "${BLUE}ℹ${NC} $service: ${BLUE}$status${NC} $details" ;; esac } log_health "=== HEALTH DASHBOARD STARTED ===" echo "==================================================================" echo " ARRS MEDIA STACK HEALTH DASHBOARD" echo "==================================================================" echo "Generated: $TIMESTAMP" echo "==================================================================" # System Health echo -e "\n${BLUE}SYSTEM HEALTH${NC}" echo "------------------------------------------------------------------" # CPU Usage CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1) if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then display_status "CPU Usage" "CRITICAL" "(${CPU_USAGE}%)" log_health "SYSTEM_HEALTH CPU_USAGE CRITICAL ${CPU_USAGE}%" elif (( $(echo "$CPU_USAGE > 60" | bc -l) )); then display_status "CPU Usage" "WARNING" "(${CPU_USAGE}%)" log_health "SYSTEM_HEALTH CPU_USAGE WARNING ${CPU_USAGE}%" else display_status "CPU Usage" "OK" "(${CPU_USAGE}%)" log_health "SYSTEM_HEALTH CPU_USAGE OK ${CPU_USAGE}%" fi # Memory Usage MEMORY_PERCENT=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}') if (( $(echo "$MEMORY_PERCENT > 90" | bc -l) )); then display_status "Memory Usage" "CRITICAL" "(${MEMORY_PERCENT}%)" log_health "SYSTEM_HEALTH MEMORY_USAGE CRITICAL ${MEMORY_PERCENT}%" elif (( $(echo "$MEMORY_PERCENT > 75" | bc -l) )); then display_status "Memory Usage" "WARNING" "(${MEMORY_PERCENT}%)" log_health "SYSTEM_HEALTH MEMORY_USAGE WARNING ${MEMORY_PERCENT}%" else display_status "Memory Usage" "OK" "(${MEMORY_PERCENT}%)" log_health "SYSTEM_HEALTH MEMORY_USAGE OK ${MEMORY_PERCENT}%" fi # Disk Usage DISK_USAGE=$(df -h {{ docker_root }} | tail -1 | awk '{print $5}' | cut -d'%' -f1) if [[ $DISK_USAGE -gt 90 ]]; then display_status "Disk Usage" "CRITICAL" "(${DISK_USAGE}%)" log_health "SYSTEM_HEALTH DISK_USAGE CRITICAL ${DISK_USAGE}%" elif [[ $DISK_USAGE -gt 80 ]]; then display_status "Disk Usage" "WARNING" "(${DISK_USAGE}%)" log_health "SYSTEM_HEALTH DISK_USAGE WARNING ${DISK_USAGE}%" else display_status "Disk Usage" "OK" "(${DISK_USAGE}%)" log_health "SYSTEM_HEALTH DISK_USAGE OK ${DISK_USAGE}%" fi # Load Average LOAD_1MIN=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1 | xargs) if (( $(echo "$LOAD_1MIN > 2.0" | bc -l) )); then display_status "Load Average" "WARNING" "(${LOAD_1MIN})" log_health "SYSTEM_HEALTH LOAD_AVERAGE WARNING ${LOAD_1MIN}" else display_status "Load Average" "OK" "(${LOAD_1MIN})" log_health "SYSTEM_HEALTH LOAD_AVERAGE OK ${LOAD_1MIN}" fi # Docker Services echo -e "\n${BLUE}DOCKER SERVICES${NC}" echo "------------------------------------------------------------------" if command -v docker >/dev/null 2>&1; then cd {{ docker_compose_dir }} SERVICES=("sonarr" "radarr" "lidarr" "bazarr" "prowlarr" "whisparr" "deluge" "sabnzbd" "plex" "tautulli" "jellyseerr" "tubearchivist" "gluetun" "watchtower" "logrotate") for service in "${SERVICES[@]}"; do CONTAINER_ID=$(docker-compose ps -q "$service" 2>/dev/null) if [[ -n "$CONTAINER_ID" ]]; then CONTAINER_STATUS=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Status}}" }}' 2>/dev/null) CONTAINER_HEALTH=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Health.Status}}" }}' 2>/dev/null) if [[ "$CONTAINER_STATUS" == "running" ]]; then if [[ "$CONTAINER_HEALTH" == "healthy" ]] || [[ -z "$CONTAINER_HEALTH" ]] || [[ "$CONTAINER_HEALTH" == "" ]]; then display_status "$service" "RUNNING" "" log_health "DOCKER_SERVICE $service RUNNING" else display_status "$service" "DEGRADED" "(health: $CONTAINER_HEALTH)" log_health "DOCKER_SERVICE $service DEGRADED $CONTAINER_HEALTH" fi else display_status "$service" "DOWN" "(status: $CONTAINER_STATUS)" log_health "DOCKER_SERVICE $service DOWN $CONTAINER_STATUS" fi else display_status "$service" "NOT_FOUND" "" log_health "DOCKER_SERVICE $service NOT_FOUND" fi done else display_status "Docker" "NOT_INSTALLED" "" log_health "DOCKER_SERVICE docker NOT_INSTALLED" fi # Network Connectivity echo -e "\n${BLUE}NETWORK CONNECTIVITY${NC}" echo "------------------------------------------------------------------" # Internet connectivity if ping -c 1 8.8.8.8 >/dev/null 2>&1; then display_status "Internet" "OK" "" log_health "NETWORK_CONNECTIVITY internet OK" else display_status "Internet" "FAILED" "" log_health "NETWORK_CONNECTIVITY internet FAILED" fi # DNS resolution if nslookup google.com >/dev/null 2>&1; then display_status "DNS Resolution" "OK" "" log_health "NETWORK_CONNECTIVITY dns OK" else display_status "DNS Resolution" "FAILED" "" log_health "NETWORK_CONNECTIVITY dns FAILED" fi # Service ports - Check on Tailscale network interface TAILSCALE_IP="{{ tailscale_bind_ip }}" SERVICES_PORTS=( "sonarr:{{ ports.sonarr }}" "radarr:{{ ports.radarr }}" "lidarr:{{ ports.lidarr }}" "bazarr:{{ ports.bazarr }}" "prowlarr:{{ ports.prowlarr }}" "deluge:{{ ports.deluge }}" "sabnzbd:{{ ports.sabnzbd }}" "plex:{{ ports.plex }}" "tautulli:{{ ports.tautulli }}" "jellyseerr:{{ ports.jellyseerr }}" "tubearchivist:{{ ports.tubearchivist }}" "whisparr:{{ ports.whisparr }}" ) for service_port in "${SERVICES_PORTS[@]}"; do SERVICE=$(echo "$service_port" | cut -d: -f1) PORT=$(echo "$service_port" | cut -d: -f2) # Check on Tailscale IP first, fallback to localhost for services that might bind to both if nc -z "$TAILSCALE_IP" "$PORT" 2>/dev/null; then display_status "$SERVICE Port" "OK" "(port $PORT on $TAILSCALE_IP)" log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT $TAILSCALE_IP" elif nc -z localhost "$PORT" 2>/dev/null; then display_status "$SERVICE Port" "OK" "(port $PORT on localhost)" log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT localhost" else display_status "$SERVICE Port" "FAILED" "(port $PORT)" log_health "NETWORK_CONNECTIVITY ${SERVICE}_port FAILED $PORT" fi done # Security Status echo -e "\n${BLUE}SECURITY STATUS${NC}" echo "------------------------------------------------------------------" # UFW Status if command -v ufw >/dev/null 2>&1; then UFW_STATUS=$(ufw status | head -1 | awk '{print $2}') if [[ "$UFW_STATUS" == "active" ]]; then display_status "UFW Firewall" "OK" "(active)" log_health "SECURITY_STATUS ufw OK active" else display_status "UFW Firewall" "WARNING" "(inactive)" log_health "SECURITY_STATUS ufw WARNING inactive" fi fi # Fail2ban Status if command -v fail2ban-client >/dev/null 2>&1; then if systemctl is-active fail2ban >/dev/null 2>&1; then display_status "Fail2ban" "OK" "(active)" log_health "SECURITY_STATUS fail2ban OK active" else display_status "Fail2ban" "WARNING" "(inactive)" log_health "SECURITY_STATUS fail2ban WARNING inactive" fi fi # Recent failed login attempts FAILED_LOGINS=$(grep "Failed password" /var/log/auth.log 2>/dev/null | grep "$(date '+%b %d')" | wc -l) if [[ $FAILED_LOGINS -gt 10 ]]; then display_status "Failed Logins" "WARNING" "($FAILED_LOGINS today)" log_health "SECURITY_STATUS failed_logins WARNING $FAILED_LOGINS" elif [[ $FAILED_LOGINS -gt 0 ]]; then display_status "Failed Logins" "OK" "($FAILED_LOGINS today)" log_health "SECURITY_STATUS failed_logins OK $FAILED_LOGINS" else display_status "Failed Logins" "OK" "(none today)" log_health "SECURITY_STATUS failed_logins OK 0" fi # Storage Status echo -e "\n${BLUE}STORAGE STATUS${NC}" echo "------------------------------------------------------------------" # Media directories MEDIA_DIRS=( "{{ media_root }}/movies" "{{ media_root }}/tv" "{{ media_root }}/music" "{{ media_root }}/downloads" ) for media_dir in "${MEDIA_DIRS[@]}"; do DIR_NAME=$(basename "$media_dir") if [[ -d "$media_dir" ]]; then SIZE=$(du -sh "$media_dir" 2>/dev/null | cut -f1) FILE_COUNT=$(find "$media_dir" -type f 2>/dev/null | wc -l) display_status "$DIR_NAME Directory" "OK" "($SIZE, $FILE_COUNT files)" log_health "STORAGE_STATUS ${DIR_NAME}_directory OK $SIZE $FILE_COUNT" else display_status "$DIR_NAME Directory" "NOT_FOUND" "" log_health "STORAGE_STATUS ${DIR_NAME}_directory NOT_FOUND" fi done # Recent Activity Summary echo -e "\n${BLUE}RECENT ACTIVITY${NC}" echo "------------------------------------------------------------------" # Check for recent downloads (last 24 hours) RECENT_DOWNLOADS=0 for media_dir in "${MEDIA_DIRS[@]}"; do if [[ -d "$media_dir" ]]; then COUNT=$(find "$media_dir" -type f -mtime -1 2>/dev/null | wc -l) RECENT_DOWNLOADS=$((RECENT_DOWNLOADS + COUNT)) fi done display_status "Recent Downloads" "INFO" "($RECENT_DOWNLOADS files in last 24h)" log_health "ACTIVITY_SUMMARY recent_downloads INFO $RECENT_DOWNLOADS" # System uptime UPTIME=$(uptime -p) display_status "System Uptime" "INFO" "($UPTIME)" log_health "ACTIVITY_SUMMARY system_uptime INFO $UPTIME" # Overall Health Summary echo -e "\n${BLUE}OVERALL HEALTH SUMMARY${NC}" echo "==================================================================" # Count issues CRITICAL_ISSUES=$(grep "CRITICAL" "$DASHBOARD_LOG" | wc -l) WARNING_ISSUES=$(grep "WARNING" "$DASHBOARD_LOG" | wc -l) if [[ $CRITICAL_ISSUES -gt 0 ]]; then echo -e "${RED}SYSTEM STATUS: CRITICAL${NC} ($CRITICAL_ISSUES critical issues)" log_health "OVERALL_HEALTH CRITICAL $CRITICAL_ISSUES" elif [[ $WARNING_ISSUES -gt 0 ]]; then echo -e "${YELLOW}SYSTEM STATUS: WARNING${NC} ($WARNING_ISSUES warnings)" log_health "OVERALL_HEALTH WARNING $WARNING_ISSUES" else echo -e "${GREEN}SYSTEM STATUS: HEALTHY${NC}" log_health "OVERALL_HEALTH HEALTHY 0" fi echo "==================================================================" echo "Dashboard log: $DASHBOARD_LOG" echo "==================================================================" log_health "=== HEALTH DASHBOARD COMPLETED ===" # Cleanup old dashboard logs (keep 7 days) find "$LOG_DIR" -name "health-dashboard-*.log" -mtime +7 -delete 2>/dev/null exit 0