🎬 ARR Suite Template Bootstrap - Complete Media Automation Stack Features: - 16 production services (Prowlarr, Sonarr, Radarr, Plex, etc.) - One-command Ansible deployment - VPN-protected downloads via Gluetun - Tailscale secure access - Production-ready security (UFW, Fail2Ban) - Automated backups and monitoring - Comprehensive documentation Ready for customization and deployment to any VPS. Co-authored-by: openhands <openhands@all-hands.dev>
314 lines
11 KiB
Django/Jinja
314 lines
11 KiB
Django/Jinja
#!/bin/bash
|
||
# Health check dashboard script for Arrs Media Stack
|
||
# Generated by Ansible
|
||
|
||
LOG_DIR="{{ docker_root }}/logs/system"
|
||
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
||
DASHBOARD_LOG="$LOG_DIR/health-dashboard-$(date '+%Y%m%d').log"
|
||
|
||
# Ensure log directory exists
|
||
mkdir -p "$LOG_DIR"
|
||
|
||
# Function to log with timestamp
|
||
log_health() {
|
||
echo "[$TIMESTAMP] $1" >> "$DASHBOARD_LOG"
|
||
}
|
||
|
||
# Colors for terminal output
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Function to display colored output
|
||
display_status() {
|
||
local service="$1"
|
||
local status="$2"
|
||
local details="$3"
|
||
|
||
case "$status" in
|
||
"OK"|"RUNNING")
|
||
echo -e "${GREEN}✓${NC} $service: ${GREEN}$status${NC} $details"
|
||
;;
|
||
"WARNING"|"DEGRADED")
|
||
echo -e "${YELLOW}⚠${NC} $service: ${YELLOW}$status${NC} $details"
|
||
;;
|
||
"CRITICAL"|"FAILED"|"DOWN")
|
||
echo -e "${RED}✗${NC} $service: ${RED}$status${NC} $details"
|
||
;;
|
||
*)
|
||
echo -e "${BLUE}ℹ${NC} $service: ${BLUE}$status${NC} $details"
|
||
;;
|
||
esac
|
||
}
|
||
|
||
log_health "=== HEALTH DASHBOARD STARTED ==="
|
||
|
||
echo "=================================================================="
|
||
echo " ARRS MEDIA STACK HEALTH DASHBOARD"
|
||
echo "=================================================================="
|
||
echo "Generated: $TIMESTAMP"
|
||
echo "=================================================================="
|
||
|
||
# System Health
|
||
echo -e "\n${BLUE}SYSTEM HEALTH${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
# CPU Usage
|
||
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
|
||
if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then
|
||
display_status "CPU Usage" "CRITICAL" "(${CPU_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH CPU_USAGE CRITICAL ${CPU_USAGE}%"
|
||
elif (( $(echo "$CPU_USAGE > 60" | bc -l) )); then
|
||
display_status "CPU Usage" "WARNING" "(${CPU_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH CPU_USAGE WARNING ${CPU_USAGE}%"
|
||
else
|
||
display_status "CPU Usage" "OK" "(${CPU_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH CPU_USAGE OK ${CPU_USAGE}%"
|
||
fi
|
||
|
||
# Memory Usage
|
||
MEMORY_PERCENT=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}')
|
||
if (( $(echo "$MEMORY_PERCENT > 90" | bc -l) )); then
|
||
display_status "Memory Usage" "CRITICAL" "(${MEMORY_PERCENT}%)"
|
||
log_health "SYSTEM_HEALTH MEMORY_USAGE CRITICAL ${MEMORY_PERCENT}%"
|
||
elif (( $(echo "$MEMORY_PERCENT > 75" | bc -l) )); then
|
||
display_status "Memory Usage" "WARNING" "(${MEMORY_PERCENT}%)"
|
||
log_health "SYSTEM_HEALTH MEMORY_USAGE WARNING ${MEMORY_PERCENT}%"
|
||
else
|
||
display_status "Memory Usage" "OK" "(${MEMORY_PERCENT}%)"
|
||
log_health "SYSTEM_HEALTH MEMORY_USAGE OK ${MEMORY_PERCENT}%"
|
||
fi
|
||
|
||
# Disk Usage
|
||
DISK_USAGE=$(df -h {{ docker_root }} | tail -1 | awk '{print $5}' | cut -d'%' -f1)
|
||
if [[ $DISK_USAGE -gt 90 ]]; then
|
||
display_status "Disk Usage" "CRITICAL" "(${DISK_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH DISK_USAGE CRITICAL ${DISK_USAGE}%"
|
||
elif [[ $DISK_USAGE -gt 80 ]]; then
|
||
display_status "Disk Usage" "WARNING" "(${DISK_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH DISK_USAGE WARNING ${DISK_USAGE}%"
|
||
else
|
||
display_status "Disk Usage" "OK" "(${DISK_USAGE}%)"
|
||
log_health "SYSTEM_HEALTH DISK_USAGE OK ${DISK_USAGE}%"
|
||
fi
|
||
|
||
# Load Average
|
||
LOAD_1MIN=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1 | xargs)
|
||
if (( $(echo "$LOAD_1MIN > 2.0" | bc -l) )); then
|
||
display_status "Load Average" "WARNING" "(${LOAD_1MIN})"
|
||
log_health "SYSTEM_HEALTH LOAD_AVERAGE WARNING ${LOAD_1MIN}"
|
||
else
|
||
display_status "Load Average" "OK" "(${LOAD_1MIN})"
|
||
log_health "SYSTEM_HEALTH LOAD_AVERAGE OK ${LOAD_1MIN}"
|
||
fi
|
||
|
||
# Docker Services
|
||
echo -e "\n${BLUE}DOCKER SERVICES${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
if command -v docker >/dev/null 2>&1; then
|
||
cd {{ docker_compose_dir }}
|
||
|
||
SERVICES=("sonarr" "radarr" "lidarr" "bazarr" "prowlarr" "whisparr" "deluge" "sabnzbd" "plex" "tautulli" "jellyseerr" "tubearchivist" "gluetun" "watchtower" "logrotate")
|
||
|
||
for service in "${SERVICES[@]}"; do
|
||
CONTAINER_ID=$(docker-compose ps -q "$service" 2>/dev/null)
|
||
if [[ -n "$CONTAINER_ID" ]]; then
|
||
CONTAINER_STATUS=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Status}}" }}' 2>/dev/null)
|
||
CONTAINER_HEALTH=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Health.Status}}" }}' 2>/dev/null)
|
||
|
||
if [[ "$CONTAINER_STATUS" == "running" ]]; then
|
||
if [[ "$CONTAINER_HEALTH" == "healthy" ]] || [[ -z "$CONTAINER_HEALTH" ]] || [[ "$CONTAINER_HEALTH" == "<no value>" ]]; then
|
||
display_status "$service" "RUNNING" ""
|
||
log_health "DOCKER_SERVICE $service RUNNING"
|
||
else
|
||
display_status "$service" "DEGRADED" "(health: $CONTAINER_HEALTH)"
|
||
log_health "DOCKER_SERVICE $service DEGRADED $CONTAINER_HEALTH"
|
||
fi
|
||
else
|
||
display_status "$service" "DOWN" "(status: $CONTAINER_STATUS)"
|
||
log_health "DOCKER_SERVICE $service DOWN $CONTAINER_STATUS"
|
||
fi
|
||
else
|
||
display_status "$service" "NOT_FOUND" ""
|
||
log_health "DOCKER_SERVICE $service NOT_FOUND"
|
||
fi
|
||
done
|
||
else
|
||
display_status "Docker" "NOT_INSTALLED" ""
|
||
log_health "DOCKER_SERVICE docker NOT_INSTALLED"
|
||
fi
|
||
|
||
# Network Connectivity
|
||
echo -e "\n${BLUE}NETWORK CONNECTIVITY${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
# Internet connectivity
|
||
if ping -c 1 8.8.8.8 >/dev/null 2>&1; then
|
||
display_status "Internet" "OK" ""
|
||
log_health "NETWORK_CONNECTIVITY internet OK"
|
||
else
|
||
display_status "Internet" "FAILED" ""
|
||
log_health "NETWORK_CONNECTIVITY internet FAILED"
|
||
fi
|
||
|
||
# DNS resolution
|
||
if nslookup google.com >/dev/null 2>&1; then
|
||
display_status "DNS Resolution" "OK" ""
|
||
log_health "NETWORK_CONNECTIVITY dns OK"
|
||
else
|
||
display_status "DNS Resolution" "FAILED" ""
|
||
log_health "NETWORK_CONNECTIVITY dns FAILED"
|
||
fi
|
||
|
||
# Service ports - Check on Tailscale network interface
|
||
TAILSCALE_IP="{{ tailscale_bind_ip }}"
|
||
SERVICES_PORTS=(
|
||
"sonarr:{{ ports.sonarr }}"
|
||
"radarr:{{ ports.radarr }}"
|
||
"lidarr:{{ ports.lidarr }}"
|
||
"bazarr:{{ ports.bazarr }}"
|
||
"prowlarr:{{ ports.prowlarr }}"
|
||
"deluge:{{ ports.deluge }}"
|
||
"sabnzbd:{{ ports.sabnzbd }}"
|
||
"plex:{{ ports.plex }}"
|
||
"tautulli:{{ ports.tautulli }}"
|
||
"jellyseerr:{{ ports.jellyseerr }}"
|
||
"tubearchivist:{{ ports.tubearchivist }}"
|
||
"whisparr:{{ ports.whisparr }}"
|
||
)
|
||
|
||
for service_port in "${SERVICES_PORTS[@]}"; do
|
||
SERVICE=$(echo "$service_port" | cut -d: -f1)
|
||
PORT=$(echo "$service_port" | cut -d: -f2)
|
||
|
||
# Check on Tailscale IP first, fallback to localhost for services that might bind to both
|
||
if nc -z "$TAILSCALE_IP" "$PORT" 2>/dev/null; then
|
||
display_status "$SERVICE Port" "OK" "(port $PORT on $TAILSCALE_IP)"
|
||
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT $TAILSCALE_IP"
|
||
elif nc -z localhost "$PORT" 2>/dev/null; then
|
||
display_status "$SERVICE Port" "OK" "(port $PORT on localhost)"
|
||
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT localhost"
|
||
else
|
||
display_status "$SERVICE Port" "FAILED" "(port $PORT)"
|
||
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port FAILED $PORT"
|
||
fi
|
||
done
|
||
|
||
# Security Status
|
||
echo -e "\n${BLUE}SECURITY STATUS${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
# UFW Status
|
||
if command -v ufw >/dev/null 2>&1; then
|
||
UFW_STATUS=$(ufw status | head -1 | awk '{print $2}')
|
||
if [[ "$UFW_STATUS" == "active" ]]; then
|
||
display_status "UFW Firewall" "OK" "(active)"
|
||
log_health "SECURITY_STATUS ufw OK active"
|
||
else
|
||
display_status "UFW Firewall" "WARNING" "(inactive)"
|
||
log_health "SECURITY_STATUS ufw WARNING inactive"
|
||
fi
|
||
fi
|
||
|
||
# Fail2ban Status
|
||
if command -v fail2ban-client >/dev/null 2>&1; then
|
||
if systemctl is-active fail2ban >/dev/null 2>&1; then
|
||
display_status "Fail2ban" "OK" "(active)"
|
||
log_health "SECURITY_STATUS fail2ban OK active"
|
||
else
|
||
display_status "Fail2ban" "WARNING" "(inactive)"
|
||
log_health "SECURITY_STATUS fail2ban WARNING inactive"
|
||
fi
|
||
fi
|
||
|
||
# Recent failed login attempts
|
||
FAILED_LOGINS=$(grep "Failed password" /var/log/auth.log 2>/dev/null | grep "$(date '+%b %d')" | wc -l)
|
||
if [[ $FAILED_LOGINS -gt 10 ]]; then
|
||
display_status "Failed Logins" "WARNING" "($FAILED_LOGINS today)"
|
||
log_health "SECURITY_STATUS failed_logins WARNING $FAILED_LOGINS"
|
||
elif [[ $FAILED_LOGINS -gt 0 ]]; then
|
||
display_status "Failed Logins" "OK" "($FAILED_LOGINS today)"
|
||
log_health "SECURITY_STATUS failed_logins OK $FAILED_LOGINS"
|
||
else
|
||
display_status "Failed Logins" "OK" "(none today)"
|
||
log_health "SECURITY_STATUS failed_logins OK 0"
|
||
fi
|
||
|
||
# Storage Status
|
||
echo -e "\n${BLUE}STORAGE STATUS${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
# Media directories
|
||
MEDIA_DIRS=(
|
||
"{{ media_root }}/movies"
|
||
"{{ media_root }}/tv"
|
||
"{{ media_root }}/music"
|
||
"{{ media_root }}/downloads"
|
||
)
|
||
|
||
for media_dir in "${MEDIA_DIRS[@]}"; do
|
||
DIR_NAME=$(basename "$media_dir")
|
||
if [[ -d "$media_dir" ]]; then
|
||
SIZE=$(du -sh "$media_dir" 2>/dev/null | cut -f1)
|
||
FILE_COUNT=$(find "$media_dir" -type f 2>/dev/null | wc -l)
|
||
display_status "$DIR_NAME Directory" "OK" "($SIZE, $FILE_COUNT files)"
|
||
log_health "STORAGE_STATUS ${DIR_NAME}_directory OK $SIZE $FILE_COUNT"
|
||
else
|
||
display_status "$DIR_NAME Directory" "NOT_FOUND" ""
|
||
log_health "STORAGE_STATUS ${DIR_NAME}_directory NOT_FOUND"
|
||
fi
|
||
done
|
||
|
||
# Recent Activity Summary
|
||
echo -e "\n${BLUE}RECENT ACTIVITY${NC}"
|
||
echo "------------------------------------------------------------------"
|
||
|
||
# Check for recent downloads (last 24 hours)
|
||
RECENT_DOWNLOADS=0
|
||
for media_dir in "${MEDIA_DIRS[@]}"; do
|
||
if [[ -d "$media_dir" ]]; then
|
||
COUNT=$(find "$media_dir" -type f -mtime -1 2>/dev/null | wc -l)
|
||
RECENT_DOWNLOADS=$((RECENT_DOWNLOADS + COUNT))
|
||
fi
|
||
done
|
||
|
||
display_status "Recent Downloads" "INFO" "($RECENT_DOWNLOADS files in last 24h)"
|
||
log_health "ACTIVITY_SUMMARY recent_downloads INFO $RECENT_DOWNLOADS"
|
||
|
||
# System uptime
|
||
UPTIME=$(uptime -p)
|
||
display_status "System Uptime" "INFO" "($UPTIME)"
|
||
log_health "ACTIVITY_SUMMARY system_uptime INFO $UPTIME"
|
||
|
||
# Overall Health Summary
|
||
echo -e "\n${BLUE}OVERALL HEALTH SUMMARY${NC}"
|
||
echo "=================================================================="
|
||
|
||
# Count issues
|
||
CRITICAL_ISSUES=$(grep "CRITICAL" "$DASHBOARD_LOG" | wc -l)
|
||
WARNING_ISSUES=$(grep "WARNING" "$DASHBOARD_LOG" | wc -l)
|
||
|
||
if [[ $CRITICAL_ISSUES -gt 0 ]]; then
|
||
echo -e "${RED}SYSTEM STATUS: CRITICAL${NC} ($CRITICAL_ISSUES critical issues)"
|
||
log_health "OVERALL_HEALTH CRITICAL $CRITICAL_ISSUES"
|
||
elif [[ $WARNING_ISSUES -gt 0 ]]; then
|
||
echo -e "${YELLOW}SYSTEM STATUS: WARNING${NC} ($WARNING_ISSUES warnings)"
|
||
log_health "OVERALL_HEALTH WARNING $WARNING_ISSUES"
|
||
else
|
||
echo -e "${GREEN}SYSTEM STATUS: HEALTHY${NC}"
|
||
log_health "OVERALL_HEALTH HEALTHY 0"
|
||
fi
|
||
|
||
echo "=================================================================="
|
||
echo "Dashboard log: $DASHBOARD_LOG"
|
||
echo "=================================================================="
|
||
|
||
log_health "=== HEALTH DASHBOARD COMPLETED ==="
|
||
|
||
# Cleanup old dashboard logs (keep 7 days)
|
||
find "$LOG_DIR" -name "health-dashboard-*.log" -mtime +7 -delete 2>/dev/null
|
||
|
||
exit 0 |