Files
arr-suite-template-bootstrap/templates/health-dashboard.sh.j2
openhands 24f2cd64e9 Initial template repository
🎬 ARR Suite Template Bootstrap - Complete Media Automation Stack

Features:
- 16 production services (Prowlarr, Sonarr, Radarr, Plex, etc.)
- One-command Ansible deployment
- VPN-protected downloads via Gluetun
- Tailscale secure access
- Production-ready security (UFW, Fail2Ban)
- Automated backups and monitoring
- Comprehensive documentation

Ready for customization and deployment to any VPS.

Co-authored-by: openhands <openhands@all-hands.dev>
2025-11-28 04:26:12 +00:00

314 lines
11 KiB
Django/Jinja
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# Health check dashboard script for Arrs Media Stack
# Generated by Ansible
LOG_DIR="{{ docker_root }}/logs/system"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
DASHBOARD_LOG="$LOG_DIR/health-dashboard-$(date '+%Y%m%d').log"
# Ensure log directory exists
mkdir -p "$LOG_DIR"
# Function to log with timestamp
log_health() {
echo "[$TIMESTAMP] $1" >> "$DASHBOARD_LOG"
}
# Colors for terminal output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to display colored output
display_status() {
local service="$1"
local status="$2"
local details="$3"
case "$status" in
"OK"|"RUNNING")
echo -e "${GREEN}✓${NC} $service: ${GREEN}$status${NC} $details"
;;
"WARNING"|"DEGRADED")
echo -e "${YELLOW}⚠${NC} $service: ${YELLOW}$status${NC} $details"
;;
"CRITICAL"|"FAILED"|"DOWN")
echo -e "${RED}✗${NC} $service: ${RED}$status${NC} $details"
;;
*)
echo -e "${BLUE}${NC} $service: ${BLUE}$status${NC} $details"
;;
esac
}
log_health "=== HEALTH DASHBOARD STARTED ==="
echo "=================================================================="
echo " ARRS MEDIA STACK HEALTH DASHBOARD"
echo "=================================================================="
echo "Generated: $TIMESTAMP"
echo "=================================================================="
# System Health
echo -e "\n${BLUE}SYSTEM HEALTH${NC}"
echo "------------------------------------------------------------------"
# CPU Usage
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then
display_status "CPU Usage" "CRITICAL" "(${CPU_USAGE}%)"
log_health "SYSTEM_HEALTH CPU_USAGE CRITICAL ${CPU_USAGE}%"
elif (( $(echo "$CPU_USAGE > 60" | bc -l) )); then
display_status "CPU Usage" "WARNING" "(${CPU_USAGE}%)"
log_health "SYSTEM_HEALTH CPU_USAGE WARNING ${CPU_USAGE}%"
else
display_status "CPU Usage" "OK" "(${CPU_USAGE}%)"
log_health "SYSTEM_HEALTH CPU_USAGE OK ${CPU_USAGE}%"
fi
# Memory Usage
MEMORY_PERCENT=$(free | grep Mem | awk '{printf "%.1f", $3/$2 * 100.0}')
if (( $(echo "$MEMORY_PERCENT > 90" | bc -l) )); then
display_status "Memory Usage" "CRITICAL" "(${MEMORY_PERCENT}%)"
log_health "SYSTEM_HEALTH MEMORY_USAGE CRITICAL ${MEMORY_PERCENT}%"
elif (( $(echo "$MEMORY_PERCENT > 75" | bc -l) )); then
display_status "Memory Usage" "WARNING" "(${MEMORY_PERCENT}%)"
log_health "SYSTEM_HEALTH MEMORY_USAGE WARNING ${MEMORY_PERCENT}%"
else
display_status "Memory Usage" "OK" "(${MEMORY_PERCENT}%)"
log_health "SYSTEM_HEALTH MEMORY_USAGE OK ${MEMORY_PERCENT}%"
fi
# Disk Usage
DISK_USAGE=$(df -h {{ docker_root }} | tail -1 | awk '{print $5}' | cut -d'%' -f1)
if [[ $DISK_USAGE -gt 90 ]]; then
display_status "Disk Usage" "CRITICAL" "(${DISK_USAGE}%)"
log_health "SYSTEM_HEALTH DISK_USAGE CRITICAL ${DISK_USAGE}%"
elif [[ $DISK_USAGE -gt 80 ]]; then
display_status "Disk Usage" "WARNING" "(${DISK_USAGE}%)"
log_health "SYSTEM_HEALTH DISK_USAGE WARNING ${DISK_USAGE}%"
else
display_status "Disk Usage" "OK" "(${DISK_USAGE}%)"
log_health "SYSTEM_HEALTH DISK_USAGE OK ${DISK_USAGE}%"
fi
# Load Average
LOAD_1MIN=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1 | xargs)
if (( $(echo "$LOAD_1MIN > 2.0" | bc -l) )); then
display_status "Load Average" "WARNING" "(${LOAD_1MIN})"
log_health "SYSTEM_HEALTH LOAD_AVERAGE WARNING ${LOAD_1MIN}"
else
display_status "Load Average" "OK" "(${LOAD_1MIN})"
log_health "SYSTEM_HEALTH LOAD_AVERAGE OK ${LOAD_1MIN}"
fi
# Docker Services
echo -e "\n${BLUE}DOCKER SERVICES${NC}"
echo "------------------------------------------------------------------"
if command -v docker >/dev/null 2>&1; then
cd {{ docker_compose_dir }}
SERVICES=("sonarr" "radarr" "lidarr" "bazarr" "prowlarr" "whisparr" "deluge" "sabnzbd" "plex" "tautulli" "jellyseerr" "tubearchivist" "gluetun" "watchtower" "logrotate")
for service in "${SERVICES[@]}"; do
CONTAINER_ID=$(docker-compose ps -q "$service" 2>/dev/null)
if [[ -n "$CONTAINER_ID" ]]; then
CONTAINER_STATUS=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Status}}" }}' 2>/dev/null)
CONTAINER_HEALTH=$(docker inspect "$CONTAINER_ID" --format='{{ "{{.State.Health.Status}}" }}' 2>/dev/null)
if [[ "$CONTAINER_STATUS" == "running" ]]; then
if [[ "$CONTAINER_HEALTH" == "healthy" ]] || [[ -z "$CONTAINER_HEALTH" ]] || [[ "$CONTAINER_HEALTH" == "<no value>" ]]; then
display_status "$service" "RUNNING" ""
log_health "DOCKER_SERVICE $service RUNNING"
else
display_status "$service" "DEGRADED" "(health: $CONTAINER_HEALTH)"
log_health "DOCKER_SERVICE $service DEGRADED $CONTAINER_HEALTH"
fi
else
display_status "$service" "DOWN" "(status: $CONTAINER_STATUS)"
log_health "DOCKER_SERVICE $service DOWN $CONTAINER_STATUS"
fi
else
display_status "$service" "NOT_FOUND" ""
log_health "DOCKER_SERVICE $service NOT_FOUND"
fi
done
else
display_status "Docker" "NOT_INSTALLED" ""
log_health "DOCKER_SERVICE docker NOT_INSTALLED"
fi
# Network Connectivity
echo -e "\n${BLUE}NETWORK CONNECTIVITY${NC}"
echo "------------------------------------------------------------------"
# Internet connectivity
if ping -c 1 8.8.8.8 >/dev/null 2>&1; then
display_status "Internet" "OK" ""
log_health "NETWORK_CONNECTIVITY internet OK"
else
display_status "Internet" "FAILED" ""
log_health "NETWORK_CONNECTIVITY internet FAILED"
fi
# DNS resolution
if nslookup google.com >/dev/null 2>&1; then
display_status "DNS Resolution" "OK" ""
log_health "NETWORK_CONNECTIVITY dns OK"
else
display_status "DNS Resolution" "FAILED" ""
log_health "NETWORK_CONNECTIVITY dns FAILED"
fi
# Service ports - Check on Tailscale network interface
TAILSCALE_IP="{{ tailscale_bind_ip }}"
SERVICES_PORTS=(
"sonarr:{{ ports.sonarr }}"
"radarr:{{ ports.radarr }}"
"lidarr:{{ ports.lidarr }}"
"bazarr:{{ ports.bazarr }}"
"prowlarr:{{ ports.prowlarr }}"
"deluge:{{ ports.deluge }}"
"sabnzbd:{{ ports.sabnzbd }}"
"plex:{{ ports.plex }}"
"tautulli:{{ ports.tautulli }}"
"jellyseerr:{{ ports.jellyseerr }}"
"tubearchivist:{{ ports.tubearchivist }}"
"whisparr:{{ ports.whisparr }}"
)
for service_port in "${SERVICES_PORTS[@]}"; do
SERVICE=$(echo "$service_port" | cut -d: -f1)
PORT=$(echo "$service_port" | cut -d: -f2)
# Check on Tailscale IP first, fallback to localhost for services that might bind to both
if nc -z "$TAILSCALE_IP" "$PORT" 2>/dev/null; then
display_status "$SERVICE Port" "OK" "(port $PORT on $TAILSCALE_IP)"
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT $TAILSCALE_IP"
elif nc -z localhost "$PORT" 2>/dev/null; then
display_status "$SERVICE Port" "OK" "(port $PORT on localhost)"
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port OK $PORT localhost"
else
display_status "$SERVICE Port" "FAILED" "(port $PORT)"
log_health "NETWORK_CONNECTIVITY ${SERVICE}_port FAILED $PORT"
fi
done
# Security Status
echo -e "\n${BLUE}SECURITY STATUS${NC}"
echo "------------------------------------------------------------------"
# UFW Status
if command -v ufw >/dev/null 2>&1; then
UFW_STATUS=$(ufw status | head -1 | awk '{print $2}')
if [[ "$UFW_STATUS" == "active" ]]; then
display_status "UFW Firewall" "OK" "(active)"
log_health "SECURITY_STATUS ufw OK active"
else
display_status "UFW Firewall" "WARNING" "(inactive)"
log_health "SECURITY_STATUS ufw WARNING inactive"
fi
fi
# Fail2ban Status
if command -v fail2ban-client >/dev/null 2>&1; then
if systemctl is-active fail2ban >/dev/null 2>&1; then
display_status "Fail2ban" "OK" "(active)"
log_health "SECURITY_STATUS fail2ban OK active"
else
display_status "Fail2ban" "WARNING" "(inactive)"
log_health "SECURITY_STATUS fail2ban WARNING inactive"
fi
fi
# Recent failed login attempts
FAILED_LOGINS=$(grep "Failed password" /var/log/auth.log 2>/dev/null | grep "$(date '+%b %d')" | wc -l)
if [[ $FAILED_LOGINS -gt 10 ]]; then
display_status "Failed Logins" "WARNING" "($FAILED_LOGINS today)"
log_health "SECURITY_STATUS failed_logins WARNING $FAILED_LOGINS"
elif [[ $FAILED_LOGINS -gt 0 ]]; then
display_status "Failed Logins" "OK" "($FAILED_LOGINS today)"
log_health "SECURITY_STATUS failed_logins OK $FAILED_LOGINS"
else
display_status "Failed Logins" "OK" "(none today)"
log_health "SECURITY_STATUS failed_logins OK 0"
fi
# Storage Status
echo -e "\n${BLUE}STORAGE STATUS${NC}"
echo "------------------------------------------------------------------"
# Media directories
MEDIA_DIRS=(
"{{ media_root }}/movies"
"{{ media_root }}/tv"
"{{ media_root }}/music"
"{{ media_root }}/downloads"
)
for media_dir in "${MEDIA_DIRS[@]}"; do
DIR_NAME=$(basename "$media_dir")
if [[ -d "$media_dir" ]]; then
SIZE=$(du -sh "$media_dir" 2>/dev/null | cut -f1)
FILE_COUNT=$(find "$media_dir" -type f 2>/dev/null | wc -l)
display_status "$DIR_NAME Directory" "OK" "($SIZE, $FILE_COUNT files)"
log_health "STORAGE_STATUS ${DIR_NAME}_directory OK $SIZE $FILE_COUNT"
else
display_status "$DIR_NAME Directory" "NOT_FOUND" ""
log_health "STORAGE_STATUS ${DIR_NAME}_directory NOT_FOUND"
fi
done
# Recent Activity Summary
echo -e "\n${BLUE}RECENT ACTIVITY${NC}"
echo "------------------------------------------------------------------"
# Check for recent downloads (last 24 hours)
RECENT_DOWNLOADS=0
for media_dir in "${MEDIA_DIRS[@]}"; do
if [[ -d "$media_dir" ]]; then
COUNT=$(find "$media_dir" -type f -mtime -1 2>/dev/null | wc -l)
RECENT_DOWNLOADS=$((RECENT_DOWNLOADS + COUNT))
fi
done
display_status "Recent Downloads" "INFO" "($RECENT_DOWNLOADS files in last 24h)"
log_health "ACTIVITY_SUMMARY recent_downloads INFO $RECENT_DOWNLOADS"
# System uptime
UPTIME=$(uptime -p)
display_status "System Uptime" "INFO" "($UPTIME)"
log_health "ACTIVITY_SUMMARY system_uptime INFO $UPTIME"
# Overall Health Summary
echo -e "\n${BLUE}OVERALL HEALTH SUMMARY${NC}"
echo "=================================================================="
# Count issues
CRITICAL_ISSUES=$(grep "CRITICAL" "$DASHBOARD_LOG" | wc -l)
WARNING_ISSUES=$(grep "WARNING" "$DASHBOARD_LOG" | wc -l)
if [[ $CRITICAL_ISSUES -gt 0 ]]; then
echo -e "${RED}SYSTEM STATUS: CRITICAL${NC} ($CRITICAL_ISSUES critical issues)"
log_health "OVERALL_HEALTH CRITICAL $CRITICAL_ISSUES"
elif [[ $WARNING_ISSUES -gt 0 ]]; then
echo -e "${YELLOW}SYSTEM STATUS: WARNING${NC} ($WARNING_ISSUES warnings)"
log_health "OVERALL_HEALTH WARNING $WARNING_ISSUES"
else
echo -e "${GREEN}SYSTEM STATUS: HEALTHY${NC}"
log_health "OVERALL_HEALTH HEALTHY 0"
fi
echo "=================================================================="
echo "Dashboard log: $DASHBOARD_LOG"
echo "=================================================================="
log_health "=== HEALTH DASHBOARD COMPLETED ==="
# Cleanup old dashboard logs (keep 7 days)
find "$LOG_DIR" -name "health-dashboard-*.log" -mtime +7 -delete 2>/dev/null
exit 0