#!/bin/bash # Docker monitoring script for Arrs Media Stack # Generated by Ansible LOG_DIR="{{ docker_root }}/logs/arrs" TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') LOG_FILE="$LOG_DIR/docker-monitor-$(date '+%Y%m%d').log" # Ensure log directory exists mkdir -p "$LOG_DIR" # Function to log with timestamp log_with_timestamp() { echo "[$TIMESTAMP] $1" >> "$LOG_FILE" } # Change to compose directory cd {{ docker_compose_dir }} # Check Docker daemon if ! docker info >/dev/null 2>&1; then log_with_timestamp "DOCKER_DAEMON FAILED - Docker daemon not responding" exit 1 fi log_with_timestamp "DOCKER_DAEMON OK" # Get container stats CONTAINER_STATS=$(docker stats --no-stream --format "table {{ '{{.Container}}' }}\t{{ '{{.CPUPerc}}' }}\t{{ '{{.MemUsage}}' }}\t{{ '{{.MemPerc}}' }}\t{{ '{{.NetIO}}' }}\t{{ '{{.BlockIO}}' }}") # Log container resource usage while IFS=$'\t' read -r container cpu mem_usage mem_perc net_io block_io; do if [[ "$container" != "CONTAINER" ]]; then log_with_timestamp "CONTAINER_STATS $container CPU:$cpu MEM:$mem_usage($mem_perc) NET:$net_io DISK:$block_io" fi done <<< "$CONTAINER_STATS" # Check individual service health SERVICES=("sonarr" "radarr" "lidarr" "bazarr" "prowlarr" "watchtower") PORTS=({{ ports.sonarr }} {{ ports.radarr }} {{ ports.lidarr }} {{ ports.bazarr }} {{ ports.prowlarr }}) for i in "${!SERVICES[@]}"; do service="${SERVICES[$i]}" # Get container status STATUS=$(docker-compose ps -q "$service" | xargs docker inspect --format='{{ "{{.State.Status}}" }}' 2>/dev/null) if [[ "$STATUS" == "running" ]]; then # Check container health HEALTH=$(docker-compose ps -q "$service" | xargs docker inspect --format='{{ "{{.State.Health.Status}}" }}' 2>/dev/null) if [[ "$HEALTH" == "healthy" ]] || [[ "$HEALTH" == "" ]]; then log_with_timestamp "SERVICE_$service OK" else log_with_timestamp "SERVICE_$service UNHEALTHY - Health status: $HEALTH" fi # Check restart count RESTART_COUNT=$(docker-compose ps -q "$service" | xargs docker inspect --format='{{ "{{.RestartCount}}" }}' 2>/dev/null) if [[ "$RESTART_COUNT" -gt 5 ]]; then log_with_timestamp "SERVICE_$service WARNING - High restart count: $RESTART_COUNT" fi else log_with_timestamp "SERVICE_$service FAILED - Status: $STATUS" # Try to restart the service log_with_timestamp "SERVICE_$service RESTART_ATTEMPT" docker-compose restart "$service" 2>/dev/null fi done # Check Docker system resources DOCKER_SYSTEM_DF=$(docker system df --format "table {{ '{{.Type}}' }}\t{{ '{{.Total}}' }}\t{{ '{{.Active}}' }}\t{{ '{{.Size}}' }}\t{{ '{{.Reclaimable}}' }}") log_with_timestamp "DOCKER_SYSTEM_DF $DOCKER_SYSTEM_DF" # Check for stopped containers STOPPED_CONTAINERS=$(docker ps -a --filter "status=exited" --format "{{ '{{.Names}}' }}" | grep -E "(sonarr|radarr|lidarr|bazarr|prowlarr|watchtower)" || true) if [[ -n "$STOPPED_CONTAINERS" ]]; then log_with_timestamp "STOPPED_CONTAINERS $STOPPED_CONTAINERS" fi # Check Docker logs for errors (last 5 minutes) FIVE_MIN_AGO=$(date -d '5 minutes ago' '+%Y-%m-%dT%H:%M:%S') for service in "${SERVICES[@]}"; do ERROR_COUNT=$(docker-compose logs --since="$FIVE_MIN_AGO" "$service" 2>/dev/null | grep -i error | wc -l) if [[ "$ERROR_COUNT" -gt 0 ]]; then log_with_timestamp "SERVICE_$service ERRORS - $ERROR_COUNT errors in last 5 minutes" fi done # Cleanup old log files (keep 7 days) find "$LOG_DIR" -name "docker-monitor-*.log" -mtime +7 -delete 2>/dev/null # Cleanup Docker system if disk usage is high DISK_USAGE=$(df {{ docker_root }} | tail -1 | awk '{print $5}' | cut -d'%' -f1) if [[ $DISK_USAGE -gt 85 ]]; then log_with_timestamp "CLEANUP_ATTEMPT Disk usage ${DISK_USAGE}% - Running Docker cleanup" docker system prune -f >/dev/null 2>&1 docker image prune -f >/dev/null 2>&1 log_with_timestamp "CLEANUP_COMPLETED Docker cleanup finished" fi exit 0