70 lines
2.3 KiB
Bash
70 lines
2.3 KiB
Bash
#!/bin/bash
|
|
# Portainer watchdog — recovers from chisel panic crashes that leave
|
|
# orphaned docker-proxy processes blocking port re-allocation.
|
|
#
|
|
# Deploy to atlantis: /usr/local/bin/watchdog-portainer.sh
|
|
# Cron (every 5 min): */5 * * * * /usr/local/bin/watchdog-portainer.sh
|
|
|
|
DOCKER=/usr/local/bin/docker
|
|
CONTAINER=portainer
|
|
PORTS=(8000 9443 10000)
|
|
NTFY_URL="http://localhost:48978/watchdog"
|
|
LOG_TAG="watchdog-portainer"
|
|
|
|
log() { logger -t "$LOG_TAG" "$*"; }
|
|
|
|
notify() {
|
|
local title="$1" msg="$2" priority="${3:-default}"
|
|
curl -s -o /dev/null \
|
|
-H "Title: $title" \
|
|
-H "Priority: $priority" \
|
|
-d "$msg" \
|
|
"$NTFY_URL" || true
|
|
}
|
|
|
|
# Is portainer already running?
|
|
if sudo $DOCKER ps --filter "name=^/${CONTAINER}$" --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
|
exit 0
|
|
fi
|
|
|
|
# Container exists but isn't running — try to start it
|
|
log "Portainer not running — attempting start"
|
|
|
|
start_output=$(sudo $DOCKER start "$CONTAINER" 2>&1)
|
|
if [ $? -eq 0 ]; then
|
|
log "Portainer started successfully"
|
|
notify "Portainer recovered" "Started successfully on atlantis" "default"
|
|
exit 0
|
|
fi
|
|
|
|
# Start failed — check if it's a port conflict from orphaned docker-proxy processes
|
|
if echo "$start_output" | grep -q "port is already allocated"; then
|
|
log "Port conflict detected — cleaning up orphaned docker-proxy processes"
|
|
|
|
killed_any=false
|
|
for port in "${PORTS[@]}"; do
|
|
# Find docker-proxy PIDs holding these specific TCP ports
|
|
pids=$(sudo netstat -tulpn 2>/dev/null \
|
|
| awk -v p="$port" '$4 ~ ":"p"$" && $7 ~ /docker-proxy/ {split($7,a,"/"); print a[1]}')
|
|
for pid in $pids; do
|
|
log "Killing orphaned docker-proxy PID $pid (port $port)"
|
|
sudo kill "$pid" && killed_any=true
|
|
done
|
|
done
|
|
|
|
if $killed_any; then
|
|
sleep 2
|
|
start_output=$(sudo $DOCKER start "$CONTAINER" 2>&1)
|
|
if [ $? -eq 0 ]; then
|
|
log "Portainer started after port cleanup"
|
|
notify "Portainer recovered" "Cleared orphaned docker-proxy processes and started successfully on atlantis" "default"
|
|
exit 0
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Still failed — escalate
|
|
log "ERROR: Could not recover Portainer: $start_output"
|
|
notify "Portainer recovery FAILED" "Could not start on atlantis — manual intervention needed.\n\n$start_output" "urgent"
|
|
exit 1
|