Sanitized mirror from private repository - 2026-04-18 11:19:59 UTC
This commit is contained in:
69
scripts/watchdog-portainer.sh
Normal file
69
scripts/watchdog-portainer.sh
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
# Portainer watchdog — recovers from chisel panic crashes that leave
|
||||
# orphaned docker-proxy processes blocking port re-allocation.
|
||||
#
|
||||
# Deploy to atlantis: /usr/local/bin/watchdog-portainer.sh
|
||||
# Cron (every 5 min): */5 * * * * /usr/local/bin/watchdog-portainer.sh
|
||||
|
||||
DOCKER=/usr/local/bin/docker
|
||||
CONTAINER=portainer
|
||||
PORTS=(8000 9443 10000)
|
||||
NTFY_URL="http://localhost:48978/watchdog"
|
||||
LOG_TAG="watchdog-portainer"
|
||||
|
||||
log() { logger -t "$LOG_TAG" "$*"; }
|
||||
|
||||
notify() {
|
||||
local title="$1" msg="$2" priority="${3:-default}"
|
||||
curl -s -o /dev/null \
|
||||
-H "Title: $title" \
|
||||
-H "Priority: $priority" \
|
||||
-d "$msg" \
|
||||
"$NTFY_URL" || true
|
||||
}
|
||||
|
||||
# Is portainer already running?
|
||||
if sudo $DOCKER ps --filter "name=^/${CONTAINER}$" --format '{{.Names}}' | grep -q "^${CONTAINER}$"; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Container exists but isn't running — try to start it
|
||||
log "Portainer not running — attempting start"
|
||||
|
||||
start_output=$(sudo $DOCKER start "$CONTAINER" 2>&1)
|
||||
if [ $? -eq 0 ]; then
|
||||
log "Portainer started successfully"
|
||||
notify "Portainer recovered" "Started successfully on atlantis" "default"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Start failed — check if it's a port conflict from orphaned docker-proxy processes
|
||||
if echo "$start_output" | grep -q "port is already allocated"; then
|
||||
log "Port conflict detected — cleaning up orphaned docker-proxy processes"
|
||||
|
||||
killed_any=false
|
||||
for port in "${PORTS[@]}"; do
|
||||
# Find docker-proxy PIDs holding these specific TCP ports
|
||||
pids=$(sudo netstat -tulpn 2>/dev/null \
|
||||
| awk -v p="$port" '$4 ~ ":"p"$" && $7 ~ /docker-proxy/ {split($7,a,"/"); print a[1]}')
|
||||
for pid in $pids; do
|
||||
log "Killing orphaned docker-proxy PID $pid (port $port)"
|
||||
sudo kill "$pid" && killed_any=true
|
||||
done
|
||||
done
|
||||
|
||||
if $killed_any; then
|
||||
sleep 2
|
||||
start_output=$(sudo $DOCKER start "$CONTAINER" 2>&1)
|
||||
if [ $? -eq 0 ]; then
|
||||
log "Portainer started after port cleanup"
|
||||
notify "Portainer recovered" "Cleared orphaned docker-proxy processes and started successfully on atlantis" "default"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Still failed — escalate
|
||||
log "ERROR: Could not recover Portainer: $start_output"
|
||||
notify "Portainer recovery FAILED" "Could not start on atlantis — manual intervention needed.\n\n$start_output" "urgent"
|
||||
exit 1
|
||||
Reference in New Issue
Block a user