# ============================================================================= # UPTIME KUMA - SERVICE MONITORING AND STATUS PAGE # ============================================================================= # # SERVICE OVERVIEW: # - Real-time monitoring of all homelab services # - Beautiful status page for service availability # - Alerting via email, Discord, Slack, SMS, and more # - Docker container monitoring via Docker socket # # DISASTER RECOVERY PRIORITY: HIGH # - Essential for monitoring service health during recovery # - Provides immediate visibility into what's working/broken # - Critical for validating recovery procedures # # RECOVERY TIME OBJECTIVE (RTO): 15 minutes # RECOVERY POINT OBJECTIVE (RPO): 1 hour (monitoring history) # # DEPENDENCIES: # - Volume1 for configuration storage # - Docker socket access for container monitoring # - Network connectivity to all monitored services # - SMTP access for email notifications # # MONITORING TARGETS: # - All critical homelab services (Plex, Vaultwarden, etc.) # - Network infrastructure (router, switches) # - Internet connectivity and speed # - SSL certificate expiration # - Disk space and system resources # # ============================================================================= version: '3.3' services: uptime-kuma: # CONTAINER IMAGE: # - louislam/uptime-kuma: Official Uptime Kuma image # - Lightweight Node.js application with SQLite database # - Regular updates with new monitoring features image: louislam/uptime-kuma # CONTAINER IDENTIFICATION: # - uptime_kuma: Clear identification for logs and management # - Used in monitoring dashboards and backup scripts container_name: uptime_kuma # NETWORK CONFIGURATION: # - 3444:3001: External port 3444 maps to internal port 3001 # - Port 3444: Accessible via reverse proxy or direct access # - Port 3001: Standard Uptime Kuma web interface port # - Accessible at: http://atlantis.vish.local:3444 ports: - '3444:3001' environment: # USER/GROUP PERMISSIONS: # - PUID=1026: User ID for file ownership (Synology user) # - PGID=100: Group ID for file access (Synology group) # - CRITICAL: Must match NAS permissions for data access - PUID=1026 - PGID=100 # TIMEZONE CONFIGURATION: # - TZ: Timezone for monitoring timestamps and scheduling # - Must match system timezone for accurate alerting # - Used for maintenance windows and notification timing - TZ=America/Los_Angeles volumes: # CONFIGURATION AND DATABASE: # - /volume1/docker/uptimekuma:/app/data # - Contains: SQLite database, configuration, notification settings # - BACKUP CRITICAL: Contains all monitoring history and settings # - Size: ~100MB-1GB depending on monitoring history - '/volume1/docker/uptimekuma:/app/data' # DOCKER SOCKET ACCESS: # - /var/run/docker.sock:/var/run/docker.sock # - Enables monitoring of Docker containers directly # - Allows automatic discovery of running services # - SECURITY NOTE: Provides full Docker API access - '/var/run/docker.sock:/var/run/docker.sock' # RESTART POLICY: # - always: Container restarts automatically on failure or reboot # - CRITICAL: Monitoring must be always available # - Essential for detecting and alerting on service failures restart: unless-stopped # ============================================================================= # DISASTER RECOVERY PROCEDURES - UPTIME KUMA # ============================================================================= # # BACKUP COMMANDS: # # Configuration backup: # tar -czf /volume2/backups/uptimekuma-$(date +%Y%m%d).tar.gz /volume1/docker/uptimekuma/ # # # Database backup (SQLite): # docker exec uptime_kuma sqlite3 /app/data/kuma.db ".backup /app/data/kuma-backup-$(date +%Y%m%d).db" # # RESTORE PROCEDURE: # 1. Stop container: docker-compose -f uptimekuma.yml down # 2. Restore data: tar -xzf uptimekuma-backup.tar.gz -C /volume1/docker/ # 3. Fix permissions: chown -R 1026:100 /volume1/docker/uptimekuma/ # 4. Start container: docker-compose -f uptimekuma.yml up -d # 5. Verify: Access http://atlantis.vish.local:3444 # # MONITORING SETUP (Post-Recovery): # 1. Add critical services: # - Vaultwarden: https://pw.vish.gg # - Plex: http://atlantis.vish.local:32400 # - Grafana: http://atlantis.vish.local:7099 # - Router: http://192.168.1.1 # # 2. Configure notifications: # - Email: SMTP settings for alerts # - Discord/Slack: Webhook URLs # - SMS: Twilio or similar service # # 3. Set up status page: # - Public status page for family/friends # - Custom domain if desired # - Maintenance windows for planned outages # # TROUBLESHOOTING: # - Database corruption: Restore from backup or recreate monitors # - Permission errors: Check PUID/PGID match NAS user/group # - Docker socket issues: Verify Docker daemon is running # - Network connectivity: Check firewall and network configuration # # HEALTH CHECKS: # - Service check: curl -f http://localhost:3444/api/status-page/heartbeat # - Database check: docker exec uptime_kuma ls -la /app/data/ # - Logs: docker logs uptime_kuma # - Performance: Monitor CPU/memory usage in Grafana # # =============================================================================