homelab-optimized/hosts/synology/atlantis/uptimekuma.yml

# =============================================================================
# UPTIME KUMA - SERVICE MONITORING AND STATUS PAGE
# =============================================================================
#
# SERVICE OVERVIEW:
# - Real-time monitoring of all homelab services
# - Beautiful status page for service availability
# - Alerting via email, Discord, Slack, SMS, and more
# - Docker container monitoring via Docker socket
#
# DISASTER RECOVERY PRIORITY: HIGH
# - Essential for monitoring service health during recovery
# - Provides immediate visibility into what's working/broken
# - Critical for validating recovery procedures
#
# RECOVERY TIME OBJECTIVE (RTO): 15 minutes
# RECOVERY POINT OBJECTIVE (RPO): 1 hour (monitoring history)
#
# DEPENDENCIES:
# - Volume1 for configuration storage
# - Docker socket access for container monitoring
# - Network connectivity to all monitored services
# - SMTP access for email notifications
#
# MONITORING TARGETS:
# - All critical homelab services (Plex, Vaultwarden, etc.)
# - Network infrastructure (router, switches)
# - Internet connectivity and speed
# - SSL certificate expiration
# - Disk space and system resources
#
# =============================================================================

version: '3.3'

services:
  uptime-kuma:
    # CONTAINER IMAGE:
    # - louislam/uptime-kuma: Official Uptime Kuma image
    # - Lightweight Node.js application with SQLite database
    # - Regular updates with new monitoring features
    image: louislam/uptime-kuma

    # CONTAINER IDENTIFICATION:
    # - uptime_kuma: Clear identification for logs and management
    # - Used in monitoring dashboards and backup scripts
    container_name: uptime_kuma

    # NETWORK CONFIGURATION:
    # - 3444:3001: External port 3444 maps to internal port 3001
    # - Port 3444: Accessible via reverse proxy or direct access
    # - Port 3001: Standard Uptime Kuma web interface port
    # - Accessible at: http://atlantis.vish.local:3444
    ports:
      - '3444:3001'

    environment:
      # USER/GROUP PERMISSIONS:
      # - PUID=1026: User ID for file ownership (Synology user)
      # - PGID=100: Group ID for file access (Synology group)
      # - CRITICAL: Must match NAS permissions for data access
      - PUID=1026
      - PGID=100

      # TIMEZONE CONFIGURATION:
      # - TZ: Timezone for monitoring timestamps and scheduling
      # - Must match system timezone for accurate alerting
      # - Used for maintenance windows and notification timing
      - TZ=America/Los_Angeles

    volumes:
      # CONFIGURATION AND DATABASE:
      # - /volume1/docker/uptimekuma:/app/data
      # - Contains: SQLite database, configuration, notification settings
      # - BACKUP CRITICAL: Contains all monitoring history and settings
      # - Size: ~100MB-1GB depending on monitoring history
      - '/volume1/docker/uptimekuma:/app/data'

      # DOCKER SOCKET ACCESS:
      # - /var/run/docker.sock:/var/run/docker.sock
      # - Enables monitoring of Docker containers directly
      # - Allows automatic discovery of running services
      # - SECURITY NOTE: Provides full Docker API access
      - '/var/run/docker.sock:/var/run/docker.sock'

    # RESTART POLICY:
    # - always: Container restarts automatically on failure or reboot
    # - CRITICAL: Monitoring must be always available
    # - Essential for detecting and alerting on service failures
    restart: unless-stopped

# =============================================================================
# DISASTER RECOVERY PROCEDURES - UPTIME KUMA
# =============================================================================
#
# BACKUP COMMANDS:
# # Configuration backup:
# tar -czf /volume2/backups/uptimekuma-$(date +%Y%m%d).tar.gz /volume1/docker/uptimekuma/
#
# # Database backup (SQLite):
# docker exec uptime_kuma sqlite3 /app/data/kuma.db ".backup /app/data/kuma-backup-$(date +%Y%m%d).db"
#
# RESTORE PROCEDURE:
# 1. Stop container: docker-compose -f uptimekuma.yml down
# 2. Restore data: tar -xzf uptimekuma-backup.tar.gz -C /volume1/docker/
# 3. Fix permissions: chown -R 1026:100 /volume1/docker/uptimekuma/
# 4. Start container: docker-compose -f uptimekuma.yml up -d
# 5. Verify: Access http://atlantis.vish.local:3444
#
# MONITORING SETUP (Post-Recovery):
# 1. Add critical services:
#    - Vaultwarden: https://pw.vish.gg
#    - Plex: http://atlantis.vish.local:32400
#    - Grafana: http://atlantis.vish.local:7099
#    - Router: http://192.168.1.1
#
# 2. Configure notifications:
#    - Email: SMTP settings for alerts
#    - Discord/Slack: Webhook URLs
#    - SMS: Twilio or similar service
#
# 3. Set up status page:
#    - Public status page for family/friends
#    - Custom domain if desired
#    - Maintenance windows for planned outages
#
# TROUBLESHOOTING:
# - Database corruption: Restore from backup or recreate monitors
# - Permission errors: Check PUID/PGID match NAS user/group
# - Docker socket issues: Verify Docker daemon is running
# - Network connectivity: Check firewall and network configuration
#
# HEALTH CHECKS:
# - Service check: curl -f http://localhost:3444/api/status-page/heartbeat
# - Database check: docker exec uptime_kuma ls -la /app/data/
# - Logs: docker logs uptime_kuma
# - Performance: Monitor CPU/memory usage in Grafana
#
# =============================================================================