140 lines
5.3 KiB
YAML
140 lines
5.3 KiB
YAML
# =============================================================================
|
|
# UPTIME KUMA - SERVICE MONITORING AND STATUS PAGE
|
|
# =============================================================================
|
|
#
|
|
# SERVICE OVERVIEW:
|
|
# - Real-time monitoring of all homelab services
|
|
# - Beautiful status page for service availability
|
|
# - Alerting via email, Discord, Slack, SMS, and more
|
|
# - Docker container monitoring via Docker socket
|
|
#
|
|
# DISASTER RECOVERY PRIORITY: HIGH
|
|
# - Essential for monitoring service health during recovery
|
|
# - Provides immediate visibility into what's working/broken
|
|
# - Critical for validating recovery procedures
|
|
#
|
|
# RECOVERY TIME OBJECTIVE (RTO): 15 minutes
|
|
# RECOVERY POINT OBJECTIVE (RPO): 1 hour (monitoring history)
|
|
#
|
|
# DEPENDENCIES:
|
|
# - Volume1 for configuration storage
|
|
# - Docker socket access for container monitoring
|
|
# - Network connectivity to all monitored services
|
|
# - SMTP access for email notifications
|
|
#
|
|
# MONITORING TARGETS:
|
|
# - All critical homelab services (Plex, Vaultwarden, etc.)
|
|
# - Network infrastructure (router, switches)
|
|
# - Internet connectivity and speed
|
|
# - SSL certificate expiration
|
|
# - Disk space and system resources
|
|
#
|
|
# =============================================================================
|
|
|
|
version: '3.3'
|
|
|
|
services:
|
|
uptime-kuma:
|
|
# CONTAINER IMAGE:
|
|
# - louislam/uptime-kuma: Official Uptime Kuma image
|
|
# - Lightweight Node.js application with SQLite database
|
|
# - Regular updates with new monitoring features
|
|
image: louislam/uptime-kuma
|
|
|
|
# CONTAINER IDENTIFICATION:
|
|
# - uptime_kuma: Clear identification for logs and management
|
|
# - Used in monitoring dashboards and backup scripts
|
|
container_name: uptime_kuma
|
|
|
|
# NETWORK CONFIGURATION:
|
|
# - 3444:3001: External port 3444 maps to internal port 3001
|
|
# - Port 3444: Accessible via reverse proxy or direct access
|
|
# - Port 3001: Standard Uptime Kuma web interface port
|
|
# - Accessible at: http://atlantis.vish.local:3444
|
|
ports:
|
|
- '3444:3001'
|
|
|
|
environment:
|
|
# USER/GROUP PERMISSIONS:
|
|
# - PUID=1026: User ID for file ownership (Synology user)
|
|
# - PGID=100: Group ID for file access (Synology group)
|
|
# - CRITICAL: Must match NAS permissions for data access
|
|
- PUID=1026
|
|
- PGID=100
|
|
|
|
# TIMEZONE CONFIGURATION:
|
|
# - TZ: Timezone for monitoring timestamps and scheduling
|
|
# - Must match system timezone for accurate alerting
|
|
# - Used for maintenance windows and notification timing
|
|
- TZ=America/Los_Angeles
|
|
|
|
volumes:
|
|
# CONFIGURATION AND DATABASE:
|
|
# - /volume1/docker/uptimekuma:/app/data
|
|
# - Contains: SQLite database, configuration, notification settings
|
|
# - BACKUP CRITICAL: Contains all monitoring history and settings
|
|
# - Size: ~100MB-1GB depending on monitoring history
|
|
- '/volume1/docker/uptimekuma:/app/data'
|
|
|
|
# DOCKER SOCKET ACCESS:
|
|
# - /var/run/docker.sock:/var/run/docker.sock
|
|
# - Enables monitoring of Docker containers directly
|
|
# - Allows automatic discovery of running services
|
|
# - SECURITY NOTE: Provides full Docker API access
|
|
- '/var/run/docker.sock:/var/run/docker.sock'
|
|
|
|
# RESTART POLICY:
|
|
# - always: Container restarts automatically on failure or reboot
|
|
# - CRITICAL: Monitoring must be always available
|
|
# - Essential for detecting and alerting on service failures
|
|
restart: unless-stopped
|
|
|
|
# =============================================================================
|
|
# DISASTER RECOVERY PROCEDURES - UPTIME KUMA
|
|
# =============================================================================
|
|
#
|
|
# BACKUP COMMANDS:
|
|
# # Configuration backup:
|
|
# tar -czf /volume2/backups/uptimekuma-$(date +%Y%m%d).tar.gz /volume1/docker/uptimekuma/
|
|
#
|
|
# # Database backup (SQLite):
|
|
# docker exec uptime_kuma sqlite3 /app/data/kuma.db ".backup /app/data/kuma-backup-$(date +%Y%m%d).db"
|
|
#
|
|
# RESTORE PROCEDURE:
|
|
# 1. Stop container: docker-compose -f uptimekuma.yml down
|
|
# 2. Restore data: tar -xzf uptimekuma-backup.tar.gz -C /volume1/docker/
|
|
# 3. Fix permissions: chown -R 1026:100 /volume1/docker/uptimekuma/
|
|
# 4. Start container: docker-compose -f uptimekuma.yml up -d
|
|
# 5. Verify: Access http://atlantis.vish.local:3444
|
|
#
|
|
# MONITORING SETUP (Post-Recovery):
|
|
# 1. Add critical services:
|
|
# - Vaultwarden: https://pw.vish.gg
|
|
# - Plex: http://atlantis.vish.local:32400
|
|
# - Grafana: http://atlantis.vish.local:7099
|
|
# - Router: http://192.168.1.1
|
|
#
|
|
# 2. Configure notifications:
|
|
# - Email: SMTP settings for alerts
|
|
# - Discord/Slack: Webhook URLs
|
|
# - SMS: Twilio or similar service
|
|
#
|
|
# 3. Set up status page:
|
|
# - Public status page for family/friends
|
|
# - Custom domain if desired
|
|
# - Maintenance windows for planned outages
|
|
#
|
|
# TROUBLESHOOTING:
|
|
# - Database corruption: Restore from backup or recreate monitors
|
|
# - Permission errors: Check PUID/PGID match NAS user/group
|
|
# - Docker socket issues: Verify Docker daemon is running
|
|
# - Network connectivity: Check firewall and network configuration
|
|
#
|
|
# HEALTH CHECKS:
|
|
# - Service check: curl -f http://localhost:3444/api/status-page/heartbeat
|
|
# - Database check: docker exec uptime_kuma ls -la /app/data/
|
|
# - Logs: docker logs uptime_kuma
|
|
# - Performance: Monitor CPU/memory usage in Grafana
|
|
#
|
|
# =============================================================================
|