Files
homelab-optimized/hosts/synology/atlantis/uptimekuma.yml
Gitea Mirror Bot a63c410ce3
Some checks failed
Documentation / Deploy to GitHub Pages (push) Has been cancelled
Documentation / Build Docusaurus (push) Has been cancelled
Sanitized mirror from private repository - 2026-04-18 12:12:12 UTC
2026-04-18 12:12:12 +00:00

140 lines
5.3 KiB
YAML

# =============================================================================
# UPTIME KUMA - SERVICE MONITORING AND STATUS PAGE
# =============================================================================
#
# SERVICE OVERVIEW:
# - Real-time monitoring of all homelab services
# - Beautiful status page for service availability
# - Alerting via email, Discord, Slack, SMS, and more
# - Docker container monitoring via Docker socket
#
# DISASTER RECOVERY PRIORITY: HIGH
# - Essential for monitoring service health during recovery
# - Provides immediate visibility into what's working/broken
# - Critical for validating recovery procedures
#
# RECOVERY TIME OBJECTIVE (RTO): 15 minutes
# RECOVERY POINT OBJECTIVE (RPO): 1 hour (monitoring history)
#
# DEPENDENCIES:
# - Volume1 for configuration storage
# - Docker socket access for container monitoring
# - Network connectivity to all monitored services
# - SMTP access for email notifications
#
# MONITORING TARGETS:
# - All critical homelab services (Plex, Vaultwarden, etc.)
# - Network infrastructure (router, switches)
# - Internet connectivity and speed
# - SSL certificate expiration
# - Disk space and system resources
#
# =============================================================================
version: '3.3'
services:
uptime-kuma:
# CONTAINER IMAGE:
# - louislam/uptime-kuma: Official Uptime Kuma image
# - Lightweight Node.js application with SQLite database
# - Regular updates with new monitoring features
image: louislam/uptime-kuma
# CONTAINER IDENTIFICATION:
# - uptime_kuma: Clear identification for logs and management
# - Used in monitoring dashboards and backup scripts
container_name: uptime_kuma
# NETWORK CONFIGURATION:
# - 3444:3001: External port 3444 maps to internal port 3001
# - Port 3444: Accessible via reverse proxy or direct access
# - Port 3001: Standard Uptime Kuma web interface port
# - Accessible at: http://atlantis.vish.local:3444
ports:
- '3444:3001'
environment:
# USER/GROUP PERMISSIONS:
# - PUID=1026: User ID for file ownership (Synology user)
# - PGID=100: Group ID for file access (Synology group)
# - CRITICAL: Must match NAS permissions for data access
- PUID=1026
- PGID=100
# TIMEZONE CONFIGURATION:
# - TZ: Timezone for monitoring timestamps and scheduling
# - Must match system timezone for accurate alerting
# - Used for maintenance windows and notification timing
- TZ=America/Los_Angeles
volumes:
# CONFIGURATION AND DATABASE:
# - /volume1/docker/uptimekuma:/app/data
# - Contains: SQLite database, configuration, notification settings
# - BACKUP CRITICAL: Contains all monitoring history and settings
# - Size: ~100MB-1GB depending on monitoring history
- '/volume1/docker/uptimekuma:/app/data'
# DOCKER SOCKET ACCESS:
# - /var/run/docker.sock:/var/run/docker.sock
# - Enables monitoring of Docker containers directly
# - Allows automatic discovery of running services
# - SECURITY NOTE: Provides full Docker API access
- '/var/run/docker.sock:/var/run/docker.sock'
# RESTART POLICY:
# - always: Container restarts automatically on failure or reboot
# - CRITICAL: Monitoring must be always available
# - Essential for detecting and alerting on service failures
restart: unless-stopped
# =============================================================================
# DISASTER RECOVERY PROCEDURES - UPTIME KUMA
# =============================================================================
#
# BACKUP COMMANDS:
# # Configuration backup:
# tar -czf /volume2/backups/uptimekuma-$(date +%Y%m%d).tar.gz /volume1/docker/uptimekuma/
#
# # Database backup (SQLite):
# docker exec uptime_kuma sqlite3 /app/data/kuma.db ".backup /app/data/kuma-backup-$(date +%Y%m%d).db"
#
# RESTORE PROCEDURE:
# 1. Stop container: docker-compose -f uptimekuma.yml down
# 2. Restore data: tar -xzf uptimekuma-backup.tar.gz -C /volume1/docker/
# 3. Fix permissions: chown -R 1026:100 /volume1/docker/uptimekuma/
# 4. Start container: docker-compose -f uptimekuma.yml up -d
# 5. Verify: Access http://atlantis.vish.local:3444
#
# MONITORING SETUP (Post-Recovery):
# 1. Add critical services:
# - Vaultwarden: https://pw.vish.gg
# - Plex: http://atlantis.vish.local:32400
# - Grafana: http://atlantis.vish.local:7099
# - Router: http://192.168.1.1
#
# 2. Configure notifications:
# - Email: SMTP settings for alerts
# - Discord/Slack: Webhook URLs
# - SMS: Twilio or similar service
#
# 3. Set up status page:
# - Public status page for family/friends
# - Custom domain if desired
# - Maintenance windows for planned outages
#
# TROUBLESHOOTING:
# - Database corruption: Restore from backup or recreate monitors
# - Permission errors: Check PUID/PGID match NAS user/group
# - Docker socket issues: Verify Docker daemon is running
# - Network connectivity: Check firewall and network configuration
#
# HEALTH CHECKS:
# - Service check: curl -f http://localhost:3444/api/status-page/heartbeat
# - Database check: docker exec uptime_kuma ls -la /app/data/
# - Logs: docker logs uptime_kuma
# - Performance: Monitor CPU/memory usage in Grafana
#
# =============================================================================