285 lines
9.0 KiB
YAML
285 lines
9.0 KiB
YAML
# Alerting Stack - Alertmanager + Notification Bridges
|
|
# =============================================================================
|
|
# Dual-channel alerting: ntfy (mobile push) + Signal (encrypted messaging)
|
|
# =============================================================================
|
|
# Deployed via: Portainer GitOps
|
|
# Ports: 9093 (Alertmanager), 5000 (signal-bridge), 5001 (ntfy-bridge)
|
|
#
|
|
# Alert Routing:
|
|
# - Warning alerts → ntfy only
|
|
# - Critical alerts → ntfy + Signal
|
|
# - Resolved alerts → Both channels (for critical)
|
|
#
|
|
# Uses docker configs to embed Python bridge apps since Portainer GitOps
|
|
# doesn't support docker build
|
|
|
|
configs:
|
|
# Alertmanager Configuration
|
|
alertmanager_config:
|
|
content: |
|
|
global:
|
|
resolve_timeout: 5m
|
|
|
|
route:
|
|
group_by: ['alertname', 'severity', 'instance']
|
|
group_wait: 30s
|
|
group_interval: 5m
|
|
repeat_interval: 4h
|
|
receiver: 'ntfy-all'
|
|
|
|
routes:
|
|
- match:
|
|
severity: critical
|
|
receiver: 'critical-alerts'
|
|
continue: false
|
|
- match:
|
|
severity: warning
|
|
receiver: 'ntfy-all'
|
|
|
|
receivers:
|
|
- name: 'ntfy-all'
|
|
webhook_configs:
|
|
- url: 'http://ntfy-bridge:5001/alert'
|
|
send_resolved: true
|
|
|
|
- name: 'critical-alerts'
|
|
webhook_configs:
|
|
- url: 'http://ntfy-bridge:5001/alert'
|
|
send_resolved: true
|
|
- url: 'http://signal-bridge:5000/alert'
|
|
send_resolved: true
|
|
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: 'critical'
|
|
target_match:
|
|
severity: 'warning'
|
|
equal: ['alertname', 'instance']
|
|
|
|
# ntfy-bridge Python App
|
|
ntfy_bridge_app:
|
|
content: |
|
|
from flask import Flask, request, jsonify
|
|
import requests
|
|
import os
|
|
|
|
app = Flask(__name__)
|
|
|
|
NTFY_URL = os.environ.get('NTFY_URL', 'http://NTFY:80')
|
|
NTFY_TOPIC = os.environ.get('NTFY_TOPIC', 'homelab-alerts')
|
|
|
|
def get_priority(severity, status):
|
|
if status == 'resolved':
|
|
return '3'
|
|
if severity == 'critical':
|
|
return '5'
|
|
return '4'
|
|
|
|
def get_tag(severity, status):
|
|
if status == 'resolved':
|
|
return 'white_check_mark'
|
|
if severity == 'critical':
|
|
return 'rotating_light'
|
|
return 'warning'
|
|
|
|
def format_alert(alert):
|
|
status = alert.get('status', 'firing')
|
|
labels = alert.get('labels', {})
|
|
annotations = alert.get('annotations', {})
|
|
|
|
alertname = labels.get('alertname', 'Unknown')
|
|
severity = labels.get('severity', 'warning')
|
|
instance = labels.get('instance', 'unknown')
|
|
|
|
status_text = 'RESOLVED' if status == 'resolved' else 'FIRING'
|
|
title = f"{alertname} [{status_text}]"
|
|
|
|
summary = annotations.get('summary', '')
|
|
description = annotations.get('description', '')
|
|
|
|
body_parts = []
|
|
if summary:
|
|
body_parts.append(summary)
|
|
if description and description != summary:
|
|
body_parts.append(description)
|
|
if instance != 'unknown':
|
|
body_parts.append(f"Host: {instance}")
|
|
|
|
body = '\n'.join(body_parts) if body_parts else f"Alert {status_text.lower()}"
|
|
return title, body, severity, status
|
|
|
|
@app.route('/alert', methods=['POST'])
|
|
def handle_alert():
|
|
try:
|
|
data = request.json
|
|
for alert in data.get('alerts', []):
|
|
title, body, severity, status = format_alert(alert)
|
|
requests.post(f"{NTFY_URL}/{NTFY_TOPIC}", data=body,
|
|
headers={'Title': title, 'Priority': get_priority(severity, status), 'Tags': get_tag(severity, status)})
|
|
return jsonify({'status': 'sent', 'count': len(data.get('alerts', []))})
|
|
except Exception as e:
|
|
return jsonify({'status': 'error', 'message': str(e)}), 500
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health():
|
|
return jsonify({'status': 'healthy'})
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=5001)
|
|
|
|
# signal-bridge Python App
|
|
signal_bridge_app:
|
|
content: |
|
|
import os
|
|
import requests
|
|
from flask import Flask, request, jsonify
|
|
|
|
app = Flask(__name__)
|
|
|
|
SIGNAL_API_URL = os.environ.get('SIGNAL_API_URL', 'http://signal-api:8080')
|
|
SIGNAL_SENDER = os.environ.get('SIGNAL_SENDER', '')
|
|
SIGNAL_RECIPIENTS = os.environ.get('SIGNAL_RECIPIENTS', '').split(',')
|
|
|
|
def format_alert_message(alert_data):
|
|
messages = []
|
|
for alert in alert_data.get('alerts', []):
|
|
status = alert.get('status', 'firing')
|
|
labels = alert.get('labels', {})
|
|
annotations = alert.get('annotations', {})
|
|
severity = labels.get('severity', 'warning')
|
|
summary = annotations.get('summary', labels.get('alertname', 'Alert'))
|
|
description = annotations.get('description', '')
|
|
|
|
if status == 'resolved':
|
|
emoji, text = '✅', 'RESOLVED'
|
|
elif severity == 'critical':
|
|
emoji, text = '🚨', 'CRITICAL'
|
|
else:
|
|
emoji, text = '⚠️', 'WARNING'
|
|
|
|
msg = f"{emoji} [{text}] {summary}"
|
|
if description:
|
|
msg += f"\n{description}"
|
|
messages.append(msg)
|
|
return "\n\n".join(messages)
|
|
|
|
def send_signal_message(message):
|
|
if not SIGNAL_SENDER or not SIGNAL_RECIPIENTS:
|
|
return False
|
|
success = True
|
|
for recipient in SIGNAL_RECIPIENTS:
|
|
recipient = recipient.strip()
|
|
if not recipient:
|
|
continue
|
|
try:
|
|
response = requests.post(f"{SIGNAL_API_URL}/v2/send", json={
|
|
"message": message, "number": SIGNAL_SENDER, "recipients": [recipient]
|
|
}, timeout=30)
|
|
if response.status_code not in [200, 201]:
|
|
success = False
|
|
except Exception:
|
|
success = False
|
|
return success
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health():
|
|
return jsonify({"status": "healthy"})
|
|
|
|
@app.route('/alert', methods=['POST'])
|
|
def receive_alert():
|
|
try:
|
|
alert_data = request.get_json()
|
|
if not alert_data:
|
|
return jsonify({"error": "No data"}), 400
|
|
message = format_alert_message(alert_data)
|
|
if send_signal_message(message):
|
|
return jsonify({"status": "sent"})
|
|
return jsonify({"status": "partial_failure"}), 207
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=5000)
|
|
|
|
services:
|
|
alertmanager:
|
|
image: prom/alertmanager:latest
|
|
container_name: alertmanager
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9093:9093"
|
|
configs:
|
|
- source: alertmanager_config
|
|
target: /etc/alertmanager/alertmanager.yml
|
|
volumes:
|
|
- alertmanager-data:/alertmanager
|
|
command:
|
|
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
|
- '--storage.path=/alertmanager'
|
|
- '--web.external-url=http://localhost:9093'
|
|
networks:
|
|
- alerting
|
|
- monitoring-stack_monitoring
|
|
|
|
ntfy-bridge:
|
|
image: python:3.11-slim
|
|
container_name: ntfy-bridge
|
|
restart: unless-stopped
|
|
ports:
|
|
- "5001:5001"
|
|
environment:
|
|
- NTFY_URL=http://NTFY:80
|
|
- NTFY_TOPIC="REDACTED_NTFY_TOPIC"
|
|
configs:
|
|
- source: ntfy_bridge_app
|
|
target: /app/app.py
|
|
command: >
|
|
sh -c "pip install --quiet flask requests gunicorn &&
|
|
cd /app && gunicorn --bind 0.0.0.0:5001 --workers 2 app:app"
|
|
networks:
|
|
- alerting
|
|
- ntfy-stack_default
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5001/health')"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
signal-bridge:
|
|
image: python:3.11-slim
|
|
container_name: signal-bridge
|
|
restart: unless-stopped
|
|
ports:
|
|
- "5000:5000"
|
|
environment:
|
|
- SIGNAL_API_URL=http://signal-api:8080
|
|
- SIGNAL_SENDER=REDACTED_PHONE_NUMBER
|
|
- SIGNAL_RECIPIENTS=REDACTED_PHONE_NUMBER
|
|
configs:
|
|
- source: signal_bridge_app
|
|
target: /app/app.py
|
|
command: >
|
|
sh -c "pip install --quiet flask requests gunicorn &&
|
|
cd /app && gunicorn --bind 0.0.0.0:5000 --workers 2 app:app"
|
|
networks:
|
|
- alerting
|
|
- signal-api-stack_default
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
volumes:
|
|
alertmanager-data:
|
|
|
|
networks:
|
|
alerting:
|
|
driver: bridge
|
|
monitoring-stack_monitoring:
|
|
external: true
|
|
ntfy-stack_default:
|
|
external: true
|
|
signal-api-stack_default:
|
|
external: true
|