Files
homelab-optimized/ansible/automation/playbooks/prometheus_target_discovery.yml
Gitea Mirror Bot c727d0bfb1
Some checks failed
Documentation / Deploy to GitHub Pages (push) Has been cancelled
Documentation / Build Docusaurus (push) Has been cancelled
Sanitized mirror from private repository - 2026-03-24 12:45:58 UTC
2026-03-24 12:45:58 +00:00

321 lines
12 KiB
YAML

---
# Prometheus Target Discovery
# Auto-discovers containers for monitoring and validates coverage
# Run with: ansible-playbook -i hosts.ini playbooks/prometheus_target_discovery.yml
- name: Prometheus Target Discovery
hosts: all
gather_facts: yes
vars:
prometheus_port: 9090
node_exporter_port: 9100
cadvisor_port: 8080
snmp_exporter_port: 9116
# Expected exporters by host type
expected_exporters:
synology:
- "node_exporter"
- "snmp_exporter"
debian_clients:
- "node_exporter"
hypervisors:
- "node_exporter"
- "cadvisor"
tasks:
- name: Scan for running exporters
shell: |
echo "=== Exporter Discovery on {{ inventory_hostname }} ==="
# Check for node_exporter
if netstat -tlnp 2>/dev/null | grep -q ":{{ node_exporter_port }} "; then
echo "✓ node_exporter: Port {{ node_exporter_port }} ($(netstat -tlnp 2>/dev/null | grep ":{{ node_exporter_port }} " | awk '{print $7}' | cut -d'/' -f2))"
else
echo "✗ node_exporter: Not found on port {{ node_exporter_port }}"
fi
# Check for cAdvisor
if netstat -tlnp 2>/dev/null | grep -q ":{{ cadvisor_port }} "; then
echo "✓ cAdvisor: Port {{ cadvisor_port }}"
else
echo "✗ cAdvisor: Not found on port {{ cadvisor_port }}"
fi
# Check for SNMP exporter
if netstat -tlnp 2>/dev/null | grep -q ":{{ snmp_exporter_port }} "; then
echo "✓ snmp_exporter: Port {{ snmp_exporter_port }}"
else
echo "✗ snmp_exporter: Not found on port {{ snmp_exporter_port }}"
fi
# Check for custom exporters
echo ""
echo "=== Custom Exporters ==="
netstat -tlnp 2>/dev/null | grep -E ":91[0-9][0-9] " | while read line; do
port=$(echo "$line" | awk '{print $4}' | cut -d':' -f2)
process=$(echo "$line" | awk '{print $7}' | cut -d'/' -f2)
echo "Found exporter on port $port: $process"
done
register: exporter_scan
- name: Get Docker containers with exposed ports
shell: |
echo "=== Container Port Mapping ==="
if command -v docker >/dev/null 2>&1; then
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" | grep -E ":[0-9]+->|:[0-9]+/tcp" | while IFS=$'\t' read name ports; do
echo "Container: $name"
echo "Ports: $ports"
echo "---"
done
else
echo "Docker not available"
fi
register: container_ports
become: yes
- name: Test Prometheus metrics endpoints
uri:
url: "http://{{ ansible_default_ipv4.address }}:{{ item }}/metrics"
method: GET
timeout: 5
register: metrics_test
loop:
- "{{ node_exporter_port }}"
- "{{ cadvisor_port }}"
- "{{ snmp_exporter_port }}"
failed_when: false
- name: Analyze metrics endpoints
set_fact:
available_endpoints: "{{ metrics_test.results | selectattr('status', 'defined') | selectattr('status', 'equalto', 200) | map(attribute='item') | list }}"
failed_endpoints: "{{ metrics_test.results | rejectattr('status', 'defined') | map(attribute='item') | list + (metrics_test.results | selectattr('status', 'defined') | rejectattr('status', 'equalto', 200) | map(attribute='item') | list) }}"
- name: Discover application metrics
shell: |
echo "=== Application Metrics Discovery ==="
app_ports="3000 8080 8081 8090 9091 9093 9094 9115"
for port in $app_ports; do
if netstat -tln 2>/dev/null | grep -q ":$port "; then
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" | head -1 | grep -q "^#"; then
echo "✓ Metrics endpoint found: localhost:$port/metrics"
elif curl -s --connect-timeout 2 "http://localhost:$port/actuator/prometheus" | head -1 | grep -q "^#"; then
echo "✓ Spring Boot metrics: localhost:$port/actuator/prometheus"
else
echo "? Port $port open but no metrics endpoint detected"
fi
fi
done
register: app_metrics_discovery
- name: Generate Prometheus configuration snippet
copy:
content: |
# Prometheus Target Configuration for {{ inventory_hostname }}
# Generated: {{ ansible_date_time.iso8601 }}
{% if available_endpoints | length > 0 %}
- job_name: '{{ inventory_hostname }}-exporters'
static_configs:
- targets:
{% for port in available_endpoints %}
- '{{ ansible_default_ipv4.address }}:{{ port }}'
{% endfor %}
scrape_interval: 15s
metrics_path: /metrics
labels:
host: '{{ inventory_hostname }}'
environment: 'homelab'
{% endif %}
{% if inventory_hostname in groups['synology'] %}
# SNMP monitoring for Synology {{ inventory_hostname }}
- job_name: '{{ inventory_hostname }}-snmp'
static_configs:
- targets:
- '{{ ansible_default_ipv4.address }}'
metrics_path: /snmp
params:
module: [synology]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: '{{ ansible_default_ipv4.address }}:{{ snmp_exporter_port }}'
labels:
host: '{{ inventory_hostname }}'
type: 'synology'
{% endif %}
dest: "/tmp/prometheus_{{ inventory_hostname }}_targets.yml"
delegate_to: localhost
- name: Check for missing monitoring coverage
set_fact:
monitoring_gaps: |
{% set gaps = [] %}
{% if inventory_hostname in groups['synology'] and node_exporter_port not in available_endpoints %}
{% set _ = gaps.append('node_exporter missing on Synology') %}
{% endif %}
{% if inventory_hostname in groups['debian_clients'] and node_exporter_port not in available_endpoints %}
{% set _ = gaps.append('node_exporter missing on Debian client') %}
{% endif %}
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
{% set _ = gaps.append('cAdvisor missing for Docker monitoring') %}
{% endif %}
{{ gaps }}
- name: Generate monitoring coverage report
copy:
content: |
# Monitoring Coverage Report - {{ inventory_hostname }}
Generated: {{ ansible_date_time.iso8601 }}
## Host Information
- Hostname: {{ inventory_hostname }}
- IP Address: {{ ansible_default_ipv4.address }}
- OS: {{ ansible_facts['os_family'] }} {{ ansible_facts['distribution_version'] }}
- Groups: {{ group_names | join(', ') }}
## Exporter Discovery
```
{{ exporter_scan.stdout }}
```
## Available Metrics Endpoints
{% for endpoint in available_endpoints %}
- ✅ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
{% endfor %}
{% if failed_endpoints | length > 0 %}
## Failed/Missing Endpoints
{% for endpoint in failed_endpoints %}
- ❌ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
{% endfor %}
{% endif %}
## Container Port Mapping
```
{{ container_ports.stdout }}
```
## Application Metrics Discovery
```
{{ app_metrics_discovery.stdout }}
```
{% if monitoring_gaps | length > 0 %}
## Monitoring Gaps
{% for gap in monitoring_gaps %}
- ⚠️ {{ gap }}
{% endfor %}
{% endif %}
## Recommended Actions
{% if node_exporter_port not in available_endpoints %}
- Install node_exporter for system metrics
{% endif %}
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
- Install cAdvisor for container metrics
{% endif %}
{% if inventory_hostname in groups['synology'] and snmp_exporter_port not in available_endpoints %}
- Configure SNMP exporter for Synology-specific metrics
{% endif %}
dest: "/tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md"
delegate_to: localhost
- name: Display monitoring summary
debug:
msg: |
Monitoring Coverage Summary for {{ inventory_hostname }}:
- Available Endpoints: {{ available_endpoints | length }}
- Failed Endpoints: {{ failed_endpoints | length }}
- Monitoring Gaps: {{ monitoring_gaps | length if monitoring_gaps else 0 }}
- Prometheus Config: /tmp/prometheus_{{ inventory_hostname }}_targets.yml
- Coverage Report: /tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md
# Consolidation task to run on localhost
- name: Consolidate Prometheus Configuration
hosts: localhost
gather_facts: no
tasks:
- name: Combine all target configurations
shell: |
echo "# Consolidated Prometheus Targets Configuration"
echo "# Generated: $(date)"
echo ""
echo "scrape_configs:"
for file in /tmp/prometheus_*_targets.yml; do
if [ -f "$file" ]; then
echo " # From $(basename $file)"
cat "$file" | sed 's/^/ /'
echo ""
fi
done
register: consolidated_config
- name: Save consolidated Prometheus configuration
copy:
content: "{{ consolidated_config.stdout }}"
dest: "/tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml"
- name: Generate monitoring summary report
shell: |
echo "# Homelab Monitoring Coverage Summary"
echo "Generated: $(date)"
echo ""
echo "## Coverage by Host"
total_hosts=0
monitored_hosts=0
for file in /tmp/monitoring_coverage_*_*.md; do
if [ -f "$file" ]; then
host=$(basename "$file" | sed 's/monitoring_coverage_\(.*\)_[0-9]*.md/\1/')
endpoints=$(grep -c "✅" "$file" 2>/dev/null || echo "0")
gaps=$(grep -c "⚠️" "$file" 2>/dev/null || echo "0")
total_hosts=$((total_hosts + 1))
if [ "$endpoints" -gt 0 ]; then
monitored_hosts=$((monitored_hosts + 1))
fi
echo "- **$host**: $endpoints endpoints, $gaps gaps"
fi
done
echo ""
echo "## Summary"
echo "- Total Hosts: $total_hosts"
echo "- Monitored Hosts: $monitored_hosts"
echo "- Coverage: $(( monitored_hosts * 100 / total_hosts ))%"
echo ""
echo "## Next Steps"
echo "1. Review individual host reports in /tmp/monitoring_coverage_*.md"
echo "2. Apply consolidated Prometheus config: /tmp/prometheus_homelab_targets_$(date +%s).yml"
echo "3. Address monitoring gaps identified in reports"
register: summary_report
- name: Save monitoring summary
copy:
content: "{{ summary_report.stdout }}"
dest: "/tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md"
- name: Display final summary
debug:
msg: |
Homelab Monitoring Discovery Complete!
📊 Reports Generated:
- Consolidated Config: /tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml
- Summary Report: /tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md
- Individual Reports: /tmp/monitoring_coverage_*.md
🔧 Next Steps:
1. Review the summary report for coverage gaps
2. Apply the consolidated Prometheus configuration
3. Install missing exporters where needed