321 lines
12 KiB
YAML
321 lines
12 KiB
YAML
---
|
|
# Prometheus Target Discovery
|
|
# Auto-discovers containers for monitoring and validates coverage
|
|
# Run with: ansible-playbook -i hosts.ini playbooks/prometheus_target_discovery.yml
|
|
|
|
- name: Prometheus Target Discovery
|
|
hosts: all
|
|
gather_facts: yes
|
|
vars:
|
|
prometheus_port: 9090
|
|
node_exporter_port: 9100
|
|
cadvisor_port: 8080
|
|
snmp_exporter_port: 9116
|
|
|
|
# Expected exporters by host type
|
|
expected_exporters:
|
|
synology:
|
|
- "node_exporter"
|
|
- "snmp_exporter"
|
|
debian_clients:
|
|
- "node_exporter"
|
|
hypervisors:
|
|
- "node_exporter"
|
|
- "cadvisor"
|
|
|
|
tasks:
|
|
- name: Scan for running exporters
|
|
shell: |
|
|
echo "=== Exporter Discovery on {{ inventory_hostname }} ==="
|
|
|
|
# Check for node_exporter
|
|
if netstat -tlnp 2>/dev/null | grep -q ":{{ node_exporter_port }} "; then
|
|
echo "✓ node_exporter: Port {{ node_exporter_port }} ($(netstat -tlnp 2>/dev/null | grep ":{{ node_exporter_port }} " | awk '{print $7}' | cut -d'/' -f2))"
|
|
else
|
|
echo "✗ node_exporter: Not found on port {{ node_exporter_port }}"
|
|
fi
|
|
|
|
# Check for cAdvisor
|
|
if netstat -tlnp 2>/dev/null | grep -q ":{{ cadvisor_port }} "; then
|
|
echo "✓ cAdvisor: Port {{ cadvisor_port }}"
|
|
else
|
|
echo "✗ cAdvisor: Not found on port {{ cadvisor_port }}"
|
|
fi
|
|
|
|
# Check for SNMP exporter
|
|
if netstat -tlnp 2>/dev/null | grep -q ":{{ snmp_exporter_port }} "; then
|
|
echo "✓ snmp_exporter: Port {{ snmp_exporter_port }}"
|
|
else
|
|
echo "✗ snmp_exporter: Not found on port {{ snmp_exporter_port }}"
|
|
fi
|
|
|
|
# Check for custom exporters
|
|
echo ""
|
|
echo "=== Custom Exporters ==="
|
|
netstat -tlnp 2>/dev/null | grep -E ":91[0-9][0-9] " | while read line; do
|
|
port=$(echo "$line" | awk '{print $4}' | cut -d':' -f2)
|
|
process=$(echo "$line" | awk '{print $7}' | cut -d'/' -f2)
|
|
echo "Found exporter on port $port: $process"
|
|
done
|
|
register: exporter_scan
|
|
|
|
- name: Get Docker containers with exposed ports
|
|
shell: |
|
|
echo "=== Container Port Mapping ==="
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" | grep -E ":[0-9]+->|:[0-9]+/tcp" | while IFS=$'\t' read name ports; do
|
|
echo "Container: $name"
|
|
echo "Ports: $ports"
|
|
echo "---"
|
|
done
|
|
else
|
|
echo "Docker not available"
|
|
fi
|
|
register: container_ports
|
|
become: yes
|
|
|
|
- name: Test Prometheus metrics endpoints
|
|
uri:
|
|
url: "http://{{ ansible_default_ipv4.address }}:{{ item }}/metrics"
|
|
method: GET
|
|
timeout: 5
|
|
register: metrics_test
|
|
loop:
|
|
- "{{ node_exporter_port }}"
|
|
- "{{ cadvisor_port }}"
|
|
- "{{ snmp_exporter_port }}"
|
|
failed_when: false
|
|
|
|
- name: Analyze metrics endpoints
|
|
set_fact:
|
|
available_endpoints: "{{ metrics_test.results | selectattr('status', 'defined') | selectattr('status', 'equalto', 200) | map(attribute='item') | list }}"
|
|
failed_endpoints: "{{ metrics_test.results | rejectattr('status', 'defined') | map(attribute='item') | list + (metrics_test.results | selectattr('status', 'defined') | rejectattr('status', 'equalto', 200) | map(attribute='item') | list) }}"
|
|
|
|
- name: Discover application metrics
|
|
shell: |
|
|
echo "=== Application Metrics Discovery ==="
|
|
app_ports="3000 8080 8081 8090 9091 9093 9094 9115"
|
|
for port in $app_ports; do
|
|
if netstat -tln 2>/dev/null | grep -q ":$port "; then
|
|
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" | head -1 | grep -q "^#"; then
|
|
echo "✓ Metrics endpoint found: localhost:$port/metrics"
|
|
elif curl -s --connect-timeout 2 "http://localhost:$port/actuator/prometheus" | head -1 | grep -q "^#"; then
|
|
echo "✓ Spring Boot metrics: localhost:$port/actuator/prometheus"
|
|
else
|
|
echo "? Port $port open but no metrics endpoint detected"
|
|
fi
|
|
fi
|
|
done
|
|
register: app_metrics_discovery
|
|
|
|
- name: Generate Prometheus configuration snippet
|
|
copy:
|
|
content: |
|
|
# Prometheus Target Configuration for {{ inventory_hostname }}
|
|
# Generated: {{ ansible_date_time.iso8601 }}
|
|
|
|
{% if available_endpoints | length > 0 %}
|
|
- job_name: '{{ inventory_hostname }}-exporters'
|
|
static_configs:
|
|
- targets:
|
|
{% for port in available_endpoints %}
|
|
- '{{ ansible_default_ipv4.address }}:{{ port }}'
|
|
{% endfor %}
|
|
scrape_interval: 15s
|
|
metrics_path: /metrics
|
|
labels:
|
|
host: '{{ inventory_hostname }}'
|
|
environment: 'homelab'
|
|
{% endif %}
|
|
|
|
{% if inventory_hostname in groups['synology'] %}
|
|
# SNMP monitoring for Synology {{ inventory_hostname }}
|
|
- job_name: '{{ inventory_hostname }}-snmp'
|
|
static_configs:
|
|
- targets:
|
|
- '{{ ansible_default_ipv4.address }}'
|
|
metrics_path: /snmp
|
|
params:
|
|
module: [synology]
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- target_label: __address__
|
|
replacement: '{{ ansible_default_ipv4.address }}:{{ snmp_exporter_port }}'
|
|
labels:
|
|
host: '{{ inventory_hostname }}'
|
|
type: 'synology'
|
|
{% endif %}
|
|
dest: "/tmp/prometheus_{{ inventory_hostname }}_targets.yml"
|
|
delegate_to: localhost
|
|
|
|
- name: Check for missing monitoring coverage
|
|
set_fact:
|
|
monitoring_gaps: |
|
|
{% set gaps = [] %}
|
|
{% if inventory_hostname in groups['synology'] and node_exporter_port not in available_endpoints %}
|
|
{% set _ = gaps.append('node_exporter missing on Synology') %}
|
|
{% endif %}
|
|
{% if inventory_hostname in groups['debian_clients'] and node_exporter_port not in available_endpoints %}
|
|
{% set _ = gaps.append('node_exporter missing on Debian client') %}
|
|
{% endif %}
|
|
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
|
|
{% set _ = gaps.append('cAdvisor missing for Docker monitoring') %}
|
|
{% endif %}
|
|
{{ gaps }}
|
|
|
|
- name: Generate monitoring coverage report
|
|
copy:
|
|
content: |
|
|
# Monitoring Coverage Report - {{ inventory_hostname }}
|
|
Generated: {{ ansible_date_time.iso8601 }}
|
|
|
|
## Host Information
|
|
- Hostname: {{ inventory_hostname }}
|
|
- IP Address: {{ ansible_default_ipv4.address }}
|
|
- OS: {{ ansible_facts['os_family'] }} {{ ansible_facts['distribution_version'] }}
|
|
- Groups: {{ group_names | join(', ') }}
|
|
|
|
## Exporter Discovery
|
|
```
|
|
{{ exporter_scan.stdout }}
|
|
```
|
|
|
|
## Available Metrics Endpoints
|
|
{% for endpoint in available_endpoints %}
|
|
- ✅ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
|
|
{% endfor %}
|
|
|
|
{% if failed_endpoints | length > 0 %}
|
|
## Failed/Missing Endpoints
|
|
{% for endpoint in failed_endpoints %}
|
|
- ❌ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
|
|
{% endfor %}
|
|
{% endif %}
|
|
|
|
## Container Port Mapping
|
|
```
|
|
{{ container_ports.stdout }}
|
|
```
|
|
|
|
## Application Metrics Discovery
|
|
```
|
|
{{ app_metrics_discovery.stdout }}
|
|
```
|
|
|
|
{% if monitoring_gaps | length > 0 %}
|
|
## Monitoring Gaps
|
|
{% for gap in monitoring_gaps %}
|
|
- ⚠️ {{ gap }}
|
|
{% endfor %}
|
|
{% endif %}
|
|
|
|
## Recommended Actions
|
|
{% if node_exporter_port not in available_endpoints %}
|
|
- Install node_exporter for system metrics
|
|
{% endif %}
|
|
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
|
|
- Install cAdvisor for container metrics
|
|
{% endif %}
|
|
{% if inventory_hostname in groups['synology'] and snmp_exporter_port not in available_endpoints %}
|
|
- Configure SNMP exporter for Synology-specific metrics
|
|
{% endif %}
|
|
dest: "/tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md"
|
|
delegate_to: localhost
|
|
|
|
- name: Display monitoring summary
|
|
debug:
|
|
msg: |
|
|
Monitoring Coverage Summary for {{ inventory_hostname }}:
|
|
- Available Endpoints: {{ available_endpoints | length }}
|
|
- Failed Endpoints: {{ failed_endpoints | length }}
|
|
- Monitoring Gaps: {{ monitoring_gaps | length if monitoring_gaps else 0 }}
|
|
- Prometheus Config: /tmp/prometheus_{{ inventory_hostname }}_targets.yml
|
|
- Coverage Report: /tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md
|
|
|
|
# Consolidation task to run on localhost
|
|
- name: Consolidate Prometheus Configuration
|
|
hosts: localhost
|
|
gather_facts: no
|
|
tasks:
|
|
- name: Combine all target configurations
|
|
shell: |
|
|
echo "# Consolidated Prometheus Targets Configuration"
|
|
echo "# Generated: $(date)"
|
|
echo ""
|
|
echo "scrape_configs:"
|
|
|
|
for file in /tmp/prometheus_*_targets.yml; do
|
|
if [ -f "$file" ]; then
|
|
echo " # From $(basename $file)"
|
|
cat "$file" | sed 's/^/ /'
|
|
echo ""
|
|
fi
|
|
done
|
|
register: consolidated_config
|
|
|
|
- name: Save consolidated Prometheus configuration
|
|
copy:
|
|
content: "{{ consolidated_config.stdout }}"
|
|
dest: "/tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml"
|
|
|
|
- name: Generate monitoring summary report
|
|
shell: |
|
|
echo "# Homelab Monitoring Coverage Summary"
|
|
echo "Generated: $(date)"
|
|
echo ""
|
|
echo "## Coverage by Host"
|
|
|
|
total_hosts=0
|
|
monitored_hosts=0
|
|
|
|
for file in /tmp/monitoring_coverage_*_*.md; do
|
|
if [ -f "$file" ]; then
|
|
host=$(basename "$file" | sed 's/monitoring_coverage_\(.*\)_[0-9]*.md/\1/')
|
|
endpoints=$(grep -c "✅" "$file" 2>/dev/null || echo "0")
|
|
gaps=$(grep -c "⚠️" "$file" 2>/dev/null || echo "0")
|
|
|
|
total_hosts=$((total_hosts + 1))
|
|
if [ "$endpoints" -gt 0 ]; then
|
|
monitored_hosts=$((monitored_hosts + 1))
|
|
fi
|
|
|
|
echo "- **$host**: $endpoints endpoints, $gaps gaps"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "## Summary"
|
|
echo "- Total Hosts: $total_hosts"
|
|
echo "- Monitored Hosts: $monitored_hosts"
|
|
echo "- Coverage: $(( monitored_hosts * 100 / total_hosts ))%"
|
|
|
|
echo ""
|
|
echo "## Next Steps"
|
|
echo "1. Review individual host reports in /tmp/monitoring_coverage_*.md"
|
|
echo "2. Apply consolidated Prometheus config: /tmp/prometheus_homelab_targets_$(date +%s).yml"
|
|
echo "3. Address monitoring gaps identified in reports"
|
|
register: summary_report
|
|
|
|
- name: Save monitoring summary
|
|
copy:
|
|
content: "{{ summary_report.stdout }}"
|
|
dest: "/tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md"
|
|
|
|
- name: Display final summary
|
|
debug:
|
|
msg: |
|
|
Homelab Monitoring Discovery Complete!
|
|
|
|
📊 Reports Generated:
|
|
- Consolidated Config: /tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml
|
|
- Summary Report: /tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md
|
|
- Individual Reports: /tmp/monitoring_coverage_*.md
|
|
|
|
🔧 Next Steps:
|
|
1. Review the summary report for coverage gaps
|
|
2. Apply the consolidated Prometheus configuration
|
|
3. Install missing exporters where needed
|