Sanitized mirror from private repository - 2026-04-25 06:50:29 UTC
This commit is contained in:
320
ansible/automation/playbooks/prometheus_target_discovery.yml
Normal file
320
ansible/automation/playbooks/prometheus_target_discovery.yml
Normal file
@@ -0,0 +1,320 @@
|
||||
---
|
||||
# Prometheus Target Discovery
|
||||
# Auto-discovers containers for monitoring and validates coverage
|
||||
# Run with: ansible-playbook -i hosts.ini playbooks/prometheus_target_discovery.yml
|
||||
|
||||
- name: Prometheus Target Discovery
|
||||
hosts: all
|
||||
gather_facts: yes
|
||||
vars:
|
||||
prometheus_port: 9090
|
||||
node_exporter_port: 9100
|
||||
cadvisor_port: 8080
|
||||
snmp_exporter_port: 9116
|
||||
|
||||
# Expected exporters by host type
|
||||
expected_exporters:
|
||||
synology:
|
||||
- "node_exporter"
|
||||
- "snmp_exporter"
|
||||
debian_clients:
|
||||
- "node_exporter"
|
||||
hypervisors:
|
||||
- "node_exporter"
|
||||
- "cadvisor"
|
||||
|
||||
tasks:
|
||||
- name: Scan for running exporters
|
||||
shell: |
|
||||
echo "=== Exporter Discovery on {{ inventory_hostname }} ==="
|
||||
|
||||
# Check for node_exporter
|
||||
if netstat -tlnp 2>/dev/null | grep -q ":{{ node_exporter_port }} "; then
|
||||
echo "✓ node_exporter: Port {{ node_exporter_port }} ($(netstat -tlnp 2>/dev/null | grep ":{{ node_exporter_port }} " | awk '{print $7}' | cut -d'/' -f2))"
|
||||
else
|
||||
echo "✗ node_exporter: Not found on port {{ node_exporter_port }}"
|
||||
fi
|
||||
|
||||
# Check for cAdvisor
|
||||
if netstat -tlnp 2>/dev/null | grep -q ":{{ cadvisor_port }} "; then
|
||||
echo "✓ cAdvisor: Port {{ cadvisor_port }}"
|
||||
else
|
||||
echo "✗ cAdvisor: Not found on port {{ cadvisor_port }}"
|
||||
fi
|
||||
|
||||
# Check for SNMP exporter
|
||||
if netstat -tlnp 2>/dev/null | grep -q ":{{ snmp_exporter_port }} "; then
|
||||
echo "✓ snmp_exporter: Port {{ snmp_exporter_port }}"
|
||||
else
|
||||
echo "✗ snmp_exporter: Not found on port {{ snmp_exporter_port }}"
|
||||
fi
|
||||
|
||||
# Check for custom exporters
|
||||
echo ""
|
||||
echo "=== Custom Exporters ==="
|
||||
netstat -tlnp 2>/dev/null | grep -E ":91[0-9][0-9] " | while read line; do
|
||||
port=$(echo "$line" | awk '{print $4}' | cut -d':' -f2)
|
||||
process=$(echo "$line" | awk '{print $7}' | cut -d'/' -f2)
|
||||
echo "Found exporter on port $port: $process"
|
||||
done
|
||||
register: exporter_scan
|
||||
|
||||
- name: Get Docker containers with exposed ports
|
||||
shell: |
|
||||
echo "=== Container Port Mapping ==="
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" | grep -E ":[0-9]+->|:[0-9]+/tcp" | while IFS=$'\t' read name ports; do
|
||||
echo "Container: $name"
|
||||
echo "Ports: $ports"
|
||||
echo "---"
|
||||
done
|
||||
else
|
||||
echo "Docker not available"
|
||||
fi
|
||||
register: container_ports
|
||||
become: yes
|
||||
|
||||
- name: Test Prometheus metrics endpoints
|
||||
uri:
|
||||
url: "http://{{ ansible_default_ipv4.address }}:{{ item }}/metrics"
|
||||
method: GET
|
||||
timeout: 5
|
||||
register: metrics_test
|
||||
loop:
|
||||
- "{{ node_exporter_port }}"
|
||||
- "{{ cadvisor_port }}"
|
||||
- "{{ snmp_exporter_port }}"
|
||||
failed_when: false
|
||||
|
||||
- name: Analyze metrics endpoints
|
||||
set_fact:
|
||||
available_endpoints: "{{ metrics_test.results | selectattr('status', 'defined') | selectattr('status', 'equalto', 200) | map(attribute='item') | list }}"
|
||||
failed_endpoints: "{{ metrics_test.results | rejectattr('status', 'defined') | map(attribute='item') | list + (metrics_test.results | selectattr('status', 'defined') | rejectattr('status', 'equalto', 200) | map(attribute='item') | list) }}"
|
||||
|
||||
- name: Discover application metrics
|
||||
shell: |
|
||||
echo "=== Application Metrics Discovery ==="
|
||||
app_ports="3000 8080 8081 8090 9091 9093 9094 9115"
|
||||
for port in $app_ports; do
|
||||
if netstat -tln 2>/dev/null | grep -q ":$port "; then
|
||||
if curl -s --connect-timeout 2 "http://localhost:$port/metrics" | head -1 | grep -q "^#"; then
|
||||
echo "✓ Metrics endpoint found: localhost:$port/metrics"
|
||||
elif curl -s --connect-timeout 2 "http://localhost:$port/actuator/prometheus" | head -1 | grep -q "^#"; then
|
||||
echo "✓ Spring Boot metrics: localhost:$port/actuator/prometheus"
|
||||
else
|
||||
echo "? Port $port open but no metrics endpoint detected"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
register: app_metrics_discovery
|
||||
|
||||
- name: Generate Prometheus configuration snippet
|
||||
copy:
|
||||
content: |
|
||||
# Prometheus Target Configuration for {{ inventory_hostname }}
|
||||
# Generated: {{ ansible_date_time.iso8601 }}
|
||||
|
||||
{% if available_endpoints | length > 0 %}
|
||||
- job_name: '{{ inventory_hostname }}-exporters'
|
||||
static_configs:
|
||||
- targets:
|
||||
{% for port in available_endpoints %}
|
||||
- '{{ ansible_default_ipv4.address }}:{{ port }}'
|
||||
{% endfor %}
|
||||
scrape_interval: 15s
|
||||
metrics_path: /metrics
|
||||
labels:
|
||||
host: '{{ inventory_hostname }}'
|
||||
environment: 'homelab'
|
||||
{% endif %}
|
||||
|
||||
{% if inventory_hostname in groups['synology'] %}
|
||||
# SNMP monitoring for Synology {{ inventory_hostname }}
|
||||
- job_name: '{{ inventory_hostname }}-snmp'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '{{ ansible_default_ipv4.address }}'
|
||||
metrics_path: /snmp
|
||||
params:
|
||||
module: [synology]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: '{{ ansible_default_ipv4.address }}:{{ snmp_exporter_port }}'
|
||||
labels:
|
||||
host: '{{ inventory_hostname }}'
|
||||
type: 'synology'
|
||||
{% endif %}
|
||||
dest: "/tmp/prometheus_{{ inventory_hostname }}_targets.yml"
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Check for missing monitoring coverage
|
||||
set_fact:
|
||||
monitoring_gaps: |
|
||||
{% set gaps = [] %}
|
||||
{% if inventory_hostname in groups['synology'] and node_exporter_port not in available_endpoints %}
|
||||
{% set _ = gaps.append('node_exporter missing on Synology') %}
|
||||
{% endif %}
|
||||
{% if inventory_hostname in groups['debian_clients'] and node_exporter_port not in available_endpoints %}
|
||||
{% set _ = gaps.append('node_exporter missing on Debian client') %}
|
||||
{% endif %}
|
||||
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
|
||||
{% set _ = gaps.append('cAdvisor missing for Docker monitoring') %}
|
||||
{% endif %}
|
||||
{{ gaps }}
|
||||
|
||||
- name: Generate monitoring coverage report
|
||||
copy:
|
||||
content: |
|
||||
# Monitoring Coverage Report - {{ inventory_hostname }}
|
||||
Generated: {{ ansible_date_time.iso8601 }}
|
||||
|
||||
## Host Information
|
||||
- Hostname: {{ inventory_hostname }}
|
||||
- IP Address: {{ ansible_default_ipv4.address }}
|
||||
- OS: {{ ansible_facts['os_family'] }} {{ ansible_facts['distribution_version'] }}
|
||||
- Groups: {{ group_names | join(', ') }}
|
||||
|
||||
## Exporter Discovery
|
||||
```
|
||||
{{ exporter_scan.stdout }}
|
||||
```
|
||||
|
||||
## Available Metrics Endpoints
|
||||
{% for endpoint in available_endpoints %}
|
||||
- ✅ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
|
||||
{% endfor %}
|
||||
|
||||
{% if failed_endpoints | length > 0 %}
|
||||
## Failed/Missing Endpoints
|
||||
{% for endpoint in failed_endpoints %}
|
||||
- ❌ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
## Container Port Mapping
|
||||
```
|
||||
{{ container_ports.stdout }}
|
||||
```
|
||||
|
||||
## Application Metrics Discovery
|
||||
```
|
||||
{{ app_metrics_discovery.stdout }}
|
||||
```
|
||||
|
||||
{% if monitoring_gaps | length > 0 %}
|
||||
## Monitoring Gaps
|
||||
{% for gap in monitoring_gaps %}
|
||||
- ⚠️ {{ gap }}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
## Recommended Actions
|
||||
{% if node_exporter_port not in available_endpoints %}
|
||||
- Install node_exporter for system metrics
|
||||
{% endif %}
|
||||
{% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %}
|
||||
- Install cAdvisor for container metrics
|
||||
{% endif %}
|
||||
{% if inventory_hostname in groups['synology'] and snmp_exporter_port not in available_endpoints %}
|
||||
- Configure SNMP exporter for Synology-specific metrics
|
||||
{% endif %}
|
||||
dest: "/tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md"
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Display monitoring summary
|
||||
debug:
|
||||
msg: |
|
||||
Monitoring Coverage Summary for {{ inventory_hostname }}:
|
||||
- Available Endpoints: {{ available_endpoints | length }}
|
||||
- Failed Endpoints: {{ failed_endpoints | length }}
|
||||
- Monitoring Gaps: {{ monitoring_gaps | length if monitoring_gaps else 0 }}
|
||||
- Prometheus Config: /tmp/prometheus_{{ inventory_hostname }}_targets.yml
|
||||
- Coverage Report: /tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md
|
||||
|
||||
# Consolidation task to run on localhost
|
||||
- name: Consolidate Prometheus Configuration
|
||||
hosts: localhost
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Combine all target configurations
|
||||
shell: |
|
||||
echo "# Consolidated Prometheus Targets Configuration"
|
||||
echo "# Generated: $(date)"
|
||||
echo ""
|
||||
echo "scrape_configs:"
|
||||
|
||||
for file in /tmp/prometheus_*_targets.yml; do
|
||||
if [ -f "$file" ]; then
|
||||
echo " # From $(basename $file)"
|
||||
cat "$file" | sed 's/^/ /'
|
||||
echo ""
|
||||
fi
|
||||
done
|
||||
register: consolidated_config
|
||||
|
||||
- name: Save consolidated Prometheus configuration
|
||||
copy:
|
||||
content: "{{ consolidated_config.stdout }}"
|
||||
dest: "/tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml"
|
||||
|
||||
- name: Generate monitoring summary report
|
||||
shell: |
|
||||
echo "# Homelab Monitoring Coverage Summary"
|
||||
echo "Generated: $(date)"
|
||||
echo ""
|
||||
echo "## Coverage by Host"
|
||||
|
||||
total_hosts=0
|
||||
monitored_hosts=0
|
||||
|
||||
for file in /tmp/monitoring_coverage_*_*.md; do
|
||||
if [ -f "$file" ]; then
|
||||
host=$(basename "$file" | sed 's/monitoring_coverage_\(.*\)_[0-9]*.md/\1/')
|
||||
endpoints=$(grep -c "✅" "$file" 2>/dev/null || echo "0")
|
||||
gaps=$(grep -c "⚠️" "$file" 2>/dev/null || echo "0")
|
||||
|
||||
total_hosts=$((total_hosts + 1))
|
||||
if [ "$endpoints" -gt 0 ]; then
|
||||
monitored_hosts=$((monitored_hosts + 1))
|
||||
fi
|
||||
|
||||
echo "- **$host**: $endpoints endpoints, $gaps gaps"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "## Summary"
|
||||
echo "- Total Hosts: $total_hosts"
|
||||
echo "- Monitored Hosts: $monitored_hosts"
|
||||
echo "- Coverage: $(( monitored_hosts * 100 / total_hosts ))%"
|
||||
|
||||
echo ""
|
||||
echo "## Next Steps"
|
||||
echo "1. Review individual host reports in /tmp/monitoring_coverage_*.md"
|
||||
echo "2. Apply consolidated Prometheus config: /tmp/prometheus_homelab_targets_$(date +%s).yml"
|
||||
echo "3. Address monitoring gaps identified in reports"
|
||||
register: summary_report
|
||||
|
||||
- name: Save monitoring summary
|
||||
copy:
|
||||
content: "{{ summary_report.stdout }}"
|
||||
dest: "/tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md"
|
||||
|
||||
- name: Display final summary
|
||||
debug:
|
||||
msg: |
|
||||
Homelab Monitoring Discovery Complete!
|
||||
|
||||
📊 Reports Generated:
|
||||
- Consolidated Config: /tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml
|
||||
- Summary Report: /tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md
|
||||
- Individual Reports: /tmp/monitoring_coverage_*.md
|
||||
|
||||
🔧 Next Steps:
|
||||
1. Review the summary report for coverage gaps
|
||||
2. Apply the consolidated Prometheus configuration
|
||||
3. Install missing exporters where needed
|
||||
Reference in New Issue
Block a user