--- # Prometheus Target Discovery # Auto-discovers containers for monitoring and validates coverage # Run with: ansible-playbook -i hosts.ini playbooks/prometheus_target_discovery.yml - name: Prometheus Target Discovery hosts: all gather_facts: yes vars: prometheus_port: 9090 node_exporter_port: 9100 cadvisor_port: 8080 snmp_exporter_port: 9116 # Expected exporters by host type expected_exporters: synology: - "node_exporter" - "snmp_exporter" debian_clients: - "node_exporter" hypervisors: - "node_exporter" - "cadvisor" tasks: - name: Scan for running exporters shell: | echo "=== Exporter Discovery on {{ inventory_hostname }} ===" # Check for node_exporter if netstat -tlnp 2>/dev/null | grep -q ":{{ node_exporter_port }} "; then echo "✓ node_exporter: Port {{ node_exporter_port }} ($(netstat -tlnp 2>/dev/null | grep ":{{ node_exporter_port }} " | awk '{print $7}' | cut -d'/' -f2))" else echo "✗ node_exporter: Not found on port {{ node_exporter_port }}" fi # Check for cAdvisor if netstat -tlnp 2>/dev/null | grep -q ":{{ cadvisor_port }} "; then echo "✓ cAdvisor: Port {{ cadvisor_port }}" else echo "✗ cAdvisor: Not found on port {{ cadvisor_port }}" fi # Check for SNMP exporter if netstat -tlnp 2>/dev/null | grep -q ":{{ snmp_exporter_port }} "; then echo "✓ snmp_exporter: Port {{ snmp_exporter_port }}" else echo "✗ snmp_exporter: Not found on port {{ snmp_exporter_port }}" fi # Check for custom exporters echo "" echo "=== Custom Exporters ===" netstat -tlnp 2>/dev/null | grep -E ":91[0-9][0-9] " | while read line; do port=$(echo "$line" | awk '{print $4}' | cut -d':' -f2) process=$(echo "$line" | awk '{print $7}' | cut -d'/' -f2) echo "Found exporter on port $port: $process" done register: exporter_scan - name: Get Docker containers with exposed ports shell: | echo "=== Container Port Mapping ===" if command -v docker >/dev/null 2>&1; then docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" | grep -E ":[0-9]+->|:[0-9]+/tcp" | while IFS=$'\t' read name ports; do echo "Container: $name" echo "Ports: $ports" echo "---" done else echo "Docker not available" fi register: container_ports become: yes - name: Test Prometheus metrics endpoints uri: url: "http://{{ ansible_default_ipv4.address }}:{{ item }}/metrics" method: GET timeout: 5 register: metrics_test loop: - "{{ node_exporter_port }}" - "{{ cadvisor_port }}" - "{{ snmp_exporter_port }}" failed_when: false - name: Analyze metrics endpoints set_fact: available_endpoints: "{{ metrics_test.results | selectattr('status', 'defined') | selectattr('status', 'equalto', 200) | map(attribute='item') | list }}" failed_endpoints: "{{ metrics_test.results | rejectattr('status', 'defined') | map(attribute='item') | list + (metrics_test.results | selectattr('status', 'defined') | rejectattr('status', 'equalto', 200) | map(attribute='item') | list) }}" - name: Discover application metrics shell: | echo "=== Application Metrics Discovery ===" app_ports="3000 8080 8081 8090 9091 9093 9094 9115" for port in $app_ports; do if netstat -tln 2>/dev/null | grep -q ":$port "; then if curl -s --connect-timeout 2 "http://localhost:$port/metrics" | head -1 | grep -q "^#"; then echo "✓ Metrics endpoint found: localhost:$port/metrics" elif curl -s --connect-timeout 2 "http://localhost:$port/actuator/prometheus" | head -1 | grep -q "^#"; then echo "✓ Spring Boot metrics: localhost:$port/actuator/prometheus" else echo "? Port $port open but no metrics endpoint detected" fi fi done register: app_metrics_discovery - name: Generate Prometheus configuration snippet copy: content: | # Prometheus Target Configuration for {{ inventory_hostname }} # Generated: {{ ansible_date_time.iso8601 }} {% if available_endpoints | length > 0 %} - job_name: '{{ inventory_hostname }}-exporters' static_configs: - targets: {% for port in available_endpoints %} - '{{ ansible_default_ipv4.address }}:{{ port }}' {% endfor %} scrape_interval: 15s metrics_path: /metrics labels: host: '{{ inventory_hostname }}' environment: 'homelab' {% endif %} {% if inventory_hostname in groups['synology'] %} # SNMP monitoring for Synology {{ inventory_hostname }} - job_name: '{{ inventory_hostname }}-snmp' static_configs: - targets: - '{{ ansible_default_ipv4.address }}' metrics_path: /snmp params: module: [synology] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: '{{ ansible_default_ipv4.address }}:{{ snmp_exporter_port }}' labels: host: '{{ inventory_hostname }}' type: 'synology' {% endif %} dest: "/tmp/prometheus_{{ inventory_hostname }}_targets.yml" delegate_to: localhost - name: Check for missing monitoring coverage set_fact: monitoring_gaps: | {% set gaps = [] %} {% if inventory_hostname in groups['synology'] and node_exporter_port not in available_endpoints %} {% set _ = gaps.append('node_exporter missing on Synology') %} {% endif %} {% if inventory_hostname in groups['debian_clients'] and node_exporter_port not in available_endpoints %} {% set _ = gaps.append('node_exporter missing on Debian client') %} {% endif %} {% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %} {% set _ = gaps.append('cAdvisor missing for Docker monitoring') %} {% endif %} {{ gaps }} - name: Generate monitoring coverage report copy: content: | # Monitoring Coverage Report - {{ inventory_hostname }} Generated: {{ ansible_date_time.iso8601 }} ## Host Information - Hostname: {{ inventory_hostname }} - IP Address: {{ ansible_default_ipv4.address }} - OS: {{ ansible_facts['os_family'] }} {{ ansible_facts['distribution_version'] }} - Groups: {{ group_names | join(', ') }} ## Exporter Discovery ``` {{ exporter_scan.stdout }} ``` ## Available Metrics Endpoints {% for endpoint in available_endpoints %} - ✅ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics {% endfor %} {% if failed_endpoints | length > 0 %} ## Failed/Missing Endpoints {% for endpoint in failed_endpoints %} - ❌ http://{{ ansible_default_ipv4.address }}:{{ endpoint }}/metrics {% endfor %} {% endif %} ## Container Port Mapping ``` {{ container_ports.stdout }} ``` ## Application Metrics Discovery ``` {{ app_metrics_discovery.stdout }} ``` {% if monitoring_gaps | length > 0 %} ## Monitoring Gaps {% for gap in monitoring_gaps %} - ⚠️ {{ gap }} {% endfor %} {% endif %} ## Recommended Actions {% if node_exporter_port not in available_endpoints %} - Install node_exporter for system metrics {% endif %} {% if ansible_facts.services is defined and 'docker' in ansible_facts.services and cadvisor_port not in available_endpoints %} - Install cAdvisor for container metrics {% endif %} {% if inventory_hostname in groups['synology'] and snmp_exporter_port not in available_endpoints %} - Configure SNMP exporter for Synology-specific metrics {% endif %} dest: "/tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md" delegate_to: localhost - name: Display monitoring summary debug: msg: | Monitoring Coverage Summary for {{ inventory_hostname }}: - Available Endpoints: {{ available_endpoints | length }} - Failed Endpoints: {{ failed_endpoints | length }} - Monitoring Gaps: {{ monitoring_gaps | length if monitoring_gaps else 0 }} - Prometheus Config: /tmp/prometheus_{{ inventory_hostname }}_targets.yml - Coverage Report: /tmp/monitoring_coverage_{{ inventory_hostname }}_{{ ansible_date_time.epoch }}.md # Consolidation task to run on localhost - name: Consolidate Prometheus Configuration hosts: localhost gather_facts: no tasks: - name: Combine all target configurations shell: | echo "# Consolidated Prometheus Targets Configuration" echo "# Generated: $(date)" echo "" echo "scrape_configs:" for file in /tmp/prometheus_*_targets.yml; do if [ -f "$file" ]; then echo " # From $(basename $file)" cat "$file" | sed 's/^/ /' echo "" fi done register: consolidated_config - name: Save consolidated Prometheus configuration copy: content: "{{ consolidated_config.stdout }}" dest: "/tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml" - name: Generate monitoring summary report shell: | echo "# Homelab Monitoring Coverage Summary" echo "Generated: $(date)" echo "" echo "## Coverage by Host" total_hosts=0 monitored_hosts=0 for file in /tmp/monitoring_coverage_*_*.md; do if [ -f "$file" ]; then host=$(basename "$file" | sed 's/monitoring_coverage_\(.*\)_[0-9]*.md/\1/') endpoints=$(grep -c "✅" "$file" 2>/dev/null || echo "0") gaps=$(grep -c "⚠️" "$file" 2>/dev/null || echo "0") total_hosts=$((total_hosts + 1)) if [ "$endpoints" -gt 0 ]; then monitored_hosts=$((monitored_hosts + 1)) fi echo "- **$host**: $endpoints endpoints, $gaps gaps" fi done echo "" echo "## Summary" echo "- Total Hosts: $total_hosts" echo "- Monitored Hosts: $monitored_hosts" echo "- Coverage: $(( monitored_hosts * 100 / total_hosts ))%" echo "" echo "## Next Steps" echo "1. Review individual host reports in /tmp/monitoring_coverage_*.md" echo "2. Apply consolidated Prometheus config: /tmp/prometheus_homelab_targets_$(date +%s).yml" echo "3. Address monitoring gaps identified in reports" register: summary_report - name: Save monitoring summary copy: content: "{{ summary_report.stdout }}" dest: "/tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md" - name: Display final summary debug: msg: | Homelab Monitoring Discovery Complete! 📊 Reports Generated: - Consolidated Config: /tmp/prometheus_homelab_targets_{{ ansible_date_time.epoch }}.yml - Summary Report: /tmp/homelab_monitoring_summary_{{ ansible_date_time.epoch }}.md - Individual Reports: /tmp/monitoring_coverage_*.md 🔧 Next Steps: 1. Review the summary report for coverage gaps 2. Apply the consolidated Prometheus configuration 3. Install missing exporters where needed