225 lines
8.3 KiB
YAML
225 lines
8.3 KiB
YAML
---
|
|
- name: System Monitoring and Metrics Collection
|
|
hosts: all
|
|
gather_facts: yes
|
|
vars:
|
|
monitoring_timestamp: "{{ ansible_date_time.iso8601 }}"
|
|
metrics_retention_days: 30
|
|
|
|
tasks:
|
|
- name: Create monitoring data directory
|
|
file:
|
|
path: "/tmp/monitoring_data"
|
|
state: directory
|
|
mode: '0755'
|
|
delegate_to: localhost
|
|
run_once: true
|
|
|
|
- name: Collect system metrics
|
|
shell: |
|
|
echo "=== SYSTEM METRICS ==="
|
|
echo "Timestamp: $(date -Iseconds)"
|
|
echo "Hostname: $(hostname)"
|
|
echo "Uptime: $(uptime -p)"
|
|
echo "Load: $(uptime | awk -F'load average:' '{print $2}')"
|
|
echo ""
|
|
|
|
echo "=== CPU INFORMATION ==="
|
|
echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)"
|
|
echo "CPU Cores: $(nproc)"
|
|
echo "CPU Usage: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1)%"
|
|
echo ""
|
|
|
|
echo "=== MEMORY INFORMATION ==="
|
|
free -h
|
|
echo ""
|
|
|
|
echo "=== DISK USAGE ==="
|
|
df -h
|
|
echo ""
|
|
|
|
echo "=== NETWORK INTERFACES ==="
|
|
ip -brief addr show
|
|
echo ""
|
|
|
|
echo "=== PROCESS SUMMARY ==="
|
|
ps aux --sort=-%cpu | head -10
|
|
echo ""
|
|
|
|
echo "=== SYSTEM TEMPERATURES (if available) ==="
|
|
if command -v sensors >/dev/null 2>&1; then
|
|
sensors 2>/dev/null || echo "Temperature sensors not available"
|
|
else
|
|
echo "lm-sensors not installed"
|
|
fi
|
|
register: system_metrics
|
|
changed_when: false
|
|
|
|
- name: Collect Docker metrics (if available)
|
|
shell: |
|
|
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
|
echo "=== DOCKER METRICS ==="
|
|
echo "Docker Version: $(docker --version)"
|
|
echo "Containers Running: $(docker ps -q | wc -l)"
|
|
echo "Containers Total: $(docker ps -aq | wc -l)"
|
|
echo "Images: $(docker images -q | wc -l)"
|
|
echo "Volumes: $(docker volume ls -q | wc -l)"
|
|
echo ""
|
|
|
|
echo "=== CONTAINER RESOURCE USAGE ==="
|
|
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}" 2>/dev/null || echo "No running containers"
|
|
echo ""
|
|
|
|
echo "=== DOCKER SYSTEM INFO ==="
|
|
docker system df 2>/dev/null || echo "Docker system info not available"
|
|
else
|
|
echo "Docker not available or not accessible"
|
|
fi
|
|
register: docker_metrics
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Collect network metrics
|
|
shell: |
|
|
echo "=== NETWORK METRICS ==="
|
|
echo "Active Connections:"
|
|
netstat -tuln 2>/dev/null | head -20 || ss -tuln | head -20
|
|
echo ""
|
|
|
|
echo "=== TAILSCALE STATUS ==="
|
|
if command -v tailscale >/dev/null 2>&1; then
|
|
tailscale status 2>/dev/null || echo "Tailscale not accessible"
|
|
else
|
|
echo "Tailscale not installed"
|
|
fi
|
|
echo ""
|
|
|
|
echo "=== INTERNET CONNECTIVITY ==="
|
|
ping -c 3 8.8.8.8 2>/dev/null | tail -2 || echo "Internet connectivity test failed"
|
|
register: network_metrics
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Collect service metrics
|
|
shell: |
|
|
echo "=== SERVICE METRICS ==="
|
|
if command -v systemctl >/dev/null 2>&1; then
|
|
echo "Failed Services:"
|
|
systemctl --failed --no-legend 2>/dev/null || echo "No failed services"
|
|
echo ""
|
|
|
|
echo "Active Services (sample):"
|
|
systemctl list-units --type=service --state=active --no-legend | head -10
|
|
else
|
|
echo "Systemd not available"
|
|
fi
|
|
echo ""
|
|
|
|
echo "=== LOG SUMMARY ==="
|
|
if [ -f /var/log/syslog ]; then
|
|
echo "Recent system log entries:"
|
|
tail -5 /var/log/syslog 2>/dev/null || echo "Cannot access syslog"
|
|
elif command -v journalctl >/dev/null 2>&1; then
|
|
echo "Recent journal entries:"
|
|
journalctl --no-pager -n 5 2>/dev/null || echo "Cannot access journal"
|
|
else
|
|
echo "No accessible system logs"
|
|
fi
|
|
register: service_metrics
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Calculate performance metrics
|
|
set_fact:
|
|
performance_metrics:
|
|
cpu_usage: "{{ (system_metrics.stdout | regex_search('CPU Usage: ([0-9.]+)%', '\\1'))[0] | default('0') | float }}"
|
|
memory_total: "{{ ansible_memtotal_mb }}"
|
|
memory_used: "{{ ansible_memtotal_mb - ansible_memfree_mb }}"
|
|
memory_percent: "{{ ((ansible_memtotal_mb - ansible_memfree_mb) / ansible_memtotal_mb * 100) | round(1) }}"
|
|
disk_usage: "{{ ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) }}"
|
|
uptime_seconds: "{{ ansible_uptime_seconds }}"
|
|
|
|
- name: Display monitoring summary
|
|
debug:
|
|
msg: |
|
|
|
|
==========================================
|
|
📊 MONITORING REPORT - {{ inventory_hostname }}
|
|
==========================================
|
|
|
|
🖥️ PERFORMANCE SUMMARY:
|
|
- CPU Usage: {{ performance_metrics.cpu_usage }}%
|
|
- Memory: {{ performance_metrics.memory_percent }}% ({{ performance_metrics.memory_used }}MB/{{ performance_metrics.memory_total }}MB)
|
|
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days, {{ (performance_metrics.uptime_seconds | int % 86400) // 3600 }} hours
|
|
|
|
📈 DETAILED METRICS:
|
|
{{ system_metrics.stdout }}
|
|
|
|
🐳 DOCKER METRICS:
|
|
{{ docker_metrics.stdout }}
|
|
|
|
🌐 NETWORK METRICS:
|
|
{{ network_metrics.stdout }}
|
|
|
|
🔧 SERVICE METRICS:
|
|
{{ service_metrics.stdout }}
|
|
|
|
==========================================
|
|
|
|
- name: Generate comprehensive monitoring report
|
|
copy:
|
|
content: |
|
|
{
|
|
"timestamp": "{{ monitoring_timestamp }}",
|
|
"hostname": "{{ inventory_hostname }}",
|
|
"system_info": {
|
|
"os": "{{ ansible_distribution }} {{ ansible_distribution_version }}",
|
|
"kernel": "{{ ansible_kernel }}",
|
|
"architecture": "{{ ansible_architecture }}",
|
|
"cpu_cores": {{ ansible_processor_vcpus }},
|
|
"memory_mb": {{ ansible_memtotal_mb }}
|
|
},
|
|
"performance": {
|
|
"cpu_usage_percent": {{ performance_metrics.cpu_usage }},
|
|
"memory_usage_percent": {{ performance_metrics.memory_percent }},
|
|
"memory_used_mb": {{ performance_metrics.memory_used }},
|
|
"memory_total_mb": {{ performance_metrics.memory_total }},
|
|
"uptime_seconds": {{ performance_metrics.uptime_seconds }},
|
|
"uptime_days": {{ performance_metrics.uptime_seconds | int // 86400 }}
|
|
},
|
|
"raw_metrics": {
|
|
"system": {{ system_metrics.stdout | to_json }},
|
|
"docker": {{ docker_metrics.stdout | to_json }},
|
|
"network": {{ network_metrics.stdout | to_json }},
|
|
"services": {{ service_metrics.stdout | to_json }}
|
|
}
|
|
}
|
|
dest: "/tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json"
|
|
delegate_to: localhost
|
|
|
|
- name: Create monitoring trend data
|
|
shell: |
|
|
echo "{{ monitoring_timestamp }},{{ inventory_hostname }},{{ performance_metrics.cpu_usage }},{{ performance_metrics.memory_percent }},{{ performance_metrics.uptime_seconds }}" >> /tmp/monitoring_data/trends.csv
|
|
delegate_to: localhost
|
|
ignore_errors: yes
|
|
|
|
- name: Clean old monitoring data
|
|
shell: |
|
|
find /tmp/monitoring_data -name "*.json" -mtime +{{ metrics_retention_days }} -delete 2>/dev/null || true
|
|
delegate_to: localhost
|
|
run_once: true
|
|
ignore_errors: yes
|
|
|
|
- name: Summary message
|
|
debug:
|
|
msg: |
|
|
|
|
📊 Monitoring complete for {{ inventory_hostname }}
|
|
📄 Report saved to: /tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json
|
|
📈 Trend data updated in: /tmp/monitoring_data/trends.csv
|
|
|
|
Performance Summary:
|
|
- CPU: {{ performance_metrics.cpu_usage }}%
|
|
- Memory: {{ performance_metrics.memory_percent }}%
|
|
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days
|