Files
homelab-optimized/ansible/automation/playbooks/system_monitoring.yml
Gitea Mirror Bot e7652c8dab
Some checks failed
Documentation / Build Docusaurus (push) Failing after 5m3s
Documentation / Deploy to GitHub Pages (push) Has been skipped
Sanitized mirror from private repository - 2026-04-20 01:32:01 UTC
2026-04-20 01:32:01 +00:00

225 lines
8.3 KiB
YAML

---
- name: System Monitoring and Metrics Collection
hosts: all
gather_facts: yes
vars:
monitoring_timestamp: "{{ ansible_date_time.iso8601 }}"
metrics_retention_days: 30
tasks:
- name: Create monitoring data directory
file:
path: "/tmp/monitoring_data"
state: directory
mode: '0755'
delegate_to: localhost
run_once: true
- name: Collect system metrics
shell: |
echo "=== SYSTEM METRICS ==="
echo "Timestamp: $(date -Iseconds)"
echo "Hostname: $(hostname)"
echo "Uptime: $(uptime -p)"
echo "Load: $(uptime | awk -F'load average:' '{print $2}')"
echo ""
echo "=== CPU INFORMATION ==="
echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)"
echo "CPU Cores: $(nproc)"
echo "CPU Usage: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1)%"
echo ""
echo "=== MEMORY INFORMATION ==="
free -h
echo ""
echo "=== DISK USAGE ==="
df -h
echo ""
echo "=== NETWORK INTERFACES ==="
ip -brief addr show
echo ""
echo "=== PROCESS SUMMARY ==="
ps aux --sort=-%cpu | head -10
echo ""
echo "=== SYSTEM TEMPERATURES (if available) ==="
if command -v sensors >/dev/null 2>&1; then
sensors 2>/dev/null || echo "Temperature sensors not available"
else
echo "lm-sensors not installed"
fi
register: system_metrics
changed_when: false
- name: Collect Docker metrics (if available)
shell: |
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
echo "=== DOCKER METRICS ==="
echo "Docker Version: $(docker --version)"
echo "Containers Running: $(docker ps -q | wc -l)"
echo "Containers Total: $(docker ps -aq | wc -l)"
echo "Images: $(docker images -q | wc -l)"
echo "Volumes: $(docker volume ls -q | wc -l)"
echo ""
echo "=== CONTAINER RESOURCE USAGE ==="
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}" 2>/dev/null || echo "No running containers"
echo ""
echo "=== DOCKER SYSTEM INFO ==="
docker system df 2>/dev/null || echo "Docker system info not available"
else
echo "Docker not available or not accessible"
fi
register: docker_metrics
changed_when: false
ignore_errors: yes
- name: Collect network metrics
shell: |
echo "=== NETWORK METRICS ==="
echo "Active Connections:"
netstat -tuln 2>/dev/null | head -20 || ss -tuln | head -20
echo ""
echo "=== TAILSCALE STATUS ==="
if command -v tailscale >/dev/null 2>&1; then
tailscale status 2>/dev/null || echo "Tailscale not accessible"
else
echo "Tailscale not installed"
fi
echo ""
echo "=== INTERNET CONNECTIVITY ==="
ping -c 3 8.8.8.8 2>/dev/null | tail -2 || echo "Internet connectivity test failed"
register: network_metrics
changed_when: false
ignore_errors: yes
- name: Collect service metrics
shell: |
echo "=== SERVICE METRICS ==="
if command -v systemctl >/dev/null 2>&1; then
echo "Failed Services:"
systemctl --failed --no-legend 2>/dev/null || echo "No failed services"
echo ""
echo "Active Services (sample):"
systemctl list-units --type=service --state=active --no-legend | head -10
else
echo "Systemd not available"
fi
echo ""
echo "=== LOG SUMMARY ==="
if [ -f /var/log/syslog ]; then
echo "Recent system log entries:"
tail -5 /var/log/syslog 2>/dev/null || echo "Cannot access syslog"
elif command -v journalctl >/dev/null 2>&1; then
echo "Recent journal entries:"
journalctl --no-pager -n 5 2>/dev/null || echo "Cannot access journal"
else
echo "No accessible system logs"
fi
register: service_metrics
changed_when: false
ignore_errors: yes
- name: Calculate performance metrics
set_fact:
performance_metrics:
cpu_usage: "{{ (system_metrics.stdout | regex_search('CPU Usage: ([0-9.]+)%', '\\1'))[0] | default('0') | float }}"
memory_total: "{{ ansible_memtotal_mb }}"
memory_used: "{{ ansible_memtotal_mb - ansible_memfree_mb }}"
memory_percent: "{{ ((ansible_memtotal_mb - ansible_memfree_mb) / ansible_memtotal_mb * 100) | round(1) }}"
disk_usage: "{{ ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) }}"
uptime_seconds: "{{ ansible_uptime_seconds }}"
- name: Display monitoring summary
debug:
msg: |
==========================================
📊 MONITORING REPORT - {{ inventory_hostname }}
==========================================
🖥️ PERFORMANCE SUMMARY:
- CPU Usage: {{ performance_metrics.cpu_usage }}%
- Memory: {{ performance_metrics.memory_percent }}% ({{ performance_metrics.memory_used }}MB/{{ performance_metrics.memory_total }}MB)
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days, {{ (performance_metrics.uptime_seconds | int % 86400) // 3600 }} hours
📈 DETAILED METRICS:
{{ system_metrics.stdout }}
🐳 DOCKER METRICS:
{{ docker_metrics.stdout }}
🌐 NETWORK METRICS:
{{ network_metrics.stdout }}
🔧 SERVICE METRICS:
{{ service_metrics.stdout }}
==========================================
- name: Generate comprehensive monitoring report
copy:
content: |
{
"timestamp": "{{ monitoring_timestamp }}",
"hostname": "{{ inventory_hostname }}",
"system_info": {
"os": "{{ ansible_distribution }} {{ ansible_distribution_version }}",
"kernel": "{{ ansible_kernel }}",
"architecture": "{{ ansible_architecture }}",
"cpu_cores": {{ ansible_processor_vcpus }},
"memory_mb": {{ ansible_memtotal_mb }}
},
"performance": {
"cpu_usage_percent": {{ performance_metrics.cpu_usage }},
"memory_usage_percent": {{ performance_metrics.memory_percent }},
"memory_used_mb": {{ performance_metrics.memory_used }},
"memory_total_mb": {{ performance_metrics.memory_total }},
"uptime_seconds": {{ performance_metrics.uptime_seconds }},
"uptime_days": {{ performance_metrics.uptime_seconds | int // 86400 }}
},
"raw_metrics": {
"system": {{ system_metrics.stdout | to_json }},
"docker": {{ docker_metrics.stdout | to_json }},
"network": {{ network_metrics.stdout | to_json }},
"services": {{ service_metrics.stdout | to_json }}
}
}
dest: "/tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json"
delegate_to: localhost
- name: Create monitoring trend data
shell: |
echo "{{ monitoring_timestamp }},{{ inventory_hostname }},{{ performance_metrics.cpu_usage }},{{ performance_metrics.memory_percent }},{{ performance_metrics.uptime_seconds }}" >> /tmp/monitoring_data/trends.csv
delegate_to: localhost
ignore_errors: yes
- name: Clean old monitoring data
shell: |
find /tmp/monitoring_data -name "*.json" -mtime +{{ metrics_retention_days }} -delete 2>/dev/null || true
delegate_to: localhost
run_once: true
ignore_errors: yes
- name: Summary message
debug:
msg: |
📊 Monitoring complete for {{ inventory_hostname }}
📄 Report saved to: /tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json
📈 Trend data updated in: /tmp/monitoring_data/trends.csv
Performance Summary:
- CPU: {{ performance_metrics.cpu_usage }}%
- Memory: {{ performance_metrics.memory_percent }}%
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days