Sanitized mirror from private repository - 2026-03-19 09:41:34 UTC
This commit is contained in:
224
ansible/automation/playbooks/system_monitoring.yml
Normal file
224
ansible/automation/playbooks/system_monitoring.yml
Normal file
@@ -0,0 +1,224 @@
|
||||
---
|
||||
- name: System Monitoring and Metrics Collection
|
||||
hosts: all
|
||||
gather_facts: yes
|
||||
vars:
|
||||
monitoring_timestamp: "{{ ansible_date_time.iso8601 }}"
|
||||
metrics_retention_days: 30
|
||||
|
||||
tasks:
|
||||
- name: Create monitoring data directory
|
||||
file:
|
||||
path: "/tmp/monitoring_data"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
|
||||
- name: Collect system metrics
|
||||
shell: |
|
||||
echo "=== SYSTEM METRICS ==="
|
||||
echo "Timestamp: $(date -Iseconds)"
|
||||
echo "Hostname: $(hostname)"
|
||||
echo "Uptime: $(uptime -p)"
|
||||
echo "Load: $(uptime | awk -F'load average:' '{print $2}')"
|
||||
echo ""
|
||||
|
||||
echo "=== CPU INFORMATION ==="
|
||||
echo "CPU Model: $(lscpu | grep 'Model name' | cut -d':' -f2 | xargs)"
|
||||
echo "CPU Cores: $(nproc)"
|
||||
echo "CPU Usage: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1)%"
|
||||
echo ""
|
||||
|
||||
echo "=== MEMORY INFORMATION ==="
|
||||
free -h
|
||||
echo ""
|
||||
|
||||
echo "=== DISK USAGE ==="
|
||||
df -h
|
||||
echo ""
|
||||
|
||||
echo "=== NETWORK INTERFACES ==="
|
||||
ip -brief addr show
|
||||
echo ""
|
||||
|
||||
echo "=== PROCESS SUMMARY ==="
|
||||
ps aux --sort=-%cpu | head -10
|
||||
echo ""
|
||||
|
||||
echo "=== SYSTEM TEMPERATURES (if available) ==="
|
||||
if command -v sensors >/dev/null 2>&1; then
|
||||
sensors 2>/dev/null || echo "Temperature sensors not available"
|
||||
else
|
||||
echo "lm-sensors not installed"
|
||||
fi
|
||||
register: system_metrics
|
||||
changed_when: false
|
||||
|
||||
- name: Collect Docker metrics (if available)
|
||||
shell: |
|
||||
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
||||
echo "=== DOCKER METRICS ==="
|
||||
echo "Docker Version: $(docker --version)"
|
||||
echo "Containers Running: $(docker ps -q | wc -l)"
|
||||
echo "Containers Total: $(docker ps -aq | wc -l)"
|
||||
echo "Images: $(docker images -q | wc -l)"
|
||||
echo "Volumes: $(docker volume ls -q | wc -l)"
|
||||
echo ""
|
||||
|
||||
echo "=== CONTAINER RESOURCE USAGE ==="
|
||||
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.NetIO}}\t{{.BlockIO}}" 2>/dev/null || echo "No running containers"
|
||||
echo ""
|
||||
|
||||
echo "=== DOCKER SYSTEM INFO ==="
|
||||
docker system df 2>/dev/null || echo "Docker system info not available"
|
||||
else
|
||||
echo "Docker not available or not accessible"
|
||||
fi
|
||||
register: docker_metrics
|
||||
changed_when: false
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Collect network metrics
|
||||
shell: |
|
||||
echo "=== NETWORK METRICS ==="
|
||||
echo "Active Connections:"
|
||||
netstat -tuln 2>/dev/null | head -20 || ss -tuln | head -20
|
||||
echo ""
|
||||
|
||||
echo "=== TAILSCALE STATUS ==="
|
||||
if command -v tailscale >/dev/null 2>&1; then
|
||||
tailscale status 2>/dev/null || echo "Tailscale not accessible"
|
||||
else
|
||||
echo "Tailscale not installed"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "=== INTERNET CONNECTIVITY ==="
|
||||
ping -c 3 8.8.8.8 2>/dev/null | tail -2 || echo "Internet connectivity test failed"
|
||||
register: network_metrics
|
||||
changed_when: false
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Collect service metrics
|
||||
shell: |
|
||||
echo "=== SERVICE METRICS ==="
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
echo "Failed Services:"
|
||||
systemctl --failed --no-legend 2>/dev/null || echo "No failed services"
|
||||
echo ""
|
||||
|
||||
echo "Active Services (sample):"
|
||||
systemctl list-units --type=service --state=active --no-legend | head -10
|
||||
else
|
||||
echo "Systemd not available"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "=== LOG SUMMARY ==="
|
||||
if [ -f /var/log/syslog ]; then
|
||||
echo "Recent system log entries:"
|
||||
tail -5 /var/log/syslog 2>/dev/null || echo "Cannot access syslog"
|
||||
elif command -v journalctl >/dev/null 2>&1; then
|
||||
echo "Recent journal entries:"
|
||||
journalctl --no-pager -n 5 2>/dev/null || echo "Cannot access journal"
|
||||
else
|
||||
echo "No accessible system logs"
|
||||
fi
|
||||
register: service_metrics
|
||||
changed_when: false
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Calculate performance metrics
|
||||
set_fact:
|
||||
performance_metrics:
|
||||
cpu_usage: "{{ (system_metrics.stdout | regex_search('CPU Usage: ([0-9.]+)%', '\\1'))[0] | default('0') | float }}"
|
||||
memory_total: "{{ ansible_memtotal_mb }}"
|
||||
memory_used: "{{ ansible_memtotal_mb - ansible_memfree_mb }}"
|
||||
memory_percent: "{{ ((ansible_memtotal_mb - ansible_memfree_mb) / ansible_memtotal_mb * 100) | round(1) }}"
|
||||
disk_usage: "{{ ansible_mounts | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) }}"
|
||||
uptime_seconds: "{{ ansible_uptime_seconds }}"
|
||||
|
||||
- name: Display monitoring summary
|
||||
debug:
|
||||
msg: |
|
||||
|
||||
==========================================
|
||||
📊 MONITORING REPORT - {{ inventory_hostname }}
|
||||
==========================================
|
||||
|
||||
🖥️ PERFORMANCE SUMMARY:
|
||||
- CPU Usage: {{ performance_metrics.cpu_usage }}%
|
||||
- Memory: {{ performance_metrics.memory_percent }}% ({{ performance_metrics.memory_used }}MB/{{ performance_metrics.memory_total }}MB)
|
||||
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days, {{ (performance_metrics.uptime_seconds | int % 86400) // 3600 }} hours
|
||||
|
||||
📈 DETAILED METRICS:
|
||||
{{ system_metrics.stdout }}
|
||||
|
||||
🐳 DOCKER METRICS:
|
||||
{{ docker_metrics.stdout }}
|
||||
|
||||
🌐 NETWORK METRICS:
|
||||
{{ network_metrics.stdout }}
|
||||
|
||||
🔧 SERVICE METRICS:
|
||||
{{ service_metrics.stdout }}
|
||||
|
||||
==========================================
|
||||
|
||||
- name: Generate comprehensive monitoring report
|
||||
copy:
|
||||
content: |
|
||||
{
|
||||
"timestamp": "{{ monitoring_timestamp }}",
|
||||
"hostname": "{{ inventory_hostname }}",
|
||||
"system_info": {
|
||||
"os": "{{ ansible_distribution }} {{ ansible_distribution_version }}",
|
||||
"kernel": "{{ ansible_kernel }}",
|
||||
"architecture": "{{ ansible_architecture }}",
|
||||
"cpu_cores": {{ ansible_processor_vcpus }},
|
||||
"memory_mb": {{ ansible_memtotal_mb }}
|
||||
},
|
||||
"performance": {
|
||||
"cpu_usage_percent": {{ performance_metrics.cpu_usage }},
|
||||
"memory_usage_percent": {{ performance_metrics.memory_percent }},
|
||||
"memory_used_mb": {{ performance_metrics.memory_used }},
|
||||
"memory_total_mb": {{ performance_metrics.memory_total }},
|
||||
"uptime_seconds": {{ performance_metrics.uptime_seconds }},
|
||||
"uptime_days": {{ performance_metrics.uptime_seconds | int // 86400 }}
|
||||
},
|
||||
"raw_metrics": {
|
||||
"system": {{ system_metrics.stdout | to_json }},
|
||||
"docker": {{ docker_metrics.stdout | to_json }},
|
||||
"network": {{ network_metrics.stdout | to_json }},
|
||||
"services": {{ service_metrics.stdout | to_json }}
|
||||
}
|
||||
}
|
||||
dest: "/tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json"
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Create monitoring trend data
|
||||
shell: |
|
||||
echo "{{ monitoring_timestamp }},{{ inventory_hostname }},{{ performance_metrics.cpu_usage }},{{ performance_metrics.memory_percent }},{{ performance_metrics.uptime_seconds }}" >> /tmp/monitoring_data/trends.csv
|
||||
delegate_to: localhost
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Clean old monitoring data
|
||||
shell: |
|
||||
find /tmp/monitoring_data -name "*.json" -mtime +{{ metrics_retention_days }} -delete 2>/dev/null || true
|
||||
delegate_to: localhost
|
||||
run_once: true
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Summary message
|
||||
debug:
|
||||
msg: |
|
||||
|
||||
📊 Monitoring complete for {{ inventory_hostname }}
|
||||
📄 Report saved to: /tmp/monitoring_data/{{ inventory_hostname }}_metrics_{{ ansible_date_time.epoch }}.json
|
||||
📈 Trend data updated in: /tmp/monitoring_data/trends.csv
|
||||
|
||||
Performance Summary:
|
||||
- CPU: {{ performance_metrics.cpu_usage }}%
|
||||
- Memory: {{ performance_metrics.memory_percent }}%
|
||||
- Uptime: {{ performance_metrics.uptime_seconds | int // 86400 }} days
|
||||
Reference in New Issue
Block a user