Files
homelab-optimized/ansible/automation/playbooks/system_metrics.yml
Gitea Mirror Bot e7652c8dab
Some checks failed
Documentation / Build Docusaurus (push) Failing after 5m3s
Documentation / Deploy to GitHub Pages (push) Has been skipped
Sanitized mirror from private repository - 2026-04-20 01:32:01 UTC
2026-04-20 01:32:01 +00:00

260 lines
12 KiB
YAML

---
# System Metrics Collection Playbook
# Collects detailed system metrics for monitoring and analysis
# Usage: ansible-playbook playbooks/system_metrics.yml
# Usage: ansible-playbook playbooks/system_metrics.yml -e "metrics_duration=300"
- name: Collect System Metrics
hosts: all
gather_facts: yes
vars:
metrics_dir: "/tmp/metrics"
default_metrics_duration: 60 # seconds
collection_interval: 5 # seconds between samples
tasks:
- name: Create metrics directory
file:
path: "{{ metrics_dir }}/{{ inventory_hostname }}"
state: directory
mode: '0755'
- name: Display metrics collection plan
debug:
msg: |
📊 SYSTEM METRICS COLLECTION
===========================
🖥️ Host: {{ inventory_hostname }}
📅 Date: {{ ansible_date_time.date }}
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
📈 Interval: {{ collection_interval }}s
📁 Output: {{ metrics_dir }}/{{ inventory_hostname }}
- name: Collect baseline system information
shell: |
info_file="{{ metrics_dir }}/{{ inventory_hostname }}/system_info_{{ ansible_date_time.epoch }}.txt"
echo "📊 SYSTEM BASELINE INFORMATION" > "$info_file"
echo "==============================" >> "$info_file"
echo "Host: {{ inventory_hostname }}" >> "$info_file"
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$info_file"
echo "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" >> "$info_file"
echo "Kernel: {{ ansible_kernel }}" >> "$info_file"
echo "Architecture: {{ ansible_architecture }}" >> "$info_file"
echo "CPU Cores: {{ ansible_processor_vcpus }}" >> "$info_file"
echo "Total Memory: {{ ansible_memtotal_mb }}MB" >> "$info_file"
echo "" >> "$info_file"
echo "🖥️ CPU INFORMATION:" >> "$info_file"
cat /proc/cpuinfo | grep -E "model name|cpu MHz|cache size" | head -10 >> "$info_file"
echo "" >> "$info_file"
echo "💾 MEMORY INFORMATION:" >> "$info_file"
cat /proc/meminfo | head -10 >> "$info_file"
echo "" >> "$info_file"
echo "💿 DISK INFORMATION:" >> "$info_file"
lsblk -o NAME,SIZE,TYPE,MOUNTPOINT >> "$info_file"
echo "" >> "$info_file"
echo "🌐 NETWORK INTERFACES:" >> "$info_file"
ip addr show | grep -E "^[0-9]+:|inet " >> "$info_file"
echo "Baseline info saved to: $info_file"
register: baseline_info
- name: Start continuous metrics collection
shell: |
metrics_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
# Create CSV header
echo "timestamp,cpu_usage,memory_usage,memory_available,load_1min,load_5min,load_15min,disk_usage_root,network_rx_bytes,network_tx_bytes,processes_total,processes_running,docker_containers_running" > "$metrics_file"
echo "📈 Starting metrics collection for {{ metrics_duration | default(default_metrics_duration) }} seconds..."
# Get initial network stats
initial_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
initial_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
samples=0
max_samples=$(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))
while [ $samples -lt $max_samples ]; do
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
# CPU usage (1 - idle percentage)
cpu_usage=$(vmstat 1 2 | tail -1 | awk '{print 100-$15}')
# Memory usage
memory_info=$(free -m)
memory_total=$(echo "$memory_info" | awk 'NR==2{print $2}')
memory_used=$(echo "$memory_info" | awk 'NR==2{print $3}')
memory_available=$(echo "$memory_info" | awk 'NR==2{print $7}')
memory_usage=$(echo "scale=1; $memory_used * 100 / $memory_total" | bc -l 2>/dev/null || echo "0")
# Load averages
load_info=$(uptime | awk -F'load average:' '{print $2}' | sed 's/^ *//')
load_1min=$(echo "$load_info" | awk -F',' '{print $1}' | sed 's/^ *//')
load_5min=$(echo "$load_info" | awk -F',' '{print $2}' | sed 's/^ *//')
load_15min=$(echo "$load_info" | awk -F',' '{print $3}' | sed 's/^ *//')
# Disk usage for root partition
disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
# Network stats
current_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
current_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
# Process counts
processes_total=$(ps aux | wc -l)
processes_running=$(ps aux | awk '$8 ~ /^R/ {count++} END {print count+0}')
# Docker container count (if available)
if command -v docker &> /dev/null && docker info &> /dev/null; then
docker_containers=$(docker ps -q | wc -l)
else
docker_containers=0
fi
# Write metrics to CSV
echo "$timestamp,$cpu_usage,$memory_usage,$memory_available,$load_1min,$load_5min,$load_15min,$disk_usage,$current_rx,$current_tx,$processes_total,$processes_running,$docker_containers" >> "$metrics_file"
samples=$((samples + 1))
echo "Sample $samples/$max_samples collected..."
sleep {{ collection_interval }}
done
echo "✅ Metrics collection complete: $metrics_file"
register: metrics_collection
async: "{{ ((metrics_duration | default(default_metrics_duration)) | int) + 30 }}"
poll: 10
- name: Collect Docker metrics (if available)
shell: |
docker_file="{{ metrics_dir }}/{{ inventory_hostname }}/docker_metrics_{{ ansible_date_time.epoch }}.txt"
if command -v docker &> /dev/null && docker info &> /dev/null; then
echo "🐳 DOCKER METRICS" > "$docker_file"
echo "=================" >> "$docker_file"
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$docker_file"
echo "" >> "$docker_file"
echo "📊 DOCKER SYSTEM INFO:" >> "$docker_file"
docker system df >> "$docker_file" 2>/dev/null || echo "Cannot get Docker system info" >> "$docker_file"
echo "" >> "$docker_file"
echo "📦 CONTAINER STATS:" >> "$docker_file"
docker stats --no-stream --format "table {{ '{{' }}.Container{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.MemPerc{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot get container stats" >> "$docker_file"
echo "" >> "$docker_file"
echo "🏃 RUNNING CONTAINERS:" >> "$docker_file"
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot list containers" >> "$docker_file"
echo "" >> "$docker_file"
echo "🔍 CONTAINER RESOURCE USAGE:" >> "$docker_file"
for container in $(docker ps --format "{{ '{{' }}.Names{{ '}}' }}" 2>/dev/null); do
echo "--- $container ---" >> "$docker_file"
docker exec "$container" sh -c 'top -bn1 | head -5' >> "$docker_file" 2>/dev/null || echo "Cannot access container $container" >> "$docker_file"
echo "" >> "$docker_file"
done
echo "Docker metrics saved to: $docker_file"
else
echo "Docker not available - skipping Docker metrics"
fi
register: docker_metrics
failed_when: false
- name: Collect network metrics
shell: |
network_file="{{ metrics_dir }}/{{ inventory_hostname }}/network_metrics_{{ ansible_date_time.epoch }}.txt"
echo "🌐 NETWORK METRICS" > "$network_file"
echo "==================" >> "$network_file"
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$network_file"
echo "" >> "$network_file"
echo "🔌 INTERFACE STATISTICS:" >> "$network_file"
cat /proc/net/dev >> "$network_file"
echo "" >> "$network_file"
echo "🔗 ACTIVE CONNECTIONS:" >> "$network_file"
netstat -tuln | head -20 >> "$network_file" 2>/dev/null || ss -tuln | head -20 >> "$network_file" 2>/dev/null || echo "Cannot get connection info" >> "$network_file"
echo "" >> "$network_file"
echo "📡 ROUTING TABLE:" >> "$network_file"
ip route >> "$network_file" 2>/dev/null || route -n >> "$network_file" 2>/dev/null || echo "Cannot get routing info" >> "$network_file"
echo "" >> "$network_file"
echo "🌍 DNS CONFIGURATION:" >> "$network_file"
cat /etc/resolv.conf >> "$network_file" 2>/dev/null || echo "Cannot read DNS config" >> "$network_file"
echo "Network metrics saved to: $network_file"
register: network_metrics
- name: Generate metrics summary
shell: |
summary_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_{{ ansible_date_time.epoch }}.txt"
metrics_csv="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
echo "📊 METRICS COLLECTION SUMMARY" > "$summary_file"
echo "=============================" >> "$summary_file"
echo "Host: {{ inventory_hostname }}" >> "$summary_file"
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$summary_file"
echo "Duration: {{ metrics_duration | default(default_metrics_duration) }}s" >> "$summary_file"
echo "Interval: {{ collection_interval }}s" >> "$summary_file"
echo "" >> "$summary_file"
if [ -f "$metrics_csv" ]; then
sample_count=$(tail -n +2 "$metrics_csv" | wc -l)
echo "📈 COLLECTION STATISTICS:" >> "$summary_file"
echo "Samples collected: $sample_count" >> "$summary_file"
echo "Expected samples: $(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))" >> "$summary_file"
echo "" >> "$summary_file"
echo "📊 METRIC RANGES:" >> "$summary_file"
echo "CPU Usage:" >> "$summary_file"
tail -n +2 "$metrics_csv" | awk -F',' '{print $2}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
echo "Memory Usage:" >> "$summary_file"
tail -n +2 "$metrics_csv" | awk -F',' '{print $3}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
echo "Load Average (1min):" >> "$summary_file"
tail -n +2 "$metrics_csv" | awk -F',' '{print $5}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min ", Max: " max}' >> "$summary_file"
echo "" >> "$summary_file"
echo "📁 GENERATED FILES:" >> "$summary_file"
ls -la {{ metrics_dir }}/{{ inventory_hostname }}/*{{ ansible_date_time.epoch }}* >> "$summary_file" 2>/dev/null || echo "No files found" >> "$summary_file"
else
echo "⚠️ WARNING: Metrics CSV file not found" >> "$summary_file"
fi
echo "Summary saved to: $summary_file"
register: metrics_summary
- name: Display metrics collection results
debug:
msg: |
📊 METRICS COLLECTION COMPLETE
==============================
🖥️ Host: {{ inventory_hostname }}
📅 Date: {{ ansible_date_time.date }}
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
📁 Generated Files:
{{ baseline_info.stdout }}
{{ metrics_collection.stdout }}
{{ docker_metrics.stdout | default('Docker metrics: N/A') }}
{{ network_metrics.stdout }}
{{ metrics_summary.stdout }}
🔍 Next Steps:
- Analyze metrics: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_*.csv
- View summary: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_*.txt
- Plot trends: Use the CSV data with your preferred visualization tool
- Set up monitoring: ansible-playbook playbooks/alert_check.yml
==============================