260 lines
12 KiB
YAML
260 lines
12 KiB
YAML
---
|
|
# System Metrics Collection Playbook
|
|
# Collects detailed system metrics for monitoring and analysis
|
|
# Usage: ansible-playbook playbooks/system_metrics.yml
|
|
# Usage: ansible-playbook playbooks/system_metrics.yml -e "metrics_duration=300"
|
|
|
|
- name: Collect System Metrics
|
|
hosts: all
|
|
gather_facts: yes
|
|
vars:
|
|
metrics_dir: "/tmp/metrics"
|
|
default_metrics_duration: 60 # seconds
|
|
collection_interval: 5 # seconds between samples
|
|
|
|
tasks:
|
|
- name: Create metrics directory
|
|
file:
|
|
path: "{{ metrics_dir }}/{{ inventory_hostname }}"
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Display metrics collection plan
|
|
debug:
|
|
msg: |
|
|
📊 SYSTEM METRICS COLLECTION
|
|
===========================
|
|
🖥️ Host: {{ inventory_hostname }}
|
|
📅 Date: {{ ansible_date_time.date }}
|
|
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
|
|
📈 Interval: {{ collection_interval }}s
|
|
📁 Output: {{ metrics_dir }}/{{ inventory_hostname }}
|
|
|
|
- name: Collect baseline system information
|
|
shell: |
|
|
info_file="{{ metrics_dir }}/{{ inventory_hostname }}/system_info_{{ ansible_date_time.epoch }}.txt"
|
|
|
|
echo "📊 SYSTEM BASELINE INFORMATION" > "$info_file"
|
|
echo "==============================" >> "$info_file"
|
|
echo "Host: {{ inventory_hostname }}" >> "$info_file"
|
|
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$info_file"
|
|
echo "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" >> "$info_file"
|
|
echo "Kernel: {{ ansible_kernel }}" >> "$info_file"
|
|
echo "Architecture: {{ ansible_architecture }}" >> "$info_file"
|
|
echo "CPU Cores: {{ ansible_processor_vcpus }}" >> "$info_file"
|
|
echo "Total Memory: {{ ansible_memtotal_mb }}MB" >> "$info_file"
|
|
echo "" >> "$info_file"
|
|
|
|
echo "🖥️ CPU INFORMATION:" >> "$info_file"
|
|
cat /proc/cpuinfo | grep -E "model name|cpu MHz|cache size" | head -10 >> "$info_file"
|
|
echo "" >> "$info_file"
|
|
|
|
echo "💾 MEMORY INFORMATION:" >> "$info_file"
|
|
cat /proc/meminfo | head -10 >> "$info_file"
|
|
echo "" >> "$info_file"
|
|
|
|
echo "💿 DISK INFORMATION:" >> "$info_file"
|
|
lsblk -o NAME,SIZE,TYPE,MOUNTPOINT >> "$info_file"
|
|
echo "" >> "$info_file"
|
|
|
|
echo "🌐 NETWORK INTERFACES:" >> "$info_file"
|
|
ip addr show | grep -E "^[0-9]+:|inet " >> "$info_file"
|
|
|
|
echo "Baseline info saved to: $info_file"
|
|
register: baseline_info
|
|
|
|
- name: Start continuous metrics collection
|
|
shell: |
|
|
metrics_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
|
|
|
|
# Create CSV header
|
|
echo "timestamp,cpu_usage,memory_usage,memory_available,load_1min,load_5min,load_15min,disk_usage_root,network_rx_bytes,network_tx_bytes,processes_total,processes_running,docker_containers_running" > "$metrics_file"
|
|
|
|
echo "📈 Starting metrics collection for {{ metrics_duration | default(default_metrics_duration) }} seconds..."
|
|
|
|
# Get initial network stats
|
|
initial_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
|
initial_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
|
|
|
samples=0
|
|
max_samples=$(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))
|
|
|
|
while [ $samples -lt $max_samples ]; do
|
|
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
# CPU usage (1 - idle percentage)
|
|
cpu_usage=$(vmstat 1 2 | tail -1 | awk '{print 100-$15}')
|
|
|
|
# Memory usage
|
|
memory_info=$(free -m)
|
|
memory_total=$(echo "$memory_info" | awk 'NR==2{print $2}')
|
|
memory_used=$(echo "$memory_info" | awk 'NR==2{print $3}')
|
|
memory_available=$(echo "$memory_info" | awk 'NR==2{print $7}')
|
|
memory_usage=$(echo "scale=1; $memory_used * 100 / $memory_total" | bc -l 2>/dev/null || echo "0")
|
|
|
|
# Load averages
|
|
load_info=$(uptime | awk -F'load average:' '{print $2}' | sed 's/^ *//')
|
|
load_1min=$(echo "$load_info" | awk -F',' '{print $1}' | sed 's/^ *//')
|
|
load_5min=$(echo "$load_info" | awk -F',' '{print $2}' | sed 's/^ *//')
|
|
load_15min=$(echo "$load_info" | awk -F',' '{print $3}' | sed 's/^ *//')
|
|
|
|
# Disk usage for root partition
|
|
disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
|
|
|
|
# Network stats
|
|
current_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
|
current_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
|
|
|
# Process counts
|
|
processes_total=$(ps aux | wc -l)
|
|
processes_running=$(ps aux | awk '$8 ~ /^R/ {count++} END {print count+0}')
|
|
|
|
# Docker container count (if available)
|
|
if command -v docker &> /dev/null && docker info &> /dev/null; then
|
|
docker_containers=$(docker ps -q | wc -l)
|
|
else
|
|
docker_containers=0
|
|
fi
|
|
|
|
# Write metrics to CSV
|
|
echo "$timestamp,$cpu_usage,$memory_usage,$memory_available,$load_1min,$load_5min,$load_15min,$disk_usage,$current_rx,$current_tx,$processes_total,$processes_running,$docker_containers" >> "$metrics_file"
|
|
|
|
samples=$((samples + 1))
|
|
echo "Sample $samples/$max_samples collected..."
|
|
|
|
sleep {{ collection_interval }}
|
|
done
|
|
|
|
echo "✅ Metrics collection complete: $metrics_file"
|
|
register: metrics_collection
|
|
async: "{{ ((metrics_duration | default(default_metrics_duration)) | int) + 30 }}"
|
|
poll: 10
|
|
|
|
- name: Collect Docker metrics (if available)
|
|
shell: |
|
|
docker_file="{{ metrics_dir }}/{{ inventory_hostname }}/docker_metrics_{{ ansible_date_time.epoch }}.txt"
|
|
|
|
if command -v docker &> /dev/null && docker info &> /dev/null; then
|
|
echo "🐳 DOCKER METRICS" > "$docker_file"
|
|
echo "=================" >> "$docker_file"
|
|
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$docker_file"
|
|
echo "" >> "$docker_file"
|
|
|
|
echo "📊 DOCKER SYSTEM INFO:" >> "$docker_file"
|
|
docker system df >> "$docker_file" 2>/dev/null || echo "Cannot get Docker system info" >> "$docker_file"
|
|
echo "" >> "$docker_file"
|
|
|
|
echo "📦 CONTAINER STATS:" >> "$docker_file"
|
|
docker stats --no-stream --format "table {{ '{{' }}.Container{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.MemPerc{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot get container stats" >> "$docker_file"
|
|
echo "" >> "$docker_file"
|
|
|
|
echo "🏃 RUNNING CONTAINERS:" >> "$docker_file"
|
|
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot list containers" >> "$docker_file"
|
|
echo "" >> "$docker_file"
|
|
|
|
echo "🔍 CONTAINER RESOURCE USAGE:" >> "$docker_file"
|
|
for container in $(docker ps --format "{{ '{{' }}.Names{{ '}}' }}" 2>/dev/null); do
|
|
echo "--- $container ---" >> "$docker_file"
|
|
docker exec "$container" sh -c 'top -bn1 | head -5' >> "$docker_file" 2>/dev/null || echo "Cannot access container $container" >> "$docker_file"
|
|
echo "" >> "$docker_file"
|
|
done
|
|
|
|
echo "Docker metrics saved to: $docker_file"
|
|
else
|
|
echo "Docker not available - skipping Docker metrics"
|
|
fi
|
|
register: docker_metrics
|
|
failed_when: false
|
|
|
|
- name: Collect network metrics
|
|
shell: |
|
|
network_file="{{ metrics_dir }}/{{ inventory_hostname }}/network_metrics_{{ ansible_date_time.epoch }}.txt"
|
|
|
|
echo "🌐 NETWORK METRICS" > "$network_file"
|
|
echo "==================" >> "$network_file"
|
|
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$network_file"
|
|
echo "" >> "$network_file"
|
|
|
|
echo "🔌 INTERFACE STATISTICS:" >> "$network_file"
|
|
cat /proc/net/dev >> "$network_file"
|
|
echo "" >> "$network_file"
|
|
|
|
echo "🔗 ACTIVE CONNECTIONS:" >> "$network_file"
|
|
netstat -tuln | head -20 >> "$network_file" 2>/dev/null || ss -tuln | head -20 >> "$network_file" 2>/dev/null || echo "Cannot get connection info" >> "$network_file"
|
|
echo "" >> "$network_file"
|
|
|
|
echo "📡 ROUTING TABLE:" >> "$network_file"
|
|
ip route >> "$network_file" 2>/dev/null || route -n >> "$network_file" 2>/dev/null || echo "Cannot get routing info" >> "$network_file"
|
|
echo "" >> "$network_file"
|
|
|
|
echo "🌍 DNS CONFIGURATION:" >> "$network_file"
|
|
cat /etc/resolv.conf >> "$network_file" 2>/dev/null || echo "Cannot read DNS config" >> "$network_file"
|
|
|
|
echo "Network metrics saved to: $network_file"
|
|
register: network_metrics
|
|
|
|
- name: Generate metrics summary
|
|
shell: |
|
|
summary_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_{{ ansible_date_time.epoch }}.txt"
|
|
metrics_csv="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
|
|
|
|
echo "📊 METRICS COLLECTION SUMMARY" > "$summary_file"
|
|
echo "=============================" >> "$summary_file"
|
|
echo "Host: {{ inventory_hostname }}" >> "$summary_file"
|
|
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$summary_file"
|
|
echo "Duration: {{ metrics_duration | default(default_metrics_duration) }}s" >> "$summary_file"
|
|
echo "Interval: {{ collection_interval }}s" >> "$summary_file"
|
|
echo "" >> "$summary_file"
|
|
|
|
if [ -f "$metrics_csv" ]; then
|
|
sample_count=$(tail -n +2 "$metrics_csv" | wc -l)
|
|
echo "📈 COLLECTION STATISTICS:" >> "$summary_file"
|
|
echo "Samples collected: $sample_count" >> "$summary_file"
|
|
echo "Expected samples: $(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))" >> "$summary_file"
|
|
echo "" >> "$summary_file"
|
|
|
|
echo "📊 METRIC RANGES:" >> "$summary_file"
|
|
echo "CPU Usage:" >> "$summary_file"
|
|
tail -n +2 "$metrics_csv" | awk -F',' '{print $2}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
|
|
|
|
echo "Memory Usage:" >> "$summary_file"
|
|
tail -n +2 "$metrics_csv" | awk -F',' '{print $3}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
|
|
|
|
echo "Load Average (1min):" >> "$summary_file"
|
|
tail -n +2 "$metrics_csv" | awk -F',' '{print $5}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min ", Max: " max}' >> "$summary_file"
|
|
|
|
echo "" >> "$summary_file"
|
|
echo "📁 GENERATED FILES:" >> "$summary_file"
|
|
ls -la {{ metrics_dir }}/{{ inventory_hostname }}/*{{ ansible_date_time.epoch }}* >> "$summary_file" 2>/dev/null || echo "No files found" >> "$summary_file"
|
|
else
|
|
echo "⚠️ WARNING: Metrics CSV file not found" >> "$summary_file"
|
|
fi
|
|
|
|
echo "Summary saved to: $summary_file"
|
|
register: metrics_summary
|
|
|
|
- name: Display metrics collection results
|
|
debug:
|
|
msg: |
|
|
|
|
📊 METRICS COLLECTION COMPLETE
|
|
==============================
|
|
🖥️ Host: {{ inventory_hostname }}
|
|
📅 Date: {{ ansible_date_time.date }}
|
|
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
|
|
|
|
📁 Generated Files:
|
|
{{ baseline_info.stdout }}
|
|
{{ metrics_collection.stdout }}
|
|
{{ docker_metrics.stdout | default('Docker metrics: N/A') }}
|
|
{{ network_metrics.stdout }}
|
|
{{ metrics_summary.stdout }}
|
|
|
|
🔍 Next Steps:
|
|
- Analyze metrics: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_*.csv
|
|
- View summary: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_*.txt
|
|
- Plot trends: Use the CSV data with your preferred visualization tool
|
|
- Set up monitoring: ansible-playbook playbooks/alert_check.yml
|
|
|
|
==============================
|