Sanitized mirror from private repository - 2026-03-18 10:31:50 UTC
This commit is contained in:
259
ansible/automation/playbooks/system_metrics.yml
Normal file
259
ansible/automation/playbooks/system_metrics.yml
Normal file
@@ -0,0 +1,259 @@
|
||||
---
|
||||
# System Metrics Collection Playbook
|
||||
# Collects detailed system metrics for monitoring and analysis
|
||||
# Usage: ansible-playbook playbooks/system_metrics.yml
|
||||
# Usage: ansible-playbook playbooks/system_metrics.yml -e "metrics_duration=300"
|
||||
|
||||
- name: Collect System Metrics
|
||||
hosts: all
|
||||
gather_facts: yes
|
||||
vars:
|
||||
metrics_dir: "/tmp/metrics"
|
||||
default_metrics_duration: 60 # seconds
|
||||
collection_interval: 5 # seconds between samples
|
||||
|
||||
tasks:
|
||||
- name: Create metrics directory
|
||||
file:
|
||||
path: "{{ metrics_dir }}/{{ inventory_hostname }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Display metrics collection plan
|
||||
debug:
|
||||
msg: |
|
||||
📊 SYSTEM METRICS COLLECTION
|
||||
===========================
|
||||
🖥️ Host: {{ inventory_hostname }}
|
||||
📅 Date: {{ ansible_date_time.date }}
|
||||
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
|
||||
📈 Interval: {{ collection_interval }}s
|
||||
📁 Output: {{ metrics_dir }}/{{ inventory_hostname }}
|
||||
|
||||
- name: Collect baseline system information
|
||||
shell: |
|
||||
info_file="{{ metrics_dir }}/{{ inventory_hostname }}/system_info_{{ ansible_date_time.epoch }}.txt"
|
||||
|
||||
echo "📊 SYSTEM BASELINE INFORMATION" > "$info_file"
|
||||
echo "==============================" >> "$info_file"
|
||||
echo "Host: {{ inventory_hostname }}" >> "$info_file"
|
||||
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$info_file"
|
||||
echo "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" >> "$info_file"
|
||||
echo "Kernel: {{ ansible_kernel }}" >> "$info_file"
|
||||
echo "Architecture: {{ ansible_architecture }}" >> "$info_file"
|
||||
echo "CPU Cores: {{ ansible_processor_vcpus }}" >> "$info_file"
|
||||
echo "Total Memory: {{ ansible_memtotal_mb }}MB" >> "$info_file"
|
||||
echo "" >> "$info_file"
|
||||
|
||||
echo "🖥️ CPU INFORMATION:" >> "$info_file"
|
||||
cat /proc/cpuinfo | grep -E "model name|cpu MHz|cache size" | head -10 >> "$info_file"
|
||||
echo "" >> "$info_file"
|
||||
|
||||
echo "💾 MEMORY INFORMATION:" >> "$info_file"
|
||||
cat /proc/meminfo | head -10 >> "$info_file"
|
||||
echo "" >> "$info_file"
|
||||
|
||||
echo "💿 DISK INFORMATION:" >> "$info_file"
|
||||
lsblk -o NAME,SIZE,TYPE,MOUNTPOINT >> "$info_file"
|
||||
echo "" >> "$info_file"
|
||||
|
||||
echo "🌐 NETWORK INTERFACES:" >> "$info_file"
|
||||
ip addr show | grep -E "^[0-9]+:|inet " >> "$info_file"
|
||||
|
||||
echo "Baseline info saved to: $info_file"
|
||||
register: baseline_info
|
||||
|
||||
- name: Start continuous metrics collection
|
||||
shell: |
|
||||
metrics_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
|
||||
|
||||
# Create CSV header
|
||||
echo "timestamp,cpu_usage,memory_usage,memory_available,load_1min,load_5min,load_15min,disk_usage_root,network_rx_bytes,network_tx_bytes,processes_total,processes_running,docker_containers_running" > "$metrics_file"
|
||||
|
||||
echo "📈 Starting metrics collection for {{ metrics_duration | default(default_metrics_duration) }} seconds..."
|
||||
|
||||
# Get initial network stats
|
||||
initial_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
||||
initial_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
||||
|
||||
samples=0
|
||||
max_samples=$(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))
|
||||
|
||||
while [ $samples -lt $max_samples ]; do
|
||||
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
|
||||
# CPU usage (1 - idle percentage)
|
||||
cpu_usage=$(vmstat 1 2 | tail -1 | awk '{print 100-$15}')
|
||||
|
||||
# Memory usage
|
||||
memory_info=$(free -m)
|
||||
memory_total=$(echo "$memory_info" | awk 'NR==2{print $2}')
|
||||
memory_used=$(echo "$memory_info" | awk 'NR==2{print $3}')
|
||||
memory_available=$(echo "$memory_info" | awk 'NR==2{print $7}')
|
||||
memory_usage=$(echo "scale=1; $memory_used * 100 / $memory_total" | bc -l 2>/dev/null || echo "0")
|
||||
|
||||
# Load averages
|
||||
load_info=$(uptime | awk -F'load average:' '{print $2}' | sed 's/^ *//')
|
||||
load_1min=$(echo "$load_info" | awk -F',' '{print $1}' | sed 's/^ *//')
|
||||
load_5min=$(echo "$load_info" | awk -F',' '{print $2}' | sed 's/^ *//')
|
||||
load_15min=$(echo "$load_info" | awk -F',' '{print $3}' | sed 's/^ *//')
|
||||
|
||||
# Disk usage for root partition
|
||||
disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//')
|
||||
|
||||
# Network stats
|
||||
current_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
||||
current_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0")
|
||||
|
||||
# Process counts
|
||||
processes_total=$(ps aux | wc -l)
|
||||
processes_running=$(ps aux | awk '$8 ~ /^R/ {count++} END {print count+0}')
|
||||
|
||||
# Docker container count (if available)
|
||||
if command -v docker &> /dev/null && docker info &> /dev/null; then
|
||||
docker_containers=$(docker ps -q | wc -l)
|
||||
else
|
||||
docker_containers=0
|
||||
fi
|
||||
|
||||
# Write metrics to CSV
|
||||
echo "$timestamp,$cpu_usage,$memory_usage,$memory_available,$load_1min,$load_5min,$load_15min,$disk_usage,$current_rx,$current_tx,$processes_total,$processes_running,$docker_containers" >> "$metrics_file"
|
||||
|
||||
samples=$((samples + 1))
|
||||
echo "Sample $samples/$max_samples collected..."
|
||||
|
||||
sleep {{ collection_interval }}
|
||||
done
|
||||
|
||||
echo "✅ Metrics collection complete: $metrics_file"
|
||||
register: metrics_collection
|
||||
async: "{{ ((metrics_duration | default(default_metrics_duration)) | int) + 30 }}"
|
||||
poll: 10
|
||||
|
||||
- name: Collect Docker metrics (if available)
|
||||
shell: |
|
||||
docker_file="{{ metrics_dir }}/{{ inventory_hostname }}/docker_metrics_{{ ansible_date_time.epoch }}.txt"
|
||||
|
||||
if command -v docker &> /dev/null && docker info &> /dev/null; then
|
||||
echo "🐳 DOCKER METRICS" > "$docker_file"
|
||||
echo "=================" >> "$docker_file"
|
||||
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$docker_file"
|
||||
echo "" >> "$docker_file"
|
||||
|
||||
echo "📊 DOCKER SYSTEM INFO:" >> "$docker_file"
|
||||
docker system df >> "$docker_file" 2>/dev/null || echo "Cannot get Docker system info" >> "$docker_file"
|
||||
echo "" >> "$docker_file"
|
||||
|
||||
echo "📦 CONTAINER STATS:" >> "$docker_file"
|
||||
docker stats --no-stream --format "table {{ '{{' }}.Container{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.MemPerc{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot get container stats" >> "$docker_file"
|
||||
echo "" >> "$docker_file"
|
||||
|
||||
echo "🏃 RUNNING CONTAINERS:" >> "$docker_file"
|
||||
docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot list containers" >> "$docker_file"
|
||||
echo "" >> "$docker_file"
|
||||
|
||||
echo "🔍 CONTAINER RESOURCE USAGE:" >> "$docker_file"
|
||||
for container in $(docker ps --format "{{ '{{' }}.Names{{ '}}' }}" 2>/dev/null); do
|
||||
echo "--- $container ---" >> "$docker_file"
|
||||
docker exec "$container" sh -c 'top -bn1 | head -5' >> "$docker_file" 2>/dev/null || echo "Cannot access container $container" >> "$docker_file"
|
||||
echo "" >> "$docker_file"
|
||||
done
|
||||
|
||||
echo "Docker metrics saved to: $docker_file"
|
||||
else
|
||||
echo "Docker not available - skipping Docker metrics"
|
||||
fi
|
||||
register: docker_metrics
|
||||
failed_when: false
|
||||
|
||||
- name: Collect network metrics
|
||||
shell: |
|
||||
network_file="{{ metrics_dir }}/{{ inventory_hostname }}/network_metrics_{{ ansible_date_time.epoch }}.txt"
|
||||
|
||||
echo "🌐 NETWORK METRICS" > "$network_file"
|
||||
echo "==================" >> "$network_file"
|
||||
echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$network_file"
|
||||
echo "" >> "$network_file"
|
||||
|
||||
echo "🔌 INTERFACE STATISTICS:" >> "$network_file"
|
||||
cat /proc/net/dev >> "$network_file"
|
||||
echo "" >> "$network_file"
|
||||
|
||||
echo "🔗 ACTIVE CONNECTIONS:" >> "$network_file"
|
||||
netstat -tuln | head -20 >> "$network_file" 2>/dev/null || ss -tuln | head -20 >> "$network_file" 2>/dev/null || echo "Cannot get connection info" >> "$network_file"
|
||||
echo "" >> "$network_file"
|
||||
|
||||
echo "📡 ROUTING TABLE:" >> "$network_file"
|
||||
ip route >> "$network_file" 2>/dev/null || route -n >> "$network_file" 2>/dev/null || echo "Cannot get routing info" >> "$network_file"
|
||||
echo "" >> "$network_file"
|
||||
|
||||
echo "🌍 DNS CONFIGURATION:" >> "$network_file"
|
||||
cat /etc/resolv.conf >> "$network_file" 2>/dev/null || echo "Cannot read DNS config" >> "$network_file"
|
||||
|
||||
echo "Network metrics saved to: $network_file"
|
||||
register: network_metrics
|
||||
|
||||
- name: Generate metrics summary
|
||||
shell: |
|
||||
summary_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_{{ ansible_date_time.epoch }}.txt"
|
||||
metrics_csv="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv"
|
||||
|
||||
echo "📊 METRICS COLLECTION SUMMARY" > "$summary_file"
|
||||
echo "=============================" >> "$summary_file"
|
||||
echo "Host: {{ inventory_hostname }}" >> "$summary_file"
|
||||
echo "Date: {{ ansible_date_time.iso8601 }}" >> "$summary_file"
|
||||
echo "Duration: {{ metrics_duration | default(default_metrics_duration) }}s" >> "$summary_file"
|
||||
echo "Interval: {{ collection_interval }}s" >> "$summary_file"
|
||||
echo "" >> "$summary_file"
|
||||
|
||||
if [ -f "$metrics_csv" ]; then
|
||||
sample_count=$(tail -n +2 "$metrics_csv" | wc -l)
|
||||
echo "📈 COLLECTION STATISTICS:" >> "$summary_file"
|
||||
echo "Samples collected: $sample_count" >> "$summary_file"
|
||||
echo "Expected samples: $(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))" >> "$summary_file"
|
||||
echo "" >> "$summary_file"
|
||||
|
||||
echo "📊 METRIC RANGES:" >> "$summary_file"
|
||||
echo "CPU Usage:" >> "$summary_file"
|
||||
tail -n +2 "$metrics_csv" | awk -F',' '{print $2}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
|
||||
|
||||
echo "Memory Usage:" >> "$summary_file"
|
||||
tail -n +2 "$metrics_csv" | awk -F',' '{print $3}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file"
|
||||
|
||||
echo "Load Average (1min):" >> "$summary_file"
|
||||
tail -n +2 "$metrics_csv" | awk -F',' '{print $5}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min ", Max: " max}' >> "$summary_file"
|
||||
|
||||
echo "" >> "$summary_file"
|
||||
echo "📁 GENERATED FILES:" >> "$summary_file"
|
||||
ls -la {{ metrics_dir }}/{{ inventory_hostname }}/*{{ ansible_date_time.epoch }}* >> "$summary_file" 2>/dev/null || echo "No files found" >> "$summary_file"
|
||||
else
|
||||
echo "⚠️ WARNING: Metrics CSV file not found" >> "$summary_file"
|
||||
fi
|
||||
|
||||
echo "Summary saved to: $summary_file"
|
||||
register: metrics_summary
|
||||
|
||||
- name: Display metrics collection results
|
||||
debug:
|
||||
msg: |
|
||||
|
||||
📊 METRICS COLLECTION COMPLETE
|
||||
==============================
|
||||
🖥️ Host: {{ inventory_hostname }}
|
||||
📅 Date: {{ ansible_date_time.date }}
|
||||
⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s
|
||||
|
||||
📁 Generated Files:
|
||||
{{ baseline_info.stdout }}
|
||||
{{ metrics_collection.stdout }}
|
||||
{{ docker_metrics.stdout | default('Docker metrics: N/A') }}
|
||||
{{ network_metrics.stdout }}
|
||||
{{ metrics_summary.stdout }}
|
||||
|
||||
🔍 Next Steps:
|
||||
- Analyze metrics: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_*.csv
|
||||
- View summary: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_*.txt
|
||||
- Plot trends: Use the CSV data with your preferred visualization tool
|
||||
- Set up monitoring: ansible-playbook playbooks/alert_check.yml
|
||||
|
||||
==============================
|
||||
Reference in New Issue
Block a user