--- # System Metrics Collection Playbook # Collects detailed system metrics for monitoring and analysis # Usage: ansible-playbook playbooks/system_metrics.yml # Usage: ansible-playbook playbooks/system_metrics.yml -e "metrics_duration=300" - name: Collect System Metrics hosts: all gather_facts: yes vars: metrics_dir: "/tmp/metrics" default_metrics_duration: 60 # seconds collection_interval: 5 # seconds between samples tasks: - name: Create metrics directory file: path: "{{ metrics_dir }}/{{ inventory_hostname }}" state: directory mode: '0755' - name: Display metrics collection plan debug: msg: | 📊 SYSTEM METRICS COLLECTION =========================== 🖥️ Host: {{ inventory_hostname }} 📅 Date: {{ ansible_date_time.date }} ⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s 📈 Interval: {{ collection_interval }}s 📁 Output: {{ metrics_dir }}/{{ inventory_hostname }} - name: Collect baseline system information shell: | info_file="{{ metrics_dir }}/{{ inventory_hostname }}/system_info_{{ ansible_date_time.epoch }}.txt" echo "📊 SYSTEM BASELINE INFORMATION" > "$info_file" echo "==============================" >> "$info_file" echo "Host: {{ inventory_hostname }}" >> "$info_file" echo "Date: {{ ansible_date_time.iso8601 }}" >> "$info_file" echo "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" >> "$info_file" echo "Kernel: {{ ansible_kernel }}" >> "$info_file" echo "Architecture: {{ ansible_architecture }}" >> "$info_file" echo "CPU Cores: {{ ansible_processor_vcpus }}" >> "$info_file" echo "Total Memory: {{ ansible_memtotal_mb }}MB" >> "$info_file" echo "" >> "$info_file" echo "🖥️ CPU INFORMATION:" >> "$info_file" cat /proc/cpuinfo | grep -E "model name|cpu MHz|cache size" | head -10 >> "$info_file" echo "" >> "$info_file" echo "💾 MEMORY INFORMATION:" >> "$info_file" cat /proc/meminfo | head -10 >> "$info_file" echo "" >> "$info_file" echo "💿 DISK INFORMATION:" >> "$info_file" lsblk -o NAME,SIZE,TYPE,MOUNTPOINT >> "$info_file" echo "" >> "$info_file" echo "🌐 NETWORK INTERFACES:" >> "$info_file" ip addr show | grep -E "^[0-9]+:|inet " >> "$info_file" echo "Baseline info saved to: $info_file" register: baseline_info - name: Start continuous metrics collection shell: | metrics_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv" # Create CSV header echo "timestamp,cpu_usage,memory_usage,memory_available,load_1min,load_5min,load_15min,disk_usage_root,network_rx_bytes,network_tx_bytes,processes_total,processes_running,docker_containers_running" > "$metrics_file" echo "📈 Starting metrics collection for {{ metrics_duration | default(default_metrics_duration) }} seconds..." # Get initial network stats initial_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0") initial_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0") samples=0 max_samples=$(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} )) while [ $samples -lt $max_samples ]; do timestamp=$(date '+%Y-%m-%d %H:%M:%S') # CPU usage (1 - idle percentage) cpu_usage=$(vmstat 1 2 | tail -1 | awk '{print 100-$15}') # Memory usage memory_info=$(free -m) memory_total=$(echo "$memory_info" | awk 'NR==2{print $2}') memory_used=$(echo "$memory_info" | awk 'NR==2{print $3}') memory_available=$(echo "$memory_info" | awk 'NR==2{print $7}') memory_usage=$(echo "scale=1; $memory_used * 100 / $memory_total" | bc -l 2>/dev/null || echo "0") # Load averages load_info=$(uptime | awk -F'load average:' '{print $2}' | sed 's/^ *//') load_1min=$(echo "$load_info" | awk -F',' '{print $1}' | sed 's/^ *//') load_5min=$(echo "$load_info" | awk -F',' '{print $2}' | sed 's/^ *//') load_15min=$(echo "$load_info" | awk -F',' '{print $3}' | sed 's/^ *//') # Disk usage for root partition disk_usage=$(df / | awk 'NR==2{print $5}' | sed 's/%//') # Network stats current_rx=$(cat /sys/class/net/*/statistics/rx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0") current_tx=$(cat /sys/class/net/*/statistics/tx_bytes 2>/dev/null | awk '{sum+=$1} END {print sum}' || echo "0") # Process counts processes_total=$(ps aux | wc -l) processes_running=$(ps aux | awk '$8 ~ /^R/ {count++} END {print count+0}') # Docker container count (if available) if command -v docker &> /dev/null && docker info &> /dev/null; then docker_containers=$(docker ps -q | wc -l) else docker_containers=0 fi # Write metrics to CSV echo "$timestamp,$cpu_usage,$memory_usage,$memory_available,$load_1min,$load_5min,$load_15min,$disk_usage,$current_rx,$current_tx,$processes_total,$processes_running,$docker_containers" >> "$metrics_file" samples=$((samples + 1)) echo "Sample $samples/$max_samples collected..." sleep {{ collection_interval }} done echo "✅ Metrics collection complete: $metrics_file" register: metrics_collection async: "{{ ((metrics_duration | default(default_metrics_duration)) | int) + 30 }}" poll: 10 - name: Collect Docker metrics (if available) shell: | docker_file="{{ metrics_dir }}/{{ inventory_hostname }}/docker_metrics_{{ ansible_date_time.epoch }}.txt" if command -v docker &> /dev/null && docker info &> /dev/null; then echo "🐳 DOCKER METRICS" > "$docker_file" echo "=================" >> "$docker_file" echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$docker_file" echo "" >> "$docker_file" echo "📊 DOCKER SYSTEM INFO:" >> "$docker_file" docker system df >> "$docker_file" 2>/dev/null || echo "Cannot get Docker system info" >> "$docker_file" echo "" >> "$docker_file" echo "📦 CONTAINER STATS:" >> "$docker_file" docker stats --no-stream --format "table {{ '{{' }}.Container{{ '}}' }}\t{{ '{{' }}.CPUPerc{{ '}}' }}\t{{ '{{' }}.MemUsage{{ '}}' }}\t{{ '{{' }}.MemPerc{{ '}}' }}\t{{ '{{' }}.NetIO{{ '}}' }}\t{{ '{{' }}.BlockIO{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot get container stats" >> "$docker_file" echo "" >> "$docker_file" echo "🏃 RUNNING CONTAINERS:" >> "$docker_file" docker ps --format "table {{ '{{' }}.Names{{ '}}' }}\t{{ '{{' }}.Image{{ '}}' }}\t{{ '{{' }}.Status{{ '}}' }}\t{{ '{{' }}.Ports{{ '}}' }}" >> "$docker_file" 2>/dev/null || echo "Cannot list containers" >> "$docker_file" echo "" >> "$docker_file" echo "🔍 CONTAINER RESOURCE USAGE:" >> "$docker_file" for container in $(docker ps --format "{{ '{{' }}.Names{{ '}}' }}" 2>/dev/null); do echo "--- $container ---" >> "$docker_file" docker exec "$container" sh -c 'top -bn1 | head -5' >> "$docker_file" 2>/dev/null || echo "Cannot access container $container" >> "$docker_file" echo "" >> "$docker_file" done echo "Docker metrics saved to: $docker_file" else echo "Docker not available - skipping Docker metrics" fi register: docker_metrics failed_when: false - name: Collect network metrics shell: | network_file="{{ metrics_dir }}/{{ inventory_hostname }}/network_metrics_{{ ansible_date_time.epoch }}.txt" echo "🌐 NETWORK METRICS" > "$network_file" echo "==================" >> "$network_file" echo "Timestamp: {{ ansible_date_time.iso8601 }}" >> "$network_file" echo "" >> "$network_file" echo "🔌 INTERFACE STATISTICS:" >> "$network_file" cat /proc/net/dev >> "$network_file" echo "" >> "$network_file" echo "🔗 ACTIVE CONNECTIONS:" >> "$network_file" netstat -tuln | head -20 >> "$network_file" 2>/dev/null || ss -tuln | head -20 >> "$network_file" 2>/dev/null || echo "Cannot get connection info" >> "$network_file" echo "" >> "$network_file" echo "📡 ROUTING TABLE:" >> "$network_file" ip route >> "$network_file" 2>/dev/null || route -n >> "$network_file" 2>/dev/null || echo "Cannot get routing info" >> "$network_file" echo "" >> "$network_file" echo "🌍 DNS CONFIGURATION:" >> "$network_file" cat /etc/resolv.conf >> "$network_file" 2>/dev/null || echo "Cannot read DNS config" >> "$network_file" echo "Network metrics saved to: $network_file" register: network_metrics - name: Generate metrics summary shell: | summary_file="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_{{ ansible_date_time.epoch }}.txt" metrics_csv="{{ metrics_dir }}/{{ inventory_hostname }}/metrics_{{ ansible_date_time.epoch }}.csv" echo "📊 METRICS COLLECTION SUMMARY" > "$summary_file" echo "=============================" >> "$summary_file" echo "Host: {{ inventory_hostname }}" >> "$summary_file" echo "Date: {{ ansible_date_time.iso8601 }}" >> "$summary_file" echo "Duration: {{ metrics_duration | default(default_metrics_duration) }}s" >> "$summary_file" echo "Interval: {{ collection_interval }}s" >> "$summary_file" echo "" >> "$summary_file" if [ -f "$metrics_csv" ]; then sample_count=$(tail -n +2 "$metrics_csv" | wc -l) echo "📈 COLLECTION STATISTICS:" >> "$summary_file" echo "Samples collected: $sample_count" >> "$summary_file" echo "Expected samples: $(( {{ metrics_duration | default(default_metrics_duration) }} / {{ collection_interval }} ))" >> "$summary_file" echo "" >> "$summary_file" echo "📊 METRIC RANGES:" >> "$summary_file" echo "CPU Usage:" >> "$summary_file" tail -n +2 "$metrics_csv" | awk -F',' '{print $2}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file" echo "Memory Usage:" >> "$summary_file" tail -n +2 "$metrics_csv" | awk -F',' '{print $3}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min "%, Max: " max "%"}' >> "$summary_file" echo "Load Average (1min):" >> "$summary_file" tail -n +2 "$metrics_csv" | awk -F',' '{print $5}' | sort -n | awk 'NR==1{min=$1} {max=$1} END{print " Min: " min ", Max: " max}' >> "$summary_file" echo "" >> "$summary_file" echo "📁 GENERATED FILES:" >> "$summary_file" ls -la {{ metrics_dir }}/{{ inventory_hostname }}/*{{ ansible_date_time.epoch }}* >> "$summary_file" 2>/dev/null || echo "No files found" >> "$summary_file" else echo "⚠️ WARNING: Metrics CSV file not found" >> "$summary_file" fi echo "Summary saved to: $summary_file" register: metrics_summary - name: Display metrics collection results debug: msg: | 📊 METRICS COLLECTION COMPLETE ============================== 🖥️ Host: {{ inventory_hostname }} 📅 Date: {{ ansible_date_time.date }} ⏱️ Duration: {{ metrics_duration | default(default_metrics_duration) }}s 📁 Generated Files: {{ baseline_info.stdout }} {{ metrics_collection.stdout }} {{ docker_metrics.stdout | default('Docker metrics: N/A') }} {{ network_metrics.stdout }} {{ metrics_summary.stdout }} 🔍 Next Steps: - Analyze metrics: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_*.csv - View summary: cat {{ metrics_dir }}/{{ inventory_hostname }}/metrics_summary_*.txt - Plot trends: Use the CSV data with your preferred visualization tool - Set up monitoring: ansible-playbook playbooks/alert_check.yml ==============================