338 lines
12 KiB
YAML
338 lines
12 KiB
YAML
---
|
|
# Service Status Check Playbook
|
|
# Get comprehensive status of all services across homelab infrastructure
|
|
# Usage: ansible-playbook playbooks/service_status.yml
|
|
# Usage with specific host: ansible-playbook playbooks/service_status.yml --limit atlantis
|
|
|
|
- name: Check Service Status Across Homelab
|
|
hosts: all
|
|
gather_facts: yes
|
|
vars:
|
|
portainer_endpoints:
|
|
atlantis: "https://192.168.0.200:9443"
|
|
calypso: "https://192.168.0.201:9443"
|
|
concord_nuc: "https://192.168.0.202:9443"
|
|
homelab_vm: "https://192.168.0.203:9443"
|
|
rpi5_vish: "https://192.168.0.204:9443"
|
|
|
|
tasks:
|
|
- name: Detect system type and environment
|
|
set_fact:
|
|
system_type: >-
|
|
{{
|
|
'synology' if (ansible_system_vendor is defined and 'synology' in ansible_system_vendor | lower) or
|
|
(ansible_distribution is defined and 'dsm' in ansible_distribution | lower) or
|
|
(ansible_hostname is defined and ('atlantis' in ansible_hostname or 'calypso' in ansible_hostname))
|
|
else 'container' if ansible_virtualization_type is defined and ansible_virtualization_type in ['docker', 'container']
|
|
else 'standard'
|
|
}}
|
|
|
|
- name: Check if Docker is running (Standard Linux with systemd)
|
|
systemd:
|
|
name: docker
|
|
register: docker_status_systemd
|
|
when: system_type == "standard"
|
|
ignore_errors: yes
|
|
|
|
- name: Check if Docker is running (Synology DSM)
|
|
shell: |
|
|
# Multiple methods to check Docker on Synology
|
|
if command -v synoservice >/dev/null 2>&1; then
|
|
# Method 1: Use synoservice (DSM 6.x/7.x)
|
|
if synoservice --status pkgctl-Docker 2>/dev/null | grep -q "start\|running"; then
|
|
echo "active"
|
|
elif synoservice --status Docker 2>/dev/null | grep -q "start\|running"; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
elif command -v docker >/dev/null 2>&1; then
|
|
# Method 2: Direct Docker check
|
|
if docker info >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
elif [ -f /var/packages/Docker/enabled ]; then
|
|
# Method 3: Check package status file
|
|
echo "active"
|
|
else
|
|
echo "not-found"
|
|
fi
|
|
register: docker_status_synology
|
|
when: system_type == "synology"
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Check if Docker is running (Container/Other environments)
|
|
shell: |
|
|
if command -v docker >/dev/null 2>&1; then
|
|
if docker info >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
else
|
|
echo "not-found"
|
|
fi
|
|
register: docker_status_other
|
|
when: system_type == "container"
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Set unified Docker status
|
|
set_fact:
|
|
docker_running: >-
|
|
{{
|
|
(docker_status_systemd is defined and docker_status_systemd.status is defined and docker_status_systemd.status.ActiveState == "active") or
|
|
(docker_status_synology is defined and docker_status_synology.stdout is defined and docker_status_synology.stdout == "active") or
|
|
(docker_status_other is defined and docker_status_other.stdout is defined and docker_status_other.stdout == "active")
|
|
}}
|
|
|
|
- name: Get Docker container status
|
|
shell: |
|
|
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
|
echo "=== DOCKER CONTAINERS ==="
|
|
# Use simpler format to avoid template issues
|
|
{% raw %}
|
|
docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" 2>/dev/null || echo "Permission denied or no containers"
|
|
{% endraw %}
|
|
echo ""
|
|
echo "=== CONTAINER SUMMARY ==="
|
|
running=$(docker ps -q 2>/dev/null | wc -l)
|
|
total=$(docker ps -aq 2>/dev/null | wc -l)
|
|
echo "Running: $running"
|
|
echo "Total: $total"
|
|
else
|
|
echo "Docker not available or not accessible"
|
|
fi
|
|
register: container_status
|
|
when: docker_running | bool
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Check system resources
|
|
shell: |
|
|
echo "=== SYSTEM RESOURCES ==="
|
|
echo "CPU Usage: $(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)%"
|
|
echo "Memory: $(free -h | awk 'NR==2{printf "%.1f%% (%s/%s)", $3*100/$2, $3, $2}')"
|
|
echo "Disk: $(df -h / | awk 'NR==2{printf "%s (%s used)", $5, $3}')"
|
|
echo "Load Average: $(uptime | awk -F'load average:' '{print $2}')"
|
|
register: system_resources
|
|
|
|
- name: Check critical services (Standard Linux)
|
|
systemd:
|
|
name: "{{ item }}"
|
|
register: critical_services_systemd
|
|
loop:
|
|
- docker
|
|
- ssh
|
|
- tailscaled
|
|
when: system_type == "standard"
|
|
ignore_errors: yes
|
|
|
|
- name: Check critical services (Synology)
|
|
shell: |
|
|
service_name="{{ item }}"
|
|
case "$service_name" in
|
|
"docker")
|
|
if command -v synoservice >/dev/null 2>&1; then
|
|
if synoservice --status pkgctl-Docker 2>/dev/null | grep -q "start\|running"; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
elif command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
"ssh")
|
|
if pgrep -f "sshd" >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
"tailscaled")
|
|
if pgrep -f "tailscaled" >/dev/null 2>&1; then
|
|
echo "active"
|
|
elif command -v tailscale >/dev/null 2>&1 && tailscale status >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
*)
|
|
echo "unknown"
|
|
;;
|
|
esac
|
|
register: critical_services_synology
|
|
loop:
|
|
- docker
|
|
- ssh
|
|
- tailscaled
|
|
when: system_type == "synology"
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Check critical services (Container/Other)
|
|
shell: |
|
|
service_name="{{ item }}"
|
|
case "$service_name" in
|
|
"docker")
|
|
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
"ssh")
|
|
if pgrep -f "sshd" >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
"tailscaled")
|
|
if pgrep -f "tailscaled" >/dev/null 2>&1; then
|
|
echo "active"
|
|
elif command -v tailscale >/dev/null 2>&1 && tailscale status >/dev/null 2>&1; then
|
|
echo "active"
|
|
else
|
|
echo "inactive"
|
|
fi
|
|
;;
|
|
*)
|
|
echo "unknown"
|
|
;;
|
|
esac
|
|
register: critical_services_other
|
|
loop:
|
|
- docker
|
|
- ssh
|
|
- tailscaled
|
|
when: system_type == "container"
|
|
changed_when: false
|
|
ignore_errors: yes
|
|
|
|
- name: Set unified critical services status
|
|
set_fact:
|
|
critical_services: >-
|
|
{{
|
|
critical_services_systemd if critical_services_systemd is defined and not critical_services_systemd.skipped
|
|
else critical_services_synology if critical_services_synology is defined and not critical_services_synology.skipped
|
|
else critical_services_other if critical_services_other is defined and not critical_services_other.skipped
|
|
else {'results': []}
|
|
}}
|
|
|
|
- name: Check network connectivity
|
|
shell: |
|
|
echo "=== NETWORK STATUS ==="
|
|
echo "Tailscale Status:"
|
|
tailscale status --json | jq -r '.Self.HostName + " - " + .Self.TailscaleIPs[0]' 2>/dev/null || echo "Tailscale not available"
|
|
echo "Internet Connectivity:"
|
|
ping -c 1 8.8.8.8 >/dev/null 2>&1 && echo "✅ Internet OK" || echo "❌ Internet DOWN"
|
|
register: network_status
|
|
ignore_errors: yes
|
|
|
|
- name: Display comprehensive status report
|
|
debug:
|
|
msg: |
|
|
|
|
==========================================
|
|
📊 SERVICE STATUS REPORT - {{ inventory_hostname }}
|
|
==========================================
|
|
|
|
🖥️ SYSTEM INFO:
|
|
- Hostname: {{ ansible_hostname }}
|
|
- OS: {{ ansible_distribution }} {{ ansible_distribution_version }}
|
|
- Uptime: {{ ansible_uptime_seconds | int // 86400 }} days, {{ (ansible_uptime_seconds | int % 86400) // 3600 }} hours
|
|
|
|
{{ system_resources.stdout }}
|
|
|
|
🐳 DOCKER STATUS:
|
|
{% if docker_running %}
|
|
✅ Docker is running ({{ system_type }} system)
|
|
{% else %}
|
|
❌ Docker is not running ({{ system_type }} system)
|
|
{% endif %}
|
|
|
|
📦 CONTAINER STATUS:
|
|
{% if container_status.stdout is defined %}
|
|
{{ container_status.stdout }}
|
|
{% else %}
|
|
No containers found or Docker not accessible
|
|
{% endif %}
|
|
|
|
🔧 CRITICAL SERVICES:
|
|
{% if critical_services.results is defined %}
|
|
{% for service in critical_services.results %}
|
|
{% if system_type == "standard" and service.status is defined %}
|
|
{% if service.status.ActiveState == "active" %}
|
|
✅ {{ service.item }}: Running
|
|
{% else %}
|
|
❌ {{ service.item }}: {{ service.status.ActiveState | default('Unknown') }}
|
|
{% endif %}
|
|
{% else %}
|
|
{% if service.stdout is defined and service.stdout == "active" %}
|
|
✅ {{ service.item }}: Running
|
|
{% else %}
|
|
❌ {{ service.item }}: {{ service.stdout | default('Unknown') }}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{% else %}
|
|
No service status available
|
|
{% endif %}
|
|
|
|
{{ network_status.stdout }}
|
|
|
|
==========================================
|
|
|
|
- name: Generate JSON status report
|
|
copy:
|
|
content: |
|
|
{
|
|
"timestamp": "{{ ansible_date_time.iso8601 }}",
|
|
"hostname": "{{ inventory_hostname }}",
|
|
"system_type": "{{ system_type }}",
|
|
"system": {
|
|
"os": "{{ ansible_distribution }} {{ ansible_distribution_version }}",
|
|
"uptime_days": {{ ansible_uptime_seconds | int // 86400 }},
|
|
"cpu_count": {{ ansible_processor_vcpus }},
|
|
"memory_mb": {{ ansible_memtotal_mb }},
|
|
"docker_status": "{{ 'active' if docker_running else 'inactive' }}"
|
|
},
|
|
"containers": {{ (container_status.stdout_lines | default([])) | to_json }},
|
|
"critical_services": [
|
|
{% if critical_services.results is defined %}
|
|
{% for service in critical_services.results %}
|
|
{
|
|
"name": "{{ service.item }}",
|
|
{% if system_type == "standard" and service.status is defined %}
|
|
"status": "{{ service.status.ActiveState | default('unknown') }}",
|
|
"enabled": {{ service.status.UnitFileState == "enabled" if service.status.UnitFileState is defined else false }}
|
|
{% else %}
|
|
"status": "{{ service.stdout | default('unknown') }}",
|
|
"enabled": {{ (service.stdout is defined and service.stdout == "active") | bool }}
|
|
{% endif %}
|
|
}{% if not loop.last %},{% endif %}
|
|
{% endfor %}
|
|
{% endif %}
|
|
]
|
|
}
|
|
dest: "/tmp/{{ inventory_hostname }}_status_{{ ansible_date_time.epoch }}.json"
|
|
delegate_to: localhost
|
|
ignore_errors: yes
|
|
|
|
- name: Summary message
|
|
debug:
|
|
msg: |
|
|
📋 Status check complete for {{ inventory_hostname }}
|
|
📄 JSON report saved to: /tmp/{{ inventory_hostname }}_status_{{ ansible_date_time.epoch }}.json
|
|
|
|
Run with --limit to check specific hosts:
|
|
ansible-playbook playbooks/service_status.yml --limit atlantis
|