Sanitized mirror from private repository - 2026-03-24 12:45:58 UTC
This commit is contained in:
675
docs/advanced/TERRAFORM_IMPLEMENTATION_GUIDE.md
Normal file
675
docs/advanced/TERRAFORM_IMPLEMENTATION_GUIDE.md
Normal file
@@ -0,0 +1,675 @@
|
||||
# Terraform Implementation Guide for Homelab
|
||||
|
||||
## 🎯 Overview
|
||||
|
||||
This guide provides a comprehensive approach to implementing Terraform for your homelab infrastructure, focusing on practical benefits and gradual adoption.
|
||||
|
||||
## 🤔 Should You Use Terraform?
|
||||
|
||||
### Decision Matrix
|
||||
|
||||
| Factor | Your Current Setup | With Terraform | Recommendation |
|
||||
|--------|-------------------|----------------|----------------|
|
||||
| **VM Management** | Manual via Proxmox UI | Automated, version-controlled | ✅ **High Value** |
|
||||
| **Network Config** | Manual VLAN/firewall setup | Declarative networking | ✅ **High Value** |
|
||||
| **Storage Provisioning** | Manual NFS/iSCSI setup | Automated storage allocation | ✅ **Medium Value** |
|
||||
| **Service Deployment** | Docker Compose (working well) | Limited benefit | ❌ **Low Value** |
|
||||
| **Backup Management** | Scripts + manual verification | Infrastructure-level backups | ✅ **Medium Value** |
|
||||
|
||||
### **Recommendation: Hybrid Approach**
|
||||
- **Use Terraform for**: Infrastructure (VMs, networks, storage)
|
||||
- **Keep current approach for**: Services (Docker Compose + Ansible)
|
||||
|
||||
## 🏗️ Implementation Strategy
|
||||
|
||||
### Phase 1: Foundation Setup (Week 1)
|
||||
|
||||
#### 1.1 Directory Structure
|
||||
```
|
||||
terraform/
|
||||
├── modules/
|
||||
│ ├── proxmox-vm/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ ├── outputs.tf
|
||||
│ │ └── README.md
|
||||
│ ├── synology-storage/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ └── outputs.tf
|
||||
│ └── networking/
|
||||
│ ├── vlans.tf
|
||||
│ ├── firewall.tf
|
||||
│ └── dns.tf
|
||||
├── environments/
|
||||
│ ├── production/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── terraform.tfvars
|
||||
│ │ ├── backend.tf
|
||||
│ │ └── versions.tf
|
||||
│ └── staging/
|
||||
│ ├── main.tf
|
||||
│ ├── terraform.tfvars
|
||||
│ └── backend.tf
|
||||
├── scripts/
|
||||
│ ├── init-terraform.sh
|
||||
│ ├── plan-and-apply.sh
|
||||
│ └── destroy-environment.sh
|
||||
└── docs/
|
||||
├── GETTING_STARTED.md
|
||||
├── MODULES.md
|
||||
└── TROUBLESHOOTING.md
|
||||
```
|
||||
|
||||
#### 1.2 Provider Configuration
|
||||
```hcl
|
||||
# terraform/environments/production/versions.tf
|
||||
terraform {
|
||||
required_version = ">= 1.0"
|
||||
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "telmate/proxmox"
|
||||
version = "~> 2.9"
|
||||
}
|
||||
cloudflare = {
|
||||
source = "cloudflare/cloudflare"
|
||||
version = "~> 4.0"
|
||||
}
|
||||
}
|
||||
|
||||
backend "local" {
|
||||
path = "terraform.tfstate"
|
||||
}
|
||||
}
|
||||
|
||||
provider "proxmox" {
|
||||
pm_api_url = var.proxmox_api_url
|
||||
pm_user = var.proxmox_user
|
||||
pm_password = "REDACTED_PASSWORD"
|
||||
pm_tls_insecure = true
|
||||
}
|
||||
|
||||
provider "cloudflare" {
|
||||
api_token = var.cloudflare_api_token
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: VM Module Development (Week 2)
|
||||
|
||||
#### 2.1 Proxmox VM Module
|
||||
```hcl
|
||||
# terraform/modules/proxmox-vm/main.tf
|
||||
resource "proxmox_vm_qemu" "vm" {
|
||||
name = var.vm_name
|
||||
target_node = var.proxmox_node
|
||||
vmid = var.vm_id
|
||||
|
||||
# VM Configuration
|
||||
cores = var.cpu_cores
|
||||
memory = var.memory_mb
|
||||
sockets = var.cpu_sockets
|
||||
|
||||
# Boot Configuration
|
||||
boot = "order=scsi0"
|
||||
scsihw = "virtio-scsi-pci"
|
||||
|
||||
# Disk Configuration
|
||||
disk {
|
||||
slot = 0
|
||||
size = var.disk_size
|
||||
type = "scsi"
|
||||
storage = var.storage_pool
|
||||
iothread = 1
|
||||
ssd = var.disk_ssd
|
||||
}
|
||||
|
||||
# Network Configuration
|
||||
network {
|
||||
model = "virtio"
|
||||
bridge = var.network_bridge
|
||||
tag = var.vlan_tag
|
||||
}
|
||||
|
||||
# Cloud-init Configuration
|
||||
os_type = "cloud-init"
|
||||
ipconfig0 = "ip=${var.ip_address}/${var.subnet_mask},gw=${var.gateway}"
|
||||
|
||||
# SSH Configuration
|
||||
sshkeys = var.ssh_public_keys
|
||||
|
||||
# Lifecycle Management
|
||||
lifecycle {
|
||||
ignore_changes = [
|
||||
network,
|
||||
disk,
|
||||
]
|
||||
}
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
```
|
||||
|
||||
#### 2.2 VM Module Variables
|
||||
```hcl
|
||||
# terraform/modules/proxmox-vm/variables.tf
|
||||
variable "vm_name" {
|
||||
description = "Name of the virtual machine"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "proxmox_node" {
|
||||
description = "Proxmox node to deploy VM on"
|
||||
type = string
|
||||
default = "proxmox"
|
||||
}
|
||||
|
||||
variable "vm_id" {
|
||||
description = "VM ID (must be unique)"
|
||||
type = number
|
||||
}
|
||||
|
||||
variable "cpu_cores" {
|
||||
description = "Number of CPU cores"
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "memory_mb" {
|
||||
description = "Memory in MB"
|
||||
type = number
|
||||
default = 2048
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
description = "Disk size (e.g., '20G')"
|
||||
type = string
|
||||
default = "20G"
|
||||
}
|
||||
|
||||
variable "storage_pool" {
|
||||
description = "Storage pool name"
|
||||
type = string
|
||||
default = "local-lvm"
|
||||
}
|
||||
|
||||
variable "network_bridge" {
|
||||
description = "Network bridge"
|
||||
type = string
|
||||
default = "vmbr0"
|
||||
}
|
||||
|
||||
variable "vlan_tag" {
|
||||
description = "VLAN tag"
|
||||
type = number
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "ip_address" {
|
||||
description = "Static IP address"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "subnet_mask" {
|
||||
description = "Subnet mask (CIDR notation)"
|
||||
type = string
|
||||
default = "24"
|
||||
}
|
||||
|
||||
variable "gateway" {
|
||||
description = "Gateway IP address"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "ssh_public_keys" {
|
||||
description = "SSH public keys for access"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "Tags for the VM"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "disk_ssd" {
|
||||
description = "Whether disk is SSD"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "cpu_sockets" {
|
||||
description = "Number of CPU sockets"
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Environment Configuration (Week 3)
|
||||
|
||||
#### 3.1 Production Environment
|
||||
```hcl
|
||||
# terraform/environments/production/main.tf
|
||||
module "atlantis_vm" {
|
||||
source = "../../modules/proxmox-vm"
|
||||
|
||||
vm_name = "atlantis"
|
||||
vm_id = 100
|
||||
proxmox_node = "proxmox-node1"
|
||||
|
||||
cpu_cores = 4
|
||||
memory_mb = 8192
|
||||
disk_size = "100G"
|
||||
|
||||
ip_address = "192.168.1.10"
|
||||
gateway = "192.168.1.1"
|
||||
network_bridge = "vmbr0"
|
||||
vlan_tag = 10
|
||||
|
||||
ssh_public_keys = file("~/.ssh/id_rsa.pub")
|
||||
tags = "homelab,synology,production"
|
||||
}
|
||||
|
||||
module "calypso_vm" {
|
||||
source = "../../modules/proxmox-vm"
|
||||
|
||||
vm_name = "calypso"
|
||||
vm_id = 101
|
||||
proxmox_node = "proxmox-node1"
|
||||
|
||||
cpu_cores = 6
|
||||
memory_mb = 16384
|
||||
disk_size = "200G"
|
||||
|
||||
ip_address = "192.168.1.11"
|
||||
gateway = "192.168.1.1"
|
||||
network_bridge = "vmbr0"
|
||||
vlan_tag = 10
|
||||
|
||||
ssh_public_keys = file("~/.ssh/id_rsa.pub")
|
||||
tags = "homelab,synology,production"
|
||||
}
|
||||
|
||||
module "homelab_vm" {
|
||||
source = "../../modules/proxmox-vm"
|
||||
|
||||
vm_name = "homelab-vm"
|
||||
vm_id = 102
|
||||
proxmox_node = "proxmox-node2"
|
||||
|
||||
cpu_cores = 2
|
||||
memory_mb = 4096
|
||||
disk_size = "50G"
|
||||
|
||||
ip_address = "192.168.1.12"
|
||||
gateway = "192.168.1.1"
|
||||
network_bridge = "vmbr0"
|
||||
vlan_tag = 20
|
||||
|
||||
ssh_public_keys = file("~/.ssh/id_rsa.pub")
|
||||
tags = "homelab,vm,production"
|
||||
}
|
||||
```
|
||||
|
||||
#### 3.2 Environment Variables
|
||||
```hcl
|
||||
# terraform/environments/production/terraform.tfvars
|
||||
proxmox_api_url = "https://proxmox.local:8006/api2/json"
|
||||
proxmox_user = "terraform@pve"
|
||||
proxmox_password = "REDACTED_PASSWORD"
|
||||
|
||||
cloudflare_api_token = REDACTED_TOKEN
|
||||
|
||||
# Network Configuration
|
||||
default_gateway = "192.168.1.1"
|
||||
dns_servers = ["1.1.1.1", "8.8.8.8"]
|
||||
|
||||
# Storage Configuration
|
||||
default_storage_pool = "local-lvm"
|
||||
backup_storage_pool = "backup-storage"
|
||||
|
||||
# SSH Configuration
|
||||
ssh_public_key_path = "~/.ssh/id_rsa.pub"
|
||||
```
|
||||
|
||||
### Phase 4: Advanced Features (Week 4)
|
||||
|
||||
#### 4.1 Network Module
|
||||
```hcl
|
||||
# terraform/modules/networking/vlans.tf
|
||||
resource "proxmox_vm_qemu" "pfsense" {
|
||||
count = var.deploy_pfsense ? 1 : 0
|
||||
|
||||
name = "pfsense-firewall"
|
||||
target_node = var.proxmox_node
|
||||
vmid = 50
|
||||
|
||||
cores = 2
|
||||
memory = 2048
|
||||
|
||||
disk {
|
||||
slot = 0
|
||||
size = "20G"
|
||||
type = "scsi"
|
||||
storage = var.storage_pool
|
||||
}
|
||||
|
||||
# WAN Interface
|
||||
network {
|
||||
model = "virtio"
|
||||
bridge = "vmbr0"
|
||||
}
|
||||
|
||||
# LAN Interface
|
||||
network {
|
||||
model = "virtio"
|
||||
bridge = "vmbr1"
|
||||
}
|
||||
|
||||
# DMZ Interface
|
||||
network {
|
||||
model = "virtio"
|
||||
bridge = "vmbr2"
|
||||
}
|
||||
|
||||
tags = "firewall,network,security"
|
||||
}
|
||||
```
|
||||
|
||||
#### 4.2 Storage Module
|
||||
```hcl
|
||||
# terraform/modules/synology-storage/main.tf
|
||||
resource "proxmox_lvm_thinpool" "storage" {
|
||||
count = length(var.storage_pools)
|
||||
|
||||
name = var.storage_pools[count.index].name
|
||||
vgname = var.storage_pools[count.index].vg_name
|
||||
size = var.storage_pools[count.index].size
|
||||
node = var.proxmox_node
|
||||
}
|
||||
|
||||
# NFS Storage Configuration
|
||||
resource "proxmox_storage" "nfs" {
|
||||
count = length(var.nfs_shares)
|
||||
|
||||
storage_id = var.nfs_shares[count.index].id
|
||||
type = "nfs"
|
||||
server = var.nfs_shares[count.index].server
|
||||
export = var.nfs_shares[count.index].export
|
||||
content = var.nfs_shares[count.index].content
|
||||
nodes = var.nfs_shares[count.index].nodes
|
||||
}
|
||||
```
|
||||
|
||||
## 🚀 Deployment Scripts
|
||||
|
||||
### Initialization Script
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# terraform/scripts/init-terraform.sh
|
||||
|
||||
set -e
|
||||
|
||||
ENVIRONMENT=${1:-production}
|
||||
TERRAFORM_DIR="terraform/environments/$ENVIRONMENT"
|
||||
|
||||
echo "🚀 Initializing Terraform for $ENVIRONMENT environment..."
|
||||
|
||||
cd "$TERRAFORM_DIR"
|
||||
|
||||
# Initialize Terraform
|
||||
terraform init
|
||||
|
||||
# Validate configuration
|
||||
terraform validate
|
||||
|
||||
# Format code
|
||||
terraform fmt -recursive
|
||||
|
||||
echo "✅ Terraform initialized successfully!"
|
||||
echo "Next steps:"
|
||||
echo " 1. Review terraform.tfvars"
|
||||
echo " 2. Run: terraform plan"
|
||||
echo " 3. Run: terraform apply"
|
||||
```
|
||||
|
||||
### Plan and Apply Script
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# terraform/scripts/plan-and-apply.sh
|
||||
|
||||
set -e
|
||||
|
||||
ENVIRONMENT=${1:-production}
|
||||
TERRAFORM_DIR="terraform/environments/$ENVIRONMENT"
|
||||
AUTO_APPROVE=${2:-false}
|
||||
|
||||
echo "🔍 Planning Terraform deployment for $ENVIRONMENT..."
|
||||
|
||||
cd "$TERRAFORM_DIR"
|
||||
|
||||
# Create plan
|
||||
terraform plan -out=tfplan
|
||||
|
||||
echo "📋 Plan created. Review the changes above."
|
||||
|
||||
if [ "$AUTO_APPROVE" = "true" ]; then
|
||||
echo "🚀 Auto-applying changes..."
|
||||
terraform apply tfplan
|
||||
else
|
||||
echo "Apply changes? (y/N)"
|
||||
read -r response
|
||||
if [[ "$response" =~ ^[Yy]$ ]]; then
|
||||
terraform apply tfplan
|
||||
else
|
||||
echo "❌ Deployment cancelled"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Clean up plan file
|
||||
rm -f tfplan
|
||||
|
||||
echo "✅ Deployment complete!"
|
||||
```
|
||||
|
||||
## 🔧 Integration with Existing Workflow
|
||||
|
||||
### Ansible Integration
|
||||
```yaml
|
||||
# ansible/homelab/terraform-integration.yml
|
||||
---
|
||||
- name: Deploy Infrastructure with Terraform
|
||||
hosts: localhost
|
||||
tasks:
|
||||
- name: Initialize Terraform
|
||||
shell: |
|
||||
cd terraform/environments/production
|
||||
terraform init
|
||||
|
||||
- name: Plan Terraform Changes
|
||||
shell: |
|
||||
cd terraform/environments/production
|
||||
terraform plan -out=tfplan
|
||||
register: terraform_plan
|
||||
|
||||
- name: Apply Terraform Changes
|
||||
shell: |
|
||||
cd terraform/environments/production
|
||||
terraform apply tfplan
|
||||
when: terraform_plan.rc == 0
|
||||
|
||||
- name: Wait for VMs to be Ready
|
||||
wait_for:
|
||||
host: "{{ item }}"
|
||||
port: 22
|
||||
timeout: 300
|
||||
loop:
|
||||
- "192.168.1.10" # Atlantis
|
||||
- "192.168.1.11" # Calypso
|
||||
- "192.168.1.12" # Homelab VM
|
||||
```
|
||||
|
||||
### CI/CD Integration
|
||||
```yaml
|
||||
# .github/workflows/terraform.yml
|
||||
name: Terraform Infrastructure
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths: ['terraform/**']
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths: ['terraform/**']
|
||||
|
||||
jobs:
|
||||
terraform:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v2
|
||||
with:
|
||||
terraform_version: 1.5.0
|
||||
|
||||
- name: Terraform Init
|
||||
run: |
|
||||
cd terraform/environments/production
|
||||
terraform init
|
||||
|
||||
- name: Terraform Validate
|
||||
run: |
|
||||
cd terraform/environments/production
|
||||
terraform validate
|
||||
|
||||
- name: Terraform Plan
|
||||
run: |
|
||||
cd terraform/environments/production
|
||||
terraform plan
|
||||
|
||||
- name: Terraform Apply
|
||||
if: github.ref == 'refs/heads/main'
|
||||
run: |
|
||||
cd terraform/environments/production
|
||||
terraform apply -auto-approve
|
||||
```
|
||||
|
||||
## 📊 Benefits Analysis
|
||||
|
||||
### Quantified Benefits
|
||||
|
||||
| Aspect | Before Terraform | With Terraform | Time Saved |
|
||||
|--------|------------------|----------------|------------|
|
||||
| **VM Deployment** | 30 min manual setup | 5 min automated | 25 min/VM |
|
||||
| **Network Changes** | 45 min manual config | 10 min code change | 35 min/change |
|
||||
| **Disaster Recovery** | 4+ hours manual rebuild | 1 hour automated | 3+ hours |
|
||||
| **Environment Consistency** | Manual verification | Guaranteed identical | 2+ hours/audit |
|
||||
| **Documentation** | Separate docs (often stale) | Self-documenting code | 1+ hour/update |
|
||||
|
||||
### ROI Calculation
|
||||
```
|
||||
Annual Time Savings:
|
||||
- VM deployments: 10 VMs × 25 min = 250 min
|
||||
- Network changes: 20 changes × 35 min = 700 min
|
||||
- DR testing: 4 tests × 180 min = 720 min
|
||||
- Documentation: 12 updates × 60 min = 720 min
|
||||
|
||||
Total: 2,390 minutes = 39.8 hours annually
|
||||
At $50/hour value: $1,990 annual savings
|
||||
|
||||
Implementation cost: ~40 hours = $2,000
|
||||
Break-even: 1 year
|
||||
```
|
||||
|
||||
## ⚠️ Risks and Mitigation
|
||||
|
||||
### Risk 1: State File Corruption
|
||||
**Mitigation:**
|
||||
- Implement remote state backend (S3 + DynamoDB)
|
||||
- Regular state file backups
|
||||
- State locking to prevent concurrent modifications
|
||||
|
||||
### Risk 2: Accidental Resource Deletion
|
||||
**Mitigation:**
|
||||
- Use `prevent_destroy` lifecycle rules
|
||||
- Implement approval workflows for destructive changes
|
||||
- Regular backups before major changes
|
||||
|
||||
### Risk 3: Learning Curve
|
||||
**Mitigation:**
|
||||
- Start with simple VM deployments
|
||||
- Gradual adoption over 4-6 weeks
|
||||
- Comprehensive documentation and examples
|
||||
|
||||
## 🎯 Success Metrics
|
||||
|
||||
### Key Performance Indicators
|
||||
- **Deployment Time**: < 10 minutes for new VM
|
||||
- **Configuration Drift**: Zero manual changes
|
||||
- **Recovery Time**: < 2 hours for complete rebuild
|
||||
- **Error Rate**: < 5% failed deployments
|
||||
|
||||
### Monitoring and Alerting
|
||||
```bash
|
||||
# Add to monitoring stack
|
||||
terraform_deployment_success_rate
|
||||
terraform_plan_execution_time
|
||||
terraform_state_file_size
|
||||
infrastructure_drift_detection
|
||||
```
|
||||
|
||||
## 📚 Learning Resources
|
||||
|
||||
### Essential Reading
|
||||
1. [Terraform Proxmox Provider Documentation](https://registry.terraform.io/providers/Telmate/proxmox/latest/docs)
|
||||
2. [Terraform Best Practices](https://www.terraform-best-practices.com/)
|
||||
3. [Infrastructure as Code Patterns](https://infrastructure-as-code.com/)
|
||||
|
||||
### Hands-on Labs
|
||||
1. Deploy single VM with Terraform
|
||||
2. Create reusable VM module
|
||||
3. Implement multi-environment setup
|
||||
4. Add networking and storage modules
|
||||
|
||||
### Community Resources
|
||||
- [r/Terraform](https://reddit.com/r/Terraform)
|
||||
- [Terraform Discord](https://discord.gg/terraform)
|
||||
- [HashiCorp Learn](https://learn.hashicorp.com/terraform)
|
||||
|
||||
## 🔄 Migration Strategy
|
||||
|
||||
### Week 1: Preparation
|
||||
- [ ] Install Terraform and providers
|
||||
- [ ] Create basic directory structure
|
||||
- [ ] Document current infrastructure
|
||||
|
||||
### Week 2: First VM
|
||||
- [ ] Create simple VM module
|
||||
- [ ] Deploy test VM with Terraform
|
||||
- [ ] Validate functionality
|
||||
|
||||
### Week 3: Production VMs
|
||||
- [ ] Import existing VMs to Terraform state
|
||||
- [ ] Create production environment
|
||||
- [ ] Test disaster recovery
|
||||
|
||||
### Week 4: Advanced Features
|
||||
- [ ] Add networking module
|
||||
- [ ] Implement storage management
|
||||
- [ ] Create CI/CD pipeline
|
||||
|
||||
### Week 5-6: Optimization
|
||||
- [ ] Refine modules and variables
|
||||
- [ ] Add monitoring and alerting
|
||||
- [ ] Create comprehensive documentation
|
||||
|
||||
---
|
||||
|
||||
**Next Steps:**
|
||||
1. Review this guide with your team
|
||||
2. Set up development environment
|
||||
3. Start with Phase 1 implementation
|
||||
4. Schedule weekly progress reviews
|
||||
Reference in New Issue
Block a user