HEX
Server: nginx/1.29.3
System: Linux 11979.bigscoots-wpo.com 6.8.0-88-generic #89-Ubuntu SMP PREEMPT_DYNAMIC Sat Oct 11 01:02:46 UTC 2025 x86_64
User: nginx (1068)
PHP: 7.4.33
Disabled: exec,system,passthru,shell_exec,proc_open,proc_close,popen,show_source,cmd# Do not modify this line # 1684243876
Upload Files
File: //bigscoots/lxd/bs-node-tuner.sh
#!/bin/bash
# ============================================================
# BigScoots LXD Node Tuner
# Version: 1.4.0
# Idempotent - safe to run repeatedly on the same node
# Outputs JSON for portal integration
# Usage: bash bs-node-tuner.sh [--dry-run] [--force]
#                               [--backfill-plans]
#                               [--optimize-containers]
# ============================================================

set -euo pipefail

# ── Config ───────────────────────────────────────────────────
SYSCTL_DIR="/etc/sysctl.d"
SYSCTL_FILE="${SYSCTL_DIR}/99-bigscoots.conf"
SYSCTL_NET_FILE="${SYSCTL_DIR}/99-bigscoots-net.conf"
SYSCTL_SNAPSHOT="${SYSCTL_DIR}/00-bigscoots-defaults.snapshot"
MODPROBE_FILE="/etc/modprobe.d/zfs.conf"
CRON_FILE="/etc/cron.d/bigscoots-drop-dcache"
NIC_TUNE_SCRIPT="/etc/networkd-dispatcher/routable.d/bigscoots-nic-tune.sh"
DRY_RUN=false
FORCE=false
BACKFILL_PLANS=false
OPTIMIZE_CONTAINERS=false

[[ "${*}" == *"--dry-run"* ]]             && DRY_RUN=true
[[ "${*}" == *"--force"* ]]               && FORCE=true
[[ "${*}" == *"--backfill-plans"* ]]      && BACKFILL_PLANS=true
[[ "${*}" == *"--optimize-containers"* ]] && OPTIMIZE_CONTAINERS=true

# ── State Tracking ───────────────────────────────────────────
CHANGES=()
ALREADY_OK=()
ERRORS=()
WARNINGS=()
START_TIME=$(date +%s%3N)

# ── Helpers ──────────────────────────────────────────────────
log_change()  { CHANGES+=("$1"); }
log_ok()      { ALREADY_OK+=("$1"); }
log_error()   { ERRORS+=("$1"); }
log_warning() { WARNINGS+=("$1"); }

# ── Preflight ────────────────────────────────────────────────
preflight() {
    if [[ $EUID -ne 0 ]]; then
        echo '{"status":"error","message":"Must be run as root"}' >&2
        exit 1
    fi
    if ! command -v lxc &>/dev/null; then
        echo '{"status":"error","message":"LXD not found - this script is for LXD hosts only"}' >&2
        exit 1
    fi
    if ! command -v zpool &>/dev/null; then
        echo '{"status":"error","message":"ZFS not found"}' >&2
        exit 1
    fi
}

# ── Snapshot defaults before first run ───────────────────────
snapshot_defaults() {
    if [[ ! -f "$SYSCTL_SNAPSHOT" ]] || [[ "$FORCE" = true ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            sysctl -a 2>/dev/null > "$SYSCTL_SNAPSHOT"
            log_change "Captured sysctl defaults snapshot -> ${SYSCTL_SNAPSHOT}"
        else
            log_change "DRY-RUN: Would capture sysctl defaults snapshot"
        fi
    else
        log_ok "Sysctl defaults snapshot exists"
    fi
}

# ── Sysctl Tuning (VM) ────────────────────────────────────────
declare -A SYSCTL_TARGET=(
    ["vm.vfs_cache_pressure"]="2000"
    ["vm.swappiness"]="100"
    ["vm.dirty_background_ratio"]="1"
    ["vm.min_free_kbytes"]="1048576"
    ["vm.watermark_scale_factor"]="400"
    ["vm.watermark_boost_factor"]="0"
    ["vm.page-cluster"]="0"
    ["vm.max_map_count"]="1048576"
    ["vm.admin_reserve_kbytes"]="131072"
)

tune_sysctl() {
    local current_vals_ok=true
    for key in "${!SYSCTL_TARGET[@]}"; do
        local target="${SYSCTL_TARGET[$key]}"
        local current
        current=$(sysctl -n "$key" 2>/dev/null || echo "")
        if [[ "$current" != "$target" ]]; then
            current_vals_ok=false
            break
        fi
    done

    if [[ ! -f "$SYSCTL_FILE" ]] || \
       ! grep -q "vm.vfs_cache_pressure = 2000" "$SYSCTL_FILE" 2>/dev/null || \
       [[ "$FORCE" = true ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            mkdir -p "$SYSCTL_DIR"
            cat > "$SYSCTL_FILE" << 'SYSCTL_EOF'
# BigScoots Node Tuning - VM/Memory
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh

vm.vfs_cache_pressure = 2000
vm.swappiness = 100
vm.dirty_background_ratio = 1
vm.min_free_kbytes = 1048576
vm.watermark_scale_factor = 400
vm.watermark_boost_factor = 0
vm.page-cluster = 0
vm.max_map_count = 1048576
vm.admin_reserve_kbytes = 131072
SYSCTL_EOF
            log_change "Written ${SYSCTL_FILE}"
        else
            log_change "DRY-RUN: Would write ${SYSCTL_FILE}"
        fi
    else
        log_ok "Sysctl VM config file up to date"
    fi

    if [[ "$current_vals_ok" = false ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            sysctl -p "$SYSCTL_FILE" &>/dev/null
            log_change "Applied VM sysctl values live"
        else
            log_change "DRY-RUN: Would apply VM sysctl values live"
        fi
    else
        log_ok "Sysctl VM live values correct"
    fi
}

# ── Sysctl Tuning (Network) ───────────────────────────────────
declare -A SYSCTL_NET_TARGET=(
    ["net.core.netdev_max_backlog"]="10000"
    ["net.core.rmem_max"]="16777216"
    ["net.core.wmem_max"]="16777216"
    ["net.core.rmem_default"]="1048576"
    ["net.core.wmem_default"]="1048576"
    ["net.core.netdev_budget"]="600"
)

tune_sysctl_net() {
    local current_vals_ok=true
    for key in "${!SYSCTL_NET_TARGET[@]}"; do
        local target="${SYSCTL_NET_TARGET[$key]}"
        local current
        current=$(sysctl -n "$key" 2>/dev/null || echo "")
        if [[ "$current" != "$target" ]]; then
            current_vals_ok=false
            break
        fi
    done

    if [[ ! -f "$SYSCTL_NET_FILE" ]] || \
       ! grep -q "net.core.netdev_max_backlog = 10000" "$SYSCTL_NET_FILE" 2>/dev/null || \
       [[ "$FORCE" = true ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            mkdir -p "$SYSCTL_DIR"
            cat > "$SYSCTL_NET_FILE" << 'NET_SYSCTL_EOF'
# BigScoots Node Tuning - Network
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# Addresses NIC ring buffer overflow and softirq backlog issues
# on busy LXD nodes with many containers sharing a bonded NIC.

net.core.netdev_max_backlog = 10000
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.rmem_default = 1048576
net.core.wmem_default = 1048576
net.core.netdev_budget = 600
NET_SYSCTL_EOF
            log_change "Written ${SYSCTL_NET_FILE}"
        else
            log_change "DRY-RUN: Would write ${SYSCTL_NET_FILE}"
        fi
    else
        log_ok "Sysctl network config file up to date"
    fi

    if [[ "$current_vals_ok" = false ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            sysctl -p "$SYSCTL_NET_FILE" &>/dev/null
            log_change "Applied network sysctl values live"
        else
            log_change "DRY-RUN: Would apply network sysctl values live"
        fi
    else
        log_ok "Sysctl network live values correct"
    fi
}

# ── NIC Hardware Tuning ───────────────────────────────────────
NIC_RX_TARGET=8160
NIC_TX_TARGET=8160
NIC_RX_USECS_TARGET=10

get_physical_nics() {
    ip link show 2>/dev/null | awk -F': ' '/^[0-9]+:/{print $2}' | \
        awk '{print $1}' | \
        grep -vE '^(lo|bond|br|veth|docker|lxc|virbr)' | \
        while read -r nic; do
            if [[ -e "/sys/class/net/${nic}/device" ]]; then
                echo "$nic"
            fi
        done
}

tune_nic() {
    local nics
    mapfile -t nics < <(get_physical_nics)

    if [[ ${#nics[@]} -eq 0 ]]; then
        log_ok "NIC tuning: no physical NICs detected (skipping)"
        return
    fi

    local all_ok=true
    local nics_tuned=()
    local nics_skipped=()

    for nic in "${nics[@]}"; do
        if ! ethtool -g "$nic" &>/dev/null; then
            nics_skipped+=("$nic")
            continue
        fi

        local rx_current tx_current rx_max
        rx_current=$(ethtool -g "$nic" 2>/dev/null | awk '/Current hardware settings/{found=1} found && /^RX:/{print $2; exit}')
        tx_current=$(ethtool -g "$nic" 2>/dev/null | awk '/Current hardware settings/{found=1} found && /^TX:/{print $2; exit}')
        rx_max=$(ethtool -g "$nic" 2>/dev/null | awk '/Pre-set maximums/{found=1} found && /^RX:/{print $2; exit}')

        local rx_target tx_target
        rx_target=$NIC_RX_TARGET
        tx_target=$NIC_TX_TARGET
        if [[ -n "$rx_max" ]] && [[ "$rx_max" -lt "$NIC_RX_TARGET" ]]; then
            rx_target=$rx_max
        fi

        local rx_usecs_current
        rx_usecs_current=$(ethtool -c "$nic" 2>/dev/null | awk '/^rx-usecs:/{print $2}')
        rx_usecs_current=${rx_usecs_current:-"unknown"}

        local nic_ok=true
        if [[ "$rx_current" != "$rx_target" ]] || \
           [[ "$tx_current" != "$tx_target" ]] || \
           [[ "$rx_usecs_current" != "$NIC_RX_USECS_TARGET" ]]; then
            nic_ok=false
            all_ok=false
        fi

        if [[ "$nic_ok" = false ]]; then
            if [[ "$DRY_RUN" = false ]]; then
                local failed=false
                ethtool -G "$nic" rx "$rx_target" tx "$tx_target" 2>/dev/null || failed=true
                ethtool -C "$nic" rx-usecs "$NIC_RX_USECS_TARGET" 2>/dev/null || true
                if [[ "$failed" = true ]]; then
                    log_error "NIC tuning failed for ${nic}"
                else
                    nics_tuned+=("${nic}(rx=${rx_target},tx=${tx_target},rx-usecs=${NIC_RX_USECS_TARGET})")
                fi
            else
                nics_tuned+=("DRY-RUN: ${nic} rx=${rx_current}->${rx_target} tx=${tx_current}->${tx_target} rx-usecs=${rx_usecs_current}->${NIC_RX_USECS_TARGET}")
            fi
        fi
    done

    if [[ ${#nics_tuned[@]} -gt 0 ]]; then
        local tuned_list
        tuned_list=$(IFS=', '; echo "${nics_tuned[*]}")
        log_change "NIC ring buffers tuned: ${tuned_list}"
    fi

    if [[ "$all_ok" = true ]] && [[ ${#nics_tuned[@]} -eq 0 ]]; then
        local nic_list
        nic_list=$(IFS=', '; echo "${nics[*]}")
        log_ok "NIC ring buffers already at target (${nic_list})"
    fi

    if [[ ${#nics_skipped[@]} -gt 0 ]]; then
        local skip_list
        skip_list=$(IFS=', '; echo "${nics_skipped[*]}")
        log_warning "NIC tuning skipped (ethtool not supported): ${skip_list}"
    fi

    _write_nic_persistence_script "${nics[@]}"
}

_write_nic_persistence_script() {
    local nics=("$@")

    local apply_cmds=""
    for nic in "${nics[@]}"; do
        local rx_max
        rx_max=$(ethtool -g "$nic" 2>/dev/null | awk '/Pre-set maximums/{found=1} found && /^RX:/{print $2; exit}' || echo "$NIC_RX_TARGET")
        local rx_target=$NIC_RX_TARGET
        [[ -n "$rx_max" ]] && [[ "$rx_max" -lt "$NIC_RX_TARGET" ]] && rx_target=$rx_max
        apply_cmds+="ethtool -G ${nic} rx ${rx_target} tx ${NIC_TX_TARGET} 2>/dev/null || true\n"
        apply_cmds+="ethtool -C ${nic} rx-usecs ${NIC_RX_USECS_TARGET} 2>/dev/null || true\n"
    done

    local expected_marker="# bs-node-tuner v1.4.0"
    if [[ -f "$NIC_TUNE_SCRIPT" ]] && \
       grep -q "$expected_marker" "$NIC_TUNE_SCRIPT" 2>/dev/null && \
       [[ "$FORCE" = false ]]; then
        log_ok "NIC persistence script exists: ${NIC_TUNE_SCRIPT}"
        return
    fi

    if [[ "$DRY_RUN" = false ]]; then
        mkdir -p "$(dirname "$NIC_TUNE_SCRIPT")"
        cat > "$NIC_TUNE_SCRIPT" << PERSIST_EOF
#!/bin/bash
# BigScoots NIC Tuning - applied on network interface up
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# bs-node-tuner v1.4.0
$(printf '%b' "$apply_cmds")
PERSIST_EOF
        chmod +x "$NIC_TUNE_SCRIPT"
        log_change "Written NIC persistence script: ${NIC_TUNE_SCRIPT}"
    else
        log_change "DRY-RUN: Would write NIC persistence script: ${NIC_TUNE_SCRIPT}"
    fi
}

# ── CPU Frequency Tuning ─────────────────────────────────────
tune_cpu_freq() {
    # Intel P-state
    if [[ -f /sys/devices/system/cpu/intel_pstate/min_perf_pct ]]; then
        local current_pct current_gov
        current_pct=$(cat /sys/devices/system/cpu/intel_pstate/min_perf_pct 2>/dev/null | tr -d '[:space:]')
        current_gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null | tr -d '[:space:]')

        if [[ "$current_pct" == "100" ]] && [[ "$current_gov" == "performance" ]]; then
            local freq
            freq=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq 2>/dev/null | tr -d '[:space:]')
            log_ok "Intel P-state already optimal: governor=performance min_perf_pct=100 freq=${freq}kHz"
        else
            if [[ "$DRY_RUN" = false ]]; then
                echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor > /dev/null 2>&1
                echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
                local freq
                freq=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq 2>/dev/null | tr -d '[:space:]')
                log_change "Intel P-state tuned: governor=performance min_perf_pct=100 freq=${freq}kHz"
            else
                log_change "DRY-RUN: Would set Intel P-state governor=performance min_perf_pct=100"
            fi
        fi
    # AMD P-state
    elif [[ -f /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference ]]; then
        local current_pref
        current_pref=$(cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference 2>/dev/null | tr -d '[:space:]')

        if [[ "$current_pref" == "performance" ]]; then
            log_ok "AMD P-state already optimal: energy_performance_preference=performance"
        else
            if [[ "$DRY_RUN" = false ]]; then
                echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference > /dev/null 2>&1
                log_change "AMD P-state tuned: energy_performance_preference=performance"
            else
                log_change "DRY-RUN: Would set AMD P-state energy_performance_preference=performance"
            fi
        fi
    else
        log_warning "CPU frequency scaling not available (no intel_pstate or amd_pstate found)"
    fi
}

# ── ZFS Tuning ───────────────────────────────────────────────
# Calculate ARC max as ~8% of total RAM, capped between 4GB and 20GB
# 256GB node → 20GB, 128GB node → 10GB, 64GB node → 5GB
_calc_arc_max() {
    local total_kb
    total_kb=$(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 0)
    local arc_bytes=$(( total_kb * 1024 / 12 ))  # ~8.3% of RAM
    local min_bytes=$(( 4 * 1024 * 1024 * 1024 ))   # 4GB floor
    local max_bytes=$(( 20 * 1024 * 1024 * 1024 ))   # 20GB ceiling
    [[ "$arc_bytes" -lt "$min_bytes" ]] && arc_bytes=$min_bytes
    [[ "$arc_bytes" -gt "$max_bytes" ]] && arc_bytes=$max_bytes
    # Round down to nearest GB
    local arc_gb=$(( arc_bytes / 1073741824 ))
    echo $(( arc_gb * 1073741824 ))
}

declare -A ZFS_TARGET=(
    ["zfs_arc_min"]="0"
    ["zfs_arc_lotsfree_percent"]="0"
    ["zfs_arc_dnode_limit_percent"]="40"
    ["zfs_prefetch_disable"]="1"
    ["zfs_dirty_data_max"]="4294967296"
    ["zfs_async_block_max_blocks"]="3200"
    ["zfs_delete_blocks"]="3200"
    ["zfs_dmu_offset_next_sync"]="0"
    ["zfs_vdev_async_read_max_active"]="16"
    ["zfs_vdev_async_read_min_active"]="8"
    ["zfs_vdev_sync_read_max_active"]="16"
    ["zfs_vdev_sync_read_min_active"]="16"
    ["zfs_vdev_sync_write_max_active"]="14"
    ["zfs_vdev_sync_write_min_active"]="12"
)

tune_zfs() {
    local params_ok=true
    local ZFS_PARAM_DIR="/sys/module/zfs/parameters"

    # Calculate ARC max for this node
    local ARC_MAX
    ARC_MAX=$(_calc_arc_max)
    local ARC_MAX_GB=$(( ARC_MAX / 1073741824 ))
    ZFS_TARGET["zfs_arc_max"]="$ARC_MAX"

    for param in "${!ZFS_TARGET[@]}"; do
        local target="${ZFS_TARGET[$param]}"
        local current
        current=$(cat "${ZFS_PARAM_DIR}/${param}" 2>/dev/null | tr -d '[:space:]' || echo "")
        if [[ "$current" != "$target" ]]; then
            params_ok=false
            break
        fi
    done

    if [[ ! -f "$MODPROBE_FILE" ]] || \
       ! grep -q "zfs_arc_max=${ARC_MAX}" "$MODPROBE_FILE" 2>/dev/null || \
       [[ "$FORCE" = true ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            cat > "$MODPROBE_FILE" << ZFS_EOF
# BigScoots ZFS Tuning - Kinsta Aligned
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# ARC max calculated as ~8% of RAM, capped 4GB-20GB (${ARC_MAX_GB}GB on this node)

options zfs zfs_arc_max=${ARC_MAX}
options zfs zfs_arc_min=0
options zfs zfs_arc_lotsfree_percent=0
options zfs zfs_arc_dnode_limit_percent=40
options zfs zfs_prefetch_disable=1
options zfs zfs_dirty_data_max=4294967296
options zfs zfs_async_block_max_blocks=3200
options zfs zfs_delete_blocks=3200
options zfs zfs_dmu_offset_next_sync=0
options zfs zfs_vdev_async_read_max_active=16
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=16
options zfs zfs_vdev_sync_read_min_active=16
options zfs zfs_vdev_sync_write_max_active=14
options zfs zfs_vdev_sync_write_min_active=12
ZFS_EOF
            log_change "Written ${MODPROBE_FILE} (ARC max: ${ARC_MAX_GB}GB)"
        else
            log_change "DRY-RUN: Would write ${MODPROBE_FILE} (ARC max: ${ARC_MAX_GB}GB)"
        fi
    else
        log_ok "ZFS modprobe.d file up to date (ARC max: ${ARC_MAX_GB}GB)"
    fi

    if [[ "$params_ok" = false ]]; then
        if [[ "$DRY_RUN" = false ]]; then
            local apply_failed=false
            for param in "${!ZFS_TARGET[@]}"; do
                local target="${ZFS_TARGET[$param]}"
                local param_path="${ZFS_PARAM_DIR}/${param}"
                if [[ -f "$param_path" ]]; then
                    if ! echo "$target" > "$param_path" 2>/dev/null; then
                        log_error "Failed to set ZFS param: ${param}=${target}"
                        apply_failed=true
                    fi
                else
                    log_warning "ZFS param not found: ${param}"
                fi
            done
            [[ "$apply_failed" = false ]] && log_change "Applied ZFS parameters live"
        else
            log_change "DRY-RUN: Would apply ZFS parameters live"
        fi
    else
        log_ok "ZFS live parameters correct"
    fi

    # Set recordsize=16k on containers dataset (optimal for MySQL/WordPress workloads)
    local containers_dataset="${HOSTNAME}/lxd/containers"
    if zfs list "$containers_dataset" &>/dev/null; then
        local current_recordsize
        current_recordsize=$(zfs get -H -o value recordsize "$containers_dataset" 2>/dev/null | tr -d '[:space:]')
        if [[ "$current_recordsize" == "16K" ]] || [[ "$current_recordsize" == "16k" ]]; then
            log_ok "ZFS containers recordsize correct: 16K (${containers_dataset})"
        else
            if [[ "$DRY_RUN" = false ]]; then
                if zfs set recordsize=16k "$containers_dataset" 2>/dev/null; then
                    log_change "Set ZFS recordsize=16k on ${containers_dataset} (was ${current_recordsize})"
                else
                    log_error "Failed to set recordsize=16k on ${containers_dataset}"
                fi
            else
                log_change "DRY-RUN: Would set recordsize=16k on ${containers_dataset} (currently ${current_recordsize})"
            fi
        fi
    else
        log_warning "ZFS dataset not found: ${containers_dataset}"
    fi
}

# ── Drop Caches Cron ─────────────────────────────────────────
tune_cron() {
    # Drop caches - weekly Sunday 4am
    local expected="0 4 * * 0 root sync && echo 2 > /proc/sys/vm/drop_caches"
    if [[ -f "$CRON_FILE" ]] && grep -qF "$expected" "$CRON_FILE" 2>/dev/null; then
        log_ok "Drop-caches cron exists"
    else
        if [[ "$DRY_RUN" = false ]]; then
            echo "$expected" > "$CRON_FILE"
            log_change "Written drop-caches cron -> ${CRON_FILE}"
        else
            log_change "DRY-RUN: Would write drop-caches cron"
        fi
    fi

    # Node monitor - every minute
    local monitor_cron="/etc/cron.d/bigscoots-node-monitor"
    local monitor_expected="* * * * * root bash /bigscoots/lxd/node_monitor.sh"
    if [[ -f "$monitor_cron" ]] && grep -qF "$monitor_expected" "$monitor_cron" 2>/dev/null; then
        log_ok "Node monitor cron exists"
    else
        if [[ "$DRY_RUN" = false ]]; then
            echo "$monitor_expected" > "$monitor_cron"
            log_change "Written node monitor cron -> ${monitor_cron}"
        else
            log_change "DRY-RUN: Would write node monitor cron -> ${monitor_cron}"
        fi
    fi
}

# ── ZFS Swap Zvol Tuning ─────────────────────────────────────
tune_swap_zvol() {
    local zvol
    zvol=$(zfs list -H -o name,type 2>/dev/null | awk '$2=="volume"{print $1}' | grep -i swap | head -1 || true)

    if [[ -z "$zvol" ]]; then
        log_ok "No ZFS swap zvol found (using file-based swap)"
        return
    fi

    local pc sc
    pc=$(zfs get -H -o value primarycache "$zvol" 2>/dev/null | tr -d '[:space:]' || echo "")
    sc=$(zfs get -H -o value secondarycache "$zvol" 2>/dev/null | tr -d '[:space:]' || echo "")

    if [[ "$pc" == "metadata" ]] && [[ "$sc" == "none" ]]; then
        log_ok "ZFS swap zvol tuning correct (${zvol})"
    else
        if [[ "$DRY_RUN" = false ]]; then
            if zfs set primarycache=metadata "$zvol" 2>/dev/null && \
               zfs set secondarycache=none "$zvol" 2>/dev/null; then
                log_change "Tuned ZFS swap zvol: ${zvol}"
            else
                log_error "Failed to tune ZFS swap zvol: ${zvol}"
            fi
        else
            log_change "DRY-RUN: Would tune ZFS swap zvol: ${zvol}"
        fi
    fi
}

# ── Dentry Health Check ───────────────────────────────────────
check_dentry_health() {
    local slabs objects ratio
    slabs=$(awk '/^dentry/{print $6}' /proc/slabinfo 2>/dev/null | tr -d '[:space:]')
    objects=$(awk '/^dentry/{print $3}' /proc/slabinfo 2>/dev/null | tr -d '[:space:]')
    slabs=${slabs:-0}
    objects=${objects:-0}
    ratio=0
    [[ "$slabs" -gt 0 ]] && ratio=$(( objects / slabs ))

    if [[ "$slabs" -gt 500000 ]]; then
        log_warning "Dentry slab fragmentation CRITICAL: ${slabs} slabs (${objects} objects, ${ratio} obj/slab) - run: echo 2 > /proc/sys/vm/drop_caches"
    elif [[ "$slabs" -gt 100000 ]]; then
        log_warning "Dentry slab fragmentation elevated: ${slabs} slabs (${objects} objects, ${ratio} obj/slab)"
    else
        log_ok "Dentry slab health OK: ${slabs} slabs (${objects} objects, ${ratio} obj/slab)"
    fi
}

# ── NIC Health Check ─────────────────────────────────────────
check_nic_health() {
    local nics
    mapfile -t nics < <(get_physical_nics)

    if [[ ${#nics[@]} -eq 0 ]]; then
        log_ok "NIC health: no physical NICs detected"
        return
    fi

    for nic in "${nics[@]}"; do
        local missed
        missed=$(ip -s link show "$nic" 2>/dev/null | awk '/RX:/{getline; print $4}')
        missed=${missed:-0}

        if [[ "$missed" -gt 1000000 ]]; then
            log_warning "NIC ${nic}: ${missed} missed RX packets (ring buffer overflows)"
        elif [[ "$missed" -gt 0 ]]; then
            log_ok "NIC ${nic}: ${missed} missed RX packets (minor)"
        else
            log_ok "NIC ${nic}: no missed RX packets"
        fi

        local rx_missed_errors
        rx_missed_errors=$(ethtool -S "$nic" 2>/dev/null | awk '/rx_missed_errors/{print $2}' || echo "0")
        rx_missed_errors=${rx_missed_errors:-0}
        if [[ "$rx_missed_errors" -gt 100000 ]]; then
            log_warning "NIC ${nic}: ${rx_missed_errors} rx_missed_errors (hardware ring buffer drops)"
        fi
    done

    local worst_cpu worst_drops
    worst_cpu=0
    worst_drops=0
    local cpu_idx=0
    while IFS= read -r line; do
        local drops
        drops=$(echo "$line" | awk '{print $2}')
        drops=$(printf '%d' "0x${drops}" 2>/dev/null || echo 0)
        if [[ "$drops" -gt "$worst_drops" ]]; then
            worst_drops=$drops
            worst_cpu=$cpu_idx
        fi
        (( cpu_idx++ )) || true
    done < /proc/net/softnet_stat

    if [[ "$worst_drops" -gt 10000000 ]]; then
        log_warning "Softnet CPU${worst_cpu} drops: ${worst_drops} (softirq backlog overflow)"
    elif [[ "$worst_drops" -gt 0 ]]; then
        log_ok "Softnet max drops: ${worst_drops} on CPU${worst_cpu} (minor)"
    else
        log_ok "Softnet drops: none"
    fi
}

# ── ARC Health Check ─────────────────────────────────────────
check_arc_health() {
    local arc_size arc_max arc_size_gb arc_max_gb expected_arc
    arc_size=$(awk '/^arc_size/{print $3}' /proc/spl/kstat/zfs/arcstats 2>/dev/null | tr -d '[:space:]')
    arc_max=$(cat /sys/module/zfs/parameters/zfs_arc_max 2>/dev/null | tr -d '[:space:]')
    arc_size=${arc_size:-0}
    arc_max=${arc_max:-0}
    arc_size_gb=$(( arc_size / 1073741824 ))
    arc_max_gb=$(( arc_max / 1073741824 ))
    expected_arc=$(_calc_arc_max)
    local expected_arc_gb=$(( expected_arc / 1073741824 ))

    if [[ "$arc_max" -ne "$expected_arc" ]]; then
        log_warning "ARC max not at expected value for this node (current: ${arc_max_gb}GB, expected: ${expected_arc_gb}GB) - check ${MODPROBE_FILE}"
    else
        log_ok "ARC max correct: ${arc_max_gb}GB (current size: ${arc_size_gb}GB)"
    fi
}

# ── Swap Health Check ─────────────────────────────────────────
check_swap_health() {
    local swap_used swap_total pct
    swap_used=$(free -m 2>/dev/null | awk '/Swap/{print $3}' | tr -d '[:space:]')
    swap_total=$(free -m 2>/dev/null | awk '/Swap/{print $2}' | tr -d '[:space:]')
    swap_used=${swap_used:-0}
    swap_total=${swap_total:-0}
    pct=0

    if [[ "$swap_total" -eq 0 ]]; then
        log_warning "No swap configured on host"
        return
    fi

    [[ "$swap_total" -gt 0 ]] && pct=$(( swap_used * 100 / swap_total ))

    if [[ "$pct" -gt 80 ]]; then
        log_warning "Swap usage critical: ${swap_used}MB / ${swap_total}MB (${pct}%)"
    elif [[ "$pct" -gt 50 ]]; then
        log_warning "Swap usage elevated: ${swap_used}MB / ${swap_total}MB (${pct}%)"
    else
        log_ok "Swap health OK: ${swap_used}MB / ${swap_total}MB (${pct}%)"
    fi

    local zvol_swap file_swap
    zvol_swap=$(swapon --show --noheadings 2>/dev/null | awk '$2=="partition"{print $0}' || true)
    file_swap=$(swapon --show --noheadings 2>/dev/null | awk '$2=="file"{print $0}' || true)

    if [[ -n "$zvol_swap" ]] && [[ -n "$file_swap" ]]; then
        local zvol_prio file_prio
        zvol_prio=$(echo "$zvol_swap" | awk '{print $5}' | tr -d '[:space:]')
        file_prio=$(echo "$file_swap" | awk '{print $5}' | tr -d '[:space:]')
        if [[ "$file_prio" -gt "$zvol_prio" ]]; then
            log_ok "Swap priorities correct (file prio=${file_prio} > zvol prio=${zvol_prio})"
        else
            log_warning "Swap priorities incorrect - file swap should have higher priority than zvol swap (file=${file_prio}, zvol=${zvol_prio})"
        fi
    fi
}

# ── LXD Cluster Health Check ─────────────────────────────────
check_lxd_health() {
    if ! command -v lxc &>/dev/null; then
        log_warning "LXC command not found"
        return
    fi

    local cluster_csv total offline
    cluster_csv=$(lxc cluster list --format csv 2>/dev/null || true)
    total=$(echo "$cluster_csv" | grep -c '[^[:space:]]' || true)
    offline=$(echo "$cluster_csv" | grep -v "Fully operational" | grep -c '[^[:space:]]' || true)
    total=${total:-0}
    offline=${offline:-0}

    if [[ "$offline" -gt 0 ]]; then
        log_warning "LXD cluster: ${offline} node(s) not fully operational (${total} total)"
    else
        log_ok "LXD cluster healthy: ${total} nodes online"
    fi

    local lxcfs_count
    lxcfs_count=$(pgrep -c lxcfs 2>/dev/null || echo 0)
    lxcfs_count=$(echo "$lxcfs_count" | tr -d '[:space:]')
    lxcfs_count=${lxcfs_count:-0}

    if [[ "$lxcfs_count" -eq 0 ]]; then
        log_error "lxcfs is not running"
    elif [[ "$lxcfs_count" -gt 1 ]]; then
        log_warning "Multiple lxcfs processes running (${lxcfs_count}) - possible crash/restart loop"
    else
        log_ok "lxcfs running (${lxcfs_count} process)"
    fi
}

# ── JSON Output ───────────────────────────────────────────────
output_json() {
    local status="${1:-ok}"
    local message="${2:-}"
    local end_time duration
    end_time=$(date +%s%3N)
    duration=$(( end_time - START_TIME ))

    local changes_json ok_json errors_json warnings_json
    changes_json="[]"
    ok_json="[]"
    errors_json="[]"
    warnings_json="[]"

    if [[ ${#CHANGES[@]} -gt 0 ]]; then
        changes_json=$(printf '%s\n' "${CHANGES[@]}" | \
            python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
    fi
    if [[ ${#ALREADY_OK[@]} -gt 0 ]]; then
        ok_json=$(printf '%s\n' "${ALREADY_OK[@]}" | \
            python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
    fi
    if [[ ${#ERRORS[@]} -gt 0 ]]; then
        errors_json=$(printf '%s\n' "${ERRORS[@]}" | \
            python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
        status="error"
    fi
    if [[ ${#WARNINGS[@]} -gt 0 ]]; then
        warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | \
            python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
        [[ "$status" == "ok" ]] && status="warning"
    fi

    if [[ -z "$message" ]]; then
        if [[ ${#ERRORS[@]} -gt 0 ]]; then
            message="Completed with ${#ERRORS[@]} error(s)"
        elif [[ ${#CHANGES[@]} -gt 0 ]]; then
            message="Applied ${#CHANGES[@]} change(s)"
        else
            message="All settings already optimal - no changes required"
        fi
    fi

    local n_changes=${#CHANGES[@]}
    local n_ok=${#ALREADY_OK[@]}
    local n_warnings=${#WARNINGS[@]}
    local n_errors=${#ERRORS[@]}
    local dry_run_bool hostname_val timestamp_val message_esc
    dry_run_bool=$([ "$DRY_RUN" = true ] && echo "True" || echo "False")
    hostname_val=$(hostname)
    timestamp_val=$(date -u +%Y-%m-%dT%H:%M:%SZ)
    message_esc=$(printf '%s' "$message" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read()))")

    python3 - << PYEOF
import json
data = {
    "status": "${status}",
    "message": ${message_esc},
    "hostname": "${hostname_val}",
    "timestamp": "${timestamp_val}",
    "duration_ms": ${duration},
    "dry_run": ${dry_run_bool},
    "summary": {
        "changes": ${n_changes},
        "already_ok": ${n_ok},
        "warnings": ${n_warnings},
        "errors": ${n_errors}
    },
    "details": {
        "changes": ${changes_json},
        "already_ok": ${ok_json},
        "warnings": ${warnings_json},
        "errors": ${errors_json}
    }
}
print(json.dumps(data, indent=2))
PYEOF
}

# ── Main ──────────────────────────────────────────────────────
# ── Backfill user.plan from LXD profile ─────────────────────
backfill_plans() {
    echo "Backfilling user.plan for all running containers on ${HOSTNAME}..." >&2
    local count=0 failed=0
    while IFS=',' read -r container _status; do
        local profile
        profile=$(lxc query "/1.0/instances/${container}" 2>/dev/null | \
            python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('profiles',['unknown'])[0])" 2>/dev/null)
        if [[ -z "$profile" ]] || [[ "$profile" == "unknown" ]]; then
            echo "  SKIP ${container}: could not determine profile" >&2
            (( failed++ )) || true
            continue
        fi
        lxc config set "$container" user.plan "$profile" 2>/dev/null
        echo "  ${container} -> ${profile}" >&2
        (( count++ )) || true
    done < <(lxc list location="$HOSTNAME" --format csv -c n,s,t 2>/dev/null | grep RUNNING | grep CONTAINER)
    echo "Backfill complete: ${count} updated, ${failed} failed" >&2
}

# ── Optimize All Containers ──────────────────────────────────
# Known valid plans - containers on other profiles will be skipped
KNOWN_PLANS=(
    wpo-starter
    wpo-pro
    wpo-business
    wpo-enterprise
    mwp-essential75
    mwp-essential125
    mwp-essential200
    mwp-essential300
    mwp-core500
    mwp-core1000
)

is_known_plan() {
    local plan="$1"
    for known in "${KNOWN_PLANS[@]}"; do
        [[ "$plan" == "$known" ]] && return 0
    done
    return 1
}

optimize_containers() {
    echo "Optimizing all running containers on ${HOSTNAME}..." >&2
    local skipped=0 optimized=0 failed=0
    local failed_list=()

    while IFS=',' read -r container _status _type; do
        # Check plan before running anything
        local plan
        plan=$(lxc config get "$container" user.plan 2>/dev/null | tr -d '[:space:]')

        if [[ -z "$plan" ]] || ! is_known_plan "$plan"; then
            echo "  SKIP ${container}: unrecognized plan '${plan:-NOT SET}'" >&2
            (( skipped++ )) || true
            continue
        fi

        echo "=== ${container} (${plan}) ===" >&2
        local container_failed=false
        local fail_reason=""

        # Run each step independently, track failures but continue
        if ! lxc exec "$container" -- bash -c "cd /bigscoots && git pull [email protected]:jcatello/bigscoots.git >/dev/null 2>&1" < /dev/null 2>/dev/null; then
            echo "  WARN ${container}: git pull failed (continuing)" >&2
        fi

        lxc exec "$container" -- bash -c "source /bigscoots/includes/common.sh && convert_wpsecure" < /dev/null 2>/dev/null || true

        if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/db/zfs_mycnf_opt.sh --yes </dev/null" < /dev/null 2>/dev/null; then
            container_failed=true; fail_reason+="db_optimizer "
        fi

        if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/nginx/cnf_manager.sh </dev/null" < /dev/null 2>/dev/null; then
            container_failed=true; fail_reason+="nginx_cnf "
        fi

        if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/phpfpm/cnf_manager.sh </dev/null" < /dev/null 2>/dev/null; then
            container_failed=true; fail_reason+="phpfpm_cnf "
        fi

        if ! lxc exec "$container" -- bash -c "source /bigscoots/includes/common.sh && ngxreload_t 'node tuner updating configs' </dev/null" < /dev/null 2>/dev/null; then
            container_failed=true; fail_reason+="ngxreload "
        fi

        if [[ "$container_failed" = true ]]; then
            echo "  FAIL ${container}: ${fail_reason}" >&2
            failed_list+=("${container}(${fail_reason% })")
            (( failed++ )) || true
        else
            echo "Done: ${container}" >&2
            (( optimized++ )) || true
        fi

    done < <(lxc list location="$HOSTNAME" --format csv -c n,s,t 2>/dev/null | grep RUNNING | grep CONTAINER)

    echo "" >&2
    echo "Optimize complete: ${optimized} optimized, ${skipped} skipped, ${failed} failed" >&2
    if [[ ${#failed_list[@]} -gt 0 ]]; then
        echo "Failed containers:" >&2
        for f in "${failed_list[@]}"; do
            echo "  - ${f}" >&2
        done
    fi
}

main() {
    # Handle standalone modes first (skip node tuning)
    if [[ "$BACKFILL_PLANS" = true ]] && [[ "$OPTIMIZE_CONTAINERS" = false ]]; then
        preflight
        backfill_plans
        exit 0
    fi

    if [[ "$OPTIMIZE_CONTAINERS" = true ]] && [[ "$BACKFILL_PLANS" = false ]]; then
        preflight
        optimize_containers
        exit 0
    fi

    # Both flags together: backfill then optimize
    if [[ "$BACKFILL_PLANS" = true ]] && [[ "$OPTIMIZE_CONTAINERS" = true ]]; then
        preflight
        backfill_plans
        optimize_containers
        exit 0
    fi

    # Default: full node tuning
    preflight
    snapshot_defaults
    tune_sysctl
    tune_sysctl_net
    tune_nic
    tune_cpu_freq
    tune_zfs
    tune_cron
    tune_swap_zvol
    check_dentry_health
    check_nic_health
    check_arc_health
    check_swap_health
    check_lxd_health
    output_json
}

main