File: //proc/1284358/root/bigscoots/lxd/bs-node-tuner.sh
#!/bin/bash
# ============================================================
# BigScoots LXD Node Tuner
# Version: 1.4.0
# Idempotent - safe to run repeatedly on the same node
# Outputs JSON for portal integration
# Usage: bash bs-node-tuner.sh [--dry-run] [--force]
# [--backfill-plans]
# [--optimize-containers]
# ============================================================
set -euo pipefail
# ── Config ───────────────────────────────────────────────────
SYSCTL_DIR="/etc/sysctl.d"
SYSCTL_FILE="${SYSCTL_DIR}/99-bigscoots.conf"
SYSCTL_NET_FILE="${SYSCTL_DIR}/99-bigscoots-net.conf"
SYSCTL_SNAPSHOT="${SYSCTL_DIR}/00-bigscoots-defaults.snapshot"
MODPROBE_FILE="/etc/modprobe.d/zfs.conf"
CRON_FILE="/etc/cron.d/bigscoots-drop-dcache"
NIC_TUNE_SCRIPT="/etc/networkd-dispatcher/routable.d/bigscoots-nic-tune.sh"
DRY_RUN=false
FORCE=false
BACKFILL_PLANS=false
OPTIMIZE_CONTAINERS=false
[[ "${*}" == *"--dry-run"* ]] && DRY_RUN=true
[[ "${*}" == *"--force"* ]] && FORCE=true
[[ "${*}" == *"--backfill-plans"* ]] && BACKFILL_PLANS=true
[[ "${*}" == *"--optimize-containers"* ]] && OPTIMIZE_CONTAINERS=true
# ── State Tracking ───────────────────────────────────────────
CHANGES=()
ALREADY_OK=()
ERRORS=()
WARNINGS=()
START_TIME=$(date +%s%3N)
# ── Helpers ──────────────────────────────────────────────────
log_change() { CHANGES+=("$1"); }
log_ok() { ALREADY_OK+=("$1"); }
log_error() { ERRORS+=("$1"); }
log_warning() { WARNINGS+=("$1"); }
# ── Preflight ────────────────────────────────────────────────
preflight() {
if [[ $EUID -ne 0 ]]; then
echo '{"status":"error","message":"Must be run as root"}' >&2
exit 1
fi
if ! command -v lxc &>/dev/null; then
echo '{"status":"error","message":"LXD not found - this script is for LXD hosts only"}' >&2
exit 1
fi
if ! command -v zpool &>/dev/null; then
echo '{"status":"error","message":"ZFS not found"}' >&2
exit 1
fi
}
# ── Snapshot defaults before first run ───────────────────────
snapshot_defaults() {
if [[ ! -f "$SYSCTL_SNAPSHOT" ]] || [[ "$FORCE" = true ]]; then
if [[ "$DRY_RUN" = false ]]; then
sysctl -a 2>/dev/null > "$SYSCTL_SNAPSHOT"
log_change "Captured sysctl defaults snapshot -> ${SYSCTL_SNAPSHOT}"
else
log_change "DRY-RUN: Would capture sysctl defaults snapshot"
fi
else
log_ok "Sysctl defaults snapshot exists"
fi
}
# ── Sysctl Tuning (VM) ────────────────────────────────────────
declare -A SYSCTL_TARGET=(
["vm.vfs_cache_pressure"]="2000"
["vm.swappiness"]="100"
["vm.dirty_background_ratio"]="1"
["vm.min_free_kbytes"]="1048576"
["vm.watermark_scale_factor"]="400"
["vm.watermark_boost_factor"]="0"
["vm.page-cluster"]="0"
["vm.max_map_count"]="1048576"
["vm.admin_reserve_kbytes"]="131072"
)
tune_sysctl() {
local current_vals_ok=true
for key in "${!SYSCTL_TARGET[@]}"; do
local target="${SYSCTL_TARGET[$key]}"
local current
current=$(sysctl -n "$key" 2>/dev/null || echo "")
if [[ "$current" != "$target" ]]; then
current_vals_ok=false
break
fi
done
if [[ ! -f "$SYSCTL_FILE" ]] || \
! grep -q "vm.vfs_cache_pressure = 2000" "$SYSCTL_FILE" 2>/dev/null || \
[[ "$FORCE" = true ]]; then
if [[ "$DRY_RUN" = false ]]; then
mkdir -p "$SYSCTL_DIR"
cat > "$SYSCTL_FILE" << 'SYSCTL_EOF'
# BigScoots Node Tuning - VM/Memory
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
vm.vfs_cache_pressure = 2000
vm.swappiness = 100
vm.dirty_background_ratio = 1
vm.min_free_kbytes = 1048576
vm.watermark_scale_factor = 400
vm.watermark_boost_factor = 0
vm.page-cluster = 0
vm.max_map_count = 1048576
vm.admin_reserve_kbytes = 131072
SYSCTL_EOF
log_change "Written ${SYSCTL_FILE}"
else
log_change "DRY-RUN: Would write ${SYSCTL_FILE}"
fi
else
log_ok "Sysctl VM config file up to date"
fi
if [[ "$current_vals_ok" = false ]]; then
if [[ "$DRY_RUN" = false ]]; then
sysctl -p "$SYSCTL_FILE" &>/dev/null
log_change "Applied VM sysctl values live"
else
log_change "DRY-RUN: Would apply VM sysctl values live"
fi
else
log_ok "Sysctl VM live values correct"
fi
}
# ── Sysctl Tuning (Network) ───────────────────────────────────
declare -A SYSCTL_NET_TARGET=(
["net.core.netdev_max_backlog"]="10000"
["net.core.rmem_max"]="16777216"
["net.core.wmem_max"]="16777216"
["net.core.rmem_default"]="1048576"
["net.core.wmem_default"]="1048576"
["net.core.netdev_budget"]="600"
)
tune_sysctl_net() {
local current_vals_ok=true
for key in "${!SYSCTL_NET_TARGET[@]}"; do
local target="${SYSCTL_NET_TARGET[$key]}"
local current
current=$(sysctl -n "$key" 2>/dev/null || echo "")
if [[ "$current" != "$target" ]]; then
current_vals_ok=false
break
fi
done
if [[ ! -f "$SYSCTL_NET_FILE" ]] || \
! grep -q "net.core.netdev_max_backlog = 10000" "$SYSCTL_NET_FILE" 2>/dev/null || \
[[ "$FORCE" = true ]]; then
if [[ "$DRY_RUN" = false ]]; then
mkdir -p "$SYSCTL_DIR"
cat > "$SYSCTL_NET_FILE" << 'NET_SYSCTL_EOF'
# BigScoots Node Tuning - Network
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# Addresses NIC ring buffer overflow and softirq backlog issues
# on busy LXD nodes with many containers sharing a bonded NIC.
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.rmem_default = 1048576
net.core.wmem_default = 1048576
net.core.netdev_budget = 600
NET_SYSCTL_EOF
log_change "Written ${SYSCTL_NET_FILE}"
else
log_change "DRY-RUN: Would write ${SYSCTL_NET_FILE}"
fi
else
log_ok "Sysctl network config file up to date"
fi
if [[ "$current_vals_ok" = false ]]; then
if [[ "$DRY_RUN" = false ]]; then
sysctl -p "$SYSCTL_NET_FILE" &>/dev/null
log_change "Applied network sysctl values live"
else
log_change "DRY-RUN: Would apply network sysctl values live"
fi
else
log_ok "Sysctl network live values correct"
fi
}
# ── NIC Hardware Tuning ───────────────────────────────────────
NIC_RX_TARGET=8160
NIC_TX_TARGET=8160
NIC_RX_USECS_TARGET=10
get_physical_nics() {
ip link show 2>/dev/null | awk -F': ' '/^[0-9]+:/{print $2}' | \
awk '{print $1}' | \
grep -vE '^(lo|bond|br|veth|docker|lxc|virbr)' | \
while read -r nic; do
if [[ -e "/sys/class/net/${nic}/device" ]]; then
echo "$nic"
fi
done
}
tune_nic() {
local nics
mapfile -t nics < <(get_physical_nics)
if [[ ${#nics[@]} -eq 0 ]]; then
log_ok "NIC tuning: no physical NICs detected (skipping)"
return
fi
local all_ok=true
local nics_tuned=()
local nics_skipped=()
for nic in "${nics[@]}"; do
if ! ethtool -g "$nic" &>/dev/null; then
nics_skipped+=("$nic")
continue
fi
local rx_current tx_current rx_max
rx_current=$(ethtool -g "$nic" 2>/dev/null | awk '/Current hardware settings/{found=1} found && /^RX:/{print $2; exit}')
tx_current=$(ethtool -g "$nic" 2>/dev/null | awk '/Current hardware settings/{found=1} found && /^TX:/{print $2; exit}')
rx_max=$(ethtool -g "$nic" 2>/dev/null | awk '/Pre-set maximums/{found=1} found && /^RX:/{print $2; exit}')
local rx_target tx_target
rx_target=$NIC_RX_TARGET
tx_target=$NIC_TX_TARGET
if [[ -n "$rx_max" ]] && [[ "$rx_max" -lt "$NIC_RX_TARGET" ]]; then
rx_target=$rx_max
fi
local rx_usecs_current
rx_usecs_current=$(ethtool -c "$nic" 2>/dev/null | awk '/^rx-usecs:/{print $2}')
rx_usecs_current=${rx_usecs_current:-"unknown"}
local nic_ok=true
if [[ "$rx_current" != "$rx_target" ]] || \
[[ "$tx_current" != "$tx_target" ]] || \
[[ "$rx_usecs_current" != "$NIC_RX_USECS_TARGET" ]]; then
nic_ok=false
all_ok=false
fi
if [[ "$nic_ok" = false ]]; then
if [[ "$DRY_RUN" = false ]]; then
local failed=false
ethtool -G "$nic" rx "$rx_target" tx "$tx_target" 2>/dev/null || failed=true
ethtool -C "$nic" rx-usecs "$NIC_RX_USECS_TARGET" 2>/dev/null || true
if [[ "$failed" = true ]]; then
log_error "NIC tuning failed for ${nic}"
else
nics_tuned+=("${nic}(rx=${rx_target},tx=${tx_target},rx-usecs=${NIC_RX_USECS_TARGET})")
fi
else
nics_tuned+=("DRY-RUN: ${nic} rx=${rx_current}->${rx_target} tx=${tx_current}->${tx_target} rx-usecs=${rx_usecs_current}->${NIC_RX_USECS_TARGET}")
fi
fi
done
if [[ ${#nics_tuned[@]} -gt 0 ]]; then
local tuned_list
tuned_list=$(IFS=', '; echo "${nics_tuned[*]}")
log_change "NIC ring buffers tuned: ${tuned_list}"
fi
if [[ "$all_ok" = true ]] && [[ ${#nics_tuned[@]} -eq 0 ]]; then
local nic_list
nic_list=$(IFS=', '; echo "${nics[*]}")
log_ok "NIC ring buffers already at target (${nic_list})"
fi
if [[ ${#nics_skipped[@]} -gt 0 ]]; then
local skip_list
skip_list=$(IFS=', '; echo "${nics_skipped[*]}")
log_warning "NIC tuning skipped (ethtool not supported): ${skip_list}"
fi
_write_nic_persistence_script "${nics[@]}"
}
_write_nic_persistence_script() {
local nics=("$@")
local apply_cmds=""
for nic in "${nics[@]}"; do
local rx_max
rx_max=$(ethtool -g "$nic" 2>/dev/null | awk '/Pre-set maximums/{found=1} found && /^RX:/{print $2; exit}' || echo "$NIC_RX_TARGET")
local rx_target=$NIC_RX_TARGET
[[ -n "$rx_max" ]] && [[ "$rx_max" -lt "$NIC_RX_TARGET" ]] && rx_target=$rx_max
apply_cmds+="ethtool -G ${nic} rx ${rx_target} tx ${NIC_TX_TARGET} 2>/dev/null || true\n"
apply_cmds+="ethtool -C ${nic} rx-usecs ${NIC_RX_USECS_TARGET} 2>/dev/null || true\n"
done
local expected_marker="# bs-node-tuner v1.4.0"
if [[ -f "$NIC_TUNE_SCRIPT" ]] && \
grep -q "$expected_marker" "$NIC_TUNE_SCRIPT" 2>/dev/null && \
[[ "$FORCE" = false ]]; then
log_ok "NIC persistence script exists: ${NIC_TUNE_SCRIPT}"
return
fi
if [[ "$DRY_RUN" = false ]]; then
mkdir -p "$(dirname "$NIC_TUNE_SCRIPT")"
cat > "$NIC_TUNE_SCRIPT" << PERSIST_EOF
#!/bin/bash
# BigScoots NIC Tuning - applied on network interface up
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# bs-node-tuner v1.4.0
$(printf '%b' "$apply_cmds")
PERSIST_EOF
chmod +x "$NIC_TUNE_SCRIPT"
log_change "Written NIC persistence script: ${NIC_TUNE_SCRIPT}"
else
log_change "DRY-RUN: Would write NIC persistence script: ${NIC_TUNE_SCRIPT}"
fi
}
# ── CPU Frequency Tuning ─────────────────────────────────────
tune_cpu_freq() {
# Intel P-state
if [[ -f /sys/devices/system/cpu/intel_pstate/min_perf_pct ]]; then
local current_pct current_gov
current_pct=$(cat /sys/devices/system/cpu/intel_pstate/min_perf_pct 2>/dev/null | tr -d '[:space:]')
current_gov=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null | tr -d '[:space:]')
if [[ "$current_pct" == "100" ]] && [[ "$current_gov" == "performance" ]]; then
local freq
freq=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq 2>/dev/null | tr -d '[:space:]')
log_ok "Intel P-state already optimal: governor=performance min_perf_pct=100 freq=${freq}kHz"
else
if [[ "$DRY_RUN" = false ]]; then
echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor > /dev/null 2>&1
echo 100 > /sys/devices/system/cpu/intel_pstate/min_perf_pct
local freq
freq=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq 2>/dev/null | tr -d '[:space:]')
log_change "Intel P-state tuned: governor=performance min_perf_pct=100 freq=${freq}kHz"
else
log_change "DRY-RUN: Would set Intel P-state governor=performance min_perf_pct=100"
fi
fi
# AMD P-state
elif [[ -f /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference ]]; then
local current_pref
current_pref=$(cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference 2>/dev/null | tr -d '[:space:]')
if [[ "$current_pref" == "performance" ]]; then
log_ok "AMD P-state already optimal: energy_performance_preference=performance"
else
if [[ "$DRY_RUN" = false ]]; then
echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference > /dev/null 2>&1
log_change "AMD P-state tuned: energy_performance_preference=performance"
else
log_change "DRY-RUN: Would set AMD P-state energy_performance_preference=performance"
fi
fi
else
log_warning "CPU frequency scaling not available (no intel_pstate or amd_pstate found)"
fi
}
# ── ZFS Tuning ───────────────────────────────────────────────
# Calculate ARC max as ~8% of total RAM, capped between 4GB and 20GB
# 256GB node → 20GB, 128GB node → 10GB, 64GB node → 5GB
_calc_arc_max() {
local total_kb
total_kb=$(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 0)
local arc_bytes=$(( total_kb * 1024 / 12 )) # ~8.3% of RAM
local min_bytes=$(( 4 * 1024 * 1024 * 1024 )) # 4GB floor
local max_bytes=$(( 20 * 1024 * 1024 * 1024 )) # 20GB ceiling
[[ "$arc_bytes" -lt "$min_bytes" ]] && arc_bytes=$min_bytes
[[ "$arc_bytes" -gt "$max_bytes" ]] && arc_bytes=$max_bytes
# Round down to nearest GB
local arc_gb=$(( arc_bytes / 1073741824 ))
echo $(( arc_gb * 1073741824 ))
}
declare -A ZFS_TARGET=(
["zfs_arc_min"]="0"
["zfs_arc_lotsfree_percent"]="0"
["zfs_arc_dnode_limit_percent"]="40"
["zfs_prefetch_disable"]="1"
["zfs_dirty_data_max"]="4294967296"
["zfs_async_block_max_blocks"]="3200"
["zfs_delete_blocks"]="3200"
["zfs_dmu_offset_next_sync"]="0"
["zfs_vdev_async_read_max_active"]="16"
["zfs_vdev_async_read_min_active"]="8"
["zfs_vdev_sync_read_max_active"]="16"
["zfs_vdev_sync_read_min_active"]="16"
["zfs_vdev_sync_write_max_active"]="14"
["zfs_vdev_sync_write_min_active"]="12"
)
tune_zfs() {
local params_ok=true
local ZFS_PARAM_DIR="/sys/module/zfs/parameters"
# Calculate ARC max for this node
local ARC_MAX
ARC_MAX=$(_calc_arc_max)
local ARC_MAX_GB=$(( ARC_MAX / 1073741824 ))
ZFS_TARGET["zfs_arc_max"]="$ARC_MAX"
for param in "${!ZFS_TARGET[@]}"; do
local target="${ZFS_TARGET[$param]}"
local current
current=$(cat "${ZFS_PARAM_DIR}/${param}" 2>/dev/null | tr -d '[:space:]' || echo "")
if [[ "$current" != "$target" ]]; then
params_ok=false
break
fi
done
if [[ ! -f "$MODPROBE_FILE" ]] || \
! grep -q "zfs_arc_max=${ARC_MAX}" "$MODPROBE_FILE" 2>/dev/null || \
[[ "$FORCE" = true ]]; then
if [[ "$DRY_RUN" = false ]]; then
cat > "$MODPROBE_FILE" << ZFS_EOF
# BigScoots ZFS Tuning - Kinsta Aligned
# DO NOT EDIT MANUALLY - managed by bs-node-tuner.sh
# ARC max calculated as ~8% of RAM, capped 4GB-20GB (${ARC_MAX_GB}GB on this node)
options zfs zfs_arc_max=${ARC_MAX}
options zfs zfs_arc_min=0
options zfs zfs_arc_lotsfree_percent=0
options zfs zfs_arc_dnode_limit_percent=40
options zfs zfs_prefetch_disable=1
options zfs zfs_dirty_data_max=4294967296
options zfs zfs_async_block_max_blocks=3200
options zfs zfs_delete_blocks=3200
options zfs zfs_dmu_offset_next_sync=0
options zfs zfs_vdev_async_read_max_active=16
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=16
options zfs zfs_vdev_sync_read_min_active=16
options zfs zfs_vdev_sync_write_max_active=14
options zfs zfs_vdev_sync_write_min_active=12
ZFS_EOF
log_change "Written ${MODPROBE_FILE} (ARC max: ${ARC_MAX_GB}GB)"
else
log_change "DRY-RUN: Would write ${MODPROBE_FILE} (ARC max: ${ARC_MAX_GB}GB)"
fi
else
log_ok "ZFS modprobe.d file up to date (ARC max: ${ARC_MAX_GB}GB)"
fi
if [[ "$params_ok" = false ]]; then
if [[ "$DRY_RUN" = false ]]; then
local apply_failed=false
for param in "${!ZFS_TARGET[@]}"; do
local target="${ZFS_TARGET[$param]}"
local param_path="${ZFS_PARAM_DIR}/${param}"
if [[ -f "$param_path" ]]; then
if ! echo "$target" > "$param_path" 2>/dev/null; then
log_error "Failed to set ZFS param: ${param}=${target}"
apply_failed=true
fi
else
log_warning "ZFS param not found: ${param}"
fi
done
[[ "$apply_failed" = false ]] && log_change "Applied ZFS parameters live"
else
log_change "DRY-RUN: Would apply ZFS parameters live"
fi
else
log_ok "ZFS live parameters correct"
fi
# Set recordsize=16k on containers dataset (optimal for MySQL/WordPress workloads)
local containers_dataset="${HOSTNAME}/lxd/containers"
if zfs list "$containers_dataset" &>/dev/null; then
local current_recordsize
current_recordsize=$(zfs get -H -o value recordsize "$containers_dataset" 2>/dev/null | tr -d '[:space:]')
if [[ "$current_recordsize" == "16K" ]] || [[ "$current_recordsize" == "16k" ]]; then
log_ok "ZFS containers recordsize correct: 16K (${containers_dataset})"
else
if [[ "$DRY_RUN" = false ]]; then
if zfs set recordsize=16k "$containers_dataset" 2>/dev/null; then
log_change "Set ZFS recordsize=16k on ${containers_dataset} (was ${current_recordsize})"
else
log_error "Failed to set recordsize=16k on ${containers_dataset}"
fi
else
log_change "DRY-RUN: Would set recordsize=16k on ${containers_dataset} (currently ${current_recordsize})"
fi
fi
else
log_warning "ZFS dataset not found: ${containers_dataset}"
fi
}
# ── Drop Caches Cron ─────────────────────────────────────────
tune_cron() {
# Drop caches - weekly Sunday 4am
local expected="0 4 * * 0 root sync && echo 2 > /proc/sys/vm/drop_caches"
if [[ -f "$CRON_FILE" ]] && grep -qF "$expected" "$CRON_FILE" 2>/dev/null; then
log_ok "Drop-caches cron exists"
else
if [[ "$DRY_RUN" = false ]]; then
echo "$expected" > "$CRON_FILE"
log_change "Written drop-caches cron -> ${CRON_FILE}"
else
log_change "DRY-RUN: Would write drop-caches cron"
fi
fi
# Node monitor - every minute
local monitor_cron="/etc/cron.d/bigscoots-node-monitor"
local monitor_expected="* * * * * root bash /bigscoots/lxd/node_monitor.sh"
if [[ -f "$monitor_cron" ]] && grep -qF "$monitor_expected" "$monitor_cron" 2>/dev/null; then
log_ok "Node monitor cron exists"
else
if [[ "$DRY_RUN" = false ]]; then
echo "$monitor_expected" > "$monitor_cron"
log_change "Written node monitor cron -> ${monitor_cron}"
else
log_change "DRY-RUN: Would write node monitor cron -> ${monitor_cron}"
fi
fi
}
# ── ZFS Swap Zvol Tuning ─────────────────────────────────────
tune_swap_zvol() {
local zvol
zvol=$(zfs list -H -o name,type 2>/dev/null | awk '$2=="volume"{print $1}' | grep -i swap | head -1 || true)
if [[ -z "$zvol" ]]; then
log_ok "No ZFS swap zvol found (using file-based swap)"
return
fi
local pc sc
pc=$(zfs get -H -o value primarycache "$zvol" 2>/dev/null | tr -d '[:space:]' || echo "")
sc=$(zfs get -H -o value secondarycache "$zvol" 2>/dev/null | tr -d '[:space:]' || echo "")
if [[ "$pc" == "metadata" ]] && [[ "$sc" == "none" ]]; then
log_ok "ZFS swap zvol tuning correct (${zvol})"
else
if [[ "$DRY_RUN" = false ]]; then
if zfs set primarycache=metadata "$zvol" 2>/dev/null && \
zfs set secondarycache=none "$zvol" 2>/dev/null; then
log_change "Tuned ZFS swap zvol: ${zvol}"
else
log_error "Failed to tune ZFS swap zvol: ${zvol}"
fi
else
log_change "DRY-RUN: Would tune ZFS swap zvol: ${zvol}"
fi
fi
}
# ── Dentry Health Check ───────────────────────────────────────
check_dentry_health() {
local slabs objects ratio
slabs=$(awk '/^dentry/{print $6}' /proc/slabinfo 2>/dev/null | tr -d '[:space:]')
objects=$(awk '/^dentry/{print $3}' /proc/slabinfo 2>/dev/null | tr -d '[:space:]')
slabs=${slabs:-0}
objects=${objects:-0}
ratio=0
[[ "$slabs" -gt 0 ]] && ratio=$(( objects / slabs ))
if [[ "$slabs" -gt 500000 ]]; then
log_warning "Dentry slab fragmentation CRITICAL: ${slabs} slabs (${objects} objects, ${ratio} obj/slab) - run: echo 2 > /proc/sys/vm/drop_caches"
elif [[ "$slabs" -gt 100000 ]]; then
log_warning "Dentry slab fragmentation elevated: ${slabs} slabs (${objects} objects, ${ratio} obj/slab)"
else
log_ok "Dentry slab health OK: ${slabs} slabs (${objects} objects, ${ratio} obj/slab)"
fi
}
# ── NIC Health Check ─────────────────────────────────────────
check_nic_health() {
local nics
mapfile -t nics < <(get_physical_nics)
if [[ ${#nics[@]} -eq 0 ]]; then
log_ok "NIC health: no physical NICs detected"
return
fi
for nic in "${nics[@]}"; do
local missed
missed=$(ip -s link show "$nic" 2>/dev/null | awk '/RX:/{getline; print $4}')
missed=${missed:-0}
if [[ "$missed" -gt 1000000 ]]; then
log_warning "NIC ${nic}: ${missed} missed RX packets (ring buffer overflows)"
elif [[ "$missed" -gt 0 ]]; then
log_ok "NIC ${nic}: ${missed} missed RX packets (minor)"
else
log_ok "NIC ${nic}: no missed RX packets"
fi
local rx_missed_errors
rx_missed_errors=$(ethtool -S "$nic" 2>/dev/null | awk '/rx_missed_errors/{print $2}' || echo "0")
rx_missed_errors=${rx_missed_errors:-0}
if [[ "$rx_missed_errors" -gt 100000 ]]; then
log_warning "NIC ${nic}: ${rx_missed_errors} rx_missed_errors (hardware ring buffer drops)"
fi
done
local worst_cpu worst_drops
worst_cpu=0
worst_drops=0
local cpu_idx=0
while IFS= read -r line; do
local drops
drops=$(echo "$line" | awk '{print $2}')
drops=$(printf '%d' "0x${drops}" 2>/dev/null || echo 0)
if [[ "$drops" -gt "$worst_drops" ]]; then
worst_drops=$drops
worst_cpu=$cpu_idx
fi
(( cpu_idx++ )) || true
done < /proc/net/softnet_stat
if [[ "$worst_drops" -gt 10000000 ]]; then
log_warning "Softnet CPU${worst_cpu} drops: ${worst_drops} (softirq backlog overflow)"
elif [[ "$worst_drops" -gt 0 ]]; then
log_ok "Softnet max drops: ${worst_drops} on CPU${worst_cpu} (minor)"
else
log_ok "Softnet drops: none"
fi
}
# ── ARC Health Check ─────────────────────────────────────────
check_arc_health() {
local arc_size arc_max arc_size_gb arc_max_gb expected_arc
arc_size=$(awk '/^arc_size/{print $3}' /proc/spl/kstat/zfs/arcstats 2>/dev/null | tr -d '[:space:]')
arc_max=$(cat /sys/module/zfs/parameters/zfs_arc_max 2>/dev/null | tr -d '[:space:]')
arc_size=${arc_size:-0}
arc_max=${arc_max:-0}
arc_size_gb=$(( arc_size / 1073741824 ))
arc_max_gb=$(( arc_max / 1073741824 ))
expected_arc=$(_calc_arc_max)
local expected_arc_gb=$(( expected_arc / 1073741824 ))
if [[ "$arc_max" -ne "$expected_arc" ]]; then
log_warning "ARC max not at expected value for this node (current: ${arc_max_gb}GB, expected: ${expected_arc_gb}GB) - check ${MODPROBE_FILE}"
else
log_ok "ARC max correct: ${arc_max_gb}GB (current size: ${arc_size_gb}GB)"
fi
}
# ── Swap Health Check ─────────────────────────────────────────
check_swap_health() {
local swap_used swap_total pct
swap_used=$(free -m 2>/dev/null | awk '/Swap/{print $3}' | tr -d '[:space:]')
swap_total=$(free -m 2>/dev/null | awk '/Swap/{print $2}' | tr -d '[:space:]')
swap_used=${swap_used:-0}
swap_total=${swap_total:-0}
pct=0
if [[ "$swap_total" -eq 0 ]]; then
log_warning "No swap configured on host"
return
fi
[[ "$swap_total" -gt 0 ]] && pct=$(( swap_used * 100 / swap_total ))
if [[ "$pct" -gt 80 ]]; then
log_warning "Swap usage critical: ${swap_used}MB / ${swap_total}MB (${pct}%)"
elif [[ "$pct" -gt 50 ]]; then
log_warning "Swap usage elevated: ${swap_used}MB / ${swap_total}MB (${pct}%)"
else
log_ok "Swap health OK: ${swap_used}MB / ${swap_total}MB (${pct}%)"
fi
local zvol_swap file_swap
zvol_swap=$(swapon --show --noheadings 2>/dev/null | awk '$2=="partition"{print $0}' || true)
file_swap=$(swapon --show --noheadings 2>/dev/null | awk '$2=="file"{print $0}' || true)
if [[ -n "$zvol_swap" ]] && [[ -n "$file_swap" ]]; then
local zvol_prio file_prio
zvol_prio=$(echo "$zvol_swap" | awk '{print $5}' | tr -d '[:space:]')
file_prio=$(echo "$file_swap" | awk '{print $5}' | tr -d '[:space:]')
if [[ "$file_prio" -gt "$zvol_prio" ]]; then
log_ok "Swap priorities correct (file prio=${file_prio} > zvol prio=${zvol_prio})"
else
log_warning "Swap priorities incorrect - file swap should have higher priority than zvol swap (file=${file_prio}, zvol=${zvol_prio})"
fi
fi
}
# ── LXD Cluster Health Check ─────────────────────────────────
check_lxd_health() {
if ! command -v lxc &>/dev/null; then
log_warning "LXC command not found"
return
fi
local cluster_csv total offline
cluster_csv=$(lxc cluster list --format csv 2>/dev/null || true)
total=$(echo "$cluster_csv" | grep -c '[^[:space:]]' || true)
offline=$(echo "$cluster_csv" | grep -v "Fully operational" | grep -c '[^[:space:]]' || true)
total=${total:-0}
offline=${offline:-0}
if [[ "$offline" -gt 0 ]]; then
log_warning "LXD cluster: ${offline} node(s) not fully operational (${total} total)"
else
log_ok "LXD cluster healthy: ${total} nodes online"
fi
local lxcfs_count
lxcfs_count=$(pgrep -c lxcfs 2>/dev/null || echo 0)
lxcfs_count=$(echo "$lxcfs_count" | tr -d '[:space:]')
lxcfs_count=${lxcfs_count:-0}
if [[ "$lxcfs_count" -eq 0 ]]; then
log_error "lxcfs is not running"
elif [[ "$lxcfs_count" -gt 1 ]]; then
log_warning "Multiple lxcfs processes running (${lxcfs_count}) - possible crash/restart loop"
else
log_ok "lxcfs running (${lxcfs_count} process)"
fi
}
# ── JSON Output ───────────────────────────────────────────────
output_json() {
local status="${1:-ok}"
local message="${2:-}"
local end_time duration
end_time=$(date +%s%3N)
duration=$(( end_time - START_TIME ))
local changes_json ok_json errors_json warnings_json
changes_json="[]"
ok_json="[]"
errors_json="[]"
warnings_json="[]"
if [[ ${#CHANGES[@]} -gt 0 ]]; then
changes_json=$(printf '%s\n' "${CHANGES[@]}" | \
python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
fi
if [[ ${#ALREADY_OK[@]} -gt 0 ]]; then
ok_json=$(printf '%s\n' "${ALREADY_OK[@]}" | \
python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
fi
if [[ ${#ERRORS[@]} -gt 0 ]]; then
errors_json=$(printf '%s\n' "${ERRORS[@]}" | \
python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
status="error"
fi
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | \
python3 -c "import sys,json; print(json.dumps([l.rstrip() for l in sys.stdin if l.strip()]))")
[[ "$status" == "ok" ]] && status="warning"
fi
if [[ -z "$message" ]]; then
if [[ ${#ERRORS[@]} -gt 0 ]]; then
message="Completed with ${#ERRORS[@]} error(s)"
elif [[ ${#CHANGES[@]} -gt 0 ]]; then
message="Applied ${#CHANGES[@]} change(s)"
else
message="All settings already optimal - no changes required"
fi
fi
local n_changes=${#CHANGES[@]}
local n_ok=${#ALREADY_OK[@]}
local n_warnings=${#WARNINGS[@]}
local n_errors=${#ERRORS[@]}
local dry_run_bool hostname_val timestamp_val message_esc
dry_run_bool=$([ "$DRY_RUN" = true ] && echo "True" || echo "False")
hostname_val=$(hostname)
timestamp_val=$(date -u +%Y-%m-%dT%H:%M:%SZ)
message_esc=$(printf '%s' "$message" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read()))")
python3 - << PYEOF
import json
data = {
"status": "${status}",
"message": ${message_esc},
"hostname": "${hostname_val}",
"timestamp": "${timestamp_val}",
"duration_ms": ${duration},
"dry_run": ${dry_run_bool},
"summary": {
"changes": ${n_changes},
"already_ok": ${n_ok},
"warnings": ${n_warnings},
"errors": ${n_errors}
},
"details": {
"changes": ${changes_json},
"already_ok": ${ok_json},
"warnings": ${warnings_json},
"errors": ${errors_json}
}
}
print(json.dumps(data, indent=2))
PYEOF
}
# ── Main ──────────────────────────────────────────────────────
# ── Backfill user.plan from LXD profile ─────────────────────
backfill_plans() {
echo "Backfilling user.plan for all running containers on ${HOSTNAME}..." >&2
local count=0 failed=0
while IFS=',' read -r container _status; do
local profile
profile=$(lxc query "/1.0/instances/${container}" 2>/dev/null | \
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('profiles',['unknown'])[0])" 2>/dev/null)
if [[ -z "$profile" ]] || [[ "$profile" == "unknown" ]]; then
echo " SKIP ${container}: could not determine profile" >&2
(( failed++ )) || true
continue
fi
lxc config set "$container" user.plan "$profile" 2>/dev/null
echo " ${container} -> ${profile}" >&2
(( count++ )) || true
done < <(lxc list location="$HOSTNAME" --format csv -c n,s,t 2>/dev/null | grep RUNNING | grep CONTAINER)
echo "Backfill complete: ${count} updated, ${failed} failed" >&2
}
# ── Optimize All Containers ──────────────────────────────────
# Known valid plans - containers on other profiles will be skipped
KNOWN_PLANS=(
wpo-starter
wpo-pro
wpo-business
wpo-enterprise
mwp-essential75
mwp-essential125
mwp-essential200
mwp-essential300
mwp-core500
mwp-core1000
)
is_known_plan() {
local plan="$1"
for known in "${KNOWN_PLANS[@]}"; do
[[ "$plan" == "$known" ]] && return 0
done
return 1
}
optimize_containers() {
echo "Optimizing all running containers on ${HOSTNAME}..." >&2
local skipped=0 optimized=0 failed=0
local failed_list=()
while IFS=',' read -r container _status _type; do
# Check plan before running anything
local plan
plan=$(lxc config get "$container" user.plan 2>/dev/null | tr -d '[:space:]')
if [[ -z "$plan" ]] || ! is_known_plan "$plan"; then
echo " SKIP ${container}: unrecognized plan '${plan:-NOT SET}'" >&2
(( skipped++ )) || true
continue
fi
echo "=== ${container} (${plan}) ===" >&2
local container_failed=false
local fail_reason=""
# Run each step independently, track failures but continue
if ! lxc exec "$container" -- bash -c "cd /bigscoots && git pull [email protected]:jcatello/bigscoots.git >/dev/null 2>&1" < /dev/null 2>/dev/null; then
echo " WARN ${container}: git pull failed (continuing)" >&2
fi
lxc exec "$container" -- bash -c "source /bigscoots/includes/common.sh && convert_wpsecure" < /dev/null 2>/dev/null || true
if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/db/zfs_mycnf_opt.sh --yes </dev/null" < /dev/null 2>/dev/null; then
container_failed=true; fail_reason+="db_optimizer "
fi
if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/nginx/cnf_manager.sh </dev/null" < /dev/null 2>/dev/null; then
container_failed=true; fail_reason+="nginx_cnf "
fi
if ! lxc exec "$container" -- bash -c "bash /bigscoots/wpo/phpfpm/cnf_manager.sh </dev/null" < /dev/null 2>/dev/null; then
container_failed=true; fail_reason+="phpfpm_cnf "
fi
if ! lxc exec "$container" -- bash -c "source /bigscoots/includes/common.sh && ngxreload_t 'node tuner updating configs' </dev/null" < /dev/null 2>/dev/null; then
container_failed=true; fail_reason+="ngxreload "
fi
if [[ "$container_failed" = true ]]; then
echo " FAIL ${container}: ${fail_reason}" >&2
failed_list+=("${container}(${fail_reason% })")
(( failed++ )) || true
else
echo "Done: ${container}" >&2
(( optimized++ )) || true
fi
done < <(lxc list location="$HOSTNAME" --format csv -c n,s,t 2>/dev/null | grep RUNNING | grep CONTAINER)
echo "" >&2
echo "Optimize complete: ${optimized} optimized, ${skipped} skipped, ${failed} failed" >&2
if [[ ${#failed_list[@]} -gt 0 ]]; then
echo "Failed containers:" >&2
for f in "${failed_list[@]}"; do
echo " - ${f}" >&2
done
fi
}
main() {
# Handle standalone modes first (skip node tuning)
if [[ "$BACKFILL_PLANS" = true ]] && [[ "$OPTIMIZE_CONTAINERS" = false ]]; then
preflight
backfill_plans
exit 0
fi
if [[ "$OPTIMIZE_CONTAINERS" = true ]] && [[ "$BACKFILL_PLANS" = false ]]; then
preflight
optimize_containers
exit 0
fi
# Both flags together: backfill then optimize
if [[ "$BACKFILL_PLANS" = true ]] && [[ "$OPTIMIZE_CONTAINERS" = true ]]; then
preflight
backfill_plans
optimize_containers
exit 0
fi
# Default: full node tuning
preflight
snapshot_defaults
tune_sysctl
tune_sysctl_net
tune_nic
tune_cpu_freq
tune_zfs
tune_cron
tune_swap_zvol
check_dentry_health
check_nic_health
check_arc_health
check_swap_health
check_lxd_health
output_json
}
main