File: //bigscoots/tools/sar_usage.sh
#!/bin/bash
#################################################################
# Server Capacity Analyzer
# Analyzes SAR data to determine if server needs upgrade/downgrade
#################################################################
set -euo pipefail
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
MAGENTA='\033[0;35m'
NC='\033[0m'
BOLD='\033[1m'
DIM='\033[2m'
# Defaults
DAYS=${1:-14}
SAR_DIR=""
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
# Globals set during detection
TIME_FORMAT="" # "12h" or "24h"
SAR_Q_CMD="" # "sar -q" or "sar -q LOAD"
# Thresholds (can be overridden via environment variables)
CPU_HIGH=${CPU_HIGH:-75}
CPU_LOW=${CPU_LOW:-20}
MEM_HIGH=${MEM_HIGH:-75}
MEM_LOW=${MEM_LOW:-40}
LOAD_HIGH_MULT=${LOAD_HIGH_MULT:-1.5}
LOAD_LOW_MULT=${LOAD_LOW_MULT:-0.3}
IO_HIGH=${IO_HIGH:-20}
STEAL_HIGH=${STEAL_HIGH:-10}
NET_HIGH_MBS=${NET_HIGH_MBS:-100}
DISK_HIGH_TPS=${DISK_HIGH_TPS:-500}
TREND_THRESHOLD_PCT=${TREND_THRESHOLD_PCT:-2}
TREND_THRESHOLD_LOAD=${TREND_THRESHOLD_LOAD:-0.5}
usage() {
cat << EOF
Server Capacity Analyzer - Determine if your server needs upgrade/downgrade
Usage: $0 [DAYS]
Arguments:
DAYS Number of days to analyze (default: 14)
Environment Variables (for custom thresholds):
CPU_HIGH CPU upgrade threshold % (default: 75)
CPU_LOW CPU downgrade threshold % (default: 20)
MEM_HIGH Memory upgrade threshold % (default: 75)
MEM_LOW Memory downgrade threshold % (default: 40)
LOAD_HIGH_MULT Load threshold multiplier (default: 1.5)
LOAD_LOW_MULT Load downgrade multiplier (default: 0.3)
IO_HIGH I/O wait upgrade threshold % (default: 20)
STEAL_HIGH Steal time upgrade threshold % (default: 10)
NET_HIGH_MBS Network throughput threshold MB/s (default: 100)
DISK_HIGH_TPS Disk transactions/sec threshold (default: 500)
TREND_THRESHOLD_PCT Trend detection %/day for CPU/mem (default: 2)
TREND_THRESHOLD_LOAD Trend detection load/day (default: 0.5)
Examples:
$0 # Analyze last 14 days
$0 7 # Analyze last 7 days
$0 30 # Analyze last 30 days
CPU_HIGH=80 $0 # Use 80% as CPU upgrade threshold
EOF
exit 1
}
[[ "${1:-}" == "-h" || "${1:-}" == "--help" ]] && usage
[[ "$DAYS" =~ ^[0-9]+$ ]] || { echo "Error: DAYS must be a number"; exit 1; }
# Parse a SAR header line into an associative array mapping column names to positions.
parse_sar_columns() {
local header_line="$1"
local -n _col_map=$2
local i=1
for field in $header_line; do
_col_map[$field]=$i
((i++))
done
}
# Get the data column offset: 1 if 24h format, 2 if 12h (to skip AM/PM token)
get_data_offset() {
if [[ "$TIME_FORMAT" == "12h" ]]; then
echo 2
else
echo 1
fi
}
# Extract hour from a SAR timestamp line. Handles both 12h and 24h formats.
extract_hour() {
local line="$1"
echo "$line" | awk -v fmt="$TIME_FORMAT" '{
split($1, t, ":")
hour = int(t[1])
if (fmt == "12h") {
if ($2 == "PM" && hour != 12) hour += 12
if ($2 == "AM" && hour == 12) hour = 0
}
print hour
}'
}
# Detect whether SAR uses 12-hour (AM/PM) or 24-hour time format.
detect_time_format() {
local sample_file=""
for i in $(seq 0 $((DAYS-1))); do
sample_file="$SAR_DIR/sa$(date -d "$i days ago" +%d)"
[[ -f "$sample_file" ]] && break
sample_file=""
done
if [[ -z "$sample_file" ]]; then
TIME_FORMAT="24h"
return 0
fi
local first_data
first_data=$(sar -f "$sample_file" -u 2>/dev/null | grep -E '^[0-9]' | head -1 || true)
if echo "$first_data" | awk '{print $2}' | grep -qE '^(AM|PM)$'; then
TIME_FORMAT="12h"
else
TIME_FORMAT="24h"
fi
}
# Detect whether sar -q includes load averages directly or needs sar -q LOAD (RHEL 8+).
detect_sar_q_variant() {
local sample_file=""
for i in $(seq 0 $((DAYS-1))); do
sample_file="$SAR_DIR/sa$(date -d "$i days ago" +%d)"
[[ -f "$sample_file" ]] && break
sample_file=""
done
if [[ -z "$sample_file" ]]; then
SAR_Q_CMD="sar -q"
return 0
fi
local header
header=$(sar -f "$sample_file" -q 2>/dev/null | grep -i 'ldavg\|runq' | head -1 || true)
if echo "$header" | grep -qi 'ldavg'; then
SAR_Q_CMD="sar -q"
else
SAR_Q_CMD="sar -q LOAD"
fi
}
detect_sar_dir() {
for dir in /var/log/sa /var/log/sysstat /var/adm/sa; do
if [[ -d "$dir" ]]; then
SAR_DIR="$dir"
return 0
fi
done
echo -e "${RED}Error: SAR data directory not found${NC}"
exit 1
}
get_cores() {
nproc 2>/dev/null || grep -c ^processor /proc/cpuinfo 2>/dev/null || echo 1
}
get_total_mem_gb() {
awk '/MemTotal/ {printf "%.1f", $2/1024/1024}' /proc/meminfo 2>/dev/null || echo "?"
}
# Calculate percentiles from a file of numbers
percentile() {
local file=$1 p=$2
if [[ ! -s "$file" ]]; then
echo "0"
return 0
fi
local count=$(wc -l < "$file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo "0"
return 0
fi
local idx=$(awk -v c="$count" -v p="$p" 'BEGIN {
idx = int(c * p / 100 + 0.5)
if (idx < 1) idx = 1
if (idx > c) idx = c
print idx
}')
sort -n "$file" | awk -v idx="$idx" 'NR==idx'
return 0
}
bar() {
local val=${1:-0} max=${2:-100} width=${3:-30}
# Sanitize input - ensure numeric
val=$(echo "$val" | grep -oE '^[0-9]+\.?[0-9]*' | head -1)
[[ -z "$val" ]] && val=0
local filled=$(awk -v v="$val" -v w="$width" -v m="$max" 'BEGIN {printf "%d", int(v * w / m)}')
[[ $filled -gt $width ]] && filled=$width
[[ $filled -lt 0 ]] && filled=0
local empty=$((width - filled))
local color=$GREEN
local cmp=$(awk -v v="$val" 'BEGIN {if(v > 80) print 2; else if(v > 60) print 1; else print 0}')
[[ "$cmp" == "2" ]] && color=$RED
[[ "$cmp" == "1" ]] && color=$YELLOW
printf "${color}"
for ((i=0; i<filled; i++)); do printf '█'; done
printf "${DIM}"
for ((i=0; i<empty; i++)); do printf '░'; done
printf "${NC}"
}
# Single-pass data collection. Loops through day-files once, runs all SAR commands,
# writes parsed values to temp files for analysis functions to read.
collect_data() {
# Initialize aggregate files
> "$TEMP_DIR/cpu_usage.txt"
> "$TEMP_DIR/iowait.txt"
> "$TEMP_DIR/steal.txt"
> "$TEMP_DIR/mem_real_pct.txt"
> "$TEMP_DIR/swap_pct.txt"
> "$TEMP_DIR/load5.txt"
> "$TEMP_DIR/net_rx_kbs.txt"
> "$TEMP_DIR/net_tx_kbs.txt"
> "$TEMP_DIR/disk_tps.txt"
> "$TEMP_DIR/disk_bread.txt"
> "$TEMP_DIR/disk_bwrtn.txt"
> "$TEMP_DIR/verdicts"
> "$TEMP_DIR/daily_cpu.txt"
> "$TEMP_DIR/daily_mem.txt"
> "$TEMP_DIR/daily_load.txt"
> "$TEMP_DIR/hourly_cpu.txt"
local collected=0
for i in $(seq 0 $((DAYS-1))); do
local day_date
day_date=$(date -d "$i days ago" +%d)
local day_str
day_str=$(date -d "$i days ago" +%Y-%m-%d)
local sar_file="$SAR_DIR/sa${day_date}"
[[ -f "$sar_file" ]] || continue
collected=$((collected + 1))
# --- CPU, iowait, steal from sar -u ---
local cpu_output
cpu_output=$(sar -f "$sar_file" -u 2>/dev/null) || continue
if [[ $collected -eq 1 ]]; then
local cpu_header
cpu_header=$(echo "$cpu_output" | grep -i '%user\|%idle' | head -1)
declare -A CPU_COLS=()
parse_sar_columns "$cpu_header" CPU_COLS
fi
echo "$cpu_output" | grep -E '^[0-9]' | awk -v off="$(get_data_offset)" \
-v idle_col="${CPU_COLS[%idle]:-0}" \
-v iowait_col="${CPU_COLS[%iowait]:-0}" \
-v steal_col="${CPU_COLS[%steal]:-0}" \
-v day="$day_str" '
{
if (idle_col > 0 && $idle_col ~ /^[0-9]/) {
cpu = 100 - $idle_col
printf "%.2f\n", cpu >> "/dev/fd/3"
}
if (iowait_col > 0 && $iowait_col ~ /^[0-9]/) {
printf "%.2f\n", $iowait_col >> "/dev/fd/4"
}
if (steal_col > 0 && $steal_col ~ /^[0-9]/) {
printf "%.2f\n", $steal_col >> "/dev/fd/5"
}
}' 3>>"$TEMP_DIR/cpu_usage.txt" 4>>"$TEMP_DIR/iowait.txt" 5>>"$TEMP_DIR/steal.txt"
# Save per-day CPU for daily summary and peak hours
echo "$cpu_output" | grep -E '^[0-9]' | awk -v off="$(get_data_offset)" \
-v idle_col="${CPU_COLS[%idle]:-0}" \
-v fmt="$TIME_FORMAT" '
{
if (idle_col > 0 && $idle_col ~ /^[0-9]/) {
split($1, t, ":")
hour = int(t[1])
if (fmt == "12h") {
if ($2 == "PM" && hour != 12) hour += 12
if ($2 == "AM" && hour == 12) hour = 0
}
print hour, 100 - $idle_col
}
}' >> "$TEMP_DIR/hourly_cpu.txt"
# CPU daily average from the Average line
local cpu_day_avg
cpu_day_avg=$(echo "$cpu_output" | grep '^Average' | awk -v idle_col="${CPU_COLS[%idle]:-0}" -v fmt="$TIME_FORMAT" '
BEGIN { if (fmt == "12h") idle_col -= 1 }
{ if (idle_col > 0 && $idle_col ~ /^[0-9]/) printf "%.2f", 100 - $idle_col }' || true)
[[ -n "$cpu_day_avg" ]] && echo "$day_str $cpu_day_avg" >> "$TEMP_DIR/daily_cpu.txt"
# --- Memory from sar -r ---
local mem_output
mem_output=$(sar -f "$sar_file" -r 2>/dev/null) || true
if [[ -n "$mem_output" ]]; then
if [[ $collected -eq 1 ]]; then
local mem_header
mem_header=$(echo "$mem_output" | grep -i 'kbmemfree\|kbmemused' | head -1)
declare -A MEM_COLS=()
parse_sar_columns "$mem_header" MEM_COLS
fi
local col_free="${MEM_COLS[kbmemfree]:-0}"
local col_used="${MEM_COLS[kbmemused]:-0}"
local col_buf="${MEM_COLS[kbbuffers]:-0}"
local col_cache="${MEM_COLS[kbcached]:-0}"
echo "$mem_output" | grep -E '^[0-9]' | grep -v 'kb' | awk \
-v cf="$col_free" -v cu="$col_used" -v cb="$col_buf" -v cc="$col_cache" '
{
if (cf > 0 && cu > 0) {
kbfree = $cf + 0
kbused = $cu + 0
kbbuf = (cb > 0) ? $cb + 0 : 0
kbcache = (cc > 0) ? $cc + 0 : 0
total = kbfree + kbused
if (total > 0) {
real_used = kbused - kbbuf - kbcache
if (real_used < 0) real_used = 0
printf "%.2f\n", (real_used / total) * 100
}
}
}' >> "$TEMP_DIR/mem_real_pct.txt"
# Memory daily average
local mem_day_avg
mem_day_avg=$(echo "$mem_output" | grep '^Average' | awk \
-v cf="$col_free" -v cu="$col_used" -v cb="$col_buf" -v cc="$col_cache" -v fmt="$TIME_FORMAT" '
BEGIN { if (fmt == "12h") { cf -= 1; cu -= 1; cb -= 1; cc -= 1 } }
{
if (cf > 0 && cu > 0) {
kbfree = $cf + 0; kbused = $cu + 0
kbbuf = (cb > 0) ? $cb + 0 : 0
kbcache = (cc > 0) ? $cc + 0 : 0
total = kbfree + kbused
if (total > 0) {
real_used = kbused - kbbuf - kbcache
if (real_used < 0) real_used = 0
printf "%.2f", (real_used / total) * 100
}
}
}' || true)
[[ -n "$mem_day_avg" ]] && echo "$day_str $mem_day_avg" >> "$TEMP_DIR/daily_mem.txt"
fi
# --- Swap from sar -S ---
local swap_output
swap_output=$(sar -f "$sar_file" -S 2>/dev/null) || true
if [[ -n "$swap_output" ]]; then
if [[ $collected -eq 1 ]]; then
local swap_header
swap_header=$(echo "$swap_output" | grep -i 'kbswp\|%swp' | head -1)
if [[ -n "$swap_header" ]]; then
declare -A SWAP_COLS=()
parse_sar_columns "$swap_header" SWAP_COLS
fi
fi
local col_swpused="${SWAP_COLS[%swpused]:-0}"
if [[ "$col_swpused" -gt 0 ]]; then
echo "$swap_output" | grep -E '^[0-9]' | grep -v 'kb' | awk \
-v sc="$col_swpused" '
{
if (sc > 0 && $sc ~ /^[0-9]/) printf "%.2f\n", $sc
}' >> "$TEMP_DIR/swap_pct.txt"
fi
fi
# --- Load averages from sar -q ---
local load_output
load_output=$(eval $SAR_Q_CMD -f "$sar_file" 2>/dev/null) || true
if [[ -n "$load_output" ]]; then
if [[ $collected -eq 1 ]]; then
local load_header
load_header=$(echo "$load_output" | grep -i 'ldavg\|runq' | head -1)
declare -A LOAD_COLS=()
parse_sar_columns "$load_header" LOAD_COLS
fi
local col_ldavg5="${LOAD_COLS[ldavg-5]:-0}"
if [[ "$col_ldavg5" -gt 0 ]]; then
echo "$load_output" | grep -E '^[0-9]' | awk \
-v lc="$col_ldavg5" '
{
if (lc > 0 && $lc ~ /^[0-9]/) printf "%.2f\n", $lc
}' >> "$TEMP_DIR/load5.txt"
fi
# Load daily average
local load_day_avg
load_day_avg=$(echo "$load_output" | grep '^Average' | awk \
-v lc="$col_ldavg5" -v fmt="$TIME_FORMAT" '
BEGIN { if (fmt == "12h") lc -= 1 }
{ if (lc > 0 && $lc ~ /^[0-9]/) printf "%.2f", $lc }' || true)
[[ -n "$load_day_avg" ]] && echo "$day_str $load_day_avg" >> "$TEMP_DIR/daily_load.txt"
fi
# --- Network from sar -n DEV ---
local net_output
net_output=$(sar -f "$sar_file" -n DEV 2>/dev/null) || true
if [[ -n "$net_output" ]]; then
if [[ $collected -eq 1 ]]; then
local net_header
net_header=$(echo "$net_output" | grep -i 'IFACE\|rxpck' | head -1)
if [[ -n "$net_header" ]]; then
declare -A NET_COLS=()
parse_sar_columns "$net_header" NET_COLS
fi
fi
local col_iface="${NET_COLS[IFACE]:-0}"
local col_rxkbs="${NET_COLS[rxkB/s]:-0}"
local col_txkbs="${NET_COLS[txkB/s]:-0}"
if [[ "$col_rxkbs" -gt 0 && "$col_txkbs" -gt 0 ]]; then
echo "$net_output" | grep -E '^[0-9]' | awk \
-v ic="$col_iface" -v rc="$col_rxkbs" -v tc="$col_txkbs" '
{
if (ic > 0 && $ic != "lo") {
if ($rc ~ /^[0-9]/) printf "%.2f\n", $rc >> "/dev/fd/3"
if ($tc ~ /^[0-9]/) printf "%.2f\n", $tc >> "/dev/fd/4"
}
}' 3>>"$TEMP_DIR/net_rx_kbs.txt" 4>>"$TEMP_DIR/net_tx_kbs.txt"
fi
fi
# --- Disk I/O from sar -b ---
local disk_output
disk_output=$(sar -f "$sar_file" -b 2>/dev/null) || true
if [[ -n "$disk_output" ]]; then
if [[ $collected -eq 1 ]]; then
local disk_header
disk_header=$(echo "$disk_output" | grep -i 'tps\|bread' | head -1)
if [[ -n "$disk_header" ]]; then
declare -A DISK_COLS=()
parse_sar_columns "$disk_header" DISK_COLS
fi
fi
local col_tps="${DISK_COLS[tps]:-0}"
local col_bread="${DISK_COLS[bread/s]:-0}"
local col_bwrtn="${DISK_COLS[bwrtn/s]:-0}"
if [[ "$col_tps" -gt 0 ]]; then
echo "$disk_output" | grep -E '^[0-9]' | awk \
-v tc="$col_tps" -v bc="$col_bread" -v wc="$col_bwrtn" '
{
if (tc > 0 && $tc ~ /^[0-9]/) printf "%.2f\n", $tc >> "/dev/fd/3"
if (bc > 0 && $bc ~ /^[0-9]/) printf "%.2f\n", $bc >> "/dev/fd/4"
if (wc > 0 && $wc ~ /^[0-9]/) printf "%.2f\n", $wc >> "/dev/fd/5"
}' 3>>"$TEMP_DIR/disk_tps.txt" 4>>"$TEMP_DIR/disk_bread.txt" 5>>"$TEMP_DIR/disk_bwrtn.txt"
fi
fi
done
if [[ $collected -eq 0 ]]; then
echo -e "${RED}Error: No SAR data files found for the last ${DAYS} days${NC}"
exit 1
fi
echo -e "${GREEN}✓${NC} Collected data from ${collected} day(s)"
}
analyze_cpu() {
local cpu_file="$TEMP_DIR/cpu_usage.txt"
local count
count=$(wc -l < "$cpu_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No CPU data available${NC}"
return 0
fi
local avg max min p50 p95 p99
avg=$(awk '{sum+=$1} END {printf "%.1f", sum/NR}' "$cpu_file")
max=$(sort -rn "$cpu_file" | awk 'NR==1')
min=$(sort -n "$cpu_file" | awk 'NR==1')
p50=$(percentile "$cpu_file" 50)
p95=$(percentile "$cpu_file" 95)
p99=$(percentile "$cpu_file" 99)
echo -e "${CYAN}${BOLD}CPU UTILIZATION${NC}"
echo -e " Samples: ${count} over ${DAYS} days"
echo ""
printf " %-12s %6s %s\n" "Metric" "Value" "Distribution"
echo " ────────────────────────────────────────────────────"
printf " %-12s %5.1f%% " "Average" "$avg"; bar "$avg"; echo ""
printf " %-12s %5.1f%% " "P50 (med)" "$p50"; bar "$p50"; echo ""
printf " %-12s %5.1f%% " "P95" "$p95"; bar "$p95"; echo ""
printf " %-12s %5.1f%% " "P99" "$p99"; bar "$p99"; echo ""
printf " %-12s %5.1f%% " "Maximum" "$max"; bar "$max"; echo ""
printf " %-12s %5.1f%%\n" "Minimum" "$min"
echo ""
local above_high below_low pct_high pct_low
above_high=$(awk -v t=$CPU_HIGH '$1>t' "$cpu_file" | wc -l)
pct_high=$(echo "scale=1; $above_high * 100 / $count" | bc)
echo -e " Time above ${CPU_HIGH}%: ${above_high} samples (${pct_high}%)"
below_low=$(awk -v t=$CPU_LOW '$1<t' "$cpu_file" | wc -l)
pct_low=$(echo "scale=1; $below_low * 100 / $count" | bc)
echo -e " Time below ${CPU_LOW}%: ${below_low} samples (${pct_low}%)"
echo ""
local verdict
verdict=$(awk -v p95="$p95" -v max="$max" -v high="$CPU_HIGH" -v low="$CPU_LOW" 'BEGIN {
if (p95 > high) print "UPGRADE"
else if (p95 < low && max < 50) print "DOWNGRADE"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - CPU regularly saturated"
echo "CPU_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
DOWNGRADE)
echo -e " ${BLUE}▼ DOWNGRADE POSSIBLE${NC} - CPU consistently underutilized"
echo "CPU_VERDICT=DOWNGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - CPU capacity matches workload"
echo "CPU_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_steal() {
local steal_file="$TEMP_DIR/steal.txt"
local count
count=$(wc -l < "$steal_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No steal time data available${NC}"
return 0
fi
local avg max p50 p95 p99
avg=$(awk '{sum+=$1} END {printf "%.2f", sum/NR}' "$steal_file")
max=$(sort -rn "$steal_file" | awk 'NR==1')
p50=$(percentile "$steal_file" 50)
p95=$(percentile "$steal_file" 95)
p99=$(percentile "$steal_file" 99)
echo -e "${CYAN}${BOLD}STEAL TIME${NC} ${DIM}(Hypervisor Contention)${NC}"
echo -e " Samples: ${count} over ${DAYS} days"
echo ""
printf " %-12s %6s %s\n" "Metric" "Value" "Distribution"
echo " ────────────────────────────────────────────────────"
printf " %-12s %5.2f%% " "Average" "$avg"; bar "$avg"; echo ""
printf " %-12s %5.2f%% " "P50 (med)" "$p50"; bar "$p50"; echo ""
printf " %-12s %5.2f%% " "P95" "$p95"; bar "$p95"; echo ""
printf " %-12s %5.2f%% " "P99" "$p99"; bar "$p99"; echo ""
printf " %-12s %5.2f%% " "Maximum" "$max"; bar "$max"; echo ""
echo ""
local verdict
verdict=$(awk -v p95="$p95" -v high="$STEAL_HIGH" 'BEGIN {
if (p95 > high) print "UPGRADE"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - Hypervisor contention detected"
echo "STEAL_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - Minimal hypervisor contention"
echo "STEAL_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_memory() {
local mem_file="$TEMP_DIR/mem_real_pct.txt"
local count
count=$(wc -l < "$mem_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No memory data available${NC}"
return 0
fi
local avg max min p50 p95 p99
avg=$(awk '{sum+=$1} END {printf "%.1f", sum/NR}' "$mem_file")
max=$(sort -rn "$mem_file" | awk 'NR==1')
min=$(sort -n "$mem_file" | awk 'NR==1')
p50=$(percentile "$mem_file" 50)
p95=$(percentile "$mem_file" 95)
p99=$(percentile "$mem_file" 99)
echo -e "${CYAN}${BOLD}MEMORY UTILIZATION${NC} ${DIM}(Application Memory - excludes cache)${NC}"
echo -e " Samples: ${count} over ${DAYS} days (Total RAM: $(get_total_mem_gb) GB)"
echo ""
printf " %-12s %6s %s\n" "Metric" "Value" "Distribution"
echo " ────────────────────────────────────────────────────"
printf " %-12s %5.1f%% " "Average" "$avg"; bar "$avg"; echo ""
printf " %-12s %5.1f%% " "P50 (med)" "$p50"; bar "$p50"; echo ""
printf " %-12s %5.1f%% " "P95" "$p95"; bar "$p95"; echo ""
printf " %-12s %5.1f%% " "P99" "$p99"; bar "$p99"; echo ""
printf " %-12s %5.1f%% " "Maximum" "$max"; bar "$max"; echo ""
printf " %-12s %5.1f%%\n" "Minimum" "$min"
echo ""
local above_high below_low pct_high pct_low
above_high=$(awk -v t=$MEM_HIGH '$1>t' "$mem_file" | wc -l)
pct_high=$(echo "scale=1; $above_high * 100 / $count" | bc)
echo -e " Time above ${MEM_HIGH}%: ${above_high} samples (${pct_high}%)"
below_low=$(awk -v t=$MEM_LOW '$1<t' "$mem_file" | wc -l)
pct_low=$(echo "scale=1; $below_low * 100 / $count" | bc)
echo -e " Time below ${MEM_LOW}%: ${below_low} samples (${pct_low}%)"
echo ""
# Swap pressure from pre-parsed data
local swap_file="$TEMP_DIR/swap_pct.txt"
local swap_avg="0"
local swap_p95="0"
if [[ -s "$swap_file" ]]; then
swap_avg=$(awk '{sum+=$1} END {if(NR>0) printf "%.1f", sum/NR; else print "0"}' "$swap_file")
swap_p95=$(percentile "$swap_file" 95)
fi
echo -e " ${DIM}Swap Usage: Avg ${swap_avg}%, P95 ${swap_p95}%${NC}"
echo -e " ${DIM}(High swap = real memory pressure, cache usage is normal)${NC}"
local verdict
verdict=$(awk -v p95="$p95" -v max="$max" -v swap="$swap_p95" -v high="$MEM_HIGH" -v low="$MEM_LOW" 'BEGIN {
if (p95 > high) print "UPGRADE"
else if (swap > 30) print "UPGRADE"
else if (swap > 15 && p95 > 60) print "UPGRADE"
else if (p95 < low && max < 60 && swap < 5) print "DOWNGRADE"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - Memory pressure detected"
echo "MEM_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
DOWNGRADE)
echo -e " ${BLUE}▼ DOWNGRADE POSSIBLE${NC} - Memory overprovisioned"
echo "MEM_VERDICT=DOWNGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - Memory allocation appropriate"
echo "MEM_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_load() {
local load_file="$TEMP_DIR/load5.txt"
local cores
cores=$(get_cores)
local count
count=$(wc -l < "$load_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No load data available${NC}"
return 0
fi
local avg max min p50 p95 p99
avg=$(awk '{sum+=$1} END {printf "%.2f", sum/NR}' "$load_file")
max=$(sort -rn "$load_file" | awk 'NR==1')
min=$(sort -n "$load_file" | awk 'NR==1')
p50=$(percentile "$load_file" 50)
p95=$(percentile "$load_file" 95)
p99=$(percentile "$load_file" 99)
local high_thresh low_thresh
high_thresh=$(echo "scale=2; $cores * $LOAD_HIGH_MULT" | bc | awk '{printf "%.1f", $1}')
low_thresh=$(echo "scale=2; $cores * $LOAD_LOW_MULT" | bc | awk '{printf "%.1f", $1}')
local avg_pct p95_pct max_pct
avg_pct=$(echo "scale=1; $avg * 100 / $cores" | bc)
p95_pct=$(echo "scale=1; $p95 * 100 / $cores" | bc)
max_pct=$(echo "scale=1; $max * 100 / $cores" | bc)
echo -e "${CYAN}${BOLD}LOAD AVERAGE${NC}"
echo -e " Samples: ${count} over ${DAYS} days (${cores} cores)"
echo ""
printf " %-12s %8s %s\n" "Metric" "Value" "Per-core"
echo " ────────────────────────────────────────────────────"
printf " %-12s %8.2f (%.0f%% of cores)\n" "Average" "$avg" "$avg_pct"
printf " %-12s %8.2f\n" "P50 (med)" "$p50"
printf " %-12s %8.2f (%.0f%% of cores)\n" "P95" "$p95" "$p95_pct"
printf " %-12s %8.2f\n" "P99" "$p99"
printf " %-12s %8.2f (%.0f%% of cores)\n" "Maximum" "$max" "$max_pct"
echo ""
echo -e " High threshold: ${high_thresh} (${cores} x ${LOAD_HIGH_MULT})"
echo -e " Low threshold: ${low_thresh} (${cores} x ${LOAD_LOW_MULT})"
echo ""
local verdict
verdict=$(awk -v p95="$p95" -v high="$high_thresh" -v low="$low_thresh" 'BEGIN {
if (p95 > high) print "UPGRADE"
else if (p95 < low) print "DOWNGRADE"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - System frequently overloaded"
echo "LOAD_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
DOWNGRADE)
echo -e " ${BLUE}▼ DOWNGRADE POSSIBLE${NC} - Load consistently low"
echo "LOAD_VERDICT=DOWNGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - Load within expected range"
echo "LOAD_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_io() {
local io_file="$TEMP_DIR/iowait.txt"
local count
count=$(wc -l < "$io_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No I/O wait data available${NC}"
return 0
fi
local avg max p95 p99
avg=$(awk '{sum+=$1} END {printf "%.2f", sum/NR}' "$io_file")
max=$(sort -rn "$io_file" | awk 'NR==1')
p95=$(percentile "$io_file" 95)
p99=$(percentile "$io_file" 99)
echo -e "${CYAN}${BOLD}I/O WAIT${NC}"
echo -e " Samples: ${count} over ${DAYS} days"
echo ""
printf " %-12s %6s\n" "Metric" "Value"
echo " ──────────────────────"
printf " %-12s %5.2f%%\n" "Average" "$avg"
printf " %-12s %5.2f%%\n" "P95" "$p95"
printf " %-12s %5.2f%%\n" "P99" "$p99"
printf " %-12s %5.2f%%\n" "Maximum" "$max"
echo ""
# Save iowait P95 for cross-reference by analyze_disk()
echo "$p95" > "$TEMP_DIR/io_p95"
local verdict
verdict=$(awk -v p95="$p95" -v avg="$avg" -v high="$IO_HIGH" 'BEGIN {
if (p95 > high) print "UPGRADE"
else if (avg < 1) print "EXCELLENT"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ DISK UPGRADE RECOMMENDED${NC} - High I/O wait indicates disk bottleneck"
echo "IO_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
EXCELLENT)
echo -e " ${GREEN}● EXCELLENT${NC} - Minimal I/O contention"
echo "IO_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - I/O wait acceptable"
echo "IO_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_network() {
local rx_file="$TEMP_DIR/net_rx_kbs.txt"
local tx_file="$TEMP_DIR/net_tx_kbs.txt"
local rx_count
rx_count=$(wc -l < "$rx_file" | tr -d ' ')
if [[ -z "$rx_count" || "$rx_count" -eq 0 ]]; then
echo -e "${YELLOW}No network data available${NC}"
return 0
fi
# Convert kB/s to MB/s for analysis
awk '{printf "%.2f\n", $1/1024}' "$rx_file" > "$TEMP_DIR/net_rx_mbs.txt"
awk '{printf "%.2f\n", $1/1024}' "$tx_file" > "$TEMP_DIR/net_tx_mbs.txt"
local rx_mbs="$TEMP_DIR/net_rx_mbs.txt"
local tx_mbs="$TEMP_DIR/net_tx_mbs.txt"
local rx_avg rx_p95 rx_max tx_avg tx_p95 tx_max
rx_avg=$(awk '{sum+=$1} END {printf "%.2f", sum/NR}' "$rx_mbs")
rx_p95=$(percentile "$rx_mbs" 95)
rx_max=$(sort -rn "$rx_mbs" | awk 'NR==1')
tx_avg=$(awk '{sum+=$1} END {printf "%.2f", sum/NR}' "$tx_mbs")
tx_p95=$(percentile "$tx_mbs" 95)
tx_max=$(sort -rn "$tx_mbs" | awk 'NR==1')
# Combined peak (max of rx or tx P95)
local peak_p95
peak_p95=$(awk -v rx="$rx_p95" -v tx="$tx_p95" 'BEGIN {print (rx > tx) ? rx : tx}')
echo -e "${CYAN}${BOLD}NETWORK THROUGHPUT${NC} ${DIM}(excludes loopback)${NC}"
echo -e " Samples: ${rx_count} over ${DAYS} days"
echo ""
printf " %-12s %10s %10s\n" "Metric" "RX (MB/s)" "TX (MB/s)"
echo " ────────────────────────────────────────"
printf " %-12s %10.2f %10.2f\n" "Average" "$rx_avg" "$tx_avg"
printf " %-12s %10.2f %10.2f\n" "P95" "$rx_p95" "$tx_p95"
printf " %-12s %10.2f %10.2f\n" "Maximum" "$rx_max" "$tx_max"
echo ""
local verdict
verdict=$(awk -v p95="$peak_p95" -v high="$NET_HIGH_MBS" 'BEGIN {
if (p95 > high) print "UPGRADE"
else print "OK"
}')
case "$verdict" in
UPGRADE)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - Network link saturated"
echo "NET_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - Network throughput within capacity"
echo "NET_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_disk() {
local tps_file="$TEMP_DIR/disk_tps.txt"
local bread_file="$TEMP_DIR/disk_bread.txt"
local bwrtn_file="$TEMP_DIR/disk_bwrtn.txt"
local count
count=$(wc -l < "$tps_file" | tr -d ' ')
if [[ -z "$count" || "$count" -eq 0 ]]; then
echo -e "${YELLOW}No disk I/O data available${NC}"
return 0
fi
local tps_avg tps_p95 tps_max
tps_avg=$(awk '{sum+=$1} END {printf "%.1f", sum/NR}' "$tps_file")
tps_p95=$(percentile "$tps_file" 95)
tps_max=$(sort -rn "$tps_file" | awk 'NR==1')
local bread_avg bread_p95
bread_avg=$(awk '{sum+=$1} END {printf "%.1f", sum/NR}' "$bread_file")
bread_p95=$(percentile "$bread_file" 95)
local bwrtn_avg bwrtn_p95
bwrtn_avg=$(awk '{sum+=$1} END {printf "%.1f", sum/NR}' "$bwrtn_file")
bwrtn_p95=$(percentile "$bwrtn_file" 95)
echo -e "${CYAN}${BOLD}DISK I/O${NC}"
echo -e " Samples: ${count} over ${DAYS} days"
echo ""
printf " %-12s %10s %12s %12s\n" "Metric" "TPS" "Read blk/s" "Write blk/s"
echo " ──────────────────────────────────────────────────────"
printf " %-12s %10.1f %12.1f %12.1f\n" "Average" "$tps_avg" "$bread_avg" "$bwrtn_avg"
printf " %-12s %10.1f %12.1f %12.1f\n" "P95" "$tps_p95" "$bread_p95" "$bwrtn_p95"
printf " %-12s %10.1f\n" "Maximum" "$tps_max"
echo ""
# Cross-reference with iowait for smarter verdict
local io_p95=0
[[ -f "$TEMP_DIR/io_p95" ]] && io_p95=$(cat "$TEMP_DIR/io_p95")
# Iowait is the primary signal - TPS alone doesn't indicate a problem on SSDs
local verdict
verdict=$(awk -v tps_p95="$tps_p95" -v io_p95="$io_p95" -v io_high="$IO_HIGH" 'BEGIN {
if (io_p95 > io_high && tps_p95 > 1000) print "THROUGHPUT"
else if (io_p95 > io_high) print "LATENCY"
else print "OK"
}')
case "$verdict" in
THROUGHPUT)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - Disk saturated (iowait P95: ${io_p95}% + high TPS)"
echo "DISK_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
LATENCY)
echo -e " ${RED}▲ UPGRADE RECOMMENDED${NC} - Disk latency issue (iowait P95: ${io_p95}%)"
echo -e " ${DIM}Consider SSD/NVMe migration for latency improvement${NC}"
echo "DISK_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
;;
*)
echo -e " ${GREEN}● ADEQUATE${NC} - Disk I/O within capacity"
echo "DISK_VERDICT=OK" >> "$TEMP_DIR/verdicts"
;;
esac
}
analyze_php_fpm() {
# Auto-detect PHP-FPM error logs
local fpm_logs=()
local search_dirs=("/var/log/php-fpm" "/var/opt/remi" "/var/log")
for dir in "${search_dirs[@]}"; do
[[ -d "$dir" ]] || continue
while IFS= read -r f; do
fpm_logs+=("$f")
done < <(find "$dir" -name "error.log" -path "*fpm*" 2>/dev/null)
while IFS= read -r f; do
fpm_logs+=("$f")
done < <(find "$dir" -name "php*-fpm*.log" 2>/dev/null)
done
if [[ ${#fpm_logs[@]} -eq 0 ]]; then
echo -e "${DIM}PHP-FPM: No log files found (checked /var/log/php-fpm, /var/opt/remi, /var/log)${NC}"
return 0
fi
echo -e "${CYAN}${BOLD}PHP-FPM WORKER LIMITS${NC}"
echo ""
local cutoff_date
cutoff_date=$(date -d "$DAYS days ago" +%Y-%m-%d 2>/dev/null || date -d "-${DAYS}d" +%Y-%m-%d 2>/dev/null)
# hits_file stores: date version
local hits_file="$TEMP_DIR/fpm_hits.txt"
> "$hits_file"
# Extract PHP version from log path (e.g., /var/opt/remi/php81/ -> 8.1)
extract_php_version() {
local path="$1"
local ver
ver=$(echo "$path" | grep -oE 'php[0-9]+' | head -1 | sed 's/php//')
if [[ -n "$ver" && ${#ver} -ge 2 ]]; then
echo "${ver:0:1}.${ver:1}"
else
echo "unknown"
fi
}
process_log() {
local log="$1"
local php_ver
php_ver=$(extract_php_version "$log")
[[ -f "$log" ]] || return 0
grep -i "max_children" "$log" 2>/dev/null | while read -r line; do
local log_date
log_date=$(echo "$line" | grep -oE '[0-9]{2}-[A-Za-z]{3}-[0-9]{4}' | head -1)
if [[ -n "$log_date" ]]; then
log_date=$(date -d "$log_date" +%Y-%m-%d 2>/dev/null || true)
else
log_date=$(echo "$line" | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' | head -1)
fi
[[ -n "$log_date" ]] && echo "$log_date $php_ver" >> "$hits_file"
done || true
}
for log in "${fpm_logs[@]}"; do
process_log "$log"
[[ -f "${log}.1" ]] && process_log "${log}.1"
done
local total_hits
total_hits=$(wc -l < "$hits_file" | tr -d ' ')
if [[ "$total_hits" -eq 0 ]]; then
echo -e " ${GREEN}● NO WORKER LIMIT HITS${NC} - pm.max_children never reached"
echo -e " ${DIM}Checked: ${fpm_logs[*]}${NC}"
else
# Per-version breakdown
echo -e " ${BOLD}By PHP Version:${NC}"
printf " %-12s %8s\n" "Version" "Hits"
echo " ────────────────────────"
local period_file="$TEMP_DIR/fpm_period.txt"
> "$period_file"
while read -r date_str ver; do
if [[ "$date_str" > "$cutoff_date" || "$date_str" == "$cutoff_date" ]]; then
echo "$date_str $ver" >> "$period_file"
fi
done < "$hits_file"
local period_hits
period_hits=$(wc -l < "$period_file" | tr -d ' ')
# Version summary
awk '{print $2}' "$period_file" | sort | uniq -c | sort -rn | while read -r count ver; do
printf " PHP %-7s %8d\n" "$ver" "$count"
done
echo ""
# Per-day breakdown
echo -e " ${BOLD}By Date:${NC}"
printf " %-12s %8s %s\n" "Date" "Hits" "Versions"
echo " ────────────────────────────────────────"
for date_str in $(awk '{print $1}' "$period_file" | sort -ru); do
local count versions
count=$(grep -c "^${date_str} " "$period_file")
versions=$(grep "^${date_str} " "$period_file" | awk '{print $2}' | sort -u | tr '\n' ',' | sed 's/,$//')
printf " %-12s %8d PHP %s\n" "$date_str" "$count" "$versions"
done
echo ""
echo -e " Total in ${DAYS}-day period: ${BOLD}${period_hits}${NC} worker limit hits"
if [[ "$period_hits" -gt 50 ]]; then
echo -e " ${RED}▲ INCREASE pm.max_children${NC} - Frequent worker exhaustion"
echo "PHP_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
elif [[ "$period_hits" -gt 10 ]]; then
echo -e " ${YELLOW}▲ CONSIDER increasing pm.max_children${NC} - Occasional worker exhaustion"
echo "PHP_VERDICT=UPGRADE" >> "$TEMP_DIR/verdicts"
else
echo -e " ${GREEN}● LOW FREQUENCY${NC} - Rare worker limit hits (${period_hits} in ${DAYS} days)"
echo "PHP_VERDICT=OK" >> "$TEMP_DIR/verdicts"
fi
fi
echo ""
}
daily_summary() {
echo -e "${CYAN}${BOLD}DAILY TRENDS${NC}"
echo ""
printf " %-12s %8s %8s %8s\n" "Date" "CPU%" "MEM%" "Load"
echo " ────────────────────────────────────────────"
local daily_cpu="$TEMP_DIR/daily_cpu.txt"
local daily_mem="$TEMP_DIR/daily_mem.txt"
local daily_load="$TEMP_DIR/daily_load.txt"
# Collect all dates that have any data, sorted descending
local all_dates
all_dates=$(cat "$daily_cpu" "$daily_mem" "$daily_load" 2>/dev/null | awk '{print $1}' | sort -ru || true)
for date_str in $all_dates; do
local cpu_val mem_val load_val
cpu_val=$(awk -v d="$date_str" '$1==d {printf "%.1f", $2}' "$daily_cpu" 2>/dev/null)
mem_val=$(awk -v d="$date_str" '$1==d {printf "%.1f", $2}' "$daily_mem" 2>/dev/null)
load_val=$(awk -v d="$date_str" '$1==d {printf "%.2f", $2}' "$daily_load" 2>/dev/null)
[[ -z "$cpu_val" ]] && cpu_val="-"
[[ -z "$mem_val" ]] && mem_val="-"
[[ -z "$load_val" ]] && load_val="-"
printf " %-12s" "$date_str"
[[ "$cpu_val" != "-" ]] && printf " %7s%%" "$cpu_val" || printf " %8s" "-"
[[ "$mem_val" != "-" ]] && printf " %7s%%" "$mem_val" || printf " %8s" "-"
printf " %8s\n" "$load_val"
done
echo ""
# --- Trend detection via least-squares slope ---
echo -e " ${BOLD}Trends (over ${DAYS} days):${NC}"
compute_and_display_trend() {
local label="$1" file="$2" threshold="$3" upgrade_val="$4" unit="$5"
if [[ ! -s "$file" ]]; then
printf " %-8s ${DIM}no data${NC}\n" "$label:"
return
fi
local line_count
line_count=$(wc -l < "$file" | tr -d ' ')
if [[ "$line_count" -lt 3 ]]; then
printf " %-8s ${DIM}insufficient data (need 3+ days)${NC}\n" "$label:"
return
fi
# Compute slope using least-squares. x = day index (0,1,2,...), y = value
local slope
slope=$(awk '
BEGIN { n=0 }
{
x = n; y = $2
sx += x; sy += y; sxy += x*y; sxx += x*x
n++
}
END {
denom = n*sxx - sx*sx
if (denom == 0) { printf "0.00"; exit }
slope = (n*sxy - sx*sy) / denom
printf "%.4f", slope
}' "$file")
local trend_label trend_color
local is_rising
is_rising=$(awk -v s="$slope" -v t="$threshold" 'BEGIN {print (s > t) ? 1 : 0}')
local is_falling
is_falling=$(awk -v s="$slope" -v t="$threshold" 'BEGIN {print (s < -t) ? 1 : 0}')
if [[ "$is_rising" -eq 1 ]]; then
trend_label="RISING"
trend_color="$YELLOW"
elif [[ "$is_falling" -eq 1 ]]; then
trend_label="FALLING"
trend_color="$BLUE"
else
trend_label="STABLE"
trend_color="$GREEN"
fi
local projection=""
if [[ "$is_rising" -eq 1 && -n "$upgrade_val" ]]; then
local current_p95
case "$label" in
CPU) current_p95=$(percentile "$TEMP_DIR/cpu_usage.txt" 95) ;;
Memory) current_p95=$(percentile "$TEMP_DIR/mem_real_pct.txt" 95) ;;
Load) current_p95=$(percentile "$TEMP_DIR/load5.txt" 95) ;;
esac
local days_to
days_to=$(awk -v cur="$current_p95" -v tgt="$upgrade_val" -v s="$slope" 'BEGIN {
if (s > 0 && tgt > cur) {
d = (tgt - cur) / s
if (d > 0 && d <= 90) printf "%.0f", d
}
}')
if [[ -n "$days_to" ]]; then
projection=" -- projected to hit ${upgrade_val}${unit} in ~${days_to} days"
echo "${label}_TREND=RISING:${days_to}" >> "$TEMP_DIR/verdicts"
fi
fi
local sign="+"
$(awk -v s="$slope" 'BEGIN {exit (s < 0) ? 0 : 1}') && sign=""
printf " %-8s ${trend_color}%-8s${NC} %s%.2f%s/day%s\n" \
"$label:" "$trend_label" "$sign" "$slope" "$unit" "$projection"
}
compute_and_display_trend "CPU" "$daily_cpu" "$TREND_THRESHOLD_PCT" "$CPU_HIGH" "%"
compute_and_display_trend "Memory" "$daily_mem" "$TREND_THRESHOLD_PCT" "$MEM_HIGH" "%"
# For load, upgrade_val is cores * LOAD_HIGH_MULT
local load_high
load_high=$(echo "scale=2; $(get_cores) * $LOAD_HIGH_MULT" | bc)
compute_and_display_trend "Load" "$daily_load" "$TREND_THRESHOLD_LOAD" "$load_high" ""
echo ""
}
peak_hours() {
echo -e "${CYAN}${BOLD}PEAK USAGE HOURS${NC}"
echo ""
local hourly_file="$TEMP_DIR/hourly_cpu.txt"
if [[ ! -s "$hourly_file" ]]; then
echo " No hourly data available"
return 0
fi
echo " Hour Avg CPU% Samples"
echo " ────────────────────────"
for h in $(seq 0 23); do
local samples
samples=$(awk -v h=$h '$1==h' "$hourly_file" | wc -l)
[[ $samples -eq 0 ]] && continue
local avg
avg=$(awk -v h=$h '$1==h {sum+=$2; c++} END {if(c>0) printf "%.1f", sum/c; else print "0"}' "$hourly_file")
local bar_len
bar_len=$(echo "scale=0; $avg / 5" | bc 2>/dev/null || echo 0)
[[ $bar_len -gt 20 ]] && bar_len=20
[[ $bar_len -lt 1 ]] && bar_len=1
local hbar
hbar=$(printf '█%.0s' $(seq 1 $bar_len) 2>/dev/null || true)
printf " %02d:00 %6.1f%% %5d %s\n" "$h" "$avg" "$samples" "$hbar"
done
echo ""
}
final_verdict() {
echo ""
echo -e "${BOLD}════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD} CAPACITY VERDICT${NC}"
echo -e "${BOLD}════════════════════════════════════════════════════════${NC}"
echo ""
local upgrades=0 downgrades=0 ok_count=0
local has_trend_warning=0
if [[ -f "$TEMP_DIR/verdicts" && -s "$TEMP_DIR/verdicts" ]]; then
upgrades=$(grep -c "=UPGRADE" "$TEMP_DIR/verdicts" 2>/dev/null || true)
downgrades=$(grep -c "=DOWNGRADE" "$TEMP_DIR/verdicts" 2>/dev/null || true)
ok_count=$(grep -c "=OK" "$TEMP_DIR/verdicts" 2>/dev/null || true)
has_trend_warning=$(grep -c "_TREND=RISING" "$TEMP_DIR/verdicts" 2>/dev/null || true)
fi
upgrades=${upgrades:-0}
downgrades=${downgrades:-0}
ok_count=${ok_count:-0}
has_trend_warning=${has_trend_warning:-0}
echo -e " Analysis Period: ${DAYS} days"
echo -e " CPU Cores: $(get_cores)"
echo -e " Total RAM: $(get_total_mem_gb) GB"
echo ""
# Check for steal+CPU interaction
local steal_upgrade=0 cpu_upgrade=0
grep -q "STEAL_VERDICT=UPGRADE" "$TEMP_DIR/verdicts" 2>/dev/null && steal_upgrade=1
grep -q "CPU_VERDICT=UPGRADE" "$TEMP_DIR/verdicts" 2>/dev/null && cpu_upgrade=1
if [[ "$upgrades" -gt 0 || "$has_trend_warning" -gt 0 ]]; then
echo -e " ${RED}${BOLD}▲ UPGRADE RECOMMENDED${NC}"
echo ""
if [[ "$upgrades" -gt 0 ]]; then
echo -e " Resource constraints detected:"
grep "=UPGRADE" "$TEMP_DIR/verdicts" 2>/dev/null | grep -v "_TREND" | while read -r line; do
local resource
resource=$(echo "$line" | cut -d'_' -f1)
case $resource in
CPU)
if [[ $steal_upgrade -eq 1 ]]; then
echo -e " * CPU: Move to dedicated or larger instance (hypervisor contention compounding CPU pressure)"
else
echo -e " * CPU: Add more CPU cores or upgrade to faster processor"
fi
;;
MEM) echo -e " * Memory: Increase RAM allocation" ;;
LOAD) echo -e " * Load: Scale horizontally or upgrade server tier" ;;
IO) echo -e " * I/O Wait: Upgrade to SSD/NVMe or add more IOPS" ;;
STEAL)
if [[ $cpu_upgrade -eq 0 ]]; then
echo -e " * Steal: Move to dedicated instance or larger VM to reduce hypervisor contention"
fi
;;
NET) echo -e " * Network: Upgrade NIC or add bonding" ;;
DISK) echo -e " * Disk: Upgrade to SSD/NVMe or add more IOPS" ;;
PHP) echo -e " * PHP Workers: Increase pm.max_children in PHP-FPM config" ;;
esac
done
echo ""
fi
if [[ "$has_trend_warning" -gt 0 ]]; then
echo -e " Approaching capacity:"
grep "_TREND=RISING" "$TEMP_DIR/verdicts" 2>/dev/null | while read -r line; do
local resource days_to
resource=$(echo "$line" | cut -d'_' -f1)
days_to=$(echo "$line" | sed 's/.*RISING://')
echo -e " * ${resource}: rising -- projected to hit threshold in ~${days_to} days"
done
echo ""
fi
elif [[ "$downgrades" -ge 3 ]]; then
echo -e " ${BLUE}${BOLD}▼ DOWNGRADE POSSIBLE${NC}"
echo ""
echo -e " Server appears overprovisioned. Consider:"
echo -e " * Moving to a smaller instance size"
echo -e " * Reducing CPU/RAM allocation"
echo -e " * Potential cost savings available"
echo ""
else
echo -e " ${GREEN}${BOLD}● RIGHT-SIZED${NC}"
echo ""
echo -e " Current capacity matches workload demands."
echo -e " No immediate changes recommended."
echo ""
fi
# List OK resources
local ok_resources=""
for resource in CPU MEM LOAD IO STEAL NET DISK PHP; do
if grep -q "${resource}_VERDICT=OK" "$TEMP_DIR/verdicts" 2>/dev/null; then
case $resource in
CPU) ok_resources="${ok_resources}CPU, " ;;
MEM) ok_resources="${ok_resources}Memory, " ;;
LOAD) ok_resources="${ok_resources}Load, " ;;
IO) ok_resources="${ok_resources}I/O Wait, " ;;
STEAL) ok_resources="${ok_resources}Steal Time, " ;;
NET) ok_resources="${ok_resources}Network, " ;;
DISK) ok_resources="${ok_resources}Disk, " ;;
PHP) ok_resources="${ok_resources}PHP Workers, " ;;
esac
fi
done
if [[ -n "$ok_resources" ]]; then
ok_resources="${ok_resources%, }" # trim trailing comma
echo -e " ${DIM}No action needed: ${ok_resources}${NC}"
echo ""
fi
echo -e "${DIM} Generated: $(date)${NC}"
echo ""
}
main() {
echo ""
echo -e "${BOLD}╔════════════════════════════════════════════════════════╗${NC}"
printf "${BOLD}║ Server Capacity Analyzer ║${NC}\n"
printf "${BOLD}║ Analyzing %-3s days of performance data ║${NC}\n" "$DAYS"
echo -e "${BOLD}╚════════════════════════════════════════════════════════╝${NC}"
echo ""
command -v sar &>/dev/null || { echo -e "${RED}Error: sar not found. Install sysstat.${NC}"; exit 1; }
command -v bc &>/dev/null || { echo -e "${RED}Error: bc not found.${NC}"; exit 1; }
detect_sar_dir
detect_time_format
detect_sar_q_variant
echo -e "${GREEN}✓${NC} SAR data: ${SAR_DIR}"
echo -e "${GREEN}✓${NC} Time format: ${TIME_FORMAT}"
echo -e "${GREEN}✓${NC} Load cmd: ${SAR_Q_CMD}"
echo -e "${GREEN}✓${NC} System: $(hostname) ($(get_cores) cores, $(get_total_mem_gb) GB RAM)"
echo ""
collect_data
echo ""
analyze_cpu
echo ""
analyze_steal
echo ""
analyze_memory
echo ""
analyze_load
echo ""
analyze_io
echo ""
analyze_network
echo ""
analyze_disk
echo ""
analyze_php_fpm
daily_summary
peak_hours
final_verdict
}
main