HEX
Server: nginx/1.29.3
System: Linux 11979.bigscoots-wpo.com 6.8.0-88-generic #89-Ubuntu SMP PREEMPT_DYNAMIC Sat Oct 11 01:02:46 UTC 2025 x86_64
User: nginx (1068)
PHP: 7.4.33
Disabled: exec,system,passthru,shell_exec,proc_open,proc_close,popen,show_source,cmd# Do not modify this line # 1684243876
Upload Files
File: //bigscoots/wpo/extras/wp_cron_debugger.sh
#!/usr/bin/env bash
set -euo pipefail

### --- CONFIG --- ###
COMMON_SH="/bigscoots/includes/common.sh"        # wpcli() + send_slack_alert()
LOG_BASE="/root/.bigscoots/logs/wpcron"

# Thresholds as % of total system RAM
ALERT_PCT=25
KILL_PCT=50

# Monitor sampling / grace time (seconds)
SAMPLE_SEC=1
GRACE_SEC=10

# Slack settings
SLACK_CHANNEL="#engineering"
SLACK_TAG="wpcron-monitor"
### ------------- ###

# ---- Args ----
if [[ $# -lt 1 ]]; then
  echo "Usage: $0 <domain.com> [single_hook_name]"
  exit 1
fi
DOMAIN="$1"
SINGLE_HOOK="${2:-${HOOK:-}}"

WP_PATH="/home/nginx/domains/${DOMAIN}/public"
LOG_DIR="${LOG_BASE}/${DOMAIN}"
HOOK_LOG_DIR="${LOG_DIR}/hooks"
LOCK_FILE="/var/lock/wpcron_runner_${DOMAIN}.lock"

mkdir -p "${HOOK_LOG_DIR}"

timestamp() { date +"%Y-%m-%d %H:%M:%S %z"; }
today() { date +"%Y-%m-%d"; }
logfile() { echo "${LOG_DIR}/$(today).log"; }

log() {
  local msg="$1"
  echo "$(timestamp) | ${DOMAIN} | ${msg}" | tee -a "$(logfile)" >/dev/null
}

send_alert() {
  local level="$1"   # INFO/WARN/CRIT
  local emoji="$2"
  local hook="$3"
  local run_id="$4"
  local msg="$5"

  log "[ALERT:${level}] ${msg}"
  # send_slack_alert(channel, emoji, tag, domain, message)
  send_slack_alert "${SLACK_CHANNEL}" "${emoji}" "${SLACK_TAG}" "${DOMAIN}" "(${hook} / ${run_id}) ${msg}"
}

mem_total_kb() { awk '/MemTotal:/ {print $2}' /proc/meminfo; }
pg_rss_kb() { local pgid="$1"; ps -o rss= -g "${pgid}" 2>/dev/null | awk '{s+=$1} END {print (s==""?0:s)}'; }
pct_of_total() { awk -v p="$1" -v t="$2" 'BEGIN { if (t==0) {print 0} else {printf "%.1f", (p*100.0)/t} }'; }

run_hook() {
  local hook="$1"

  local start_ts=$(date +%s)
  local run_id="${hook}-$(date +%Y%m%dT%H%M%S)"
  local hook_log="${HOOK_LOG_DIR}/${hook}.log"

  log "Starting hook='${hook}' run_id='${run_id}'"
  echo "$(timestamp) | START ${run_id}" >> "${hook_log}"

  set +e
  # Source common.sh in the child shell so wpcli() + send_slack_alert() exist
  setsid /bin/bash -lc "source '${COMMON_SH}'; wp cron event run '${hook}' --allow-root --path='${WP_PATH}'" \
    > >(awk -v r="${run_id}" '{print strftime("%Y-%m-%d %H:%M:%S %z"), "|", r, "| STDOUT |", $0}' | tee -a "${hook_log}") \
    2> >(awk -v r="${run_id}" '{print strftime("%Y-%m-%d %H:%M:%S %z"), "|", r, "| STDERR |", $0}' | tee -a "${hook_log}" >&2) &
  child_pid=$!
  set -e

  sleep 0.05
  pgid=$(ps -o pgid= -p "${child_pid}" 2>/dev/null | tr -d ' ' || echo "")
  [[ -z "${pgid}" ]] && pgid="${child_pid}"

  total_kb=$(mem_total_kb)
  peak_kb=0
  exceeded_alert=0

  while kill -0 "${child_pid}" 2>/dev/null; do
    current_kb=$(pg_rss_kb "${pgid}")
    (( current_kb > peak_kb )) && peak_kb="${current_kb}"
    current_pct=$(pct_of_total "${current_kb}" "${total_kb}")

    if (( exceeded_alert == 0 )) && awk -v c="${current_pct}" -v t="${ALERT_PCT}" 'BEGIN{exit !(c>=t)}'; then
      send_alert "WARN" ":warning:" "${hook}" "${run_id}" "exceeded ${ALERT_PCT}% RAM (current ~${current_pct}%)."
      exceeded_alert=1
    fi

    if awk -v c="${current_pct}" -v t="${KILL_PCT}" 'BEGIN{exit !(c>=t)}'; then
      send_alert "CRIT" ":rotating_light:" "${hook}" "${run_id}" "KILLING for exceeding ${KILL_PCT}% RAM (current ~${current_pct}%)."
      pkill -TERM -g "${pgid}" || true
      sleep "${GRACE_SEC}"
      kill -0 "${child_pid}" 2>/dev/null && pkill -KILL -g "${pgid}" || true
      break
    fi

    sleep "${SAMPLE_SEC}"
  done

  wait "${child_pid}" 2>/dev/null || true
  exit_code=$?

  local end_ts=$(date +%s)
  local duration=$(( end_ts - start_ts ))
  local peak_pct=$(pct_of_total "${peak_kb}" "${total_kb}")

  log "Finished hook='${hook}' run_id='${run_id}' exit=${exit_code} duration=${duration}s peak_mem=${peak_kb}KB (~${peak_pct}%)"
  echo "$(timestamp) | END   ${run_id} | exit=${exit_code} | duration=${duration}s | peak=${peak_kb}KB (~${peak_pct}%)" >> "${hook_log}"

  [[ "${exit_code}" -ne 0 ]] && send_alert "WARN" ":x:" "${hook}" "${run_id}" "finished with non-zero exit=${exit_code} (duration ${duration}s, peak ~${peak_pct}%)."
}

main() {
  mkdir -p "${LOG_DIR}"

  # Per-domain lock to prevent overlap for the same site
  exec 9>"${LOCK_FILE}"
  if ! flock -n 9; then
    log "Another instance is running for ${DOMAIN}; exiting."
    exit 0
  fi

  # Ensure the path exists (basic sanity)
  if [[ ! -d "${WP_PATH}" ]]; then
    log "WP_PATH not found: ${WP_PATH}"
    exit 1
  fi

  # Make sure common.sh is loaded in this shell too
  # shellcheck disable=SC1090
  source "${COMMON_SH}"
  if ! type -t wp >/dev/null 2>&1; then
    log "wp command not found in PATH; aborting."
    exit 1
  fi

  local hooks=()
  if [[ -n "${SINGLE_HOOK}" ]]; then
    # Only run this hook if it's actually due now
    if /bin/bash -lc "source '${COMMON_SH}'; wpcli cron event list --next_run_relative=now --field=hook --path='${WP_PATH}'" \
        | grep -Fxq "${SINGLE_HOOK}"; then
      hooks=("${SINGLE_HOOK}")
    else
      log "Single-hook '${SINGLE_HOOK}' is not due now; skipping."
      exit 0
    fi
  else
    # Only hooks whose next run is 'now'
    mapfile -t hooks < <(/bin/bash -lc "source '${COMMON_SH}'; wpcli cron event list --next_run_relative=now --field=hook --path='${WP_PATH}'" 2>/dev/null || true)
  fi

  if [[ ${#hooks[@]} -eq 0 ]]; then
    log "No hooks due now."
    exit 0
  fi

  log "Hooks due now: ${hooks[*]}"
  for hook in "${hooks[@]}"; do
    run_hook "${hook}"
  done
}

main "$@"