HEX
Server: nginx/1.29.3
System: Linux 11979.bigscoots-wpo.com 6.8.0-88-generic #89-Ubuntu SMP PREEMPT_DYNAMIC Sat Oct 11 01:02:46 UTC 2025 x86_64
User: nginx (1068)
PHP: 7.4.33
Disabled: exec,system,passthru,shell_exec,proc_open,proc_close,popen,show_source,cmd# Do not modify this line # 1684243876
Upload Files
File: //bigscoots/ovz/node/lxdbackup.sh
#!/bin/bash
#
# LXD full-instance backup for ZFS-backed containers.
# Finalized Production Version
#

set -euo pipefail
[[ "${LXD_BACKUP_DEBUG:-0}" == "1" ]] && set -x

# 1. --- GLOBAL INITIALIZATION (For Trap Safety) ---
SNAP_MNT=""
SNAP_NAME=""
INSTANCE=""
LOG_FILE="/var/log/lxd-backup.log"

# Setup Global Logging
touch "$LOG_FILE"
exec > >(tee -a "$LOG_FILE") 2>&1

# 2. --- CONFIGURATION ---
BSPATH=/root/.bigscoots
BACKUPINFO="${BSPATH}/backupinfo"
S3_BUCKET="scoots-egv-vps"
S3_ENDPOINT="https://s3-egv.bscoots.dev"
S3_OPTS="--no-verify-ssl"
KEEP_DEFAULT=3

mkdir -p "$BSPATH"
touch "$BACKUPINFO"

#######################################
# Helpers
#######################################

log() {
  echo "[$(date '+%F %T')] $*" >&2
}

# --- THE GLOBAL EXIT TRAP ---
cleanup_on_exit() {
  local exit_code=$?

  # Log error if we are exiting with non-zero
  if [ "$exit_code" -ne 0 ]; then
    log "PROCESS FAILED (Code: $exit_code). Sending Alert..."
    if command -v send_slack_alert >/dev/null 2>&1; then
      send_slack_alert "#node-alerts" ":fire:" "LXD Backup FAILED" "danger" \
        "Instance: ${INSTANCE:-unknown} on $(hostname -s) failed. Check ${LOG_FILE}"
    fi
  fi

  # Release ZFS lock by unmounting (using :- fallback to satisfy set -u)
  if [[ -n "${SNAP_MNT:-}" && -d "${SNAP_MNT:-}" ]]; then
    log "Trap Cleanup: Unmounting ${SNAP_MNT}"
    umount -l "${SNAP_MNT}" 2>/dev/null || true
    rmdir "${SNAP_MNT}" 2>/dev/null || true
  fi

  # Delete the LXD snapshot
  if [[ -n "${INSTANCE:-}" && -n "${SNAP_NAME:-}" ]]; then
    if lxc info "${INSTANCE}" 2>/dev/null | grep -q "${SNAP_NAME}"; then
      log "Trap Cleanup: Deleting snapshot ${INSTANCE}/${SNAP_NAME}"
      lxc delete "${INSTANCE}/${SNAP_NAME}" || true
    fi
  fi
}
# Register trap globally
trap cleanup_on_exit EXIT

check_disk_space() {
  local free_gb
  free_gb=$(df / --output=avail -BG | tail -n1 | tr -d ' G')
  if [ "$free_gb" -lt 10 ]; then
    log "CRITICAL: Only ${free_gb}GB left on / partition. Aborting."
    if command -v send_slack_alert >/dev/null 2>&1; then
       send_slack_alert "#node-alerts" ":warning:" "LXD Backup ABORTED" "danger" \
       "Host $(hostname -s) has only ${free_gb}GB free. Backup skipped."
    fi
    exit 1
  fi
}

ensure_aws_ready() {
  if ! command -v aws >/dev/null 2>&1; then
    log "ERROR: AWS CLI not installed."
    exit 1
  fi
  export AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id 2>/dev/null || true)
  export AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key 2>/dev/null || true)
  if [[ -z "$AWS_ACCESS_KEY_ID" || -z "$AWS_SECRET_ACCESS_KEY" ]]; then
    log "ERROR: AWS credentials missing."
    exit 1
  fi
}

aws_s3() {
  AWS_EC2_METADATA_DISABLED=true PYTHONWARNINGS="ignore" \
  aws --endpoint-url "$S3_ENDPOINT" $S3_OPTS s3 "$@" \
    2> >(grep -v 'InsecureRequestWarning' >&2)
}

get_keep_value() {
  local keep="$KEEP_DEFAULT"
  if [[ -s "$BACKUPINFO" ]]; then
    # shellcheck disable=SC1090
    source "$BACKUPINFO"
  fi
  printf '%s\n' "$keep"
}

cleanup_s3_backups() {
  local inst="$1"
  local node_prefix="$2"
  local keep_count="$3"

  log "Cleaning up old S3 backups (KEEP=${keep_count})"
  local backups
  backups=$(aws_s3 ls "s3://${S3_BUCKET}/${node_prefix}/" 2>/dev/null \
    | awk '{print $4}' \
    | grep "^lxdbackup_${inst}_" \
    | sort || true)

  local total
  total=$(echo "$backups" | sed '/^\s*$/d' | wc -l)
  if (( total <= keep_count )); then return 0; fi

  local to_delete
  to_delete=$(echo "$backups" | head -n "$(( total - keep_count ))")

  while IFS= read -r key; do
    [[ -z "$key" ]] && continue
    log "S3: Deleting old backup: ${key}"
    aws_s3 rm "s3://${S3_BUCKET}/${node_prefix}/${key}" || true
    local ts
    ts=$(echo "$key" | sed -n 's/^lxdbackup_'"$inst"'_\(.*\)\.tar\.zst$/\1/p')
    aws_s3 rm "s3://${S3_BUCKET}/${node_prefix}/lxdconfig_${inst}_${ts}.yaml" 2>/dev/null || true
  done <<< "$to_delete"
}

#######################################
# ACTIONS
#######################################

do_list() {
  local inst="$1"
  local node_prefix="$2"
  # Set global INSTANCE so trap logging works if this fails
  INSTANCE="$inst"
  ensure_aws_ready
  log "Listing backups for ${inst} on node ${node_prefix}..."
  aws_s3 ls "s3://${S3_BUCKET}/${node_prefix}/" | grep "lxdbackup_${inst}_" || echo "No backups found."
}

do_backup() {
  INSTANCE="$1" # Assigned to global
  local use_s3="$2"

  if [[ "$use_s3" != "yes" ]]; then
    log "ERROR: --s3 flag required for backup."
    exit 1
  fi

  check_disk_space
  ensure_aws_ready

  log "Checking LXD instance: ${INSTANCE}"
  if ! lxc info "${INSTANCE}" &>/dev/null; then
    log "ERROR: Instance '${INSTANCE}' not found."
    exit 1
  fi

  local NODE
  NODE=$(hostname -s)
  local TIMESTAMP
  TIMESTAMP=$(date +%Y%m%d%H%M%S)
  local KEEP
  KEEP=$(get_keep_value)

  SNAP_NAME="bkp-${TIMESTAMP}"

  log "Creating LXD snapshot ${INSTANCE}/${SNAP_NAME}"
  lxc snapshot "${INSTANCE}" "${SNAP_NAME}"

  local SNAP_DATASET
  SNAP_DATASET=$(zfs list -t snapshot -Ho name | grep "/containers/${INSTANCE}@snapshot-${SNAP_NAME}$" | head -n1 || true)

  if [[ -z "$SNAP_DATASET" ]]; then
    log "ERROR: ZFS snapshot dataset not found."
    exit 1
  fi

  SNAP_MNT=$(mktemp -d "/mnt/lxd-snap-${INSTANCE}-${TIMESTAMP}-XXXX")
  log "Mounting ZFS dataset at ${SNAP_MNT}"
  mount -t zfs "$SNAP_DATASET" "$SNAP_MNT"

  local SNAP_ROOT="${SNAP_MNT}/rootfs"
  if [[ ! -d "$SNAP_ROOT" ]]; then
    log "ERROR: rootfs missing."
    exit 1
  fi

  local CFG_TMP="/tmp/lxdconfig_${INSTANCE}_${TIMESTAMP}.yaml"
  lxc config show "${INSTANCE}" --expanded > "$CFG_TMP"
  aws_s3 cp "$CFG_TMP" "s3://${S3_BUCKET}/${NODE}/lxdconfig_${INSTANCE}_${TIMESTAMP}.yaml"
  rm -f "$CFG_TMP"

  export RCLONE_CONFIG_CEPH_TYPE="s3"
  export RCLONE_CONFIG_CEPH_PROVIDER="Ceph"
  export RCLONE_CONFIG_CEPH_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID"
  export RCLONE_CONFIG_CEPH_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY"
  export RCLONE_CONFIG_CEPH_ENDPOINT="$S3_ENDPOINT"

  log "Streaming to S3..."
  (
    set -o pipefail
    cd "$SNAP_ROOT"
    tar -cpf - . 2> >(grep -v 'socket ignored' >&2) \
      | zstd -T0 \
      | rclone rcat "ceph:${S3_BUCKET}/${NODE}/lxdbackup_${INSTANCE}_${TIMESTAMP}.tar.zst" \
          --no-check-certificate \
          --s3-chunk-size=64M \
          --s3-upload-concurrency=4
  )

  log "S3 upload successful."
  cleanup_s3_backups "$INSTANCE" "$NODE" "$KEEP"
  log "LXD BACKUP COMPLETE for ${INSTANCE}"
}

#######################################
# CLI PARSING & ROUTING
#######################################

ACTION="backup"
USE_S3="no"
TARGET=""
FROM_NODE="$(hostname -s)"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --s3) USE_S3="yes"; shift ;;
    --list) ACTION="list"; shift ;;
    --from-node=*) FROM_NODE="${1#*=}"; shift ;;
    *) TARGET="$1"; shift ;;
  esac
done

if [[ -z "$TARGET" ]]; then
  log "Usage:"
  log "  Backup: $0 --s3 INSTANCE_NAME"
  log "  List:   $0 --list INSTANCE_NAME [--from-node=NODE]"
  exit 1
fi

case "$ACTION" in
  list)
    do_list "$TARGET" "$FROM_NODE"
    ;;
  backup)
    do_backup "$TARGET" "$USE_S3"
    ;;
esac