File: //bigscoots/ovz/node/lxdbackup.sh
#!/bin/bash
#
# LXD full-instance backup for ZFS-backed containers.
# Finalized Production Version
#
set -euo pipefail
[[ "${LXD_BACKUP_DEBUG:-0}" == "1" ]] && set -x
# 1. --- GLOBAL INITIALIZATION (For Trap Safety) ---
SNAP_MNT=""
SNAP_NAME=""
INSTANCE=""
LOG_FILE="/var/log/lxd-backup.log"
# Setup Global Logging
touch "$LOG_FILE"
exec > >(tee -a "$LOG_FILE") 2>&1
# 2. --- CONFIGURATION ---
BSPATH=/root/.bigscoots
BACKUPINFO="${BSPATH}/backupinfo"
S3_BUCKET="scoots-egv-vps"
S3_ENDPOINT="https://s3-egv.bscoots.dev"
S3_OPTS="--no-verify-ssl"
KEEP_DEFAULT=3
mkdir -p "$BSPATH"
touch "$BACKUPINFO"
#######################################
# Helpers
#######################################
log() {
echo "[$(date '+%F %T')] $*" >&2
}
# --- THE GLOBAL EXIT TRAP ---
cleanup_on_exit() {
local exit_code=$?
# Log error if we are exiting with non-zero
if [ "$exit_code" -ne 0 ]; then
log "PROCESS FAILED (Code: $exit_code). Sending Alert..."
if command -v send_slack_alert >/dev/null 2>&1; then
send_slack_alert "#node-alerts" ":fire:" "LXD Backup FAILED" "danger" \
"Instance: ${INSTANCE:-unknown} on $(hostname -s) failed. Check ${LOG_FILE}"
fi
fi
# Release ZFS lock by unmounting (using :- fallback to satisfy set -u)
if [[ -n "${SNAP_MNT:-}" && -d "${SNAP_MNT:-}" ]]; then
log "Trap Cleanup: Unmounting ${SNAP_MNT}"
umount -l "${SNAP_MNT}" 2>/dev/null || true
rmdir "${SNAP_MNT}" 2>/dev/null || true
fi
# Delete the LXD snapshot
if [[ -n "${INSTANCE:-}" && -n "${SNAP_NAME:-}" ]]; then
if lxc info "${INSTANCE}" 2>/dev/null | grep -q "${SNAP_NAME}"; then
log "Trap Cleanup: Deleting snapshot ${INSTANCE}/${SNAP_NAME}"
lxc delete "${INSTANCE}/${SNAP_NAME}" || true
fi
fi
}
# Register trap globally
trap cleanup_on_exit EXIT
check_disk_space() {
local free_gb
free_gb=$(df / --output=avail -BG | tail -n1 | tr -d ' G')
if [ "$free_gb" -lt 10 ]; then
log "CRITICAL: Only ${free_gb}GB left on / partition. Aborting."
if command -v send_slack_alert >/dev/null 2>&1; then
send_slack_alert "#node-alerts" ":warning:" "LXD Backup ABORTED" "danger" \
"Host $(hostname -s) has only ${free_gb}GB free. Backup skipped."
fi
exit 1
fi
}
ensure_aws_ready() {
if ! command -v aws >/dev/null 2>&1; then
log "ERROR: AWS CLI not installed."
exit 1
fi
export AWS_ACCESS_KEY_ID=$(aws configure get aws_access_key_id 2>/dev/null || true)
export AWS_SECRET_ACCESS_KEY=$(aws configure get aws_secret_access_key 2>/dev/null || true)
if [[ -z "$AWS_ACCESS_KEY_ID" || -z "$AWS_SECRET_ACCESS_KEY" ]]; then
log "ERROR: AWS credentials missing."
exit 1
fi
}
aws_s3() {
AWS_EC2_METADATA_DISABLED=true PYTHONWARNINGS="ignore" \
aws --endpoint-url "$S3_ENDPOINT" $S3_OPTS s3 "$@" \
2> >(grep -v 'InsecureRequestWarning' >&2)
}
get_keep_value() {
local keep="$KEEP_DEFAULT"
if [[ -s "$BACKUPINFO" ]]; then
# shellcheck disable=SC1090
source "$BACKUPINFO"
fi
printf '%s\n' "$keep"
}
cleanup_s3_backups() {
local inst="$1"
local node_prefix="$2"
local keep_count="$3"
log "Cleaning up old S3 backups (KEEP=${keep_count})"
local backups
backups=$(aws_s3 ls "s3://${S3_BUCKET}/${node_prefix}/" 2>/dev/null \
| awk '{print $4}' \
| grep "^lxdbackup_${inst}_" \
| sort || true)
local total
total=$(echo "$backups" | sed '/^\s*$/d' | wc -l)
if (( total <= keep_count )); then return 0; fi
local to_delete
to_delete=$(echo "$backups" | head -n "$(( total - keep_count ))")
while IFS= read -r key; do
[[ -z "$key" ]] && continue
log "S3: Deleting old backup: ${key}"
aws_s3 rm "s3://${S3_BUCKET}/${node_prefix}/${key}" || true
local ts
ts=$(echo "$key" | sed -n 's/^lxdbackup_'"$inst"'_\(.*\)\.tar\.zst$/\1/p')
aws_s3 rm "s3://${S3_BUCKET}/${node_prefix}/lxdconfig_${inst}_${ts}.yaml" 2>/dev/null || true
done <<< "$to_delete"
}
#######################################
# ACTIONS
#######################################
do_list() {
local inst="$1"
local node_prefix="$2"
# Set global INSTANCE so trap logging works if this fails
INSTANCE="$inst"
ensure_aws_ready
log "Listing backups for ${inst} on node ${node_prefix}..."
aws_s3 ls "s3://${S3_BUCKET}/${node_prefix}/" | grep "lxdbackup_${inst}_" || echo "No backups found."
}
do_backup() {
INSTANCE="$1" # Assigned to global
local use_s3="$2"
if [[ "$use_s3" != "yes" ]]; then
log "ERROR: --s3 flag required for backup."
exit 1
fi
check_disk_space
ensure_aws_ready
log "Checking LXD instance: ${INSTANCE}"
if ! lxc info "${INSTANCE}" &>/dev/null; then
log "ERROR: Instance '${INSTANCE}' not found."
exit 1
fi
local NODE
NODE=$(hostname -s)
local TIMESTAMP
TIMESTAMP=$(date +%Y%m%d%H%M%S)
local KEEP
KEEP=$(get_keep_value)
SNAP_NAME="bkp-${TIMESTAMP}"
log "Creating LXD snapshot ${INSTANCE}/${SNAP_NAME}"
lxc snapshot "${INSTANCE}" "${SNAP_NAME}"
local SNAP_DATASET
SNAP_DATASET=$(zfs list -t snapshot -Ho name | grep "/containers/${INSTANCE}@snapshot-${SNAP_NAME}$" | head -n1 || true)
if [[ -z "$SNAP_DATASET" ]]; then
log "ERROR: ZFS snapshot dataset not found."
exit 1
fi
SNAP_MNT=$(mktemp -d "/mnt/lxd-snap-${INSTANCE}-${TIMESTAMP}-XXXX")
log "Mounting ZFS dataset at ${SNAP_MNT}"
mount -t zfs "$SNAP_DATASET" "$SNAP_MNT"
local SNAP_ROOT="${SNAP_MNT}/rootfs"
if [[ ! -d "$SNAP_ROOT" ]]; then
log "ERROR: rootfs missing."
exit 1
fi
local CFG_TMP="/tmp/lxdconfig_${INSTANCE}_${TIMESTAMP}.yaml"
lxc config show "${INSTANCE}" --expanded > "$CFG_TMP"
aws_s3 cp "$CFG_TMP" "s3://${S3_BUCKET}/${NODE}/lxdconfig_${INSTANCE}_${TIMESTAMP}.yaml"
rm -f "$CFG_TMP"
export RCLONE_CONFIG_CEPH_TYPE="s3"
export RCLONE_CONFIG_CEPH_PROVIDER="Ceph"
export RCLONE_CONFIG_CEPH_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID"
export RCLONE_CONFIG_CEPH_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY"
export RCLONE_CONFIG_CEPH_ENDPOINT="$S3_ENDPOINT"
log "Streaming to S3..."
(
set -o pipefail
cd "$SNAP_ROOT"
tar -cpf - . 2> >(grep -v 'socket ignored' >&2) \
| zstd -T0 \
| rclone rcat "ceph:${S3_BUCKET}/${NODE}/lxdbackup_${INSTANCE}_${TIMESTAMP}.tar.zst" \
--no-check-certificate \
--s3-chunk-size=64M \
--s3-upload-concurrency=4
)
log "S3 upload successful."
cleanup_s3_backups "$INSTANCE" "$NODE" "$KEEP"
log "LXD BACKUP COMPLETE for ${INSTANCE}"
}
#######################################
# CLI PARSING & ROUTING
#######################################
ACTION="backup"
USE_S3="no"
TARGET=""
FROM_NODE="$(hostname -s)"
while [[ $# -gt 0 ]]; do
case "$1" in
--s3) USE_S3="yes"; shift ;;
--list) ACTION="list"; shift ;;
--from-node=*) FROM_NODE="${1#*=}"; shift ;;
*) TARGET="$1"; shift ;;
esac
done
if [[ -z "$TARGET" ]]; then
log "Usage:"
log " Backup: $0 --s3 INSTANCE_NAME"
log " List: $0 --list INSTANCE_NAME [--from-node=NODE]"
exit 1
fi
case "$ACTION" in
list)
do_list "$TARGET" "$FROM_NODE"
;;
backup)
do_backup "$TARGET" "$USE_S3"
;;
esac