File: //bigscoots/wpo/nginx/blockbots.sh
#!/bin/bash
source /bigscoots/includes/common.sh
# Define the version once
VERSION="e1b411ce-5962-463e-961d-17bdb54bbfb8"
# Function to show usage
usage() {
echo "Usage: $0 {init|modify|block} --domain DOMAIN [--user-agent 'User Agent String']"
exit 1
}
# Parse arguments
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
init)
ACTION="init"
shift
;;
modify)
ACTION="modify"
shift
;;
block)
ACTION="block"
shift
;;
--domain)
DOMAIN="$2"
shift
shift
;;
--user-agent)
USER_AGENT="$2"
shift
shift
;;
*)
echo "Unknown option: $key"
usage
;;
esac
done
# Ensure domain and user agent are provided for block action
if [[ "$ACTION" == "block" && (-z "$DOMAIN" || -z "$USER_AGENT") ]]; then
echo "Error: --domain and --user-agent are required for blocking a bot."
usage
fi
# Path to the global and domain-specific configuration files
conf_file="/usr/local/nginx/conf/bigscoots_bad_bots.conf"
badbots_custom_conf="/usr/local/nginx/conf/wpincludes/$DOMAIN/badbots_custom.conf"
redirects_conf="/usr/local/nginx/conf/wpincludes/$DOMAIN/redirects.conf"
# Function to create the global bad bots configuration file
create_bigscoots_bad_bots_conf() {
cat <<EOF > "$conf_file"
# $VERSION
map \$http_user_agent \$bad_bot {
default 0;
"~*Barkrowler" 1;
"~*Bytespider" 1;
"~*ClaudeBot" 1;
"~*FemtosearchBot" 1;
"~*MJ12bot" 1;
"~*PetalBot" 1;
"~*SeznamBot" 1;
"~*SirdataBot" 1;
"~*VelenPublicWebCrawler" 1;
"~*YandexBot" 1;
"~*keys-so-bot" 1;
"~*trendictionbot" 1;
"~*weborama-fetcher" 1;
"~*ImagesiftBot" 1;
"~*Scrapy" 1;
"~*PerplexityBot" 1;
"~*ImagesiftBot" 1;
"~*Recipe Seeker Chef" 1;
}
EOF
}
# Function to check and update the global configuration file
check_and_update_conf() {
if [ ! -f "$conf_file" ] || ! grep -q "$VERSION" "$conf_file"; then
create_bigscoots_bad_bots_conf
# Run reload after 10 seconds in the background
(
sleep 10
ngxreload_t check_and_update_conf
) >/dev/null 2>&1 &
# If this might ever be run interactively, this prevents SIGHUP issues.
# In most non-interactive scripts it's not required, but it's harmless.
disown 2>/dev/null || true
fi
}
# Function to update domain-specific redirects.conf
update_redirects_conf() {
local dir="/usr/local/nginx/conf/wpincludes/"
if [ ! -d "$dir" ]; then
return 0
fi
find "$dir" -name 'redirects.conf' -exec sh -c '
for file do
if [ -s "$file" ] && grep -q "\$bad_bot" "$file"; then
:
else
echo "if (\$bad_bot) {return 403;}" >> "$file"
fi
done
' sh {} +
}
# Function to block a bot for a specific domain
block_bot() {
local user_agent="$1"
local conf_file="$badbots_custom_conf"
# Ensure the domain-specific configuration file exists
if [ ! -f "$conf_file" ]; then
cat <<EOF > "$conf_file"
map \$http_user_agent \$bad_bot {
default 0;
}
EOF
fi
# Escape special characters in user agent
local escaped_ua=$(echo "$user_agent" | sed 's/[.[\*^$()]/\\&/g')
# Check if the user agent is already blocked
if grep -q "\"~*$escaped_ua\"" "$conf_file"; then
echo "Bot '$user_agent' is already blocked."
return
fi
# Insert the new user agent block **before the closing brace** inside the map
sed -i "/^}/i\ \"~*$user_agent\" 1;" "$conf_file"
echo "Blocked bot: $user_agent for $DOMAIN"
# Reload Nginx to apply changes
ngxreload
}
# Main execution logic
if [[ "$ACTION" == "init" ]]
then
# Check if the include directive already exists in nginx.conf
if ! grep -q "/usr/local/nginx/conf/bigscoots_bad_bots.conf;" /usr/local/nginx/conf/nginx.conf
then
# Check if either map_hash_bucket_size or map_hash_max_size exists in nginx.conf
if grep -q "map_hash_" /usr/local/nginx/conf/nginx.conf
then
# Get the last line containing map_hash_ options
last_map_hash=$(grep "map_hash_" /usr/local/nginx/conf/nginx.conf | tail -1)
# Append the include directive below the last map_hash_ option
sed -i "/$last_map_hash/a \ include /usr/local/nginx/conf/bigscoots_bad_bots.conf;" /usr/local/nginx/conf/nginx.conf
else
# If neither map_hash option exists, append the include directive below "http {"
sed -i '/^http {/a \ include /usr/local/nginx/conf/bigscoots_bad_bots.conf;' /usr/local/nginx/conf/nginx.conf
fi
fi
# Update redirects.conf if the line doesn't exist
update_redirects_conf
# Run the check and update function
check_and_update_conf
elif [[ "$ACTION" == "modify" && -n "$DOMAIN" ]]
then
validate_domain_in_path "$DOMAIN" blockbots.sh || { echo "{\"success\": false, \"error\": \"Domain has failed validation!\"}"; exit 1; }
bash /bigscoots/wpo/nginx/blockbots.sh init
ssl_conf="/usr/local/nginx/conf/conf.d/$DOMAIN.ssl.conf"
if ! grep -q badbots_custom.conf "$ssl_conf"
then
sed -i "1i include $badbots_custom_conf;" "$ssl_conf"
fi
if [ -f "$badbots_custom_conf" ] && grep -q '\$bad_bot' "$redirects_conf"
then
echo "Modify $badbots_custom_conf and reload nginx"
elif [ ! -f "$badbots_custom_conf" ] && grep -q '\$bad_bot' "$redirects_conf"
then
cat <<EOF > "$badbots_custom_conf"
map \$http_user_agent \$bad_bot {
default 0;
}
EOF
echo "Modify $badbots_custom_conf and reload nginx"
fi
elif [[ "$ACTION" == "block" ]]
then
block_bot "$USER_AGENT"
fi