books/refresh_libgen
2020-08-05 11:00:49 +00:00

388 lines
9.6 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# refresh libgen databases from dump files
version="0.6.1"
release="20200805"
trap "trap_error" TERM
trap "trap_exit" EXIT
export TOP_PID=$$
main () {
# PREFERENCES
config=${XDG_CONFIG_HOME:-$HOME/.config}/books.conf
# maximum age (in days) of database dump file to use
max_age=5
# database server to use
dbhost="localhost"
dbport="3306"
dbuser="libgen"
# where to get updates. A change here probably necessitates a change in the urls array
# as dump file names can be site-specific.
base="http://gen.lib.rus.ec/dbdumps/"
#base="https://lgdumps.xyz/dumps/"
# database names
declare -A databases=(
[libgen]=libgen
[compact]=libgen_compact
[fiction]=libgen_fiction
)
# source config file if it exists
[[ -f ${config} ]] && source ${config}
# (mostly) END OF PREFERENCES
# urls for dump files (minus datestamp and extension)
declare -A urls=(
[libgen]="${base}/libgen"
[compact]="${base}/libgen_compact"
[fiction]="${base}/fiction"
)
# sql to get time last modified for database
declare -A lastmodified=(
[libgen]="select max(timelastmodified) from updated;"
[compact]="select max(timelastmodified) from updated;"
[fiction]="select max(timelastmodified) from fiction;"
)
declare -A filter=(
[libgen]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/'
[compact]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/'
)
# sql to run BEFORE update
declare -A before_update=(
)
# sql to run AFTER update
declare -A after_update=(
[compact]="drop trigger updated_edited;create table description (id int(11) not null auto_increment, md5 varchar(32) not null default '', descr varchar(20000) not null default '', toc mediumtext not null, TimeLastModified timestamp not null default current_timestamp on update current_timestamp, primary key (id), unique key md5_unique (md5) using btree, key time (timelastmodified) using btree, key md5_hash (md5) using hash);"
)
declare -A options=(
[wget]="-nv"
[wget_verbose]=""
[unrar]="-inul"
[unrar_verbose]=""
)
tmpdir=$(mktemp -d /var/tmp/libgen.XXXXXX)
while getopts "cd:efhH:knp:P:R:u:U:v@" OPTION
do
case $OPTION in
n)
no_action=1
;;
f)
force_refresh=1
;;
d)
max_age=${OPTARG}
;;
u)
if [[ -v "databases[${OPTARG}]" ]]; then
dbs+=" ${OPTARG}"
else
exit_with_error "-u ${OPTARG}: no such database"
fi
;;
v)
verbose="_verbose"
;;
H)
dbhost="${OPTARG}"
;;
P)
dbport="${OPTARG}"
;;
U)
dbuser="${OPTARG}"
;;
p)
password="${OPTARG}"
if [[ -z $password ]]; then
password=$(read_password)
echo
fi
;;
c)
if [[ ! -f "${config}" ]]; then
cat <<-EOT > "${config}"
dbhost=${dbhost}
dbport=${dbport}
dbuser=${dbuser}
base=${base}
EOT
else
exit_with_error "-c: config file ${config} exists, either remove it or edit it directly"
fi
exit
;;
e)
if [[ -f "$config" ]]; then
if [[ "$VISUAL" ]]; then "$VISUAL" "$config";
elif [[ "$EDITOR" ]]; then "$EDITOR" "$config";
else exit_with_error "-e: no editor configured, can not edit $config"
fi
else
exit_with_error "-e: config file does not exist, create is first (see -c)"
fi
exit
;;
a)
if url_available "${OPTARG}"; then
base="${OPTARG}"
else
exit_with_error "-a ${OPTARG}: repository not available"
fi
;;
@)
use_torsocks=1
source $(which torsocks) on
;;
k)
keep_downloaded_files=1
;;
h)
help
exit
;;
esac
done
check_sanity
[[ -n ${password} ]] && dbpass="-p${password}"
# skip credential check when using password prompt option - password will be asked often enough as it is...
[[ ${dbpass} != "-p" ]] && check_credentials
[[ -z ${dbs} ]] && dbs="${!databases[@]}"
pushd $tmpdir >/dev/null
for db in ${dbs}; do
database=${databases[$db]}
if [[ $(db_exists "$database") ]]; then
db_dump=$(is_available ${db} ${max_age})
if [[ -n $db_dump ]]; then
[[ -n $verbose ]] && echo "update available for ${db}: ${db_dump}"
if [[ -z ${no_action} ]]; then
wget ${options[wget${verbose}]} ${db_dump}
unrar ${options[unrar${verbose}]} x $(basename ${db_dump})
[[ -n "${filter[$db]}" ]] && run_filter $(unrar lb $(basename ${db_dump})) "${filter[$db]}"
drop_tables=$(drop_table_sql "${database}")
[[ -n $drop_tables ]] && dbx ${database} "${drop_tables}"
[[ -n ${before_update[$db]} ]] && dbx ${database} "${before_update[$db]}"
[[ -n ${filter[$db]} ]] && filter_command="|sed -e '${filter[$db]}'"
if [[ -n $verbose ]]; then
echo "importing $(basename ${db_dump}) into ${database}"
pv $(unrar lb $(basename ${db_dump})) | dbx ${database}
else
dbx ${database} < $(unrar lb $(basename ${db_dump}))
fi
[[ -n ${after_update[$db]} ]] && dbx ${database} "${after_update[$db]}"
fi
else
[[ -n $verbose ]] && echo "no update available for ${db}"
fi
else
echo "database '$database' does not exist, please create it before attempting to refresh" >&2
fi
done
popd >/dev/null
}
dbx () {
database=$1
shift
if [ $# -gt 0 ]; then
mysql -Bsssss -h ${dbhost} -P ${dbport} -u ${dbuser} ${dbpass} ${database} -e "$*"
else
mysql -Bsssss -h ${dbhost} -P ${dbport} -u ${dbuser} ${dbpass} ${database}
fi
}
# check whether there is a dump file which is more recent than the current database and no older
# than $max_age
is_available () {
db="$1"
max_age="$2"
db_age=$(db_age $db)
age=0
while [[ $age -lt $db_age && $age -lt $max_age ]]; do
timestamp=$(date -d "@$(($(date +%s) - $((60*60*24*$age))))" +%Y-%m-%d)
result=$(w3m -dump ${base} | awk '{ print $1 }'|grep $(basename "${urls[$db]}_${timestamp}.rar"))
[[ -n $result ]] && break
let age+=1
done
[[ -n $result ]] && echo $(dirname ${urls[$db]})/${result}
}
# drop tables to prepare database for refresh
drop_table_sql () {
database="$1"
dbx "$database" "SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') FROM information_schema.tables WHERE table_schema = '$dbname';"
}
# returns database name if it exists, nothing otherwise
db_exists () {
database="$1"
dbx $database "select schema_name from information_schema.schemata where schema_name='$database';" 2>/dev/null
}
# return database age in days
db_age () {
db="$1"
now=$(date +%s)
age=0
if [[ "$force_refresh" -gt 0 ]]; then
age=$max_age
else
db_last_modified=$(date -d "$(dbx $database ${lastmodified[$db]})" +%s)
age=$(((${now}-${db_last_modified})/60/60/24))
fi
echo -n $age
}
# run filter on dump
run_filter () {
dump_file="$1"
filter="$2"
if [[ -n $verbose ]]; then
echo "running '$filter' on '$dump_file'"
fi
sed -i -e "$filter" "$dump_file"
}
# find tool, returns the first|one|found, exit with error message if none found
find_tool () {
IFS='|' read -ra tools <<< "$*"
found=0
for tool in "${tools[@]}"; do
if [[ -n $(which "$tool") ]]; then
found=1
break
fi
done
if [[ $found -eq 0 ]]; then
if [[ ${#tools[@]} -gt 1 ]]; then
exit_with_error "missing programs: $*; install at least one of these: ${tools[*]} and try again"
else
exit_with_error "missing program: $1; please install and try again"
fi
fi
echo "$tool"
}
# read password from command line
read_password () {
password=""
prompt="Enter database password:"
while IFS= read -p "$prompt" -r -s -n 1 char
do
if [[ $char == $'\0' ]]
then
break
fi
prompt='*'
password+="$char"
done
echo "$password"
}
check_credentials () {
if [[ ! $(dbx "" "select true;" 2>/dev/null) ]]; then
exit_with_error "database connection error, bad username or password?"
fi
}
url_available () {
url="$1"
wget -q --spider "$url"
}
check_sanity () {
find_tool "w3m" > /dev/null
find_tool "wget" > /dev/null
find_tool "unrar" > /dev/null
[[ -n $verbose ]] && find_tool "pv" > /dev/null
}
cleanup () {
if [[ ! -v keep_downloaded_files ]]; then
rm -rf ${tmpdir}
else
echo "-k option active, temporary directory ${tmpdir} not removed"
fi
}
trap_error () {
cleanup
exit 1
}
trap_exit () {
cleanup
exit
}
# echo error message to stdout and terminate main
exit_with_error () {
echo "$(basename $0): $*" >&2
kill -s TERM $TOP_PID
}
help () {
echo $(basename $(readlink -f $0)) "version $version"
cat <<- EOT
Usage: refresh_libgen OPTIONS
Performs a refresh from a database dump file for the chosen libgen databases.
-n do not refresh database
use together with '-v' to check if recent dumps are available
-f force refresh, use this on first install
-v be verbose about what is being updated
-d DAYS only use database dump files no older than DAYS days (default: ${max_age})
-u DBS refresh DBS databases (default: ${!databases[@]})
-H DBHOST database host (${dbhost})
-P DBPORT database port (${dbport})
-U DBUSER database user (${dbuser})
-R REPO dump repository (${base})
-c create a config file using current settings (see -H, -P, -U, -R)
-e edit config file
-p DBPASS database password (cache password for this session)
use empty string ("") to get password prompt
-@ use tor (through torsocks) to connect to libgen server
-k keep downloaded files after exit
-h this help message
EOT
}
main "$@"