317 lines
8.2 KiB
Bash
Executable file
317 lines
8.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# shellcheck disable=SC2034,SC1090
|
|
#
|
|
# refresh libgen databases from dump files
|
|
|
|
version="0.6.2"
|
|
release="20210512"
|
|
|
|
trap "trap_error" TERM
|
|
trap "trap_clean" EXIT
|
|
export TOP_PID=$$
|
|
|
|
functions="$(dirname "$0")/books_functions"
|
|
if [ -f "$functions" ]; then
|
|
source "$functions"
|
|
else
|
|
echo "$functions not found"
|
|
exit 1
|
|
fi
|
|
|
|
main () {
|
|
|
|
exlock now || exit 1
|
|
|
|
# PREFERENCES
|
|
config=${XDG_CONFIG_HOME:-$HOME/.config}/books.conf
|
|
|
|
# maximum age (in days) of database dump file to use
|
|
max_age=5
|
|
|
|
# database server to use
|
|
dbhost="localhost"
|
|
dbport="3306"
|
|
dbuser="libgen"
|
|
|
|
# where to get updates. A change here probably necessitates a change in the urls array
|
|
# as dump file names can be site-specific.
|
|
base="http://libgen.rs/dbdumps/"
|
|
|
|
# database names
|
|
declare -A databases=(
|
|
[libgen]=libgen
|
|
[compact]=libgen_compact
|
|
[fiction]=libgen_fiction
|
|
)
|
|
|
|
# source config file if it exists
|
|
[[ -f ${config} ]] && source "${config}"
|
|
|
|
# (mostly) END OF PREFERENCES
|
|
|
|
# urls for dump files (minus datestamp and extension)
|
|
declare -A urls=(
|
|
[libgen]="${base}/libgen"
|
|
[compact]="${base}/libgen_compact"
|
|
[fiction]="${base}/fiction"
|
|
)
|
|
|
|
# sql to get time last modified for database
|
|
declare -A lastmodified=(
|
|
[libgen]="select max(timelastmodified) from updated;"
|
|
[compact]="select max(timelastmodified) from updated;"
|
|
[fiction]="select max(timelastmodified) from fiction;"
|
|
)
|
|
|
|
declare -A filter=(
|
|
[libgen]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/'
|
|
[compact]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/'
|
|
)
|
|
|
|
# sql to run BEFORE update
|
|
declare -A before_update=(
|
|
)
|
|
|
|
# sql to run AFTER update
|
|
declare -A after_update=(
|
|
[compact]="drop trigger updated_edited;create table description (id int(11) not null auto_increment, md5 varchar(32) not null default '', descr varchar(20000) not null default '', toc mediumtext not null, TimeLastModified timestamp not null default current_timestamp on update current_timestamp, primary key (id), unique key md5_unique (md5) using btree, key time (timelastmodified) using btree, key md5_hash (md5) using hash);"
|
|
)
|
|
|
|
declare -A options=(
|
|
[wget]="-nv"
|
|
[wget_verbose]=""
|
|
[unrar]="-inul"
|
|
[unrar_verbose]=""
|
|
)
|
|
|
|
|
|
tmpdir=$(mktemp -d /var/tmp/libgen.XXXXXX)
|
|
|
|
unrar=$(find_tool "unrar")
|
|
wget=$(find_tool "wget")
|
|
w3m=$(find_tool "w3m")
|
|
|
|
while getopts "a:cd:efhH:knP:u:U:v@" OPTION
|
|
do
|
|
case $OPTION in
|
|
n)
|
|
no_action=1
|
|
;;
|
|
f)
|
|
force_refresh=1
|
|
;;
|
|
d)
|
|
max_age=${OPTARG}
|
|
;;
|
|
u)
|
|
if [[ -v "databases[${OPTARG}]" ]]; then
|
|
dbs+=" ${OPTARG}"
|
|
else
|
|
exit_with_error "-u ${OPTARG}: no such database"
|
|
fi
|
|
;;
|
|
v)
|
|
pv=$(find_tool "pv")
|
|
verbose="_verbose"
|
|
;;
|
|
H)
|
|
dbhost="${OPTARG}"
|
|
;;
|
|
P)
|
|
dbport="${OPTARG}"
|
|
;;
|
|
U)
|
|
dbuser="${OPTARG}"
|
|
;;
|
|
c)
|
|
if [[ ! -f "${config}" ]]; then
|
|
cat <<-EOT > "${config}"
|
|
dbhost=${dbhost}
|
|
dbport=${dbport}
|
|
dbuser=${dbuser}
|
|
base=${base}
|
|
EOT
|
|
else
|
|
exit_with_error "-c: config file ${config} exists, either remove it or edit it directly"
|
|
fi
|
|
exit
|
|
;;
|
|
e)
|
|
if [[ -f "$config" ]]; then
|
|
if [[ "$VISUAL" ]]; then "$VISUAL" "$config";
|
|
elif [[ "$EDITOR" ]]; then "$EDITOR" "$config";
|
|
else exit_with_error "-e: no editor configured, can not edit $config"
|
|
fi
|
|
else
|
|
exit_with_error "-e: config file does not exist, create is first (see -c)"
|
|
fi
|
|
exit
|
|
;;
|
|
a)
|
|
if url_available "${OPTARG}"; then
|
|
base="${OPTARG}"
|
|
else
|
|
exit_with_error "-a ${OPTARG}: repository not available"
|
|
fi
|
|
;;
|
|
@)
|
|
torsocks=$(find_tool "torsocks")
|
|
;;
|
|
k)
|
|
keep_downloaded_files=1
|
|
;;
|
|
h)
|
|
help
|
|
exit
|
|
;;
|
|
*)
|
|
exit_with_error "unknown option: $OPTION"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
[[ -z ${dbs} ]] && dbs="${!databases[*]}"
|
|
|
|
pushd "$tmpdir" >/dev/null || exit_with_error "can not change directory to $tmpdir"
|
|
for db in ${dbs}; do
|
|
database=${databases[$db]}
|
|
if [[ $(db_exists "$database") ]]; then
|
|
db_dump=$(is_available "${db}" "${max_age}")
|
|
if [[ -n $db_dump ]]; then
|
|
[[ -n $verbose ]] && echo "update available for ${db}: ${db_dump}"
|
|
if [[ -z ${no_action} ]]; then
|
|
$torsocks "$wget" "${options[$wget${verbose}]}" "${db_dump}"
|
|
$unrar "${options[$unrar${verbose}]}" x "$(basename "${db_dump}")"
|
|
[[ -n "${filter[$db]}" ]] && run_filter "$($unrar lb "$(basename "${db_dump}")")" "${filter[$db]}"
|
|
drop_tables=$(drop_table_sql "${database}")
|
|
[[ -n $drop_tables ]] && dbx "${database}" "${drop_tables}"
|
|
[[ -n ${before_update[$db]} ]] && dbx "${database}" "${before_update[$db]}"
|
|
[[ -n ${filter[$db]} ]] && filter_command="|sed -e '${filter[$db]}'"
|
|
if [[ -n $verbose ]]; then
|
|
echo "importing $(basename "${db_dump}") into ${database}"
|
|
$pv "$($unrar lb "$(basename "${db_dump}")")" | dbx "${database}"
|
|
else
|
|
dbx "${database}" < "$($unrar lb "$(basename "${db_dump}")")"
|
|
fi
|
|
[[ -n ${after_update[$db]} ]] && dbx "${database}" "${after_update[$db]}"
|
|
fi
|
|
else
|
|
[[ -n $verbose ]] && echo "no update available for ${db}"
|
|
fi
|
|
else
|
|
echo "database '$database' does not exist, please create it before attempting to refresh" >&2
|
|
fi
|
|
done
|
|
popd >/dev/null || exit_with_error "popd failed?"
|
|
}
|
|
|
|
# check whether there is a dump file which is more recent than the current database and no older
|
|
# than $max_age
|
|
is_available () {
|
|
db="$1"
|
|
max_age="$2"
|
|
|
|
db_age=$(db_age "$db")
|
|
|
|
age=0
|
|
|
|
while [[ $age -lt $db_age && $age -lt $max_age ]]; do
|
|
timestamp=$(date -d "@$(($(date +%s) - $((60*60*24*age))))" +%Y-%m-%d)
|
|
result=$($w3m -dump "${base}" | awk '{ print $1 }'|grep "$(basename "${urls[$db]}_${timestamp}.rar")")
|
|
[[ -n $result ]] && break
|
|
((age++))
|
|
done
|
|
|
|
[[ -n $result ]] && echo "$(dirname "${urls[$db]}")"/"${result}"
|
|
}
|
|
|
|
# drop tables to prepare database for refresh
|
|
drop_table_sql () {
|
|
database="$1"
|
|
dbx "$database" "SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') FROM information_schema.tables WHERE table_schema = '$database';"
|
|
}
|
|
|
|
# returns database name if it exists, nothing otherwise
|
|
db_exists () {
|
|
database="$1"
|
|
dbx "$database" "select schema_name from information_schema.schemata where schema_name='$database';" 2>/dev/null
|
|
}
|
|
|
|
# return database age in days
|
|
db_age () {
|
|
db="$1"
|
|
now=$(date +%s)
|
|
age=0
|
|
if [[ "$force_refresh" -gt 0 ]]; then
|
|
age=$max_age
|
|
else
|
|
db_last_modified=$(date -d "$(dbx "$database" "${lastmodified[$db]}")" +%s)
|
|
age=$(((now-db_last_modified)/60/60/24))
|
|
fi
|
|
echo -n $age
|
|
}
|
|
|
|
# run filter on dump
|
|
run_filter () {
|
|
dump_file="$1"
|
|
flt="$2"
|
|
if [[ -n $verbose ]]; then
|
|
echo "running '$flt' on '$dump_file'"
|
|
fi
|
|
sed -i -e "$flt" "$dump_file"
|
|
}
|
|
|
|
check_credentials () {
|
|
if [[ ! $(dbx "" "select true;" 2>/dev/null) ]]; then
|
|
exit_with_error "database connection error, bad username or password?"
|
|
fi
|
|
}
|
|
|
|
url_available () {
|
|
url="$1"
|
|
$torsocks "$wget" -q --spider "$url"
|
|
}
|
|
|
|
cleanup () {
|
|
if [[ ! -v keep_downloaded_files ]]; then
|
|
rm -rf "${tmpdir}"
|
|
else
|
|
echo "-k option active, temporary directory ${tmpdir} not removed"
|
|
fi
|
|
}
|
|
|
|
help () {
|
|
echo "$(basename "$(readlink -f "$0")")" "version $version"
|
|
cat <<- EOT
|
|
|
|
Usage: refresh_libgen OPTIONS
|
|
|
|
Performs a refresh from a database dump file for the chosen libgen databases.
|
|
|
|
Make sure the database credentials are configured (in \$HOME/.my.cnf) before
|
|
using this tool.
|
|
|
|
-n do not refresh database
|
|
use together with '-v' to check if recent dumps are available
|
|
-f force refresh, use this on first install
|
|
-v be verbose about what is being updated
|
|
-d DAYS only use database dump files no older than DAYS days (default: ${max_age})
|
|
-u DBS refresh DBS databases (default: ${!databases[@]})
|
|
|
|
-H DBHOST database host (${dbhost})
|
|
-P DBPORT database port (${dbport})
|
|
-U DBUSER database user (${dbuser})
|
|
-R REPO dump repository (${base})
|
|
-c create a config file using current settings (see -H, -P, -U, -R)
|
|
-e edit config file
|
|
|
|
-@ use tor (through torsocks) to connect to libgen server
|
|
-k keep downloaded files after exit
|
|
-h this help message
|
|
|
|
EOT
|
|
}
|
|
|
|
exlock prepare || exit 1
|
|
|
|
main "$@"
|