books/refresh_libgen

336 lines
8.8 KiB
Bash
Executable file

#!/usr/bin/env bash
# shellcheck disable=SC2034,SC1090
#
# refresh libgen databases from dump files
version="0.7.0"
release="20210701"
trap "trap_error" TERM
trap "trap_clean" EXIT
export TOP_PID=$$
functions="$(dirname "$0")/books_functions"
if [ -f "$functions" ]; then
source "$functions"
else
echo "$functions not found"
exit 1
fi
main () {
exlock now || exit 1
# PREFERENCES
config=${XDG_CONFIG_HOME:-$HOME/.config}/books.conf
# maximum age (in days) of database dump file to use
max_age=5
# database server to use
dbhost="localhost"
dbport="3306"
dbuser="libgen"
# where to get updates. A change here probably necessitates a change in the urls array
# as dump file names can be site-specific.
base="http://libgen.rs/dbdumps/"
# database names
declare -A databases=(
[libgen]=libgen
[compact]=libgen_compact
[fiction]=libgen_fiction
)
# source config file if it exists
[[ -f ${config} ]] && source "${config}"
# (mostly) END OF PREFERENCES
# urls for dump files (minus datestamp and extension)
declare -A urls=(
[libgen]="${base}/libgen"
[compact]="${base}/libgen_compact"
[fiction]="${base}/fiction"
)
# sql to get time last modified for database
declare -A lastmodified=(
[libgen]="select max(timelastmodified) from updated;"
[compact]="select max(timelastmodified) from updated;"
[fiction]="select max(timelastmodified) from fiction;"
)
# stream filters can be applied during extraction, saving time and storage space
declare -A stream_filter=(
[libgen]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/;s/^INSERT INTO `updated_edited`.*$/-- skip/;s/^INSERT INTO `description_edited`.*$/-- skip/'
[compact]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/;s/^INSERT INTO `updated_edited`.*$/-- skip/'
)
# file filters are used in case the whole file needs to be extracted before the filter can be applied
# if both a stream filter as well as a file filter is defined, only the file filter is used
declare -A file_filter=(
)
# sql to run BEFORE update
declare -A before_update=(
)
# sql to run AFTER update
declare -A after_update=(
[libgen]="drop trigger libgen_description_update_all"
[compact]="drop trigger libgen_description_update_all;create table description (id int(11) not null auto_increment, md5 varchar(32) not null default '', descr varchar(20000) not null default '', toc mediumtext not null, TimeLastModified timestamp not null default current_timestamp on update current_timestamp, primary key (id), unique key md5_unique (md5) using btree, key time (timelastmodified) using btree, key md5_hash (md5) using hash);"
)
declare -A options=(
[wget]="-nv"
[wget_verbose]=""
[unrar]="-inul"
[unrar_verbose]=""
)
tmpdir=$(mktemp -d /var/tmp/libgen.XXXXXX)
unrar=$(find_tool "unrar")
wget=$(find_tool "wget")
w3m=$(find_tool "w3m")
while getopts "a:cd:efhH:knP:u:U:v@" OPTION
do
case $OPTION in
n)
no_action=1
;;
f)
force_refresh=1
;;
d)
max_age=${OPTARG}
;;
u)
if [[ -v "databases[${OPTARG}]" ]]; then
dbs+=" ${OPTARG}"
else
exit_with_error "-u ${OPTARG}: no such database"
fi
;;
v)
pv=$(find_tool "pv")
verbose="_verbose"
;;
H)
dbhost="${OPTARG}"
;;
P)
dbport="${OPTARG}"
;;
U)
dbuser="${OPTARG}"
;;
c)
if [[ ! -f "${config}" ]]; then
cat <<-EOT > "${config}"
dbhost=${dbhost}
dbport=${dbport}
dbuser=${dbuser}
base=${base}
EOT
else
exit_with_error "-c: config file ${config} exists, either remove it or edit it directly"
fi
exit
;;
e)
if [[ -f "$config" ]]; then
if [[ "$VISUAL" ]]; then "$VISUAL" "$config";
elif [[ "$EDITOR" ]]; then "$EDITOR" "$config";
else exit_with_error "-e: no editor configured, can not edit $config"
fi
else
exit_with_error "-e: config file does not exist, create is first (see -c)"
fi
exit
;;
a)
if url_available "${OPTARG}"; then
base="${OPTARG}"
else
exit_with_error "-a ${OPTARG}: repository not available"
fi
;;
@)
torsocks=$(find_tool "torsocks")
export TORSOCKS_TOR_PORT=$OPTARG
;;
k)
keep_downloaded_files=1
;;
h)
help
exit
;;
*)
exit_with_error "unknown option: $OPTION"
;;
esac
done
[[ -z ${dbs} ]] && dbs="${!databases[*]}"
pushd "$tmpdir" >/dev/null || exit_with_error "can not change directory to $tmpdir"
for db in ${dbs}; do
database=${databases[$db]}
if [[ $(db_exists "$database") ]]; then
db_dump=$(is_available "${db}" "${max_age}")
if [[ -n $db_dump ]]; then
[[ -n $verbose ]] && echo "update available for ${db}: ${db_dump}"
if [[ -z ${no_action} ]]; then
$torsocks "$wget" "${options[$wget${verbose}]}" "${db_dump}"
dbfilename="$(basename "${db_dump}")"
if [[ -z "${file_filter[$db]}" ]]; then
if [[ -n "${stream_filter[$db]}" ]]; then
dumpcmd() { $unrar p -ierr "$dbfilename"|run_filter "${stream_filter[$db]}"; }
else
dumpcmd() { $unrar p -ierr "$dbfilename"; }
fi
else
dumpname="$($unrar lb "$dbfilename")"
$unrar "${options[$unrar${verbose}]}" x "$(basename "${db_dump}")"
run_filter "${filter[$db]}" "$dumpname"
dumpcmd() { cat "$dumpname"; }
fi
drop_tables=$(drop_table_sql "$database")
[[ -n $drop_tables ]] && dbx "$database" "$drop_tables"
[[ -n ${before_update[$db]} ]] && dbx "$database" "${before_update[$db]}"
if [[ -n $verbose ]]; then
echo "importing $dbfilename into $database"
dumpcmd | $pv | dbx "$database"
else
dumpcmd | dbx "$database"
fi
[[ -n ${after_update[$db]} ]] && dbx "$database" "${after_update[$db]}"
fi
else
[[ -n $verbose ]] && echo "no update available for ${db}"
fi
else
echo "database '$database' does not exist, please create it before attempting to refresh" >&2
fi
done
popd >/dev/null || exit_with_error "popd failed?"
}
# check whether there is a dump file which is more recent than the current database and no older
# than $max_age
is_available () {
db="$1"
max_age="$2"
db_age=$(db_age "$db")
age=0
while [[ $age -lt $db_age && $age -lt $max_age ]]; do
timestamp=$(date -d "@$(($(date +%s) - $((60*60*24*age))))" +%Y-%m-%d)
result=$($w3m -dump "${base}" | awk '{ print $1 }'|grep "$(basename "${urls[$db]}_${timestamp}.rar")")
[[ -n $result ]] && break
((age++))
done
[[ -n $result ]] && echo "$(dirname "${urls[$db]}")"/"${result}"
}
# drop tables to prepare database for refresh
drop_table_sql () {
database="$1"
dbx "$database" "SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') FROM information_schema.tables WHERE table_schema = '$database';"
}
# returns database name if it exists, nothing otherwise
db_exists () {
database="$1"
dbx "$database" "select schema_name from information_schema.schemata where schema_name='$database';" 2>/dev/null
}
# return database age in days
db_age () {
db="$1"
now=$(date +%s)
age=0
if [[ "$force_refresh" -gt 0 ]]; then
age=$max_age
else
db_last_modified=$(date -d "$(dbx "$database" "${lastmodified[$db]}")" +%s)
age=$(((now-db_last_modified)/60/60/24))
fi
echo -n $age
}
# run filter on dump
run_filter () {
flt="$1"
file="$2"
if [[ -n "$file" ]]; then
sed -i -e "$flt" "$file"
else
sed -e "$flt"
fi
}
check_credentials () {
if [[ ! $(dbx "" "select true;" 2>/dev/null) ]]; then
exit_with_error "database connection error, bad username or password?"
fi
}
url_available () {
url="$1"
$torsocks "$wget" -q --spider "$url"
}
cleanup () {
if [[ ! -v keep_downloaded_files ]]; then
rm -rf "${tmpdir}"
else
echo "-k option active, temporary directory ${tmpdir} not removed"
fi
}
help () {
echo "$(basename "$(readlink -f "$0")")" "version $version"
cat <<- EOT
Usage: refresh_libgen OPTIONS
Performs a refresh from a database dump file for the chosen libgen databases.
Make sure the database credentials are configured (in \$HOME/.my.cnf) before
using this tool.
-n do not refresh database
use together with '-v' to check if recent dumps are available
-f force refresh, use this on first install
-v be verbose about what is being updated
-d DAYS only use database dump files no older than DAYS days (default: ${max_age})
-u DBS refresh DBS databases (default: ${!databases[@]})
-H DBHOST database host (${dbhost})
-P DBPORT database port (${dbport})
-U DBUSER database user (${dbuser})
-a REPO dump repository (${base})
-c create a config file using current settings (see -H, -P, -U, -R)
-e edit config file
-@ TORPORT use tor (through torsocks) to connect to libgen server
-k keep downloaded files after exit
-h this help message
EOT
}
exlock prepare || exit 1
main "$@"