#!/usr/bin/env bash # # refresh libgen databases from dump files version="0.6.1" release="20200805" trap "trap_error" TERM trap "trap_exit" EXIT export TOP_PID=$$ main () { # PREFERENCES config=${XDG_CONFIG_HOME:-$HOME/.config}/books.conf # maximum age (in days) of database dump file to use max_age=5 # database server to use dbhost="localhost" dbport="3306" dbuser="libgen" # where to get updates. A change here probably necessitates a change in the urls array # as dump file names can be site-specific. base="http://gen.lib.rus.ec/dbdumps/" #base="https://lgdumps.xyz/dumps/" # database names declare -A databases=( [libgen]=libgen [compact]=libgen_compact [fiction]=libgen_fiction ) # source config file if it exists [[ -f ${config} ]] && source ${config} # (mostly) END OF PREFERENCES # urls for dump files (minus datestamp and extension) declare -A urls=( [libgen]="${base}/libgen" [compact]="${base}/libgen_compact" [fiction]="${base}/fiction" ) # sql to get time last modified for database declare -A lastmodified=( [libgen]="select max(timelastmodified) from updated;" [compact]="select max(timelastmodified) from updated;" [fiction]="select max(timelastmodified) from fiction;" ) declare -A filter=( [libgen]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/' [compact]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/' ) # sql to run BEFORE update declare -A before_update=( ) # sql to run AFTER update declare -A after_update=( [compact]="drop trigger updated_edited;create table description (id int(11) not null auto_increment, md5 varchar(32) not null default '', descr varchar(20000) not null default '', toc mediumtext not null, TimeLastModified timestamp not null default current_timestamp on update current_timestamp, primary key (id), unique key md5_unique (md5) using btree, key time (timelastmodified) using btree, key md5_hash (md5) using hash);" ) declare -A options=( [wget]="-nv" [wget_verbose]="" [unrar]="-inul" [unrar_verbose]="" ) tmpdir=$(mktemp -d /var/tmp/libgen.XXXXXX) while getopts "cd:efhH:knp:P:R:u:U:v@" OPTION do case $OPTION in n) no_action=1 ;; f) force_refresh=1 ;; d) max_age=${OPTARG} ;; u) if [[ -v "databases[${OPTARG}]" ]]; then dbs+=" ${OPTARG}" else exit_with_error "-u ${OPTARG}: no such database" fi ;; v) verbose="_verbose" ;; H) dbhost="${OPTARG}" ;; P) dbport="${OPTARG}" ;; U) dbuser="${OPTARG}" ;; p) password="${OPTARG}" if [[ -z $password ]]; then password=$(read_password) echo fi ;; c) if [[ ! -f "${config}" ]]; then cat <<-EOT > "${config}" dbhost=${dbhost} dbport=${dbport} dbuser=${dbuser} base=${base} EOT else exit_with_error "-c: config file ${config} exists, either remove it or edit it directly" fi exit ;; e) if [[ -f "$config" ]]; then if [[ "$VISUAL" ]]; then "$VISUAL" "$config"; elif [[ "$EDITOR" ]]; then "$EDITOR" "$config"; else exit_with_error "-e: no editor configured, can not edit $config" fi else exit_with_error "-e: config file does not exist, create is first (see -c)" fi exit ;; a) if url_available "${OPTARG}"; then base="${OPTARG}" else exit_with_error "-a ${OPTARG}: repository not available" fi ;; @) use_torsocks=1 source $(which torsocks) on ;; k) keep_downloaded_files=1 ;; h) help exit ;; esac done check_sanity [[ -n ${password} ]] && dbpass="-p${password}" # skip credential check when using password prompt option - password will be asked often enough as it is... [[ ${dbpass} != "-p" ]] && check_credentials [[ -z ${dbs} ]] && dbs="${!databases[@]}" pushd $tmpdir >/dev/null for db in ${dbs}; do database=${databases[$db]} if [[ $(db_exists "$database") ]]; then db_dump=$(is_available ${db} ${max_age}) if [[ -n $db_dump ]]; then [[ -n $verbose ]] && echo "update available for ${db}: ${db_dump}" if [[ -z ${no_action} ]]; then wget ${options[wget${verbose}]} ${db_dump} unrar ${options[unrar${verbose}]} x $(basename ${db_dump}) [[ -n "${filter[$db]}" ]] && run_filter $(unrar lb $(basename ${db_dump})) "${filter[$db]}" drop_tables=$(drop_table_sql "${database}") [[ -n $drop_tables ]] && dbx ${database} "${drop_tables}" [[ -n ${before_update[$db]} ]] && dbx ${database} "${before_update[$db]}" [[ -n ${filter[$db]} ]] && filter_command="|sed -e '${filter[$db]}'" if [[ -n $verbose ]]; then echo "importing $(basename ${db_dump}) into ${database}" pv $(unrar lb $(basename ${db_dump})) | dbx ${database} else dbx ${database} < $(unrar lb $(basename ${db_dump})) fi [[ -n ${after_update[$db]} ]] && dbx ${database} "${after_update[$db]}" fi else [[ -n $verbose ]] && echo "no update available for ${db}" fi else echo "database '$database' does not exist, please create it before attempting to refresh" >&2 fi done popd >/dev/null } dbx () { database=$1 shift if [ $# -gt 0 ]; then mysql -Bsssss -h ${dbhost} -P ${dbport} -u ${dbuser} ${dbpass} ${database} -e "$*" else mysql -Bsssss -h ${dbhost} -P ${dbport} -u ${dbuser} ${dbpass} ${database} fi } # check whether there is a dump file which is more recent than the current database and no older # than $max_age is_available () { db="$1" max_age="$2" db_age=$(db_age $db) age=0 while [[ $age -lt $db_age && $age -lt $max_age ]]; do timestamp=$(date -d "@$(($(date +%s) - $((60*60*24*$age))))" +%Y-%m-%d) result=$(w3m -dump ${base} | awk '{ print $1 }'|grep $(basename "${urls[$db]}_${timestamp}.rar")) [[ -n $result ]] && break let age+=1 done [[ -n $result ]] && echo $(dirname ${urls[$db]})/${result} } # drop tables to prepare database for refresh drop_table_sql () { database="$1" dbx "$database" "SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') FROM information_schema.tables WHERE table_schema = '$dbname';" } # returns database name if it exists, nothing otherwise db_exists () { database="$1" dbx $database "select schema_name from information_schema.schemata where schema_name='$database';" 2>/dev/null } # return database age in days db_age () { db="$1" now=$(date +%s) age=0 if [[ "$force_refresh" -gt 0 ]]; then age=$max_age else db_last_modified=$(date -d "$(dbx $database ${lastmodified[$db]})" +%s) age=$(((${now}-${db_last_modified})/60/60/24)) fi echo -n $age } # run filter on dump run_filter () { dump_file="$1" filter="$2" if [[ -n $verbose ]]; then echo "running '$filter' on '$dump_file'" fi sed -i -e "$filter" "$dump_file" } # find tool, returns the first|one|found, exit with error message if none found find_tool () { IFS='|' read -ra tools <<< "$*" found=0 for tool in "${tools[@]}"; do if [[ -n $(which "$tool") ]]; then found=1 break fi done if [[ $found -eq 0 ]]; then if [[ ${#tools[@]} -gt 1 ]]; then exit_with_error "missing programs: $*; install at least one of these: ${tools[*]} and try again" else exit_with_error "missing program: $1; please install and try again" fi fi echo "$tool" } # read password from command line read_password () { password="" prompt="Enter database password:" while IFS= read -p "$prompt" -r -s -n 1 char do if [[ $char == $'\0' ]] then break fi prompt='*' password+="$char" done echo "$password" } check_credentials () { if [[ ! $(dbx "" "select true;" 2>/dev/null) ]]; then exit_with_error "database connection error, bad username or password?" fi } url_available () { url="$1" wget -q --spider "$url" } check_sanity () { find_tool "w3m" > /dev/null find_tool "wget" > /dev/null find_tool "unrar" > /dev/null [[ -n $verbose ]] && find_tool "pv" > /dev/null } cleanup () { if [[ ! -v keep_downloaded_files ]]; then rm -rf ${tmpdir} else echo "-k option active, temporary directory ${tmpdir} not removed" fi } trap_error () { cleanup exit 1 } trap_exit () { cleanup exit } # echo error message to stdout and terminate main exit_with_error () { echo "$(basename $0): $*" >&2 kill -s TERM $TOP_PID } help () { echo $(basename $(readlink -f $0)) "version $version" cat <<- EOT Usage: refresh_libgen OPTIONS Performs a refresh from a database dump file for the chosen libgen databases. -n do not refresh database use together with '-v' to check if recent dumps are available -f force refresh, use this on first install -v be verbose about what is being updated -d DAYS only use database dump files no older than DAYS days (default: ${max_age}) -u DBS refresh DBS databases (default: ${!databases[@]}) -H DBHOST database host (${dbhost}) -P DBPORT database port (${dbport}) -U DBUSER database user (${dbuser}) -R REPO dump repository (${base}) -c create a config file using current settings (see -H, -P, -U, -R) -e edit config file -p DBPASS database password (cache password for this session) use empty string ("") to get password prompt -@ use tor (through torsocks) to connect to libgen server -k keep downloaded files after exit -h this help message EOT } main "$@"