#!/usr/bin/env bash # shellcheck disable=SC2034,SC1090 # # refresh libgen databases from dump files version="0.7.0" release="20210701" trap "trap_error" TERM trap "trap_clean" EXIT export TOP_PID=$$ functions="$(dirname "$0")/books_functions" if [ -f "$functions" ]; then source "$functions" else echo "$functions not found" exit 1 fi main () { exlock now || exit 1 # PREFERENCES config=${XDG_CONFIG_HOME:-$HOME/.config}/books.conf # maximum age (in days) of database dump file to use max_age=5 # database server to use dbhost="localhost" dbport="3306" dbuser="libgen" # where to get updates. A change here probably necessitates a change in the urls array # as dump file names can be site-specific. base="http://libgen.rs/dbdumps/" # database names declare -A databases=( [libgen]=libgen [compact]=libgen_compact [fiction]=libgen_fiction ) # source config file if it exists [[ -f ${config} ]] && source "${config}" # (mostly) END OF PREFERENCES # urls for dump files (minus datestamp and extension) declare -A urls=( [libgen]="${base}/libgen" [compact]="${base}/libgen_compact" [fiction]="${base}/fiction" ) # sql to get time last modified for database declare -A lastmodified=( [libgen]="select max(timelastmodified) from updated;" [compact]="select max(timelastmodified) from updated;" [fiction]="select max(timelastmodified) from fiction;" ) # stream filters can be applied during extraction, saving time and storage space declare -A stream_filter=( [libgen]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/;s/^INSERT INTO `updated_edited`.*$/-- skip/;s/^INSERT INTO `description_edited`.*$/-- skip/' [compact]='s/DEFINER[ ]*=[ ]*[^*]*\*/\*/;s/DEFINER[ ]*=[ ]*[^*]*PROCEDURE/PROCEDURE/;s/DEFINER[ ]*=[ ]*[^*]*FUNCTION/FUNCTION/;s/^INSERT INTO `updated_edited`.*$/-- skip/' ) # file filters are used in case the whole file needs to be extracted before the filter can be applied # if both a stream filter as well as a file filter is defined, only the file filter is used declare -A file_filter=( ) # sql to run BEFORE update declare -A before_update=( ) # sql to run AFTER update declare -A after_update=( [libgen]="drop trigger libgen_description_update_all" [compact]="drop trigger libgen_description_update_all;create table description (id int(11) not null auto_increment, md5 varchar(32) not null default '', descr varchar(20000) not null default '', toc mediumtext not null, TimeLastModified timestamp not null default current_timestamp on update current_timestamp, primary key (id), unique key md5_unique (md5) using btree, key time (timelastmodified) using btree, key md5_hash (md5) using hash);" ) declare -A options=( [wget]="-nv" [wget_verbose]="" [unrar]="-inul" [unrar_verbose]="" ) tmpdir=$(mktemp -d /var/tmp/libgen.XXXXXX) unrar=$(find_tool "unrar") wget=$(find_tool "wget") w3m=$(find_tool "w3m") while getopts "a:cd:efhH:knP:u:U:v@" OPTION do case $OPTION in n) no_action=1 ;; f) force_refresh=1 ;; d) max_age=${OPTARG} ;; u) if [[ -v "databases[${OPTARG}]" ]]; then dbs+=" ${OPTARG}" else exit_with_error "-u ${OPTARG}: no such database" fi ;; v) pv=$(find_tool "pv") verbose="_verbose" ;; H) dbhost="${OPTARG}" ;; P) dbport="${OPTARG}" ;; U) dbuser="${OPTARG}" ;; c) if [[ ! -f "${config}" ]]; then cat <<-EOT > "${config}" dbhost=${dbhost} dbport=${dbport} dbuser=${dbuser} base=${base} EOT else exit_with_error "-c: config file ${config} exists, either remove it or edit it directly" fi exit ;; e) if [[ -f "$config" ]]; then if [[ "$VISUAL" ]]; then "$VISUAL" "$config"; elif [[ "$EDITOR" ]]; then "$EDITOR" "$config"; else exit_with_error "-e: no editor configured, can not edit $config" fi else exit_with_error "-e: config file does not exist, create is first (see -c)" fi exit ;; a) if url_available "${OPTARG}"; then base="${OPTARG}" else exit_with_error "-a ${OPTARG}: repository not available" fi ;; @) torsocks=$(find_tool "torsocks") export TORSOCKS_TOR_PORT=$OPTARG ;; k) keep_downloaded_files=1 ;; h) help exit ;; *) exit_with_error "unknown option: $OPTION" ;; esac done [[ -z ${dbs} ]] && dbs="${!databases[*]}" pushd "$tmpdir" >/dev/null || exit_with_error "can not change directory to $tmpdir" for db in ${dbs}; do database=${databases[$db]} if [[ $(db_exists "$database") ]]; then db_dump=$(is_available "${db}" "${max_age}") if [[ -n $db_dump ]]; then [[ -n $verbose ]] && echo "update available for ${db}: ${db_dump}" if [[ -z ${no_action} ]]; then $torsocks "$wget" "${options[$wget${verbose}]}" "${db_dump}" dbfilename="$(basename "${db_dump}")" if [[ -z "${file_filter[$db]}" ]]; then if [[ -n "${stream_filter[$db]}" ]]; then dumpcmd() { $unrar p -ierr "$dbfilename"|run_filter "${stream_filter[$db]}"; } else dumpcmd() { $unrar p -ierr "$dbfilename"; } fi else dumpname="$($unrar lb "$dbfilename")" $unrar "${options[$unrar${verbose}]}" x "$(basename "${db_dump}")" run_filter "${filter[$db]}" "$dumpname" dumpcmd() { cat "$dumpname"; } fi drop_tables=$(drop_table_sql "$database") [[ -n $drop_tables ]] && dbx "$database" "$drop_tables" [[ -n ${before_update[$db]} ]] && dbx "$database" "${before_update[$db]}" if [[ -n $verbose ]]; then echo "importing $dbfilename into $database" dumpcmd | $pv | dbx "$database" else dumpcmd | dbx "$database" fi [[ -n ${after_update[$db]} ]] && dbx "$database" "${after_update[$db]}" fi else [[ -n $verbose ]] && echo "no update available for ${db}" fi else echo "database '$database' does not exist, please create it before attempting to refresh" >&2 fi done popd >/dev/null || exit_with_error "popd failed?" } # check whether there is a dump file which is more recent than the current database and no older # than $max_age is_available () { db="$1" max_age="$2" db_age=$(db_age "$db") age=0 while [[ $age -lt $db_age && $age -lt $max_age ]]; do timestamp=$(date -d "@$(($(date +%s) - $((60*60*24*age))))" +%Y-%m-%d) result=$($w3m -dump "${base}" | awk '{ print $1 }'|grep "$(basename "${urls[$db]}_${timestamp}.rar")") [[ -n $result ]] && break ((age++)) done [[ -n $result ]] && echo "$(dirname "${urls[$db]}")"/"${result}" } # drop tables to prepare database for refresh drop_table_sql () { database="$1" dbx "$database" "SELECT concat('DROP TABLE IF EXISTS ', table_name, ';') FROM information_schema.tables WHERE table_schema = '$database';" } # returns database name if it exists, nothing otherwise db_exists () { database="$1" dbx "$database" "select schema_name from information_schema.schemata where schema_name='$database';" 2>/dev/null } # return database age in days db_age () { db="$1" now=$(date +%s) age=0 if [[ "$force_refresh" -gt 0 ]]; then age=$max_age else db_last_modified=$(date -d "$(dbx "$database" "${lastmodified[$db]}")" +%s) age=$(((now-db_last_modified)/60/60/24)) fi echo -n $age } # run filter on dump run_filter () { flt="$1" file="$2" if [[ -n "$file" ]]; then sed -i -e "$flt" "$file" else sed -e "$flt" fi } check_credentials () { if [[ ! $(dbx "" "select true;" 2>/dev/null) ]]; then exit_with_error "database connection error, bad username or password?" fi } url_available () { url="$1" $torsocks "$wget" -q --spider "$url" } cleanup () { if [[ ! -v keep_downloaded_files ]]; then rm -rf "${tmpdir}" else echo "-k option active, temporary directory ${tmpdir} not removed" fi } help () { echo "$(basename "$(readlink -f "$0")")" "version $version" cat <<- EOT Usage: refresh_libgen OPTIONS Performs a refresh from a database dump file for the chosen libgen databases. Make sure the database credentials are configured (in \$HOME/.my.cnf) before using this tool. -n do not refresh database use together with '-v' to check if recent dumps are available -f force refresh, use this on first install -v be verbose about what is being updated -d DAYS only use database dump files no older than DAYS days (default: ${max_age}) -u DBS refresh DBS databases (default: ${!databases[@]}) -H DBHOST database host (${dbhost}) -P DBPORT database port (${dbport}) -U DBUSER database user (${dbuser}) -a REPO dump repository (${base}) -c create a config file using current settings (see -H, -P, -U, -R) -e edit config file -@ TORPORT use tor (through torsocks) to connect to libgen server -k keep downloaded files after exit -h this help message EOT } exlock prepare || exit 1 main "$@"