diff --git a/books b/books index 19e3e3e..2857bee 100755 --- a/books +++ b/books @@ -7,8 +7,8 @@ trap "trap_error" TERM trap "trap_clean" EXIT export TOP_PID=$$ -version="0.7.1" -release="20200812" +version="0.7.2" +release="20210601" functions="$(dirname "$0")/books_functions" if [ -f "$functions" ]; then @@ -336,6 +336,7 @@ main () { ;; @) source "$(which torsocks)" on + export TORSOCKS_TOR_PORT=$OPTARG ;; =) if [ -d "${OPTARG}" ]; then @@ -1448,7 +1449,7 @@ help () { -x skip database update (currently only the 'libgen' database can be updated) - -@ use torsocks to connect to the libgen server(s). You'll need to install + -@ TORPORT use torsocks to connect to the libgen server(s). You'll need to install torsocks before using this option; try this in case your ISP (or a transit provider somewhere en-route) blocks access to libgen diff --git a/classify b/classify index 1bf8071..fb3d6ad 100755 --- a/classify +++ b/classify @@ -8,8 +8,8 @@ trap "trap_error" TERM trap "trap_clean" EXIT export TOP_PID=$$ -version="0.5.0" -release="20210516" +version="0.5.1" +release="20210601" functions="$(dirname "$0")/books_functions" if [ -f "$functions" ]; then @@ -59,7 +59,7 @@ main () { # source config file if it exists [[ -f ${config} ]] && source "${config}" - while getopts "owdlnfatVAD:C:X:G@h" OPTION; do + while getopts "owdlnfatVAD:C:X:G@:h" OPTION; do case $OPTION in o) request="$request owi" @@ -111,6 +111,7 @@ main () { ;; @) torsocks=$(find_tool "torsocks") + export TORSOCKS_TOR_PORT=${OPTARG} ;; h) help @@ -132,7 +133,7 @@ main () { [[ -n "$debug" ]] && echo "trying $ident..." - get_xml "$xml" "stdnbr=$ident" + get_xml "$xml" "stdnbr=${ident// }" response=$(get "response" "$xml") case "$response" in @@ -249,7 +250,7 @@ help () { Use: classify [OPTIONS] identifier[,identifier...] Queries OCLC classification service for available data - Supports: DDC, LCC, NLM, FAST, Author and Title + Supports: DDC, LCC, NLM, Author and Title Valid identifiers are ISBN, ISSN, UPC and OCLC/OWI @@ -277,7 +278,7 @@ help () { -V show labels - -@ use torsocks to connect to the OCLC classify service. + -@ PORT use torsocks to connect to the OCLC classify service. use this to avoid getting your IP blocked by OCLC -h show this help message @@ -319,7 +320,7 @@ help () { the resulting file in ~1000 line sections and feed these to this tool, preferably with a random pause between requests to keep OCLC's intrusion detection systems from triggering too early. It is advisable to use - this tool through Tor (using -@ to enable torsocks, make sure it + this tool through Tor (using -@ TORPORT to enable torsocks, make sure it is configured correctly for your Tor instance) to avoid having too many requests from your IP to be registered, this again to avoid your IP being blocked. The OCLC classification service is not diff --git a/refresh_libgen b/refresh_libgen index b080cf5..6a910cb 100755 --- a/refresh_libgen +++ b/refresh_libgen @@ -4,7 +4,7 @@ # refresh libgen databases from dump files version="0.6.2" -release="20210512" +release="20210601" trap "trap_error" TERM trap "trap_clean" EXIT @@ -156,6 +156,7 @@ main () { ;; @) torsocks=$(find_tool "torsocks") + export TORSOCKS_TOR_PORT=$OPTARG ;; k) keep_downloaded_files=1 @@ -305,7 +306,7 @@ help () { -c create a config file using current settings (see -H, -P, -U, -R) -e edit config file - -@ use tor (through torsocks) to connect to libgen server + -@ TORPORT use tor (through torsocks) to connect to libgen server -k keep downloaded files after exit -h this help message diff --git a/update_libgen b/update_libgen index a0a9a05..24893a6 100755 --- a/update_libgen +++ b/update_libgen @@ -66,7 +66,7 @@ main () { declare -A values=() declare -A upsert=() - while getopts "a:D:j:hH:i:l:nP:U:qs:t:u:v@" OPTION + while getopts "a:D:j:hH:i:l:nP:U:qs:ct:u:v@:" OPTION do case $OPTION in j) @@ -75,6 +75,11 @@ main () { s) sql_dump="${OPTARG}" ;; + c) + classify=$(find_tool "classify") + import_metadata=$(find_tool "import_metadata") + classifile="${tmpdir}/classifile" + ;; v) ((verbose++)) ;; @@ -116,7 +121,8 @@ main () { fi ;; @) - torsocks=$(find_toolo "torsocks") + torsocks=$(find_tool "torsocks") + export TORSOCKS_TOR_PORT=${OPTARG} ;; q) quiet=1 @@ -197,6 +203,10 @@ main () { echo fi + if [[ -n "$classifile" && -n "${record['identifierwodash']}" ]]; then + echo "${record['md5']}" >> "$classifile" + fi + keys=${!record[*]} md5="${record[md5]}" @@ -230,6 +240,7 @@ main () { sql+="insert into $table (${columns[$table]%?}) values(${values[$table]%?}) on duplicate key update ${upsert[$table]%?};" fi done + echo "${sql}" >> "${update_sql}" [[ -n $sql_dump ]] && echo "${sql}" >> "${sql_dump}" @@ -249,6 +260,28 @@ main () { [[ $no_action == 0 ]] && dbx "$db" < "${update_sql}" done + + # optionally add classification data to new records + # this will use tor and round-robin through TOR ports if these are + # defined in classify_tor_ports in the config file + if [[ -n "$classifile" && -f $classifile ]]; then + now=$(date +%Y%m%d%H%M) + csvfile="${classify_csv:+$classify_csv/}${now}.csv" + IFS=',' read -ra torports <<< "$classify_tor_ports" + if [[ ${#torports[*]} -gt 0 ]]; then + torpc=${#torports[*]} + fi + upc=0 + while read md5;do + $classify ${torpc:+-@ ${torports[$upc%$torpc]}} -D "$db" ${classify_xml:+-X $classify_xml} -C "$md5" >> "${csvfile}" + ((upc++)) + done < <(cat "$classifile") + + if [[ -f ${csvfile} ]]; then + $import_metadata -d "$db" -f "${classify_fields:-ddc,lcc,fast}" ${classify_sql:+-s $classify_sql/$now.sql} -F "${csvfile}" + fi + fi + } get_current_fields () { @@ -417,7 +450,7 @@ help () { -D DATABASE database name -a APIHOST use APIHOST as API server - -@ use tor (through torsocks) to connect to libgen API server + -@ TORPORT use tor (through torsocks) to connect to libgen API server -q don't warn about missing fields in database or api response -h this help message