update_libgen: add -c option to integrat classify into the update process
config file: add these fields (values for demonstration purpose): ``` classify_xml="/home/username/Project/libgen_classify/xml" classify_csv="/home/username/Project/libgen_classify/csv" classify_sql="/home/username/Project/libgen_classify/sql" classify_fields="ddc,lcc,nlm,fast,title,author" classify_tor_ports="9100,9102,9104,9106,9108" ``` These fields are used by update_libgen to configure classify and import_metadata All programs: the -@ switch now takes a mandatory parameter: -@ TORPORT
This commit is contained in:
parent
b5af8887b3
commit
391b0189fb
4 changed files with 51 additions and 15 deletions
7
books
7
books
|
@ -7,8 +7,8 @@ trap "trap_error" TERM
|
||||||
trap "trap_clean" EXIT
|
trap "trap_clean" EXIT
|
||||||
export TOP_PID=$$
|
export TOP_PID=$$
|
||||||
|
|
||||||
version="0.7.1"
|
version="0.7.2"
|
||||||
release="20200812"
|
release="20210601"
|
||||||
|
|
||||||
functions="$(dirname "$0")/books_functions"
|
functions="$(dirname "$0")/books_functions"
|
||||||
if [ -f "$functions" ]; then
|
if [ -f "$functions" ]; then
|
||||||
|
@ -336,6 +336,7 @@ main () {
|
||||||
;;
|
;;
|
||||||
@)
|
@)
|
||||||
source "$(which torsocks)" on
|
source "$(which torsocks)" on
|
||||||
|
export TORSOCKS_TOR_PORT=$OPTARG
|
||||||
;;
|
;;
|
||||||
=)
|
=)
|
||||||
if [ -d "${OPTARG}" ]; then
|
if [ -d "${OPTARG}" ]; then
|
||||||
|
@ -1448,7 +1449,7 @@ help () {
|
||||||
-x skip database update
|
-x skip database update
|
||||||
(currently only the 'libgen' database can be updated)
|
(currently only the 'libgen' database can be updated)
|
||||||
|
|
||||||
-@ use torsocks to connect to the libgen server(s). You'll need to install
|
-@ TORPORT use torsocks to connect to the libgen server(s). You'll need to install
|
||||||
torsocks before using this option; try this in case your ISP
|
torsocks before using this option; try this in case your ISP
|
||||||
(or a transit provider somewhere en-route) blocks access to libgen
|
(or a transit provider somewhere en-route) blocks access to libgen
|
||||||
|
|
||||||
|
|
15
classify
15
classify
|
@ -8,8 +8,8 @@ trap "trap_error" TERM
|
||||||
trap "trap_clean" EXIT
|
trap "trap_clean" EXIT
|
||||||
export TOP_PID=$$
|
export TOP_PID=$$
|
||||||
|
|
||||||
version="0.5.0"
|
version="0.5.1"
|
||||||
release="20210516"
|
release="20210601"
|
||||||
|
|
||||||
functions="$(dirname "$0")/books_functions"
|
functions="$(dirname "$0")/books_functions"
|
||||||
if [ -f "$functions" ]; then
|
if [ -f "$functions" ]; then
|
||||||
|
@ -59,7 +59,7 @@ main () {
|
||||||
# source config file if it exists
|
# source config file if it exists
|
||||||
[[ -f ${config} ]] && source "${config}"
|
[[ -f ${config} ]] && source "${config}"
|
||||||
|
|
||||||
while getopts "owdlnfatVAD:C:X:G@h" OPTION; do
|
while getopts "owdlnfatVAD:C:X:G@:h" OPTION; do
|
||||||
case $OPTION in
|
case $OPTION in
|
||||||
o)
|
o)
|
||||||
request="$request owi"
|
request="$request owi"
|
||||||
|
@ -111,6 +111,7 @@ main () {
|
||||||
;;
|
;;
|
||||||
@)
|
@)
|
||||||
torsocks=$(find_tool "torsocks")
|
torsocks=$(find_tool "torsocks")
|
||||||
|
export TORSOCKS_TOR_PORT=${OPTARG}
|
||||||
;;
|
;;
|
||||||
h)
|
h)
|
||||||
help
|
help
|
||||||
|
@ -132,7 +133,7 @@ main () {
|
||||||
|
|
||||||
[[ -n "$debug" ]] && echo "trying $ident..."
|
[[ -n "$debug" ]] && echo "trying $ident..."
|
||||||
|
|
||||||
get_xml "$xml" "stdnbr=$ident"
|
get_xml "$xml" "stdnbr=${ident// }"
|
||||||
response=$(get "response" "$xml")
|
response=$(get "response" "$xml")
|
||||||
|
|
||||||
case "$response" in
|
case "$response" in
|
||||||
|
@ -249,7 +250,7 @@ help () {
|
||||||
Use: classify [OPTIONS] identifier[,identifier...]
|
Use: classify [OPTIONS] identifier[,identifier...]
|
||||||
|
|
||||||
Queries OCLC classification service for available data
|
Queries OCLC classification service for available data
|
||||||
Supports: DDC, LCC, NLM, FAST, Author and Title
|
Supports: DDC, LCC, NLM, Author and Title
|
||||||
|
|
||||||
Valid identifiers are ISBN, ISSN, UPC and OCLC/OWI
|
Valid identifiers are ISBN, ISSN, UPC and OCLC/OWI
|
||||||
|
|
||||||
|
@ -277,7 +278,7 @@ help () {
|
||||||
|
|
||||||
-V show labels
|
-V show labels
|
||||||
|
|
||||||
-@ use torsocks to connect to the OCLC classify service.
|
-@ PORT use torsocks to connect to the OCLC classify service.
|
||||||
use this to avoid getting your IP blocked by OCLC
|
use this to avoid getting your IP blocked by OCLC
|
||||||
|
|
||||||
-h show this help message
|
-h show this help message
|
||||||
|
@ -319,7 +320,7 @@ help () {
|
||||||
the resulting file in ~1000 line sections and feed these to this tool,
|
the resulting file in ~1000 line sections and feed these to this tool,
|
||||||
preferably with a random pause between requests to keep OCLC's intrusion
|
preferably with a random pause between requests to keep OCLC's intrusion
|
||||||
detection systems from triggering too early. It is advisable to use
|
detection systems from triggering too early. It is advisable to use
|
||||||
this tool through Tor (using -@ to enable torsocks, make sure it
|
this tool through Tor (using -@ TORPORT to enable torsocks, make sure it
|
||||||
is configured correctly for your Tor instance) to avoid having too
|
is configured correctly for your Tor instance) to avoid having too
|
||||||
many requests from your IP to be registered, this again to avoid
|
many requests from your IP to be registered, this again to avoid
|
||||||
your IP being blocked. The OCLC classification service is not
|
your IP being blocked. The OCLC classification service is not
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# refresh libgen databases from dump files
|
# refresh libgen databases from dump files
|
||||||
|
|
||||||
version="0.6.2"
|
version="0.6.2"
|
||||||
release="20210512"
|
release="20210601"
|
||||||
|
|
||||||
trap "trap_error" TERM
|
trap "trap_error" TERM
|
||||||
trap "trap_clean" EXIT
|
trap "trap_clean" EXIT
|
||||||
|
@ -156,6 +156,7 @@ main () {
|
||||||
;;
|
;;
|
||||||
@)
|
@)
|
||||||
torsocks=$(find_tool "torsocks")
|
torsocks=$(find_tool "torsocks")
|
||||||
|
export TORSOCKS_TOR_PORT=$OPTARG
|
||||||
;;
|
;;
|
||||||
k)
|
k)
|
||||||
keep_downloaded_files=1
|
keep_downloaded_files=1
|
||||||
|
@ -305,7 +306,7 @@ help () {
|
||||||
-c create a config file using current settings (see -H, -P, -U, -R)
|
-c create a config file using current settings (see -H, -P, -U, -R)
|
||||||
-e edit config file
|
-e edit config file
|
||||||
|
|
||||||
-@ use tor (through torsocks) to connect to libgen server
|
-@ TORPORT use tor (through torsocks) to connect to libgen server
|
||||||
-k keep downloaded files after exit
|
-k keep downloaded files after exit
|
||||||
-h this help message
|
-h this help message
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@ main () {
|
||||||
declare -A values=()
|
declare -A values=()
|
||||||
declare -A upsert=()
|
declare -A upsert=()
|
||||||
|
|
||||||
while getopts "a:D:j:hH:i:l:nP:U:qs:t:u:v@" OPTION
|
while getopts "a:D:j:hH:i:l:nP:U:qs:ct:u:v@:" OPTION
|
||||||
do
|
do
|
||||||
case $OPTION in
|
case $OPTION in
|
||||||
j)
|
j)
|
||||||
|
@ -75,6 +75,11 @@ main () {
|
||||||
s)
|
s)
|
||||||
sql_dump="${OPTARG}"
|
sql_dump="${OPTARG}"
|
||||||
;;
|
;;
|
||||||
|
c)
|
||||||
|
classify=$(find_tool "classify")
|
||||||
|
import_metadata=$(find_tool "import_metadata")
|
||||||
|
classifile="${tmpdir}/classifile"
|
||||||
|
;;
|
||||||
v)
|
v)
|
||||||
((verbose++))
|
((verbose++))
|
||||||
;;
|
;;
|
||||||
|
@ -116,7 +121,8 @@ main () {
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
@)
|
@)
|
||||||
torsocks=$(find_toolo "torsocks")
|
torsocks=$(find_tool "torsocks")
|
||||||
|
export TORSOCKS_TOR_PORT=${OPTARG}
|
||||||
;;
|
;;
|
||||||
q)
|
q)
|
||||||
quiet=1
|
quiet=1
|
||||||
|
@ -197,6 +203,10 @@ main () {
|
||||||
echo
|
echo
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ -n "$classifile" && -n "${record['identifierwodash']}" ]]; then
|
||||||
|
echo "${record['md5']}" >> "$classifile"
|
||||||
|
fi
|
||||||
|
|
||||||
keys=${!record[*]}
|
keys=${!record[*]}
|
||||||
|
|
||||||
md5="${record[md5]}"
|
md5="${record[md5]}"
|
||||||
|
@ -230,6 +240,7 @@ main () {
|
||||||
sql+="insert into $table (${columns[$table]%?}) values(${values[$table]%?}) on duplicate key update ${upsert[$table]%?};"
|
sql+="insert into $table (${columns[$table]%?}) values(${values[$table]%?}) on duplicate key update ${upsert[$table]%?};"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "${sql}" >> "${update_sql}"
|
echo "${sql}" >> "${update_sql}"
|
||||||
[[ -n $sql_dump ]] && echo "${sql}" >> "${sql_dump}"
|
[[ -n $sql_dump ]] && echo "${sql}" >> "${sql_dump}"
|
||||||
|
|
||||||
|
@ -249,6 +260,28 @@ main () {
|
||||||
|
|
||||||
[[ $no_action == 0 ]] && dbx "$db" < "${update_sql}"
|
[[ $no_action == 0 ]] && dbx "$db" < "${update_sql}"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# optionally add classification data to new records
|
||||||
|
# this will use tor and round-robin through TOR ports if these are
|
||||||
|
# defined in classify_tor_ports in the config file
|
||||||
|
if [[ -n "$classifile" && -f $classifile ]]; then
|
||||||
|
now=$(date +%Y%m%d%H%M)
|
||||||
|
csvfile="${classify_csv:+$classify_csv/}${now}.csv"
|
||||||
|
IFS=',' read -ra torports <<< "$classify_tor_ports"
|
||||||
|
if [[ ${#torports[*]} -gt 0 ]]; then
|
||||||
|
torpc=${#torports[*]}
|
||||||
|
fi
|
||||||
|
upc=0
|
||||||
|
while read md5;do
|
||||||
|
$classify ${torpc:+-@ ${torports[$upc%$torpc]}} -D "$db" ${classify_xml:+-X $classify_xml} -C "$md5" >> "${csvfile}"
|
||||||
|
((upc++))
|
||||||
|
done < <(cat "$classifile")
|
||||||
|
|
||||||
|
if [[ -f ${csvfile} ]]; then
|
||||||
|
$import_metadata -d "$db" -f "${classify_fields:-ddc,lcc,fast}" ${classify_sql:+-s $classify_sql/$now.sql} -F "${csvfile}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get_current_fields () {
|
get_current_fields () {
|
||||||
|
@ -417,7 +450,7 @@ help () {
|
||||||
-D DATABASE database name
|
-D DATABASE database name
|
||||||
|
|
||||||
-a APIHOST use APIHOST as API server
|
-a APIHOST use APIHOST as API server
|
||||||
-@ use tor (through torsocks) to connect to libgen API server
|
-@ TORPORT use tor (through torsocks) to connect to libgen API server
|
||||||
-q don't warn about missing fields in database or api response
|
-q don't warn about missing fields in database or api response
|
||||||
-h this help message
|
-h this help message
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue