update_libgen: add -c option to integrat classify into the update process
config file: add these fields (values for demonstration purpose): ``` classify_xml="/home/username/Project/libgen_classify/xml" classify_csv="/home/username/Project/libgen_classify/csv" classify_sql="/home/username/Project/libgen_classify/sql" classify_fields="ddc,lcc,nlm,fast,title,author" classify_tor_ports="9100,9102,9104,9106,9108" ``` These fields are used by update_libgen to configure classify and import_metadata All programs: the -@ switch now takes a mandatory parameter: -@ TORPORT
This commit is contained in:
parent
b5af8887b3
commit
391b0189fb
4 changed files with 51 additions and 15 deletions
7
books
7
books
|
@ -7,8 +7,8 @@ trap "trap_error" TERM
|
|||
trap "trap_clean" EXIT
|
||||
export TOP_PID=$$
|
||||
|
||||
version="0.7.1"
|
||||
release="20200812"
|
||||
version="0.7.2"
|
||||
release="20210601"
|
||||
|
||||
functions="$(dirname "$0")/books_functions"
|
||||
if [ -f "$functions" ]; then
|
||||
|
@ -336,6 +336,7 @@ main () {
|
|||
;;
|
||||
@)
|
||||
source "$(which torsocks)" on
|
||||
export TORSOCKS_TOR_PORT=$OPTARG
|
||||
;;
|
||||
=)
|
||||
if [ -d "${OPTARG}" ]; then
|
||||
|
@ -1448,7 +1449,7 @@ help () {
|
|||
-x skip database update
|
||||
(currently only the 'libgen' database can be updated)
|
||||
|
||||
-@ use torsocks to connect to the libgen server(s). You'll need to install
|
||||
-@ TORPORT use torsocks to connect to the libgen server(s). You'll need to install
|
||||
torsocks before using this option; try this in case your ISP
|
||||
(or a transit provider somewhere en-route) blocks access to libgen
|
||||
|
||||
|
|
15
classify
15
classify
|
@ -8,8 +8,8 @@ trap "trap_error" TERM
|
|||
trap "trap_clean" EXIT
|
||||
export TOP_PID=$$
|
||||
|
||||
version="0.5.0"
|
||||
release="20210516"
|
||||
version="0.5.1"
|
||||
release="20210601"
|
||||
|
||||
functions="$(dirname "$0")/books_functions"
|
||||
if [ -f "$functions" ]; then
|
||||
|
@ -59,7 +59,7 @@ main () {
|
|||
# source config file if it exists
|
||||
[[ -f ${config} ]] && source "${config}"
|
||||
|
||||
while getopts "owdlnfatVAD:C:X:G@h" OPTION; do
|
||||
while getopts "owdlnfatVAD:C:X:G@:h" OPTION; do
|
||||
case $OPTION in
|
||||
o)
|
||||
request="$request owi"
|
||||
|
@ -111,6 +111,7 @@ main () {
|
|||
;;
|
||||
@)
|
||||
torsocks=$(find_tool "torsocks")
|
||||
export TORSOCKS_TOR_PORT=${OPTARG}
|
||||
;;
|
||||
h)
|
||||
help
|
||||
|
@ -132,7 +133,7 @@ main () {
|
|||
|
||||
[[ -n "$debug" ]] && echo "trying $ident..."
|
||||
|
||||
get_xml "$xml" "stdnbr=$ident"
|
||||
get_xml "$xml" "stdnbr=${ident// }"
|
||||
response=$(get "response" "$xml")
|
||||
|
||||
case "$response" in
|
||||
|
@ -249,7 +250,7 @@ help () {
|
|||
Use: classify [OPTIONS] identifier[,identifier...]
|
||||
|
||||
Queries OCLC classification service for available data
|
||||
Supports: DDC, LCC, NLM, FAST, Author and Title
|
||||
Supports: DDC, LCC, NLM, Author and Title
|
||||
|
||||
Valid identifiers are ISBN, ISSN, UPC and OCLC/OWI
|
||||
|
||||
|
@ -277,7 +278,7 @@ help () {
|
|||
|
||||
-V show labels
|
||||
|
||||
-@ use torsocks to connect to the OCLC classify service.
|
||||
-@ PORT use torsocks to connect to the OCLC classify service.
|
||||
use this to avoid getting your IP blocked by OCLC
|
||||
|
||||
-h show this help message
|
||||
|
@ -319,7 +320,7 @@ help () {
|
|||
the resulting file in ~1000 line sections and feed these to this tool,
|
||||
preferably with a random pause between requests to keep OCLC's intrusion
|
||||
detection systems from triggering too early. It is advisable to use
|
||||
this tool through Tor (using -@ to enable torsocks, make sure it
|
||||
this tool through Tor (using -@ TORPORT to enable torsocks, make sure it
|
||||
is configured correctly for your Tor instance) to avoid having too
|
||||
many requests from your IP to be registered, this again to avoid
|
||||
your IP being blocked. The OCLC classification service is not
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# refresh libgen databases from dump files
|
||||
|
||||
version="0.6.2"
|
||||
release="20210512"
|
||||
release="20210601"
|
||||
|
||||
trap "trap_error" TERM
|
||||
trap "trap_clean" EXIT
|
||||
|
@ -156,6 +156,7 @@ main () {
|
|||
;;
|
||||
@)
|
||||
torsocks=$(find_tool "torsocks")
|
||||
export TORSOCKS_TOR_PORT=$OPTARG
|
||||
;;
|
||||
k)
|
||||
keep_downloaded_files=1
|
||||
|
@ -305,7 +306,7 @@ help () {
|
|||
-c create a config file using current settings (see -H, -P, -U, -R)
|
||||
-e edit config file
|
||||
|
||||
-@ use tor (through torsocks) to connect to libgen server
|
||||
-@ TORPORT use tor (through torsocks) to connect to libgen server
|
||||
-k keep downloaded files after exit
|
||||
-h this help message
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ main () {
|
|||
declare -A values=()
|
||||
declare -A upsert=()
|
||||
|
||||
while getopts "a:D:j:hH:i:l:nP:U:qs:t:u:v@" OPTION
|
||||
while getopts "a:D:j:hH:i:l:nP:U:qs:ct:u:v@:" OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
j)
|
||||
|
@ -75,6 +75,11 @@ main () {
|
|||
s)
|
||||
sql_dump="${OPTARG}"
|
||||
;;
|
||||
c)
|
||||
classify=$(find_tool "classify")
|
||||
import_metadata=$(find_tool "import_metadata")
|
||||
classifile="${tmpdir}/classifile"
|
||||
;;
|
||||
v)
|
||||
((verbose++))
|
||||
;;
|
||||
|
@ -116,7 +121,8 @@ main () {
|
|||
fi
|
||||
;;
|
||||
@)
|
||||
torsocks=$(find_toolo "torsocks")
|
||||
torsocks=$(find_tool "torsocks")
|
||||
export TORSOCKS_TOR_PORT=${OPTARG}
|
||||
;;
|
||||
q)
|
||||
quiet=1
|
||||
|
@ -197,6 +203,10 @@ main () {
|
|||
echo
|
||||
fi
|
||||
|
||||
if [[ -n "$classifile" && -n "${record['identifierwodash']}" ]]; then
|
||||
echo "${record['md5']}" >> "$classifile"
|
||||
fi
|
||||
|
||||
keys=${!record[*]}
|
||||
|
||||
md5="${record[md5]}"
|
||||
|
@ -230,6 +240,7 @@ main () {
|
|||
sql+="insert into $table (${columns[$table]%?}) values(${values[$table]%?}) on duplicate key update ${upsert[$table]%?};"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "${sql}" >> "${update_sql}"
|
||||
[[ -n $sql_dump ]] && echo "${sql}" >> "${sql_dump}"
|
||||
|
||||
|
@ -249,6 +260,28 @@ main () {
|
|||
|
||||
[[ $no_action == 0 ]] && dbx "$db" < "${update_sql}"
|
||||
done
|
||||
|
||||
# optionally add classification data to new records
|
||||
# this will use tor and round-robin through TOR ports if these are
|
||||
# defined in classify_tor_ports in the config file
|
||||
if [[ -n "$classifile" && -f $classifile ]]; then
|
||||
now=$(date +%Y%m%d%H%M)
|
||||
csvfile="${classify_csv:+$classify_csv/}${now}.csv"
|
||||
IFS=',' read -ra torports <<< "$classify_tor_ports"
|
||||
if [[ ${#torports[*]} -gt 0 ]]; then
|
||||
torpc=${#torports[*]}
|
||||
fi
|
||||
upc=0
|
||||
while read md5;do
|
||||
$classify ${torpc:+-@ ${torports[$upc%$torpc]}} -D "$db" ${classify_xml:+-X $classify_xml} -C "$md5" >> "${csvfile}"
|
||||
((upc++))
|
||||
done < <(cat "$classifile")
|
||||
|
||||
if [[ -f ${csvfile} ]]; then
|
||||
$import_metadata -d "$db" -f "${classify_fields:-ddc,lcc,fast}" ${classify_sql:+-s $classify_sql/$now.sql} -F "${csvfile}"
|
||||
fi
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
get_current_fields () {
|
||||
|
@ -417,7 +450,7 @@ help () {
|
|||
-D DATABASE database name
|
||||
|
||||
-a APIHOST use APIHOST as API server
|
||||
-@ use tor (through torsocks) to connect to libgen API server
|
||||
-@ TORPORT use tor (through torsocks) to connect to libgen API server
|
||||
-q don't warn about missing fields in database or api response
|
||||
-h this help message
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue