From c98bdb0edd5ebf5b222e1df1470a038a68f288f6 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes
Date: Fri, 8 Apr 2016 10:24:52 +0200
Subject: [PATCH] converted rcldjvu to python
---
src/Makefile.am | 2 +-
src/filters/rcldjvu | 180 ----------------------------------------
src/filters/rcldjvu.py | 107 ++++++++++++++++++++++++
src/sampleconf/mimeconf | 2 +-
4 files changed, 109 insertions(+), 182 deletions(-)
delete mode 100755 src/filters/rcldjvu
create mode 100755 src/filters/rcldjvu.py
diff --git a/src/Makefile.am b/src/Makefile.am
index 6b0f25b7..72133dd7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -566,7 +566,7 @@ filters/rclcheckneedretry.sh \
filters/rclchm \
filters/rclconfig.py \
filters/rcldia \
-filters/rcldjvu \
+filters/rcldjvu.py \
filters/rcldoc.py \
filters/rcldvi \
filters/rclepub \
diff --git a/src/filters/rcldjvu b/src/filters/rcldjvu
deleted file mode 100755
index 93210a71..00000000
--- a/src/filters/rcldjvu
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/bin/sh
-# @(#$Id: rcldjvu,v 1.6 2008-10-08 08:27:34 dockes Exp $ (C) 2005 J.F.Dockes
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-#================================================================
-# Extract text from a djvu file by executing djvused and djvutxt
-#
-# We use djvused to extract a possible title, djvutxt for the text
-#
-# Of course this only means anything if the djvu document actually has
-# a text layer !
-#
-# djvu utilities (04-2010) have a bug in which they try to interpret
-# and convert file paths as character data, and fail miserably if the
-# locale is not consistent with the actual encoding of the path (which
-# could be arbitrary binary for all they know). We use a temporary
-# symbolic link to get around this.
-#
-#================================================================
-
-progname="rcldjvu"
-filetype=dejavu
-
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
- echo RECFILTERROR $*
- # Also alert on stderr just in case
- echo ":2:$progname::: $*" 1>&2
- exit 1
-}
-
-iscmd()
-{
- cmd=$1
- case $cmd in
- */*)
- if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
- *)
- oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
- for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
- return 1 ;;
- esac
-}
-
-checkcmds()
-{
- for cmd in $*;do
- if iscmd $cmd
- then
- a=1
- else
- senderror HELPERNOTFOUND $cmd
- fi
- done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help"
-then
- echo "Convert a $filetype file to HTML text for Recoll indexing."
- echo "Usage: $progname [infile]"
- exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
- senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds djvutxt djvused awk
-
-# We need a temporary symlink to avoid path encoding issues
-if test z"$RECOLL_TMPDIR" != z; then
- ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
- ttdir=$TMPDIR
-else
- ttdir=/tmp
-fi
-tmplink=$ttdir/rcldjvu_tmp$$.djvu
-rm -f $tmplink
-ln -s "$infile" $tmplink || exit 1
-
-cleanup()
-{
- rm -f $tmplink
-}
-
-trap cleanup EXIT HUP QUIT INT TERM
-
-# Title: we try to extract it from the annotations. djvused outputs string
-# in C/awk \-escaped notation. Awk can only process this in string
-# constants, so we have a first awk pass to create an awk program to parse
-# the string as a constant (...). This is not exactly robust or nice
-title=`djvused "$tmplink" -e 'select 1;output-ant' | \
-grep ' (title ' | sed -e 's/^.* (title //' -e 's/)$//' |\
-awk '
-{
- printf("BEGIN" " {s = %s; print s}\n", $0)
-}' | awk -f -`
-
-
-cat <
-
- $title
-
-
-
-
-EOF
-
-# The strange 'BEGIN' setup is to prevent 'file' from thinking this file
-# is an awk program
-djvutxt "$tmplink" | sed -e 's/[ ][ ]*$//' | \
-awk 'BEGIN'\
-' {
- cont = ""
-}
-{
- $0 = cont $0
- cont = ""
-
- if ($0 == "\f") {
- print "
\n
\n"
- next
- } else if ($0 ~ /[-]$/) {
- # Break at last whitespace
- match($0, "[ \t][^ \t]+$")
- line = substr($0, 0, RSTART)
- cont = substr($0, RSTART, RLENGTH)
- $0 = line
- gsub("-", "", cont)
- }
- gsub(/&/, "\\&", $0)
- gsub(/, "\\<", $0)
- gsub(/>/, "\\>", $0)
- print $0
-}'
-
-cat <
-
-