Added contributed rcltar filter

This commit is contained in:
Jean-Francois Dockes 2012-05-25 17:04:22 +02:00
parent 8cc33a60b4
commit 07a4cc832c
6 changed files with 107 additions and 2 deletions

69
src/filters/rcltar Executable file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env python
# Tar-file filter for Recoll
# Thanks to Recoll user Martin Ziegler
# This is a modified version of /usr/share/recoll/filters/rclzip
# It works not only for tar-files, but automatically for gzipped and
# bzipped tar-files at well.
import rclexecm
try:
from tarfile import TarFile, open
except:
print "RECFILTERROR HELPERNOTFOUND python:tarfile"
sys.exit(1);
class TarExtractor:
def __init__(self, em):
self.currentindex = 0
self.em = em
self.namen = []
def extractone(self, ipath):
docdata = ""
try:
docdata = self.tar.extractfile(ipath).read()
ok = True
except Exception, err:
ok = False
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.namen) -1:
iseof = rclexecm.RclExecM.eofnext
if isinstance(ipath, unicode):
ipath = ipath.encode("utf-8")
return (ok, docdata, ipath, iseof)
def openfile(self, params):
self.currentindex = 0
try:
self.tar = open(name=params["filename:"],mode='r')
self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
return True
except:
return False
def getipath(self, params):
ipath = params["ipath:"]
ok, data, ipath, eof = self.extractone(ipath)
if ok:
return (ok, data, ipath, eof)
try:
ipath = ipath.decode("utf-8")
return self.extractone(ipath)
except Exception, err:
return (ok, data, ipath, eof)
def getnext(self, params):
if self.currentindex >= len(self.namen):
self.namen=[]
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.namen[self.currentindex])
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = TarExtractor(proto)
rclexecm.main(proto, extract)

View file

@ -86,6 +86,7 @@ application/x-perl = internal text/plain
application/x-rar = execm rclrar;charset=default application/x-rar = execm rclrar;charset=default
application/x-scribus = exec rclscribus application/x-scribus = exec rclscribus
application/x-shellscript = internal text/plain application/x-shellscript = internal text/plain
#application/x-tar = execm rcltar
application/x-tex = exec rcltex application/x-tex = exec rcltex
application/x-webarchive = execm rclwar application/x-webarchive = execm rclwar
application/zip = execm rclzip;charset=default application/zip = execm rclzip;charset=default
@ -299,6 +300,7 @@ other = application/vnd.sun.xml.draw \
application/x-fsdirectory \ application/x-fsdirectory \
application/x-mimehtml \ application/x-mimehtml \
application/x-rar \ application/x-rar \
application/x-tar \
application/x-webarchive \ application/x-webarchive \
application/zip \ application/zip \

View file

@ -53,6 +53,16 @@
#.Z = application/x-compress #.Z = application/x-compress
.zip = application/zip .zip = application/zip
# The rcltar module can handle compressed tar formats internally so we
# use application/x-tar for all tar files compressed or not. Note that tar
# file indexing is disabled by default, you'll need to copy and uncomment
# the application/x-tar commented line from mimeconf into your personal config
.tar = application/x-tar
.tar.gz = application/x-tar
.tgz = application/x-tar
.tbz = application/x-tar
.tar.bz2 = application/x-tar
.doc = application/msword .doc = application/msword
.ppt = application/vnd.ms-powerpoint .ppt = application/vnd.ms-powerpoint
.xls = application/vnd.ms-excel .xls = application/vnd.ms-excel
@ -135,9 +145,9 @@
# indexallfilenames is set (so this is different from skippedNames). It's a # indexallfilenames is set (so this is different from skippedNames). It's a
# bit unconsistent to have it listed among the suffix translations, but no # bit unconsistent to have it listed among the suffix translations, but no
# problem in practice. # problem in practice.
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \ recoll_noindex = .md5 .map \
.o .lib .dll .a .sys .exe .com \ .o .lib .dll .a .sys .exe .com \
.dat .bak .rdf .log .db .msf .pid \ .dat .bak .rdf .log.gz .log .db .msf .pid \
,v ~ # ,v ~ #
# Special handling of .txt files inside ~/.gaim and ~/.purple directories # Special handling of .txt files inside ~/.gaim and ~/.purple directories

View file

@ -7,3 +7,5 @@
# with the same name in the central directory. The syntax for setting # with the same name in the central directory. The syntax for setting
# values is identical. # values is identical.
[index]
application/x-tar = execm rcltar

16
tests/tar/tar.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/sh
topdir=`dirname $0`/..
. $topdir/shared.sh
initvariables $0
(
recollq TARUNIQUETERM2
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
checkresult

6
tests/tar/tar.txt Normal file
View file

@ -0,0 +1,6 @@
5 results
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar] [tarfile.tar] 15 bytes
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.gz] [tarfile.tar.gz] 15 bytes
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tgz] [tarfile.tgz] 15 bytes
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tbz] [tarfile.tbz] 15 bytes
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.bz2] [tarfile.tar.bz2] 15 bytes