Added contributed rcltar filter
This commit is contained in:
parent
8cc33a60b4
commit
07a4cc832c
6 changed files with 107 additions and 2 deletions
69
src/filters/rcltar
Executable file
69
src/filters/rcltar
Executable file
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Tar-file filter for Recoll
|
||||
# Thanks to Recoll user Martin Ziegler
|
||||
# This is a modified version of /usr/share/recoll/filters/rclzip
|
||||
# It works not only for tar-files, but automatically for gzipped and
|
||||
# bzipped tar-files at well.
|
||||
|
||||
import rclexecm
|
||||
|
||||
try:
|
||||
from tarfile import TarFile, open
|
||||
except:
|
||||
print "RECFILTERROR HELPERNOTFOUND python:tarfile"
|
||||
sys.exit(1);
|
||||
|
||||
class TarExtractor:
|
||||
def __init__(self, em):
|
||||
self.currentindex = 0
|
||||
self.em = em
|
||||
self.namen = []
|
||||
|
||||
def extractone(self, ipath):
|
||||
docdata = ""
|
||||
try:
|
||||
docdata = self.tar.extractfile(ipath).read()
|
||||
ok = True
|
||||
except Exception, err:
|
||||
ok = False
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.namen) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
if isinstance(ipath, unicode):
|
||||
ipath = ipath.encode("utf-8")
|
||||
return (ok, docdata, ipath, iseof)
|
||||
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
try:
|
||||
self.tar = open(name=params["filename:"],mode='r')
|
||||
self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def getipath(self, params):
|
||||
ipath = params["ipath:"]
|
||||
ok, data, ipath, eof = self.extractone(ipath)
|
||||
if ok:
|
||||
return (ok, data, ipath, eof)
|
||||
try:
|
||||
ipath = ipath.decode("utf-8")
|
||||
return self.extractone(ipath)
|
||||
except Exception, err:
|
||||
return (ok, data, ipath, eof)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= len(self.namen):
|
||||
self.namen=[]
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(self.namen[self.currentindex])
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = TarExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
|
@ -86,6 +86,7 @@ application/x-perl = internal text/plain
|
|||
application/x-rar = execm rclrar;charset=default
|
||||
application/x-scribus = exec rclscribus
|
||||
application/x-shellscript = internal text/plain
|
||||
#application/x-tar = execm rcltar
|
||||
application/x-tex = exec rcltex
|
||||
application/x-webarchive = execm rclwar
|
||||
application/zip = execm rclzip;charset=default
|
||||
|
@ -299,6 +300,7 @@ other = application/vnd.sun.xml.draw \
|
|||
application/x-fsdirectory \
|
||||
application/x-mimehtml \
|
||||
application/x-rar \
|
||||
application/x-tar \
|
||||
application/x-webarchive \
|
||||
application/zip \
|
||||
|
||||
|
|
|
@ -53,6 +53,16 @@
|
|||
#.Z = application/x-compress
|
||||
.zip = application/zip
|
||||
|
||||
# The rcltar module can handle compressed tar formats internally so we
|
||||
# use application/x-tar for all tar files compressed or not. Note that tar
|
||||
# file indexing is disabled by default, you'll need to copy and uncomment
|
||||
# the application/x-tar commented line from mimeconf into your personal config
|
||||
.tar = application/x-tar
|
||||
.tar.gz = application/x-tar
|
||||
.tgz = application/x-tar
|
||||
.tbz = application/x-tar
|
||||
.tar.bz2 = application/x-tar
|
||||
|
||||
.doc = application/msword
|
||||
.ppt = application/vnd.ms-powerpoint
|
||||
.xls = application/vnd.ms-excel
|
||||
|
@ -135,9 +145,9 @@
|
|||
# indexallfilenames is set (so this is different from skippedNames). It's a
|
||||
# bit unconsistent to have it listed among the suffix translations, but no
|
||||
# problem in practice.
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
|
||||
recoll_noindex = .md5 .map \
|
||||
.o .lib .dll .a .sys .exe .com \
|
||||
.dat .bak .rdf .log .db .msf .pid \
|
||||
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
||||
,v ~ #
|
||||
|
||||
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
|
||||
|
|
|
@ -7,3 +7,5 @@
|
|||
# with the same name in the central directory. The syntax for setting
|
||||
# values is identical.
|
||||
|
||||
[index]
|
||||
application/x-tar = execm rcltar
|
||||
|
|
16
tests/tar/tar.sh
Executable file
16
tests/tar/tar.sh
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/bin/sh
|
||||
|
||||
topdir=`dirname $0`/..
|
||||
. $topdir/shared.sh
|
||||
|
||||
initvariables $0
|
||||
|
||||
(
|
||||
recollq TARUNIQUETERM2
|
||||
|
||||
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||
|
||||
|
||||
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||
|
||||
checkresult
|
6
tests/tar/tar.txt
Normal file
6
tests/tar/tar.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
5 results
|
||||
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar] [tarfile.tar] 15 bytes
|
||||
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.gz] [tarfile.tar.gz] 15 bytes
|
||||
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tgz] [tarfile.tgz] 15 bytes
|
||||
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tbz] [tarfile.tbz] 15 bytes
|
||||
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.bz2] [tarfile.tar.bz2] 15 bytes
|
Loading…
Add table
Add a link
Reference in a new issue