Added contributed rcltar filter
This commit is contained in:
parent
8cc33a60b4
commit
07a4cc832c
6 changed files with 107 additions and 2 deletions
69
src/filters/rcltar
Executable file
69
src/filters/rcltar
Executable file
|
@ -0,0 +1,69 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Tar-file filter for Recoll
|
||||||
|
# Thanks to Recoll user Martin Ziegler
|
||||||
|
# This is a modified version of /usr/share/recoll/filters/rclzip
|
||||||
|
# It works not only for tar-files, but automatically for gzipped and
|
||||||
|
# bzipped tar-files at well.
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
|
try:
|
||||||
|
from tarfile import TarFile, open
|
||||||
|
except:
|
||||||
|
print "RECFILTERROR HELPERNOTFOUND python:tarfile"
|
||||||
|
sys.exit(1);
|
||||||
|
|
||||||
|
class TarExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.currentindex = 0
|
||||||
|
self.em = em
|
||||||
|
self.namen = []
|
||||||
|
|
||||||
|
def extractone(self, ipath):
|
||||||
|
docdata = ""
|
||||||
|
try:
|
||||||
|
docdata = self.tar.extractfile(ipath).read()
|
||||||
|
ok = True
|
||||||
|
except Exception, err:
|
||||||
|
ok = False
|
||||||
|
iseof = rclexecm.RclExecM.noteof
|
||||||
|
if self.currentindex >= len(self.namen) -1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
if isinstance(ipath, unicode):
|
||||||
|
ipath = ipath.encode("utf-8")
|
||||||
|
return (ok, docdata, ipath, iseof)
|
||||||
|
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
try:
|
||||||
|
self.tar = open(name=params["filename:"],mode='r')
|
||||||
|
self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
ipath = params["ipath:"]
|
||||||
|
ok, data, ipath, eof = self.extractone(ipath)
|
||||||
|
if ok:
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
try:
|
||||||
|
ipath = ipath.decode("utf-8")
|
||||||
|
return self.extractone(ipath)
|
||||||
|
except Exception, err:
|
||||||
|
return (ok, data, ipath, eof)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= len(self.namen):
|
||||||
|
self.namen=[]
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(self.namen[self.currentindex])
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = TarExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
|
@ -86,6 +86,7 @@ application/x-perl = internal text/plain
|
||||||
application/x-rar = execm rclrar;charset=default
|
application/x-rar = execm rclrar;charset=default
|
||||||
application/x-scribus = exec rclscribus
|
application/x-scribus = exec rclscribus
|
||||||
application/x-shellscript = internal text/plain
|
application/x-shellscript = internal text/plain
|
||||||
|
#application/x-tar = execm rcltar
|
||||||
application/x-tex = exec rcltex
|
application/x-tex = exec rcltex
|
||||||
application/x-webarchive = execm rclwar
|
application/x-webarchive = execm rclwar
|
||||||
application/zip = execm rclzip;charset=default
|
application/zip = execm rclzip;charset=default
|
||||||
|
@ -299,6 +300,7 @@ other = application/vnd.sun.xml.draw \
|
||||||
application/x-fsdirectory \
|
application/x-fsdirectory \
|
||||||
application/x-mimehtml \
|
application/x-mimehtml \
|
||||||
application/x-rar \
|
application/x-rar \
|
||||||
|
application/x-tar \
|
||||||
application/x-webarchive \
|
application/x-webarchive \
|
||||||
application/zip \
|
application/zip \
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,16 @@
|
||||||
#.Z = application/x-compress
|
#.Z = application/x-compress
|
||||||
.zip = application/zip
|
.zip = application/zip
|
||||||
|
|
||||||
|
# The rcltar module can handle compressed tar formats internally so we
|
||||||
|
# use application/x-tar for all tar files compressed or not. Note that tar
|
||||||
|
# file indexing is disabled by default, you'll need to copy and uncomment
|
||||||
|
# the application/x-tar commented line from mimeconf into your personal config
|
||||||
|
.tar = application/x-tar
|
||||||
|
.tar.gz = application/x-tar
|
||||||
|
.tgz = application/x-tar
|
||||||
|
.tbz = application/x-tar
|
||||||
|
.tar.bz2 = application/x-tar
|
||||||
|
|
||||||
.doc = application/msword
|
.doc = application/msword
|
||||||
.ppt = application/vnd.ms-powerpoint
|
.ppt = application/vnd.ms-powerpoint
|
||||||
.xls = application/vnd.ms-excel
|
.xls = application/vnd.ms-excel
|
||||||
|
@ -135,9 +145,9 @@
|
||||||
# indexallfilenames is set (so this is different from skippedNames). It's a
|
# indexallfilenames is set (so this is different from skippedNames). It's a
|
||||||
# bit unconsistent to have it listed among the suffix translations, but no
|
# bit unconsistent to have it listed among the suffix translations, but no
|
||||||
# problem in practice.
|
# problem in practice.
|
||||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz .log.gz .md5 .map \
|
recoll_noindex = .md5 .map \
|
||||||
.o .lib .dll .a .sys .exe .com \
|
.o .lib .dll .a .sys .exe .com \
|
||||||
.dat .bak .rdf .log .db .msf .pid \
|
.dat .bak .rdf .log.gz .log .db .msf .pid \
|
||||||
,v ~ #
|
,v ~ #
|
||||||
|
|
||||||
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
|
# Special handling of .txt files inside ~/.gaim and ~/.purple directories
|
||||||
|
|
|
@ -7,3 +7,5 @@
|
||||||
# with the same name in the central directory. The syntax for setting
|
# with the same name in the central directory. The syntax for setting
|
||||||
# values is identical.
|
# values is identical.
|
||||||
|
|
||||||
|
[index]
|
||||||
|
application/x-tar = execm rcltar
|
||||||
|
|
16
tests/tar/tar.sh
Executable file
16
tests/tar/tar.sh
Executable file
|
@ -0,0 +1,16 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
topdir=`dirname $0`/..
|
||||||
|
. $topdir/shared.sh
|
||||||
|
|
||||||
|
initvariables $0
|
||||||
|
|
||||||
|
(
|
||||||
|
recollq TARUNIQUETERM2
|
||||||
|
|
||||||
|
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||||
|
|
||||||
|
|
||||||
|
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||||
|
|
||||||
|
checkresult
|
6
tests/tar/tar.txt
Normal file
6
tests/tar/tar.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
5 results
|
||||||
|
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar] [tarfile.tar] 15 bytes
|
||||||
|
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.gz] [tarfile.tar.gz] 15 bytes
|
||||||
|
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tgz] [tarfile.tgz] 15 bytes
|
||||||
|
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tbz] [tarfile.tbz] 15 bytes
|
||||||
|
text/plain [file:///home/dockes/projets/fulltext/testrecoll/tar/tarfile.tar.bz2] [tarfile.tar.bz2] 15 bytes
|
Loading…
Add table
Add a link
Reference in a new issue