added bogus minimum sample execm handler, indexing text lines as docs
This commit is contained in:
parent
4cf10c4778
commit
b8cc6f8c27
1 changed files with 110 additions and 0 deletions
110
src/filters/rcltxtlines.py
Executable file
110
src/filters/rcltxtlines.py
Executable file
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""Index text lines as document (execm handler sample). This exists
|
||||||
|
to demonstrate the execm interface and is not meant to be useful or
|
||||||
|
efficient"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
|
||||||
|
# Here try to import your document module if you need one. There is
|
||||||
|
# not much risk of 'sys' missing, but this shows what you should do if
|
||||||
|
# something is not there: the data will go to the 'missing' file, which
|
||||||
|
# can be displayed by the GUI as a list of MIME type and missing
|
||||||
|
# helpers.
|
||||||
|
try:
|
||||||
|
import sys
|
||||||
|
except:
|
||||||
|
print("RECFILTERROR HELPERNOTFOUND python:sys")
|
||||||
|
sys.exit(1);
|
||||||
|
|
||||||
|
# Our class.
|
||||||
|
class rclTXTLINES:
|
||||||
|
def __init__(self, em):
|
||||||
|
# Store a ref to our execm object so that we can use its services.
|
||||||
|
self.em = em
|
||||||
|
|
||||||
|
# This is called once for every processed file during indexing, or
|
||||||
|
# query preview. It usually creates some kind of table of
|
||||||
|
# contents, and resets the current index in it, because we don't
|
||||||
|
# know at this point if this is for indexing (will walk all
|
||||||
|
# entries) or previewing (will request one). Actually we could
|
||||||
|
# know from the environment but it's just simpler this way in
|
||||||
|
# general. Note that there is no close call, openfile() will just
|
||||||
|
# be called repeatedly during indexing, and should clear any
|
||||||
|
# existing state
|
||||||
|
def openfile(self, params):
|
||||||
|
"""Open the text file, create a contents array"""
|
||||||
|
self.currentindex = -1
|
||||||
|
try:
|
||||||
|
f = open(params["filename:"].decode('UTF-8'), "r")
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("openfile: open failed: [%s]" % err)
|
||||||
|
return False
|
||||||
|
self.lines = f.readlines()
|
||||||
|
return True
|
||||||
|
|
||||||
|
# This is called for query preview to request one specific
|
||||||
|
# entry. Here our internal paths are stringified line numbers, but
|
||||||
|
# they could be tar archive paths or whatever we returned during
|
||||||
|
# indexing.
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(int(params["ipath:"]))
|
||||||
|
|
||||||
|
# This is called during indexing to walk the contents. The first
|
||||||
|
# time, we return a 'self' document, which may be empty (e.g. for
|
||||||
|
# a tar file), or might contain data (e.g. for an email body,
|
||||||
|
# further docs being the attachments).
|
||||||
|
def getnext(self, params):
|
||||||
|
|
||||||
|
# Self doc. Here empty.
|
||||||
|
# !Note that the self doc has an *empty* ipath
|
||||||
|
if self.currentindex == -1:
|
||||||
|
self.currentindex = 0
|
||||||
|
if len(self.lines) == 0:
|
||||||
|
eof = rclexecm.RclExecM.eofnext
|
||||||
|
else:
|
||||||
|
eof = rclexecm.RclExecM.noteof
|
||||||
|
return (True, "", "", eof)
|
||||||
|
|
||||||
|
|
||||||
|
if self.currentindex >= len(self.lines):
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(self.currentindex)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
# Most handlers factorize common code from getipath() and
|
||||||
|
# getnext() in an extractone() method, but this is not part of the
|
||||||
|
# interface.
|
||||||
|
def extractone(self, lno):
|
||||||
|
"""Extract one line from the text file"""
|
||||||
|
|
||||||
|
# Need to specify the MIME type here. This would not be
|
||||||
|
# necessary if the ipath was a file name with a usable
|
||||||
|
# extension.
|
||||||
|
self.em.setmimetype("text/plain")
|
||||||
|
|
||||||
|
# Warning of upcoming eof saves one roundtrip
|
||||||
|
iseof = rclexecm.RclExecM.noteof
|
||||||
|
if lno == len(self.lines) - 1:
|
||||||
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Return the doc data and internal path (here stringified
|
||||||
|
# line number). If we're called from getipath(), the
|
||||||
|
# returned ipath is not that useful of course.
|
||||||
|
return (True, self.lines[lno], str(lno), iseof)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("extractone: failed: [%s]" % err)
|
||||||
|
return (False, "", lno, iseof)
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize: create our protocol handler, the filetype-specific
|
||||||
|
# object, link them and run.
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = rclTXTLINES(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
Loading…
Add table
Add a link
Reference in a new issue