From b8cc6f8c2751f10e0259f318e75b031dc8c2a55d Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 23 May 2016 18:59:00 +0200 Subject: [PATCH] added bogus minimum sample execm handler, indexing text lines as docs --- src/filters/rcltxtlines.py | 110 +++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 src/filters/rcltxtlines.py diff --git a/src/filters/rcltxtlines.py b/src/filters/rcltxtlines.py new file mode 100755 index 00000000..4dbf7436 --- /dev/null +++ b/src/filters/rcltxtlines.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +"""Index text lines as document (execm handler sample). This exists +to demonstrate the execm interface and is not meant to be useful or +efficient""" +from __future__ import print_function + +import sys +import os + +import rclexecm + +# Here try to import your document module if you need one. There is +# not much risk of 'sys' missing, but this shows what you should do if +# something is not there: the data will go to the 'missing' file, which +# can be displayed by the GUI as a list of MIME type and missing +# helpers. +try: + import sys +except: + print("RECFILTERROR HELPERNOTFOUND python:sys") + sys.exit(1); + +# Our class. +class rclTXTLINES: + def __init__(self, em): + # Store a ref to our execm object so that we can use its services. + self.em = em + + # This is called once for every processed file during indexing, or + # query preview. It usually creates some kind of table of + # contents, and resets the current index in it, because we don't + # know at this point if this is for indexing (will walk all + # entries) or previewing (will request one). Actually we could + # know from the environment but it's just simpler this way in + # general. Note that there is no close call, openfile() will just + # be called repeatedly during indexing, and should clear any + # existing state + def openfile(self, params): + """Open the text file, create a contents array""" + self.currentindex = -1 + try: + f = open(params["filename:"].decode('UTF-8'), "r") + except Exception as err: + self.em.rclog("openfile: open failed: [%s]" % err) + return False + self.lines = f.readlines() + return True + + # This is called for query preview to request one specific + # entry. Here our internal paths are stringified line numbers, but + # they could be tar archive paths or whatever we returned during + # indexing. + def getipath(self, params): + return self.extractone(int(params["ipath:"])) + + # This is called during indexing to walk the contents. The first + # time, we return a 'self' document, which may be empty (e.g. for + # a tar file), or might contain data (e.g. for an email body, + # further docs being the attachments). + def getnext(self, params): + + # Self doc. Here empty. + # !Note that the self doc has an *empty* ipath + if self.currentindex == -1: + self.currentindex = 0 + if len(self.lines) == 0: + eof = rclexecm.RclExecM.eofnext + else: + eof = rclexecm.RclExecM.noteof + return (True, "", "", eof) + + + if self.currentindex >= len(self.lines): + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(self.currentindex) + self.currentindex += 1 + return ret + + # Most handlers factorize common code from getipath() and + # getnext() in an extractone() method, but this is not part of the + # interface. + def extractone(self, lno): + """Extract one line from the text file""" + + # Need to specify the MIME type here. This would not be + # necessary if the ipath was a file name with a usable + # extension. + self.em.setmimetype("text/plain") + + # Warning of upcoming eof saves one roundtrip + iseof = rclexecm.RclExecM.noteof + if lno == len(self.lines) - 1: + iseof = rclexecm.RclExecM.eofnext + + try: + # Return the doc data and internal path (here stringified + # line number). If we're called from getipath(), the + # returned ipath is not that useful of course. + return (True, self.lines[lno], str(lno), iseof) + except Exception as err: + self.em.rclog("extractone: failed: [%s]" % err) + return (False, "", lno, iseof) + + +# Initialize: create our protocol handler, the filetype-specific +# object, link them and run. +proto = rclexecm.RclExecM() +extract = rclTXTLINES(proto) +rclexecm.main(proto, extract)