From b8cc6f8c2751f10e0259f318e75b031dc8c2a55d Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Mon, 23 May 2016 18:59:00 +0200
Subject: [PATCH] added bogus minimum sample execm handler, indexing text lines
 as docs

---
 src/filters/rcltxtlines.py | 110 +++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100755 src/filters/rcltxtlines.py

diff --git a/src/filters/rcltxtlines.py b/src/filters/rcltxtlines.py
new file mode 100755
index 00000000..4dbf7436
--- /dev/null
+++ b/src/filters/rcltxtlines.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+"""Index text lines as document (execm handler sample). This exists
+to demonstrate the execm interface and is not meant to be useful or
+efficient"""
+from __future__ import print_function
+
+import sys
+import os
+
+import rclexecm
+
+# Here try to import your document module if you need one. There is
+# not much risk of 'sys' missing, but this shows what you should do if
+# something is not there: the data will go to the 'missing' file, which
+# can be displayed by the GUI as a list of MIME type and missing
+# helpers.
+try:
+    import sys
+except:
+    print("RECFILTERROR HELPERNOTFOUND python:sys")
+    sys.exit(1);
+
+# Our class.
+class rclTXTLINES:
+    def __init__(self, em):
+        # Store a ref to our execm object so that we can use its services.
+        self.em = em
+
+    # This is called once for every processed file during indexing, or
+    # query preview. It usually creates some kind of table of
+    # contents, and resets the current index in it, because we don't
+    # know at this point if this is for indexing (will walk all
+    # entries) or previewing (will request one). Actually we could
+    # know from the environment but it's just simpler this way in
+    # general. Note that there is no close call, openfile() will just
+    # be called repeatedly during indexing, and should clear any
+    # existing state
+    def openfile(self, params):
+        """Open the text file, create a contents array"""
+        self.currentindex = -1
+        try:
+            f = open(params["filename:"].decode('UTF-8'), "r")
+        except Exception as err:
+            self.em.rclog("openfile: open failed: [%s]" % err)
+            return False
+        self.lines = f.readlines()
+        return True
+
+    # This is called for query preview to request one specific
+    # entry. Here our internal paths are stringified line numbers, but
+    # they could be tar archive paths or whatever we returned during
+    # indexing.
+    def getipath(self, params):
+        return self.extractone(int(params["ipath:"]))
+
+    # This is called during indexing to walk the contents. The first
+    # time, we return a 'self' document, which may be empty (e.g. for
+    # a tar file), or might contain data (e.g. for an email body,
+    # further docs being the attachments).
+    def getnext(self, params):
+
+        # Self doc. Here empty.
+        # !Note that the self doc has an *empty* ipath
+        if self.currentindex == -1:
+            self.currentindex = 0
+            if len(self.lines) == 0:
+                eof = rclexecm.RclExecM.eofnext
+            else:
+                eof = rclexecm.RclExecM.noteof
+            return (True, "", "", eof)
+
+
+        if self.currentindex >= len(self.lines):
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(self.currentindex)
+            self.currentindex += 1
+            return ret
+
+    # Most handlers factorize common code from getipath() and
+    # getnext() in an extractone() method, but this is not part of the
+    # interface.
+    def extractone(self, lno):
+        """Extract one line from the text file"""
+
+        # Need to specify the MIME type here. This would not be
+        # necessary if the ipath was a file name with a usable
+        # extension.
+        self.em.setmimetype("text/plain")
+
+        # Warning of upcoming eof saves one roundtrip
+        iseof = rclexecm.RclExecM.noteof
+        if lno == len(self.lines) - 1:
+            iseof = rclexecm.RclExecM.eofnext
+
+        try:
+            # Return the doc data and internal path (here stringified
+            # line number). If we're called from getipath(), the
+            # returned ipath is not that useful of course.
+            return (True, self.lines[lno], str(lno), iseof)
+        except Exception as err:
+            self.em.rclog("extractone: failed: [%s]" % err)
+            return (False, "", lno, iseof)
+
+
+# Initialize: create our protocol handler, the filetype-specific
+# object, link them and run.
+proto = rclexecm.RclExecM()
+extract = rclTXTLINES(proto)
+rclexecm.main(proto, extract)