comments

2016-05-23 19:16:31 +02:00 · 2016-05-23 19:16:31 +02:00 · 8d836ca39a
commit 8d836ca39a
parent b8cc6f8c27
1 changed files with 21 additions and 16 deletions
--- a/src/filters/rcltxtlines.py
+++ b/src/filters/rcltxtlines.py
@ -27,14 +27,14 @@ class rclTXTLINES:
        self.em = em
    # This is called once for every processed file during indexing, or
-    # query preview. It usually creates some kind of table of
+    # query preview. For multi-document files, it usually creates some
-    # contents, and resets the current index in it, because we don't
+    # kind of table of contents, and resets the current index in it,
-    # know at this point if this is for indexing (will walk all
+    # because we don't know at this point if this is for indexing
-    # entries) or previewing (will request one). Actually we could
+    # (will walk all entries) or previewing (will request
-    # know from the environment but it's just simpler this way in
+    # one). Actually we could know from the environment but it's just
-    # general. Note that there is no close call, openfile() will just
+    # simpler this way in general. Note that there is no close call,
-    # be called repeatedly during indexing, and should clear any
+    # openfile() will just be called repeatedly during indexing, and
-    # existing state
+    # should clear any existing state
    def openfile(self, params):
        """Open the text file, create a contents array"""
        self.currentindex = -1
@ -46,20 +46,18 @@ class rclTXTLINES:
        self.lines = f.readlines()
        return True
    # This is called for query preview to request one specific
    # entry. Here our internal paths are stringified line numbers, but
    # they could be tar archive paths or whatever we returned during
    # indexing.
    def getipath(self, params):
        return self.extractone(int(params["ipath:"]))
    # This is called during indexing to walk the contents. The first
    # time, we return a 'self' document, which may be empty (e.g. for
    # a tar file), or might contain data (e.g. for an email body,
-    # further docs being the attachments).
+    # further docs being the attachments), and may also be the only
    # document returned (for single document files).
    def getnext(self, params):
        # Self doc. Here empty.
        #
        # This could also be the only entry if this file type holds a
        # single document. We return eofnext in this case
        #
        # !Note that the self doc has an *empty* ipath
        if self.currentindex == -1:
            self.currentindex = 0
@ -77,6 +75,13 @@ class rclTXTLINES:
            self.currentindex += 1
            return ret
    # This is called for query preview to request one specific (or the
    # only) entry. Here our internal paths are stringified line
    # numbers, but they could be tar archive paths or whatever we
    # returned during indexing.
    def getipath(self, params):
        return self.extractone(int(params["ipath:"]))
    # Most handlers factorize common code from getipath() and
    # getnext() in an extractone() method, but this is not part of the
    # interface.