diff --git a/src/filters/rclrtf.py b/src/filters/rclrtf.py old mode 100644 new mode 100755 diff --git a/src/filters/rcltext.py b/src/filters/rcltext.py new file mode 100755 index 00000000..d55249d2 --- /dev/null +++ b/src/filters/rcltext.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +import rclexecm +import sys + +# Wrapping a text file. Recoll does it internally in most cases, but +# there is a reason this exists, just can't remember it ... +class TxtDump: + def __init__(self, em): + self.em = em + + def extractone(self, params): + #self.em.rclog("extractone %s %s" % (params["filename:"], \ + #params["mimetype:"])) + if not params.has_key("filename:"): + self.em.rclog("extractone: no file name") + return (False, "", "", rclexecm.RclExecM.eofnow) + + fn = params["filename:"] + # No charset, so recoll will have to use its config to guess it + txt = '
'
+        try:
+            f = open(fn, "r")
+            txt += self.em.htmlescape(f.read())
+        except Exception as err:
+            self.em.rclog("TxtDump: %s : %s" % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+            
+        txt += '
' + return (True, txt, "", rclexecm.RclExecM.eofnext) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = TxtDump(proto) + rclexecm.main(proto, extract)