diff --git a/src/filters/rclinfo b/src/filters/rclinfo
index 203e7e57..8047c842 100755
--- a/src/filters/rclinfo
+++ b/src/filters/rclinfo
@@ -32,13 +32,17 @@ class InfoExtractor:
         nodename, docdata = self.contents[index]
         nodename = self.em.htmlescape(nodename)
         docdata = self.em.htmlescape(docdata)
+        print("type(docdata) = %s type(nodename) = %s"% \
+              (type(docdata), type(nodename)), file=sys.stderr)
         # strange whitespace to avoid changing the module tests (same as old)
-        docdata = b'\n<html>\n  <head>\n      <title>' + nodename + \
+        docdata = b'\n<html>\n  <head>\n      <title>' + \
+                  nodename + \
                   b'</title>\n' + \
-                  '      <meta name="rclaptg" content="gnuinfo">\n' + \
+                  b'      <meta name="rclaptg" content="gnuinfo">\n' + \
                   b'   </head>\n   <body>\n' + \
                   b'   <pre style="white-space: pre-wrap">\n   ' + \
-                  docdata + b'\n   </pre></body>\n</html>\n'
+                  docdata + \
+                  b'\n   </pre></body>\n</html>\n'
 
         iseof = rclexecm.RclExecM.noteof
         if self.currentindex >= len(self.contents) -1:
diff --git a/src/filters/rclkar b/src/filters/rclkar
index 00432b15..940f13d0 100755
--- a/src/filters/rclkar
+++ b/src/filters/rclkar
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
 
 # Read a .kar midi karaoke file and translate to recoll indexable format
 # This does not work with Python3 yet because python:midi doesn't 
diff --git a/src/filters/rclmpdf.py b/src/filters/rclmpdf.py
index 0b5ba836..4d7e9634 100755
--- a/src/filters/rclmpdf.py
+++ b/src/filters/rclmpdf.py
@@ -120,24 +120,24 @@ class PDFExtractor:
         inheader = False
         inbody = False
         didcs = False
-        output = ''
-        cont = ''
-        for line in input.split('\n'):
+        output = b''
+        cont = b''
+        for line in input.split(b'\n'):
             line = cont + line
-            cont = ''
-            if re.search('</head>', line):
+            cont = b''
+            if re.search(b'</head>', line):
                 inheader = False
-            if re.search('</pre>', line):
+            if re.search(b'</pre>', line):
                 inbody = False
             if inheader:
                 if not didcs:
-                    output += '<meta http-equiv="Content-Type"' + \
-                              'content="text/html; charset=UTF-8">\n'
+                    output += b'<meta http-equiv="Content-Type"' + \
+                              b'content="text/html; charset=UTF-8">\n'
                     didcs = True
 
-                m = re.search(r'(.*<title>)(.*)(<\/title>.*)', line)
+                m = re.search(rb'(.*<title>)(.*)(<\/title>.*)', line)
                 if not m:
-                    m = re.search(r'(.*content=")(.*)(".*/>.*)', line)
+                    m = re.search(rb'(.*content=")(.*)(".*/>.*)', line)
                 if m:
                     line = m.group(1) + self.em.htmlescape(m.group(2)) + \
                            m.group(3)
@@ -145,7 +145,7 @@ class PDFExtractor:
                 # Recoll treats "Subject" as a "title" element
                 # (based on emails). The PDF "Subject" metadata
                 # field is more like an HTML "description"
-                line = re.sub('name="Subject"', 'name="Description"', line, 1)
+                line = re.sub(b'name="Subject"', b'name="Description"', line, 1)
 
             elif inbody:
                 # Remove end-of-line hyphenation. It's not clear that
@@ -158,12 +158,12 @@ class PDFExtractor:
                         #cont = m.group(2).rstrip('-')
                 line = self.em.htmlescape(line)
                 
-            if re.search('<head>', line):
+            if re.search(b'<head>', line):
                 inheader = True
-            if re.search('<pre>', line):
+            if re.search(b'<pre>', line):
                 inbody = True
 
-            output += line + '\n'
+            output += line + b'\n'
 
         return output
             
diff --git a/src/filters/rcltar b/src/filters/rcltar
index f597bb79..74aaecbd 100755
--- a/src/filters/rcltar
+++ b/src/filters/rcltar
@@ -23,15 +23,15 @@ class TarExtractor:
         self.namen = []
 
     def extractone(self, ipath):
-        docdata = ""
+        docdata = b''
         try:
             info = self.tar.getmember(ipath)
             if info.size > self.em.maxmembersize:
                 # skip
-                docdata = ""
+                docdata = b''
                 self.em.rclog("extractone: entry %s size %d too big" %
                               (ipath, info.size))
-                docdata = "" # raise TarError("Member too big")
+                docdata = b'' # raise TarError("Member too big")
             else:
                 docdata = self.tar.extractfile(ipath).read()
             ok = True
@@ -45,7 +45,7 @@ class TarExtractor:
     def openfile(self, params):
         self.currentindex = -1
         try:
-            self.tar = tarfile.open(name=params["filename:"],mode='r')
+            self.tar = tarfile.open(name=params["filename:"], mode='r')
             #self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
             self.namen = [ y.name for y in [z for z in self.tar.getmembers() if z.isfile()]]
 
diff --git a/src/filters/rcltext.py b/src/filters/rcltext.py
index 847a80b2..f449dfe6 100755
--- a/src/filters/rcltext.py
+++ b/src/filters/rcltext.py
@@ -21,7 +21,7 @@ class TxtDump:
 
         fn = params["filename:"]
         # No charset, so recoll will have to use its config to guess it
-        txt = '<html><head><title></title></head><body><pre>'
+        txt = b'<html><head><title></title></head><body><pre>'
         try:
             f = open(fn, "rb")
             txt += self.em.htmlescape(f.read())
@@ -29,7 +29,7 @@ class TxtDump:
             self.em.rclog("TxtDump: %s : %s" % (fn, err))
             return (False, "", "", rclexecm.RclExecM.eofnow)
             
-        txt += '</pre></body></html>'
+        txt += b'</pre></body></html>'
         return (True, txt, "", rclexecm.RclExecM.eofnext)
         
     ###### File type handler api, used by rclexecm ---------->
diff --git a/src/filters/xlsxmltocsv.py b/src/filters/xlsxmltocsv.py
index cfc39304..7fa12e58 100755
--- a/src/filters/xlsxmltocsv.py
+++ b/src/filters/xlsxmltocsv.py
@@ -1,14 +1,33 @@
 #!/usr/bin/env python2
+# Copyright (C) 2015 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 # Transform XML output from xls-dump.py into csv format.
-# Note: msodumper is not compatible with python3.
+#
+# Note: this would be difficult to make compatible with python 3 <= 3.4
+# because of the use of % interpolation on what should be bytes.
+# The python2 restriction is not a big issue at this point because
+# msodumper is not compatible with python3 anyway
+# % interpolation for bytes is planned for python 3.5, at which point
+# porting this module will become trivial.
 
 from __future__ import print_function
 
 import sys
 import xml.sax
-sys.path.append(sys.path[0]+"/msodump.zip")
-from msodumper.globals import error
 
 dtt = True
 
@@ -62,7 +81,7 @@ if __name__ == '__main__':
         xml.sax.parse(sys.stdin, handler)
         print(handler.output)
     except BaseException as err:
-        error("xml-parse: %s\n" % (str(sys.exc_info()[:2]),))
+        print("xml-parse: %s\n" % (str(sys.exc_info()[:2]),), file=sys.stderr)
         sys.exit(1)
 
     sys.exit(0)