rcldia fix from the author

This commit is contained in:
Jean-Francois Dockes 2012-04-21 20:48:44 +02:00
parent c123da6428
commit 99f20c32c4
2 changed files with 8 additions and 22 deletions

View file

@ -4,7 +4,7 @@
# stefan.friedel@iwr.uni-heidelberg.de 2012 # stefan.friedel@iwr.uni-heidelberg.de 2012
# #
# add the following to ~/.recoll/mimeconf into the [index] section: # add the following to ~/.recoll/mimeconf into the [index] section:
# application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8 # application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
# and into the [icons] section: # and into the [icons] section:
# application/x-dia-diagram = drawing # application/x-dia-diagram = drawing
# and finally under [categories]: # and finally under [categories]:
@ -15,8 +15,6 @@
# .dia = application/x-dia-diagram # .dia = application/x-dia-diagram
# Small fixes from jfd: dia files are sometimes not compressed. # Small fixes from jfd: dia files are sometimes not compressed.
# And a note: this file actually has no reason to return HTML as there is
# no metadata. We could just as well and more simply return text/plain
import rclexecm import rclexecm
import re import re
from gzip import GzipFile from gzip import GzipFile
@ -24,20 +22,10 @@ import xml.parsers.expat
# some regexps to parse/format the xml data: delete #/spaces at the b/eol and # some regexps to parse/format the xml data: delete #/spaces at the b/eol and
# ignore empty lines # ignore empty lines
rhs = re.compile(r'^[#|\s+](.*)') rhs = re.compile(r'^#\s*(.*)')
rhe = re.compile(r'(.*)[#|\s+]$') rhe = re.compile(r'(.*)\s*#$')
rempty = re.compile(r'^#?\s*#?$') rempty = re.compile(r'^#?\s*#?$')
htmltemplate = '''
<html><head>
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
</head>
<body>
{0}
</body>
</html>
'''
# xml parser for dia xml file # xml parser for dia xml file
class Parser: class Parser:
def __init__(self,rclem): def __init__(self,rclem):
@ -58,10 +46,8 @@ class Parser:
def chardata(self,data): def chardata(self,data):
if self.handlethis: if self.handlethis:
# check if line is not empty and replace hashes/spaces # check if line is not empty and replace hashes/spaces
# tricky: after htmlescape check also for umlauts
if not rempty.search(data): if not rempty.search(data):
self.string.append(self.rclem.htmlescape( self.string.append(rhe.sub(r'\1',rhs.sub(r'\1',data)))
rhe.sub(r'\1',rhs.sub(r'\1',data))).encode('ascii', 'xmlcharrefreplace'))
def endelement(self,name): def endelement(self,name):
self.handlethis = False self.handlethis = False
@ -83,7 +69,7 @@ class DiaExtractor:
except Exception, err: except Exception, err:
ok = False ok = False
iseof = rclexecm.RclExecM.eofnext iseof = rclexecm.RclExecM.eofnext
self.em.setmimetype("text/html") self.em.setmimetype("text/plain")
return (ok, docdata, ipath, iseof) return (ok, docdata, ipath, iseof)
###### File type handler api, used by rclexecm ----------> ###### File type handler api, used by rclexecm ---------->
@ -116,7 +102,7 @@ class DiaExtractor:
def ExtractDiaText(self): def ExtractDiaText(self):
diap = Parser(self.em) diap = Parser(self.em)
diap.feed(self.dia) diap.feed(self.dia)
return htmltemplate.format('\n'.join(diap.string)) return '\n'.join(diap.string)
# Main program: create protocol handler and extractor and run them # Main program: create protocol handler and extractor and run them
proto = rclexecm.RclExecM() proto = rclexecm.RclExecM()

View file

@ -71,7 +71,7 @@ application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = exec rclabw application/x-abiword = exec rclabw
application/x-awk = internal text/plain application/x-awk = internal text/plain
application/x-chm = execm rclchm application/x-chm = execm rclchm
application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8 application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
application/x-dvi = exec rcldvi application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio application/x-flac = execm rclaudio
application/x-gnuinfo = execm rclinfo application/x-gnuinfo = execm rclinfo