rcldia fix from the author
This commit is contained in:
parent
c123da6428
commit
99f20c32c4
2 changed files with 8 additions and 22 deletions
|
@ -4,7 +4,7 @@
|
||||||
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||||
#
|
#
|
||||||
# add the following to ~/.recoll/mimeconf into the [index] section:
|
# add the following to ~/.recoll/mimeconf into the [index] section:
|
||||||
# application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8
|
# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
|
||||||
# and into the [icons] section:
|
# and into the [icons] section:
|
||||||
# application/x-dia-diagram = drawing
|
# application/x-dia-diagram = drawing
|
||||||
# and finally under [categories]:
|
# and finally under [categories]:
|
||||||
|
@ -15,8 +15,6 @@
|
||||||
# .dia = application/x-dia-diagram
|
# .dia = application/x-dia-diagram
|
||||||
|
|
||||||
# Small fixes from jfd: dia files are sometimes not compressed.
|
# Small fixes from jfd: dia files are sometimes not compressed.
|
||||||
# And a note: this file actually has no reason to return HTML as there is
|
|
||||||
# no metadata. We could just as well and more simply return text/plain
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import re
|
import re
|
||||||
from gzip import GzipFile
|
from gzip import GzipFile
|
||||||
|
@ -24,20 +22,10 @@ import xml.parsers.expat
|
||||||
|
|
||||||
# some regexps to parse/format the xml data: delete #/spaces at the b/eol and
|
# some regexps to parse/format the xml data: delete #/spaces at the b/eol and
|
||||||
# ignore empty lines
|
# ignore empty lines
|
||||||
rhs = re.compile(r'^[#|\s+](.*)')
|
rhs = re.compile(r'^#\s*(.*)')
|
||||||
rhe = re.compile(r'(.*)[#|\s+]$')
|
rhe = re.compile(r'(.*)\s*#$')
|
||||||
rempty = re.compile(r'^#?\s*#?$')
|
rempty = re.compile(r'^#?\s*#?$')
|
||||||
|
|
||||||
htmltemplate = '''
|
|
||||||
<html><head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
{0}
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
|
|
||||||
# xml parser for dia xml file
|
# xml parser for dia xml file
|
||||||
class Parser:
|
class Parser:
|
||||||
def __init__(self,rclem):
|
def __init__(self,rclem):
|
||||||
|
@ -58,10 +46,8 @@ class Parser:
|
||||||
def chardata(self,data):
|
def chardata(self,data):
|
||||||
if self.handlethis:
|
if self.handlethis:
|
||||||
# check if line is not empty and replace hashes/spaces
|
# check if line is not empty and replace hashes/spaces
|
||||||
# tricky: after htmlescape check also for umlauts
|
|
||||||
if not rempty.search(data):
|
if not rempty.search(data):
|
||||||
self.string.append(self.rclem.htmlescape(
|
self.string.append(rhe.sub(r'\1',rhs.sub(r'\1',data)))
|
||||||
rhe.sub(r'\1',rhs.sub(r'\1',data))).encode('ascii', 'xmlcharrefreplace'))
|
|
||||||
|
|
||||||
def endelement(self,name):
|
def endelement(self,name):
|
||||||
self.handlethis = False
|
self.handlethis = False
|
||||||
|
@ -83,7 +69,7 @@ class DiaExtractor:
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
ok = False
|
ok = False
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
self.em.setmimetype("text/html")
|
self.em.setmimetype("text/plain")
|
||||||
return (ok, docdata, ipath, iseof)
|
return (ok, docdata, ipath, iseof)
|
||||||
|
|
||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
@ -116,7 +102,7 @@ class DiaExtractor:
|
||||||
def ExtractDiaText(self):
|
def ExtractDiaText(self):
|
||||||
diap = Parser(self.em)
|
diap = Parser(self.em)
|
||||||
diap.feed(self.dia)
|
diap.feed(self.dia)
|
||||||
return htmltemplate.format('\n'.join(diap.string))
|
return '\n'.join(diap.string)
|
||||||
|
|
||||||
# Main program: create protocol handler and extractor and run them
|
# Main program: create protocol handler and extractor and run them
|
||||||
proto = rclexecm.RclExecM()
|
proto = rclexecm.RclExecM()
|
||||||
|
|
|
@ -71,7 +71,7 @@ application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||||
application/x-abiword = exec rclabw
|
application/x-abiword = exec rclabw
|
||||||
application/x-awk = internal text/plain
|
application/x-awk = internal text/plain
|
||||||
application/x-chm = execm rclchm
|
application/x-chm = execm rclchm
|
||||||
application/x-dia-diagram = execm rcldia;mimetype=text/html;charset=utf-8
|
application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
|
||||||
application/x-dvi = exec rcldvi
|
application/x-dvi = exec rcldvi
|
||||||
application/x-flac = execm rclaudio
|
application/x-flac = execm rclaudio
|
||||||
application/x-gnuinfo = execm rclinfo
|
application/x-gnuinfo = execm rclinfo
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue