make python recoll a package with 2 modules recoll and rclextract
This commit is contained in:
parent
fc68d31151
commit
66e8817f77
7 changed files with 51 additions and 15 deletions
|
@ -4,7 +4,7 @@ include $(depth)/mk/sysconf
|
||||||
|
|
||||||
LIBS = librcl.a
|
LIBS = librcl.a
|
||||||
|
|
||||||
all: $(LIBS)
|
all: $(LIBS)
|
||||||
|
|
||||||
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
|
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
|
||||||
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
|
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
|
||||||
|
@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
|
||||||
ar ru librcl.a $(OBJS)
|
ar ru librcl.a $(OBJS)
|
||||||
$(RANLIB) librcl.a
|
$(RANLIB) librcl.a
|
||||||
|
|
||||||
|
# Future
|
||||||
|
#all: librecoll.so
|
||||||
|
#librecoll.so : $(DEPS) $(OBJS)
|
||||||
|
# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl
|
||||||
|
|
||||||
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
|
rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
|
||||||
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
|
beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
|
||||||
|
|
|
@ -117,7 +117,7 @@ include \$(depth)/mk/sysconf
|
||||||
|
|
||||||
LIBS = librcl.a
|
LIBS = librcl.a
|
||||||
|
|
||||||
all: \$(LIBS)
|
all: \$(LIBS)
|
||||||
|
|
||||||
OBJS = $OBJS
|
OBJS = $OBJS
|
||||||
DEPS = $DEPS
|
DEPS = $DEPS
|
||||||
|
@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
|
||||||
ar ru librcl.a \$(OBJS)
|
ar ru librcl.a \$(OBJS)
|
||||||
\$(RANLIB) librcl.a
|
\$(RANLIB) librcl.a
|
||||||
|
|
||||||
|
# Future
|
||||||
|
#all: librecoll.so
|
||||||
|
#librecoll.so : \$(DEPS) \$(OBJS)
|
||||||
|
# g++ -shared -Wl,--no-undefined \
|
||||||
|
# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
|
||||||
|
# -lxapian -lz -lX11 -lpthread -ldl
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
for c in $SRC_CPP;do
|
for c in $SRC_CPP;do
|
||||||
|
|
|
@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
|
||||||
outfile.assign(soutfile);
|
outfile.assign(soutfile);
|
||||||
|
|
||||||
if (self->xtr == 0) {
|
if (self->xtr == 0) {
|
||||||
PyErr_SetString(PyExc_AttributeError, "extract: null object");
|
PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (ipath.empty()) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
self->xtr->setTargetMType(mimetype);
|
||||||
TempFile temp;
|
TempFile temp;
|
||||||
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
|
bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
|
||||||
if (!status) {
|
if (!status) {
|
||||||
|
@ -285,5 +291,5 @@ initrclextract(void)
|
||||||
Py_INCREF(&rclx_ExtractorType);
|
Py_INCREF(&rclx_ExtractorType);
|
||||||
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
|
PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);
|
||||||
|
|
||||||
recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
|
recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1703,6 +1703,6 @@ initrecoll(void)
|
||||||
|
|
||||||
// Export a few pointers for the benefit of other recoll python modules
|
// Export a few pointers for the benefit of other recoll python modules
|
||||||
PyObject* doctypecapsule =
|
PyObject* doctypecapsule =
|
||||||
PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
|
PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
|
||||||
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
|
PyModule_AddObject(m, "doctypeptr", doctypecapsule);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,4 +28,6 @@ typedef struct {
|
||||||
RclConfig *rclconfig;
|
RclConfig *rclconfig;
|
||||||
} recoll_DocObject;
|
} recoll_DocObject;
|
||||||
|
|
||||||
|
#define PYRECOLL_PACKAGE "recoll."
|
||||||
|
|
||||||
#endif // _PYRECOLL_H_INCLUDED_
|
#endif // _PYRECOLL_H_INCLUDED_
|
||||||
|
|
|
@ -50,8 +50,7 @@ module1 = Extension('recoll',
|
||||||
],
|
],
|
||||||
libraries = libs,
|
libraries = libs,
|
||||||
library_dirs = libdirs,
|
library_dirs = libdirs,
|
||||||
sources = ['pyrecoll.cpp',
|
sources = ['pyrecoll.cpp'])
|
||||||
])
|
|
||||||
|
|
||||||
module2 = Extension('rclextract',
|
module2 = Extension('rclextract',
|
||||||
define_macros = [('MAJOR_VERSION', '1'),
|
define_macros = [('MAJOR_VERSION', '1'),
|
||||||
|
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
|
||||||
],
|
],
|
||||||
libraries = libs,
|
libraries = libs,
|
||||||
library_dirs = libdirs,
|
library_dirs = libdirs,
|
||||||
sources = ['pyrclextract.cpp',
|
sources = ['pyrclextract.cpp'])
|
||||||
])
|
|
||||||
|
|
||||||
setup (name = 'Recoll',
|
setup (name = 'Recoll',
|
||||||
version = '1.0',
|
version = '1.0',
|
||||||
description = 'Query/Augment a Recoll full text index',
|
description = 'Query/Augment a Recoll full text index',
|
||||||
author = 'J.F. Dockes',
|
author = 'J.F. Dockes',
|
||||||
author_email = 'jfd@recoll.org',
|
author_email = 'jfd@recoll.org',
|
||||||
|
url = 'http://www.recoll.org',
|
||||||
|
license = 'GPL',
|
||||||
long_description = '''
|
long_description = '''
|
||||||
''',
|
''',
|
||||||
|
packages = ['recoll'],
|
||||||
|
ext_package = 'recoll',
|
||||||
ext_modules = [module1, module2])
|
ext_modules = [module1, module2])
|
||||||
|
|
|
@ -6,9 +6,15 @@ This could actually be useful for something after some customization
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from getopt import getopt
|
from getopt import getopt
|
||||||
import recoll
|
|
||||||
import rclextract
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from recoll import recoll
|
||||||
|
from recoll import rclextract
|
||||||
|
hasextract = True
|
||||||
|
except:
|
||||||
|
import recoll
|
||||||
|
hasextract = False
|
||||||
|
|
||||||
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
||||||
"ipath", "fbytes", "dbytes", "relevancyrating")
|
"ipath", "fbytes", "dbytes", "relevancyrating")
|
||||||
|
|
||||||
|
@ -27,9 +33,15 @@ class ptrmeths:
|
||||||
|
|
||||||
def extract(doc):
|
def extract(doc):
|
||||||
extractor = rclextract.Extractor(doc)
|
extractor = rclextract.Extractor(doc)
|
||||||
newdoc = extractor.extract(doc.ipath)
|
newdoc = extractor.textextract(doc.ipath)
|
||||||
return newdoc
|
return newdoc
|
||||||
|
|
||||||
|
def extractofile(doc, outfilename=""):
|
||||||
|
extractor = rclextract.Extractor(doc)
|
||||||
|
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
|
||||||
|
ofilename=outfilename)
|
||||||
|
return outfilename
|
||||||
|
|
||||||
def doquery(db, q):
|
def doquery(db, q):
|
||||||
# Get query object
|
# Get query object
|
||||||
query = db.query()
|
query = db.query()
|
||||||
|
@ -48,9 +60,11 @@ def doquery(db, q):
|
||||||
while query.next >= 0 and query.next < nres:
|
while query.next >= 0 and query.next < nres:
|
||||||
doc = query.fetchone()
|
doc = query.fetchone()
|
||||||
print query.next, ":",
|
print query.next, ":",
|
||||||
# for k,v in doc.items().items():
|
#for k,v in doc.items().items():
|
||||||
# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
#print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
||||||
# continue
|
#continue
|
||||||
|
#outfile = extractofile(doc)
|
||||||
|
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
|
||||||
for k in ("title", "mtime", "author"):
|
for k in ("title", "mtime", "author"):
|
||||||
value = getattr(doc, k)
|
value = getattr(doc, k)
|
||||||
# value = doc.get(k)
|
# value = doc.get(k)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue