From 66e8817f7799b87dac2f0846e71b85cd3f41f5e6 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 20 Dec 2012 17:05:34 +0100 Subject: [PATCH] make python recoll a package with 2 modules recoll and rclextract --- src/lib/Makefile | 7 ++++++- src/lib/mkMake | 9 ++++++++- src/python/recoll/pyrclextract.cpp | 10 ++++++++-- src/python/recoll/pyrecoll.cpp | 2 +- src/python/recoll/pyrecoll.h | 2 ++ src/python/recoll/setup.py.in | 10 ++++++---- src/python/samples/recollq.py | 26 ++++++++++++++++++++------ 7 files changed, 51 insertions(+), 15 deletions(-) diff --git a/src/lib/Makefile b/src/lib/Makefile index c9ab11fd..00172524 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -4,7 +4,7 @@ include $(depth)/mk/sysconf LIBS = librcl.a -all: $(LIBS) +all: $(LIBS) OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp @@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS) ar ru librcl.a $(OBJS) $(RANLIB) librcl.a +# Future +#all: librecoll.so +#librecoll.so : $(DEPS) $(OBJS) +# g++ -shared -Wl,--no-undefined # -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) # -lxapian -lz -lX11 -lpthread -ldl + rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs diff --git a/src/lib/mkMake b/src/lib/mkMake index 7a9f3586..3b6d55f7 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -117,7 +117,7 @@ include \$(depth)/mk/sysconf LIBS = librcl.a -all: \$(LIBS) +all: \$(LIBS) OBJS = $OBJS DEPS = $DEPS @@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS) ar ru librcl.a \$(OBJS) \$(RANLIB) librcl.a +# Future +#all: librecoll.so +#librecoll.so : \$(DEPS) \$(OBJS) +# g++ -shared -Wl,--no-undefined \ +# -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \ +# -lxapian -lz -lX11 -lpthread -ldl + EOF for c in $SRC_CPP;do diff --git a/src/python/recoll/pyrclextract.cpp b/src/python/recoll/pyrclextract.cpp index dbe5a22c..852b9614 100644 --- a/src/python/recoll/pyrclextract.cpp +++ b/src/python/recoll/pyrclextract.cpp @@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args, outfile.assign(soutfile); if (self->xtr == 0) { - PyErr_SetString(PyExc_AttributeError, "extract: null object"); + PyErr_SetString(PyExc_AttributeError, "idoctofile: null object"); return 0; } + if (ipath.empty()) { + PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath"); + return 0; + } + + self->xtr->setTargetMType(mimetype); TempFile temp; bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype); if (!status) { @@ -285,5 +291,5 @@ initrclextract(void) Py_INCREF(&rclx_ExtractorType); PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType); - recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0); + recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0); } diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index 7667acff..aa702dcf 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -1703,6 +1703,6 @@ initrecoll(void) // Export a few pointers for the benefit of other recoll python modules PyObject* doctypecapsule = - PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0); + PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0); PyModule_AddObject(m, "doctypeptr", doctypecapsule); } diff --git a/src/python/recoll/pyrecoll.h b/src/python/recoll/pyrecoll.h index 53a12ed5..217a6a16 100644 --- a/src/python/recoll/pyrecoll.h +++ b/src/python/recoll/pyrecoll.h @@ -28,4 +28,6 @@ typedef struct { RclConfig *rclconfig; } recoll_DocObject; +#define PYRECOLL_PACKAGE "recoll." + #endif // _PYRECOLL_H_INCLUDED_ diff --git a/src/python/recoll/setup.py.in b/src/python/recoll/setup.py.in index 18568cdd..92ee076b 100644 --- a/src/python/recoll/setup.py.in +++ b/src/python/recoll/setup.py.in @@ -50,8 +50,7 @@ module1 = Extension('recoll', ], libraries = libs, library_dirs = libdirs, - sources = ['pyrecoll.cpp', - ]) + sources = ['pyrecoll.cpp']) module2 = Extension('rclextract', define_macros = [('MAJOR_VERSION', '1'), @@ -67,14 +66,17 @@ module2 = Extension('rclextract', ], libraries = libs, library_dirs = libdirs, - sources = ['pyrclextract.cpp', - ]) + sources = ['pyrclextract.cpp']) setup (name = 'Recoll', version = '1.0', description = 'Query/Augment a Recoll full text index', author = 'J.F. Dockes', author_email = 'jfd@recoll.org', + url = 'http://www.recoll.org', + license = 'GPL', long_description = ''' ''', + packages = ['recoll'], + ext_package = 'recoll', ext_modules = [module1, module2]) diff --git a/src/python/samples/recollq.py b/src/python/samples/recollq.py index 6a119eb4..67008d29 100755 --- a/src/python/samples/recollq.py +++ b/src/python/samples/recollq.py @@ -6,9 +6,15 @@ This could actually be useful for something after some customization import sys from getopt import getopt -import recoll -import rclextract +try: + from recoll import recoll + from recoll import rclextract + hasextract = True +except: + import recoll + hasextract = False + allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime", "ipath", "fbytes", "dbytes", "relevancyrating") @@ -27,9 +33,15 @@ class ptrmeths: def extract(doc): extractor = rclextract.Extractor(doc) - newdoc = extractor.extract(doc.ipath) + newdoc = extractor.textextract(doc.ipath) return newdoc +def extractofile(doc, outfilename=""): + extractor = rclextract.Extractor(doc) + outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \ + ofilename=outfilename) + return outfilename + def doquery(db, q): # Get query object query = db.query() @@ -48,9 +60,11 @@ def doquery(db, q): while query.next >= 0 and query.next < nres: doc = query.fetchone() print query.next, ":", -# for k,v in doc.items().items(): -# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8') -# continue + #for k,v in doc.items().items(): + #print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8') + #continue + #outfile = extractofile(doc) + #print "outfile:", outfile, "url", doc.url.encode("utf-8") for k in ("title", "mtime", "author"): value = getattr(doc, k) # value = doc.get(k)