make python recoll a package with 2 modules recoll and rclextract

2012-12-20 17:05:34 +01:00 · 2012-12-20 17:05:34 +01:00 · 66e8817f77
commit 66e8817f77
parent fc68d31151
7 changed files with 51 additions and 15 deletions
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -4,7 +4,7 @@ include $(depth)/mk/sysconf

 LIBS = librcl.a

-all: $(LIBS)
+all: $(LIBS) 

 OBJS =  rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o bglfetcher.o fetcher.o fsfetcher.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o mimehandler.o myhtmlparse.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o daterange.o expansiondbs.o rclabstract.o rcldb.o rcldoc.o rclquery.o searchdata.o searchdataxml.o stemdb.o stoplist.o synfamily.o unac.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime.o convert.o iodevice.o iofactory.o
 DEPS =  rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp bglfetcher.dep.stamp fetcher.dep.stamp fsfetcher.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp mimehandler.dep.stamp myhtmlparse.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp daterange.dep.stamp expansiondbs.dep.stamp rclabstract.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp searchdataxml.dep.stamp stemdb.dep.stamp stoplist.dep.stamp synfamily.dep.stamp unac.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
 	ar ru librcl.a $(OBJS)
 	$(RANLIB) librcl.a

+# Future 
+#all: librecoll.so
+#librecoll.so : $(DEPS) $(OBJS)
+#	g++ -shared -Wl,--no-undefined #          -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) #           -lxapian -lz -lX11 -lpthread -ldl
+
 rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
 beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -117,7 +117,7 @@ include \$(depth)/mk/sysconf

 LIBS = librcl.a

-all: \$(LIBS)
+all: \$(LIBS) 

 OBJS = $OBJS
 DEPS = $DEPS
@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
 	ar ru librcl.a \$(OBJS)
 	\$(RANLIB) librcl.a

+# Future 
+#all: librecoll.so
+#librecoll.so : \$(DEPS) \$(OBJS)
+#	g++ -shared -Wl,--no-undefined \
+#          -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
+#           -lxapian -lz -lX11 -lpthread -ldl
+
 EOF

 for c in $SRC_CPP;do
--- a/src/python/recoll/pyrclextract.cpp
+++ b/src/python/recoll/pyrclextract.cpp
@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
 	outfile.assign(soutfile); 
    
    if (self->xtr == 0) {
-        PyErr_SetString(PyExc_AttributeError, "extract: null object");
+        PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
 	return 0;
    }
+    if (ipath.empty()) {
+        PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
+	return 0;
+    }
+	
+    self->xtr->setTargetMType(mimetype);
    TempFile temp;
    bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
    if (!status) {
@ -285,5 +291,5 @@ initrclextract(void)
    Py_INCREF(&rclx_ExtractorType);
    PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);

-    recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
+    recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
 }
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -1703,6 +1703,6 @@ initrecoll(void)

    // Export a few pointers for the benefit of other recoll python modules
    PyObject* doctypecapsule = 
-	PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
+	PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
    PyModule_AddObject(m, "doctypeptr", doctypecapsule);
 }
--- a/src/python/recoll/pyrecoll.h
+++ b/src/python/recoll/pyrecoll.h
@ -28,4 +28,6 @@ typedef struct {
    RclConfig *rclconfig; 
 } recoll_DocObject;

+#define PYRECOLL_PACKAGE "recoll."
+
 #endif // _PYRECOLL_H_INCLUDED_
--- a/src/python/recoll/setup.py.in
+++ b/src/python/recoll/setup.py.in
@ -50,8 +50,7 @@ module1 = Extension('recoll',
                                    ],
                    libraries = libs,
                    library_dirs = libdirs,
-                    sources = ['pyrecoll.cpp',
-                               ])
+                    sources = ['pyrecoll.cpp'])

 module2 = Extension('rclextract',
                    define_macros = [('MAJOR_VERSION', '1'),
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
                                    ],
                    libraries = libs,
                    library_dirs = libdirs,
-                    sources = ['pyrclextract.cpp',
-                               ])
+                    sources = ['pyrclextract.cpp'])

 setup (name = 'Recoll',
       version = '1.0',
       description = 'Query/Augment a Recoll full text index',
       author = 'J.F. Dockes',
       author_email = 'jfd@recoll.org',
+       url = 'http://www.recoll.org',
+       license = 'GPL',
       long_description = '''
 ''',
+    packages = ['recoll'],
+    ext_package = 'recoll',
       ext_modules = [module1, module2])
--- a/src/python/samples/recollq.py
+++ b/src/python/samples/recollq.py
@ -6,9 +6,15 @@ This could actually be useful for something after some customization

 import sys
 from getopt import getopt
-import recoll
-import rclextract

+try:
+    from recoll import recoll
+    from recoll import rclextract
+    hasextract = True
+except:
+    import recoll
+    hasextract = False
+    
 allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
           "ipath", "fbytes", "dbytes", "relevancyrating")

@ -27,9 +33,15 @@ class ptrmeths:
    
 def extract(doc):
    extractor = rclextract.Extractor(doc)
-    newdoc = extractor.extract(doc.ipath)
+    newdoc = extractor.textextract(doc.ipath)
    return newdoc

+def extractofile(doc, outfilename=""):
+    extractor = rclextract.Extractor(doc)
+    outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
+                                       ofilename=outfilename)
+    return outfilename
+
 def doquery(db, q):
    # Get query object
    query = db.query()
@ -48,9 +60,11 @@ def doquery(db, q):
    while query.next >= 0 and query.next < nres: 
        doc = query.fetchone()
        print query.next, ":",
-#        for k,v in doc.items().items():
-#            print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
-#        continue
+        #for k,v in doc.items().items():
+        #print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
+        #continue
+        #outfile = extractofile(doc)
+        #print "outfile:", outfile, "url", doc.url.encode("utf-8")
        for k in ("title", "mtime", "author"):
            value = getattr(doc, k)
 #            value = doc.get(k)