make python recoll a package with 2 modules recoll and rclextract

2012-12-20 17:05:34 +01:00 · 2012-12-20 17:05:34 +01:00 · 66e8817f77
commit 66e8817f77
parent fc68d31151
7 changed files with 51 additions and 15 deletions
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -13,6 +13,11 @@ librcl.a : $(DEPS) $(OBJS)
 	ar ru librcl.a $(OBJS)
 	$(RANLIB) librcl.a

+# Future 
+#all: librecoll.so
+#librecoll.so : $(DEPS) $(OBJS)
+#	g++ -shared -Wl,--no-undefined #          -Wl,-soname=librecoll.so.1 -o librecoll.so $(OBJS) #           -lxapian -lz -lX11 -lpthread -ldl
+
 rclaspell.o : ../aspell/rclaspell.cpp $(depth)/mk/localdefs
 	$(CXX) $(ALL_CXXFLAGS) -c ../aspell/rclaspell.cpp
 beaglequeuecache.o : ../common/beaglequeuecache.cpp $(depth)/mk/localdefs
--- a/src/lib/mkMake
+++ b/src/lib/mkMake
@ -126,6 +126,13 @@ librcl.a : \$(DEPS) \$(OBJS)
 	ar ru librcl.a \$(OBJS)
 	\$(RANLIB) librcl.a

+# Future 
+#all: librecoll.so
+#librecoll.so : \$(DEPS) \$(OBJS)
+#	g++ -shared -Wl,--no-undefined \
+#          -Wl,-soname=librecoll.so.1 -o librecoll.so \$(OBJS) \
+#           -lxapian -lz -lX11 -lpthread -ldl
+
 EOF

 for c in $SRC_CPP;do
--- a/src/python/recoll/pyrclextract.cpp
+++ b/src/python/recoll/pyrclextract.cpp
@ -178,9 +178,15 @@ Extractor_idoctofile(rclx_ExtractorObject* self, PyObject *args,
 	outfile.assign(soutfile); 
    
    if (self->xtr == 0) {
-        PyErr_SetString(PyExc_AttributeError, "extract: null object");
+        PyErr_SetString(PyExc_AttributeError, "idoctofile: null object");
 	return 0;
    }
+    if (ipath.empty()) {
+        PyErr_SetString(PyExc_ValueError, "idoctofile: null ipath");
+	return 0;
+    }
+	
+    self->xtr->setTargetMType(mimetype);
    TempFile temp;
    bool status = self->xtr->interntofile(temp, outfile, ipath, mimetype);
    if (!status) {
@ -285,5 +291,5 @@ initrclextract(void)
    Py_INCREF(&rclx_ExtractorType);
    PyModule_AddObject(m, "Extractor", (PyObject *)&rclx_ExtractorType);

-    recoll_DocType = (PyObject*)PyCapsule_Import("recoll.doctypeptr", 0);
+    recoll_DocType = (PyObject*)PyCapsule_Import(PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
 }
--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@ -1703,6 +1703,6 @@ initrecoll(void)

    // Export a few pointers for the benefit of other recoll python modules
    PyObject* doctypecapsule = 
-	PyCapsule_New(&recoll_DocType, "recoll.doctypeptr", 0);
+	PyCapsule_New(&recoll_DocType, PYRECOLL_PACKAGE "recoll.doctypeptr", 0);
    PyModule_AddObject(m, "doctypeptr", doctypecapsule);
 }
--- a/src/python/recoll/pyrecoll.h
+++ b/src/python/recoll/pyrecoll.h
@ -28,4 +28,6 @@ typedef struct {
    RclConfig *rclconfig; 
 } recoll_DocObject;

+#define PYRECOLL_PACKAGE "recoll."
+
 #endif // _PYRECOLL_H_INCLUDED_
--- a/src/python/recoll/setup.py.in
+++ b/src/python/recoll/setup.py.in
@ -50,8 +50,7 @@ module1 = Extension('recoll',
                                    ],
                    libraries = libs,
                    library_dirs = libdirs,
-                    sources = ['pyrecoll.cpp',
-                               ])
+                    sources = ['pyrecoll.cpp'])

 module2 = Extension('rclextract',
                    define_macros = [('MAJOR_VERSION', '1'),
@ -67,14 +66,17 @@ module2 = Extension('rclextract',
                                    ],
                    libraries = libs,
                    library_dirs = libdirs,
-                    sources = ['pyrclextract.cpp',
-                               ])
+                    sources = ['pyrclextract.cpp'])

 setup (name = 'Recoll',
       version = '1.0',
       description = 'Query/Augment a Recoll full text index',
       author = 'J.F. Dockes',
       author_email = 'jfd@recoll.org',
+       url = 'http://www.recoll.org',
+       license = 'GPL',
       long_description = '''
 ''',
+    packages = ['recoll'],
+    ext_package = 'recoll',
       ext_modules = [module1, module2])
--- a/src/python/samples/recollq.py
+++ b/src/python/samples/recollq.py
@ -6,8 +6,14 @@ This could actually be useful for something after some customization

 import sys
 from getopt import getopt
+
+try:
+    from recoll import recoll
+    from recoll import rclextract
+    hasextract = True
+except:
    import recoll
-import rclextract
+    hasextract = False
    
 allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
           "ipath", "fbytes", "dbytes", "relevancyrating")
@ -27,9 +33,15 @@ class ptrmeths:
    
 def extract(doc):
    extractor = rclextract.Extractor(doc)
-    newdoc = extractor.extract(doc.ipath)
+    newdoc = extractor.textextract(doc.ipath)
    return newdoc

+def extractofile(doc, outfilename=""):
+    extractor = rclextract.Extractor(doc)
+    outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
+                                       ofilename=outfilename)
+    return outfilename
+
 def doquery(db, q):
    # Get query object
    query = db.query()
@ -51,6 +63,8 @@ def doquery(db, q):
        #for k,v in doc.items().items():
        #print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
        #continue
+        #outfile = extractofile(doc)
+        #print "outfile:", outfile, "url", doc.url.encode("utf-8")
        for k in ("title", "mtime", "author"):
            value = getattr(doc, k)
 #            value = doc.get(k)