Also index non-html files from the web queue and fix the Open operation for them

This commit is contained in:
Jean-Francois Dockes 2015-07-24 16:30:13 +02:00
parent 7cc5f9e039
commit be51bf5ef0
8 changed files with 204 additions and 142 deletions

View file

@ -217,12 +217,6 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
// Just index the dotdoc // Just index the dotdoc
dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; dotdoc.meta[Rcl::Doc::keybcknd] = "BGL";
return m_db->addOrUpdate(udi, cstr_null, dotdoc); return m_db->addOrUpdate(udi, cstr_null, dotdoc);
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
(dotdoc.mimetype.compare("text/html") &&
dotdoc.mimetype.compare(cstr_textplain))) {
LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n",
dotdoc.meta[Rcl::Doc::keybght].c_str(), dotdoc.mimetype.c_str()));
return true;
} else { } else {
Rcl::Doc doc; Rcl::Doc doc;
FileInterner interner(data, m_config, FileInterner interner(data, m_config,
@ -404,7 +398,6 @@ BeagleQueueIndexer::processone(const string &path,
char ascdate[30]; char ascdate[30];
sprintf(ascdate, "%ld", long(stp->st_mtime)); sprintf(ascdate, "%ld", long(stp->st_mtime));
// We only process bookmarks or text/html and text/plain files.
if (!stringlowercmp("bookmark", dotdoc.meta[Rcl::Doc::keybght])) { if (!stringlowercmp("bookmark", dotdoc.meta[Rcl::Doc::keybght])) {
// For bookmarks, we just index the doc that was built from the // For bookmarks, we just index the doc that was built from the
// metadata. // metadata.
@ -422,14 +415,6 @@ BeagleQueueIndexer::processone(const string &path,
if (!m_db->addOrUpdate(udi, cstr_null, dotdoc)) if (!m_db->addOrUpdate(udi, cstr_null, dotdoc))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;
} else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) ||
(dotdoc.mimetype.compare("text/html") &&
dotdoc.mimetype.compare(cstr_textplain))) {
LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n",
dotdoc.meta[Rcl::Doc::keybght].c_str(), dotdoc.mimetype.c_str()));
// Unlink them anyway
dounlink = true;
goto out;
} else { } else {
Rcl::Doc doc; Rcl::Doc doc;
// Store the dotdoc fields in the future doc. In case someone wants // Store the dotdoc fields in the future doc. In case someone wants

View file

@ -380,20 +380,12 @@ TempFile FileInterner::dataToTempFile(const string& dt, const string& mt)
temp->getreason().c_str())); temp->getreason().c_str()));
return TempFile(); return TempFile();
} }
string reason;
int fd = open(temp->filename(), O_WRONLY); if (!stringtofile(dt, temp->filename(), reason)) {
if (fd < 0) { LOGERR(("FileInterner::dataToTempFile: stringtofile: %s\n",
LOGERR(("FileInterner::dataToTempFile: open(%s) failed errno %d\n", reason.c_str()));
temp->filename(), errno));
return TempFile(); return TempFile();
} }
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
close(fd);
LOGERR(("FileInterner::dataToTempFile: write to %s failed errno %d\n",
temp->filename(), errno));
return TempFile();
}
close(fd);
return temp; return temp;
} }
@ -892,7 +884,20 @@ static string urltolocalpath(string url)
return url.substr(7, string::npos); return url.substr(7, string::npos);
} }
// Extract subdoc out of multidoc into temporary file. bool FileInterner::tempFileForMT(TempFile& otemp, RclConfig* cnf,
const string& mimetype)
{
TempFile temp(new TempFileInternal(
cnf->getSuffixFromMimeType(mimetype)));
if (!temp->ok()) {
LOGERR(("FileInterner::interntofile: can't create temp file\n"));
return false;
}
otemp = temp;
return true;
}
// Extract document (typically subdoc of multidoc) into temporary file.
// We do the usual internfile stuff: create a temporary directory, // We do the usual internfile stuff: create a temporary directory,
// then create an interner and call internfile. The target mtype is set to // then create an interner and call internfile. The target mtype is set to
// the input mtype, so that no data conversion is performed. // the input mtype, so that no data conversion is performed.
@ -901,22 +906,20 @@ static string urltolocalpath(string url)
// - The internfile temporary directory gets destroyed by its destructor // - The internfile temporary directory gets destroyed by its destructor
// - The output temporary file which is held in a reference-counted // - The output temporary file which is held in a reference-counted
// object and will be deleted when done with. // object and will be deleted when done with.
// This DOES NOT work with a non-internal file (because at least one conversion //
// is always performed). // If the ipath is null, maybe we're called because the file is not
// stored in the regular file system. We use the docfetcher to get a
// copy (in topdocToFile())
//
// We currently don't handle the case of an internal doc of a non-fs document.
bool FileInterner::idocToFile(TempFile& otemp, const string& tofile, bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& idoc) RclConfig *cnf, const Rcl::Doc& idoc)
{ {
LOGDEB(("FileInterner::idocToFile\n")); LOGDEB(("FileInterner::idocToFile\n"));
// idoc.dump();
if (idoc.ipath.empty()) { if (idoc.ipath.empty()) {
LOGDEB(("FileInterner::idocToFile: not a sub-document !\n")); return topdocToFile(otemp, tofile, cnf, idoc);
// We could do a copy here but it's much more complicated than
// it seems because the source is not necessarily a simple
// depending on the backend. Until we fix the Internfile
// constructor to not do the first conversion, it's much saner
// to just return an error
return false;
} }
// We set FIF_forPreview for consistency with the previous version // We set FIF_forPreview for consistency with the previous version
@ -927,6 +930,54 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
return interner.interntofile(otemp, tofile, idoc.ipath, idoc.mimetype); return interner.interntofile(otemp, tofile, idoc.ipath, idoc.mimetype);
} }
bool FileInterner::topdocToFile(TempFile& otemp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& idoc)
{
DocFetcher *fetcher = docFetcherMake(idoc);
if (fetcher == 0) {
LOGERR(("FileInterner::idocToFile no backend\n"));
return false;
}
DocFetcher::RawDoc rawdoc;
if (!fetcher->fetch(cnf, idoc, rawdoc)) {
LOGERR(("FileInterner::idocToFile fetcher failed\n"));
return false;
}
const char *filename = "";
TempFile temp;
if (tofile.empty()) {
if (!tempFileForMT(temp, cnf, idoc.mimetype)) {
return false;
}
filename = temp->filename();
} else {
filename = tofile.c_str();
}
string reason;
switch (rawdoc.kind) {
case DocFetcher::RawDoc::RDK_FILENAME:
if (!copyfile(rawdoc.data.c_str(), filename, reason)) {
LOGERR(("FileInterner::idocToFile: copyfile: %s\n",
reason.c_str()));
return false;
}
break;
case DocFetcher::RawDoc::RDK_DATA:
if (!stringtofile(rawdoc.data, filename, reason)) {
LOGERR(("FileInterner::idocToFile: stringtofile: %s\n",
reason.c_str()));
return false;
}
break;
default:
LOGERR(("FileInterner::FileInterner(idoc): bad rawdoc kind ??\n"));
}
if (tofile.empty())
otemp = temp;
return true;
}
bool FileInterner::interntofile(TempFile& otemp, const string& tofile, bool FileInterner::interntofile(TempFile& otemp, const string& tofile,
const string& ipath, const string& mimetype) const string& ipath, const string& mimetype)
{ {
@ -952,35 +1003,22 @@ bool FileInterner::interntofile(TempFile& otemp, const string& tofile,
doc.mimetype = "text/html"; doc.mimetype = "text/html";
} }
string filename; const char *filename;
TempFile temp; TempFile temp;
if (tofile.empty()) { if (tofile.empty()) {
TempFile temp1(new TempFileInternal( if (!tempFileForMT(temp, m_cfg, mimetype)) {
m_cfg->getSuffixFromMimeType(mimetype))); return false;
temp = temp1; }
if (!temp->ok()) {
LOGERR(("FileInterner::interntofile: can't create temp file\n"));
return false;
}
filename = temp->filename(); filename = temp->filename();
} else { } else {
filename = tofile; filename = tofile.c_str();
} }
string reason;
int fd = open(filename.c_str(), O_WRONLY|O_CREAT, 0600); if (!stringtofile(doc.text, filename, reason)) {
if (fd < 0) { LOGERR(("FileInterner::interntofile: stringtofile : %s\n",
LOGERR(("FileInterner::interntofile: open(%s) failed errno %d\n", reason.c_str()));
filename.c_str(), errno));
return false; return false;
} }
const string& dt = doc.text;
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
close(fd);
LOGERR(("FileInterner::interntofile: write to %s failed errno %d\n",
filename.c_str(), errno));
return false;
}
close(fd);
if (tofile.empty()) if (tofile.empty())
otemp = temp; otemp = temp;

View file

@ -287,6 +287,10 @@ class FileInterner {
int addHandler(); int addHandler();
void checkExternalMissing(const string& msg, const string& mt); void checkExternalMissing(const string& msg, const string& mt);
void processNextDocError(Rcl::Doc &doc); void processNextDocError(Rcl::Doc &doc);
static bool tempFileForMT(TempFile& otemp, RclConfig *cnf,
const std::string& mimetype);
static bool topdocToFile(TempFile& otemp, const std::string& tofile,
RclConfig *cnf, const Rcl::Doc& idoc);
}; };

View file

@ -31,6 +31,7 @@
#include "readfile.h" #include "readfile.h"
#include "xmltosd.h" #include "xmltosd.h"
#include "searchdata.h" #include "searchdata.h"
#include "copyfile.h"
using namespace std; using namespace std;
using namespace Rcl; using namespace Rcl;
@ -90,23 +91,12 @@ void RclMain::saveLastQuery()
string tofile((const char *)s.toLocal8Bit()); string tofile((const char *)s.toLocal8Bit());
LOGDEB(("RclMain::saveLastQuery: XML: [%s]\n", xml.c_str())); LOGDEB(("RclMain::saveLastQuery: XML: [%s]\n", xml.c_str()));
string reason;
int fd = ::open(tofile.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0600); if (!stringtofile(xml, tofile.c_str(), reason)) {
if (fd < 0) {
QMessageBox::warning(this, tr("Open failed"),
tr("Could not open/create file"));
return;
}
if (::write(fd, xml.c_str(), xml.size()) != int(xml.size())) {
::close(fd);
QMessageBox::warning(this, tr("Write failed"), QMessageBox::warning(this, tr("Write failed"),
tr("Could not write to file")); tr("Could not write to file"));
return;
}
if (::close(fd) != 0) {
QMessageBox::warning(this, tr("Close failed"), tr("File close error"));
return;
} }
return;
} }

View file

@ -291,11 +291,20 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
LOGDEB(("RclMain::startNV: groksipath %d wantsf %d wantsparentf %d\n", LOGDEB(("RclMain::startNV: groksipath %d wantsf %d wantsparentf %d\n",
groksipath, wantsfile, wantsparentfile)); groksipath, wantsfile, wantsparentfile));
bool notinfs = false;
{
string backend;
doc.getmeta(Rcl::Doc::keybcknd, &backend);
if (!backend.empty() && backend.compare("FS"))
notinfs = true;
}
// If the command wants a file but this is not a file url, or // If the command wants a file but this is not a file url, or
// there is an ipath that it won't understand, we need a temp file: // there is an ipath that it won't understand, we need a temp file:
theconfig->setKeyDir(path_getfather(fn)); theconfig->setKeyDir(path_getfather(fn));
if (((wantsfile || wantsparentfile) && fn.empty()) || if (notinfs ||
(!groksipath && !doc.ipath.empty())) { ((wantsfile || wantsparentfile) && fn.empty()) ||
(!groksipath && !doc.ipath.empty()) ) {
TempFile temp; TempFile temp;
Rcl::Doc& thedoc = wantsparentfile ? pdoc : doc; Rcl::Doc& thedoc = wantsparentfile ? pdoc : doc;
if (!FileInterner::idocToFile(temp, string(), theconfig, thedoc)) { if (!FileInterner::idocToFile(temp, string(), theconfig, thedoc)) {

View file

@ -111,17 +111,13 @@ void RTIToolW::accept()
dir = path_cat(dir, "autostart"); dir = path_cat(dir, "autostart");
mkdir(dir.c_str(), 0700); mkdir(dir.c_str(), 0700);
int fd = ::open(autostartfile.c_str(), O_WRONLY|O_CREAT, 0644); string reason;
if (fd < 0 || ::write(fd, text.c_str(), size_t(text.size())) if (!stringtofile(text, autostartfile.c_str(), reason)) {
!= ssize_t(text.size()) || ::close(fd) != 0) {
if (fd >=0)
::close(fd);
QString msg = tr("Can't create: ") + QString msg = tr("Can't create: ") +
QString::fromLocal8Bit(autostartfile.c_str()); QString::fromLocal8Bit(autostartfile.c_str());
QMessageBox::warning(0, tr("Warning"), msg, QMessageBox::Ok); QMessageBox::warning(0, tr("Warning"), msg, QMessageBox::Ok);
return; return;
} }
::close(fd);
if (nowCB->isChecked()) { if (nowCB->isChecked()) {
ExecCmd cmd; ExecCmd cmd;

View file

@ -35,55 +35,91 @@ using namespace std;
bool copyfile(const char *src, const char *dst, string &reason, int flags) bool copyfile(const char *src, const char *dst, string &reason, int flags)
{ {
int sfd = -1; int sfd = -1;
int dfd = -1; int dfd = -1;
bool ret = false; bool ret = false;
char buf[CPBSIZ]; char buf[CPBSIZ];
int oflags = O_WRONLY|O_CREAT|O_TRUNC; int oflags = O_WRONLY|O_CREAT|O_TRUNC;
LOGDEB(("copyfile: %s to %s\n", src, dst)); LOGDEB(("copyfile: %s to %s\n", src, dst));
if ((sfd = open(src, O_RDONLY)) < 0) { if ((sfd = ::open(src, O_RDONLY)) < 0) {
reason += string("open ") + src + ": " + strerror(errno); reason += string("open ") + src + ": " + strerror(errno);
goto out; goto out;
} }
if (flags & COPYFILE_EXCL) { if (flags & COPYFILE_EXCL) {
oflags |= O_EXCL; oflags |= O_EXCL;
} }
if ((dfd = open(dst, oflags, 0644)) < 0) { if ((dfd = ::open(dst, oflags, 0644)) < 0) {
reason += string("open/creat ") + dst + ": " + strerror(errno); reason += string("open/creat ") + dst + ": " + strerror(errno);
// If we fail because of an open/truncate error, we do not want to unlink // If we fail because of an open/truncate error, we do not
// the file, we might succeed... // want to unlink the file, we might succeed...
flags |= COPYFILE_NOERRUNLINK; flags |= COPYFILE_NOERRUNLINK;
goto out; goto out;
} }
for (;;) { for (;;) {
int didread; int didread;
didread = read(sfd, buf, CPBSIZ); didread = ::read(sfd, buf, CPBSIZ);
if (didread < 0) { if (didread < 0) {
reason += string("read src ") + src + ": " + strerror(errno); reason += string("read src ") + src + ": " + strerror(errno);
goto out; goto out;
} }
if (didread == 0) if (didread == 0)
break; break;
if (write(dfd, buf, didread) != didread) { if (::write(dfd, buf, didread) != didread) {
reason += string("write dst ") + src + ": " + strerror(errno); reason += string("write dst ") + src + ": " + strerror(errno);
goto out; goto out;
} }
} }
ret = true; ret = true;
out: out:
if (ret == false && !(flags&COPYFILE_NOERRUNLINK)) if (ret == false && !(flags&COPYFILE_NOERRUNLINK))
unlink(dst); ::unlink(dst);
if (sfd >= 0) if (sfd >= 0)
close(sfd); ::close(sfd);
if (dfd >= 0) if (dfd >= 0)
close(dfd); ::close(dfd);
return ret; return ret;
}
bool stringtofile(const string& dt, const char *dst, string& reason,
int flags)
{
LOGDEB(("stringtofile:\n"));
int dfd = -1;
bool ret = false;
int oflags = O_WRONLY|O_CREAT|O_TRUNC;
LOGDEB(("stringtofile: %u bytes to %s\n", (unsigned int)dt.size(), dst));
if (flags & COPYFILE_EXCL) {
oflags |= O_EXCL;
}
if ((dfd = ::open(dst, oflags, 0644)) < 0) {
reason += string("open/creat ") + dst + ": " + strerror(errno);
// If we fail because of an open/truncate error, we do not
// want to unlink the file, we might succeed...
flags |= COPYFILE_NOERRUNLINK;
goto out;
}
if (::write(dfd, dt.c_str(), size_t(dt.size())) != ssize_t(dt.size())) {
reason += string("write dst ") + ": " + strerror(errno);
goto out;
}
ret = true;
out:
if (ret == false && !(flags&COPYFILE_NOERRUNLINK))
::unlink(dst);
if (dfd >= 0)
::close(dfd);
return ret;
} }
bool renameormove(const char *src, const char *dst, string &reason) bool renameormove(const char *src, const char *dst, string &reason)
@ -117,13 +153,13 @@ bool renameormove(const char *src, const char *dst, string &reason)
// of reasons // of reasons
if ((st1.st_mode & 0777) != (st.st_mode & 0777)) { if ((st1.st_mode & 0777) != (st.st_mode & 0777)) {
if (chmod(dst, st.st_mode&0777) != 0) { if (chmod(dst, st.st_mode&0777) != 0) {
reason += string("Chmod ") + dst + "Error : " + strerror(errno); reason += string("Chmod ") + dst + "Error : " + strerror(errno);
} }
} }
if (st.st_uid != st1.st_uid || st.st_gid != st1.st_gid) { if (st.st_uid != st1.st_uid || st.st_gid != st1.st_gid) {
if (chown(dst, st.st_uid, st.st_gid) != 0) { if (chown(dst, st.st_uid, st.st_gid) != 0) {
reason += string("Chown ") + dst + "Error : " + strerror(errno); reason += string("Chown ") + dst + "Error : " + strerror(errno);
} }
} }
struct timeval times[2]; struct timeval times[2];
times[0].tv_sec = st.st_atime; times[0].tv_sec = st.st_atime;
@ -161,11 +197,11 @@ static int op_flags;
static const char *thisprog; static const char *thisprog;
static char usage [] = static char usage [] =
"trcopyfile [-m] src dst\n" "trcopyfile [-m] src dst\n"
" -m : move instead of copying\n" " -m : move instead of copying\n"
" -e : fail if dest exists (only for copy)\n" " -e : fail if dest exists (only for copy)\n"
"\n" "\n"
; ;
static void static void
Usage(void) Usage(void)
{ {
@ -185,9 +221,9 @@ int main(int argc, const char **argv)
Usage(); Usage();
while (**argv) while (**argv)
switch (*(*argv)++) { switch (*(*argv)++) {
case 'm': op_flags |= OPT_m; break; case 'm': op_flags |= OPT_m; break;
case 'e': op_flags |= OPT_e; break; case 'e': op_flags |= OPT_e; break;
default: Usage(); break; default: Usage(); break;
} }
argc--; argv++; argc--; argv++;
} }
@ -211,11 +247,11 @@ int main(int argc, const char **argv)
cerr << reason << endl; cerr << reason << endl;
exit(1); exit(1);
} else { } else {
cout << "Succeeded" << endl; cout << "Succeeded" << endl;
if (!reason.empty()) { if (!reason.empty()) {
cout << "Warnings: " << reason << endl; cout << "Warnings: " << reason << endl;
} }
exit(0); exit(0);
} }
} }

View file

@ -34,6 +34,10 @@ enum CopyfileFlags {COPYFILE_NONE = 0,
extern bool copyfile(const char *src, const char *dst, std::string &reason, extern bool copyfile(const char *src, const char *dst, std::string &reason,
int flags = 0); int flags = 0);
/** Save c++ string to file */
extern bool stringtofile(const std::string& dt, const char *dst,
std::string& reason, int flags = 0);
/** Try to rename src. If this fails (different devices) copy then unlink src */ /** Try to rename src. If this fails (different devices) copy then unlink src */
extern bool renameormove(const char *src, const char *dst, std::string &reason); extern bool renameormove(const char *src, const char *dst, std::string &reason);