/* Copyright (C) 2005 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef TEST_IDFILE #include "autoconfig.h" #include #include #include #include #include #include "idfile.h" #include "debuglog.h" using namespace std; // Bogus code to avoid bogus valgrind mt warnings about the // initialization of treat_mbox_... which I can't even remember the // use of (it's not documented or ever set) static int treat_mbox_as_rfc822; class InitTMAR { public: InitTMAR() { treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1; } }; static InitTMAR initTM; /** * This code is currently ONLY used to identify mbox and mail message files * which are badly handled by standard mime type identifiers * There is a very old (circa 1990) mbox format using blocks of ^A (0x01) chars * to separate messages, that we don't recognize currently */ // Mail headers we compare to: static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ", "Date: ", "Subject: ", "Status: ", "In-Reply-To: "}; static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8, 13}; static const int nmh = sizeof(mailhs) / sizeof(char *); const int wantnhead = 3; // fn is for message printing static string idFileInternal(istream& input, const char *fn) { bool line1HasFrom = false; bool gotnonempty = false; int lookslikemail = 0; // emacs VM sometimes inserts very long lines with continuations or // not (for folder information). This forces us to look at many // lines and long ones int lnum = 1; for (int loop = 1; loop < 200; loop++, lnum++) { #define LL 2*1024 char cline[LL+1]; cline[LL] = 0; input.getline(cline, LL-1); if (input.fail()) { if (input.bad()) { LOGERR(("idfile: error while reading [%s]\n", fn)); return string(); } // Must be eof ? break; } // gcount includes the \n std::streamsize ll = input.gcount() - 1; if (ll > 0) gotnonempty = true; LOGDEB2(("idfile: lnum %d ll %u: [%s]\n", lnum, (unsigned int)ll, cline)); // Check for a few things that can't be found in a mail file, // (optimization to get a quick negative) // Empty lines if (ll <= 0) { // Accept a few empty lines at the beginning of the file, // otherwise this is the end of headers if (gotnonempty || lnum > 10) { LOGDEB2(("Got empty line\n")); break; } else { // Don't increment the line counter for initial empty lines. lnum--; continue; } } // emacs vm can insert VERY long header lines. if (ll > LL - 20) { LOGDEB2(("idFile: Line too long\n")); return string(); } // Check for mbox 'From ' line if (lnum == 1 && !strncmp("From ", cline, 5)) { if (treat_mbox_as_rfc822 == -1) { line1HasFrom = true; LOGDEB2(("idfile: line 1 has From_\n")); } continue; } // Except for a possible first line with 'From ', lines must // begin with whitespace or have a colon // (hope no one comes up with a longer header name ! if (!isspace(cline[0])) { char *cp = strchr(cline, ':'); if (cp == 0 || (cp - cline) > 70) { LOGDEB2(("idfile: can't be mail header line: [%s]\n", cline)); break; } } // Compare to known headers for (int i = 0; i < nmh; i++) { if (!strncasecmp(mailhs[i], cline, mailhsl[i])) { //fprintf(stderr, "Got [%s]\n", mailhs[i]); lookslikemail++; break; } } if (lookslikemail >= wantnhead) break; } if (line1HasFrom) lookslikemail++; if (lookslikemail >= wantnhead) return line1HasFrom ? string("text/x-mail") : string("message/rfc822"); return string(); } string idFile(const char *fn) { ifstream input; input.open(fn, ios::in); if (!input.is_open()) { LOGERR(("idFile: could not open [%s]\n", fn)); return string(); } return idFileInternal(input, fn); } string idFileMem(const string& data) { stringstream s(data, stringstream::in); return idFileInternal(s, ""); } #else #include #include #include #include #include using namespace std; #include "debuglog.h" #include "idfile.h" int main(int argc, char **argv) { if (argc < 2) { cerr << "Usage: idfile filename" << endl; exit(1); } DebugLog::getdbl()->setloglevel(DEBDEB1); DebugLog::setfilename("stderr"); for (int i = 1; i < argc; i++) { string mime = idFile(argv[i]); cout << argv[i] << " : " << mime << endl; } exit(0); } #endif