diff --git a/src/filters/rclpurple b/src/filters/rclpurple index 39a8a784..b991e78b 100755 --- a/src/filters/rclpurple +++ b/src/filters/rclpurple @@ -82,41 +82,79 @@ umask 77 # !! Leave the following line unmodified ! #ENDRECFILTCOMMONCODE -checkcmds awk iconv +checkcmds awk awk ' # First line: parse from, to , output html header NR == 1 { - if (NF != 13) { + if (NF != 14 && NF != 13 && NF != 9) { printf("Bad format: (NF %d) %s\n", NF, $0) exit 1 } to = $3 - from = $12 - proto = $13 - date = $5 " " $6 " " $7 " " $8 " " $9 " " $10 - #printf("from [%s] to [%s] proto [%s] date [%s]\n", from, to, proto, date) + if (NF == 14 || NF == 13) { + mon_i["Jan"] = "01" + mon_i["Feb"] = "02" + mon_i["Mar"] = "03" + mon_i["Apr"] = "04" + mon_i["May"] = "05" + mon_i["Jun"] = "06" + mon_i["Jul"] = "07" + mon_i["Aug"] = "08" + mon_i["Sep"] = "09" + mon_i["Oct"] = "10" + mon_i["Nov"] = "11" + mon_i["Dec"] = "12" + date = $8 "-" mon_i[$7] "-" $6 "T" $9 + if (NF == 14) { + from = $13 + } + if (NF == 13) { + from = $12 + } + } + + if (NF == 9) { + from = $8 + date = $5 + } + + #printf("from [%s] to [%s] date [%s]\n", from, to, date) + print "
" print "" + + if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") { + printf("%s\n", $0) + } + + # Remember who the main persons are. This is so that we output + # them once while indexing the conversation body, to avoid giving + # excessive weight by repeated indexing to the term. authors[from] = "yes" authors[to] = "yes" next } -# Message first line. We strip from/to and time when indexing + /^\([0-2][0-9]:[0-5][0-9]:[0-5][0-9]\)/ { + # Conversation element 1st line. We strip from/to (except 1st + # occurrence) and time when indexing. Time is not interesting and + # repeated from/to indexing would give excessive weight if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") { - # Preview: output everything - print $0 " " "
" + # Preview: output everything + print $0 } else { - # Index: output only text, except each new author once - #printf("INDEX: NF %d [%s] [%s] [%s] ", NF, $1, $2, $3); + # Index: output only text, except each new author once. Unfortunately, + # it is too late to add them to the "author" field. from = $2 sub(":$", "", from); if (authors[from] == "") { @@ -126,16 +164,16 @@ NR == 1 { for (idx = 3; idx <= NR; idx++) { printf("%s ", $idx) } - printf("
\n") + printf("\n") } next } -# Continuation line: print it +# Conversation element continuation line: print it { - printf("%s
\n", $0) + printf("%s\n", $0) } END { - printf("\n") + printf("