From c28fe16c8545a16b63f456740b7f7c23c2bccfbd Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 1 Oct 2014 11:37:20 +0200 Subject: [PATCH] rclpurple: fix for current log format --- src/filters/rclpurple | 80 ++++++++++++++++++++++++++++++----------- tests/purple/purple.sh | 15 ++++++++ tests/purple/purple.txt | 4 +++ 3 files changed, 78 insertions(+), 21 deletions(-) create mode 100755 tests/purple/purple.sh create mode 100644 tests/purple/purple.txt diff --git a/src/filters/rclpurple b/src/filters/rclpurple index 39a8a784..b991e78b 100755 --- a/src/filters/rclpurple +++ b/src/filters/rclpurple @@ -82,41 +82,79 @@ umask 77 # !! Leave the following line unmodified ! #ENDRECFILTCOMMONCODE -checkcmds awk iconv +checkcmds awk awk ' # First line: parse from, to , output html header NR == 1 { - if (NF != 13) { + if (NF != 14 && NF != 13 && NF != 9) { printf("Bad format: (NF %d) %s\n", NF, $0) exit 1 } to = $3 - from = $12 - proto = $13 - date = $5 " " $6 " " $7 " " $8 " " $9 " " $10 - #printf("from [%s] to [%s] proto [%s] date [%s]\n", from, to, proto, date) + if (NF == 14 || NF == 13) { + mon_i["Jan"] = "01" + mon_i["Feb"] = "02" + mon_i["Mar"] = "03" + mon_i["Apr"] = "04" + mon_i["May"] = "05" + mon_i["Jun"] = "06" + mon_i["Jul"] = "07" + mon_i["Aug"] = "08" + mon_i["Sep"] = "09" + mon_i["Oct"] = "10" + mon_i["Nov"] = "11" + mon_i["Dec"] = "12" + date = $8 "-" mon_i[$7] "-" $6 "T" $9 + if (NF == 14) { + from = $13 + } + if (NF == 13) { + from = $12 + } + } + + if (NF == 9) { + from = $8 + date = $5 + } + + #printf("from [%s] to [%s] date [%s]\n", from, to, date) + print "" print " " $0 "" - print "" - # Yes there is no such thing as a "date" meta tag. This probably should - # be http-equiv=last-modified or such - printf("\n", date) - print "" - # Remember who the main persons are. + # Yes there is no such thing as a "date" meta tag. This probably should + # be http-equiv=last-modified or such, but recollindex understands "date" + printf("\n", date) + + printf("\n", from) + printf("\n", to) + print "" + print "
"
+
+    if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") {
+        printf("%s\n", $0)
+    }
+
+    # Remember who the main persons are. This is so that we output
+    # them once while indexing the conversation body, to avoid giving
+    # excessive weight by repeated indexing to the term.
     authors[from] = "yes"
     authors[to] = "yes"
     next
 }
-# Message first line. We strip from/to and time when indexing
+
 /^\([0-2][0-9]:[0-5][0-9]:[0-5][0-9]\)/ {
+    # Conversation element 1st line. We strip from/to (except 1st
+    # occurrence) and time when indexing.  Time is not interesting and
+    # repeated from/to indexing would give excessive weight
     if (ENVIRON["RECOLL_FILTER_FORPREVIEW"] == "yes") {
-       # Preview: output everything
-        print $0 " " "
" + # Preview: output everything + print $0 } else { - # Index: output only text, except each new author once - #printf("INDEX: NF %d [%s] [%s] [%s] ", NF, $1, $2, $3); + # Index: output only text, except each new author once. Unfortunately, + # it is too late to add them to the "author" field. from = $2 sub(":$", "", from); if (authors[from] == "") { @@ -126,16 +164,16 @@ NR == 1 { for (idx = 3; idx <= NR; idx++) { printf("%s ", $idx) } - printf("
\n") + printf("\n") } next } -# Continuation line: print it +# Conversation element continuation line: print it { - printf("%s
\n", $0) + printf("%s\n", $0) } END { - printf("\n") + printf("
\n") } ' < "$infile" diff --git a/tests/purple/purple.sh b/tests/purple/purple.sh new file mode 100755 index 00000000..93474f1e --- /dev/null +++ b/tests/purple/purple.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +topdir=`dirname $0`/.. +. $topdir/shared.sh + +initvariables $0 + +( +recollq -S url '"Hallo friend"' date:2011-08-25 +recollq -S url '"I like the G+ post you gave a"' date:2014-09-22 +) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout + +diff -w ${myname}.txt $mystdout > $mydiffs 2>&1 + +checkresult diff --git a/tests/purple/purple.txt b/tests/purple/purple.txt new file mode 100644 index 00000000..5cd220d0 --- /dev/null +++ b/tests/purple/purple.txt @@ -0,0 +1,4 @@ +1 results +text/x-purple-log [file:///home/dockes/projets/fulltext/testrecoll/purple/2011-08-25.160747+0200CEST.txt] [Conversation with 666666 at Thu 25 Aug 2011 04:07:47 PM CEST on 333333 (icq)] 316 bytes +1 results +text/x-purple-log [file:///home/dockes/projets/fulltext/testrecoll/purple/2014-09-22.104527+0200CEST.txt] [Conversation with friend at 2014-09-22T10:45:27 CEST on Hell-G/G (jabber)] 363 bytes