* src/compose.c
authorAlfons Hoogervorst <alfons@proteus.demon.nl>
Thu, 12 Jun 2003 00:29:17 +0000 (00:29 +0000)
committerAlfons Hoogervorst <alfons@proteus.demon.nl>
Thu, 12 Jun 2003 00:29:17 +0000 (00:29 +0000)
* src/procmsg.c
* src/common/utils.[ch]
handle reply subject prefixes better (initial
suggestion by Robert Story adapted and enhanced
using regexps)

ChangeLog.claws
configure.ac
src/common/utils.c
src/common/utils.h
src/compose.c
src/procmsg.c

index 23b2635..6eaebdc 100644 (file)
@@ -1,3 +1,12 @@
+2003-06-12 [alfons]    0.9.0claws30
+
+       * src/compose.c
+       * src/procmsg.c
+       * src/common/utils.[ch]
+               handle reply subject prefixes better (initial 
+               suggestion by Robert Story adapted and enhanced 
+               using regexps)
+
 2003-06-12 [melvin]    0.9.0claws29
 
        * src/compose.c
index 51b6af8..1ff83c2 100644 (file)
@@ -11,7 +11,7 @@ MINOR_VERSION=9
 MICRO_VERSION=0
 INTERFACE_AGE=0
 BINARY_AGE=0
-EXTRA_VERSION=claws29
+EXTRA_VERSION=claws30
 VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION$EXTRA_VERSION
 
 dnl set $target
index 77162e3..e67da63 100644 (file)
@@ -41,6 +41,7 @@
 #include <sys/wait.h>
 #include <dirent.h>
 #include <time.h>
+#include <regex.h>
 
 #include "intl.h"
 #include "utils.h"
@@ -575,16 +576,15 @@ gint subject_compare_for_sort(const gchar *s1, const gchar *s2)
 void trim_subject_for_compare(gchar *str)
 {
        gchar *srcp;
+       int skip;
 
        eliminate_parenthesis(str, '[', ']');
        eliminate_parenthesis(str, '(', ')');
        g_strstrip(str);
 
-       while (!strncasecmp(str, "Re:", 3)) {
-               srcp = str + 3;
-               while (isspace(*srcp)) srcp++;
+       srcp = str + subject_get_reply_prefix_length(str);
+       if (srcp != str)
                memmove(str, srcp, strlen(srcp) + 1);
-       }
 }
 
 void trim_subject_for_sort(gchar *str)
@@ -593,11 +593,9 @@ void trim_subject_for_sort(gchar *str)
 
        g_strstrip(str);
 
-       while (!strncasecmp(str, "Re:", 3)) {
-               srcp = str + 3;
-               while (isspace(*srcp)) srcp++;
+       srcp = str + subject_get_reply_prefix_length(str);
+       if (srcp != str)        
                memmove(str, srcp, strlen(srcp) + 1);
-       }
 }
 
 void trim_subject(gchar *str)
@@ -606,11 +604,7 @@ void trim_subject(gchar *str)
        gchar op, cl;
        gint in_brace;
 
-       destp = str;
-       while (!strncasecmp(destp, "Re:", 3)) {
-               destp += 3;
-               while (isspace(*destp)) destp++;
-       }
+       destp = str + subject_get_reply_prefix_length(str);
 
        if (*destp == '[') {
                op = '[';
@@ -3259,29 +3253,19 @@ void * subject_table_lookup(GHashTable *subject_table, gchar * subject)
 {
        if (subject == NULL)
                subject = "";
-
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               return g_hash_table_lookup(subject_table, subject + 4);
        else
-               return g_hash_table_lookup(subject_table, subject);
+               subject += subject_get_reply_prefix_length(subject);
+
+       return g_hash_table_lookup(subject_table, subject);
 }
 
 void subject_table_insert(GHashTable *subject_table, gchar * subject,
                          void * data)
 {
-       if (subject == NULL)
-               return;
-       if (* subject == 0)
-               return;
-       if (g_strcasecmp(subject, "Re:") == 0)
+       if (subject == NULL || *subject == 0)
                return;
-       if (g_strcasecmp(subject, "Re: ") == 0)
-               return;
-
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               g_hash_table_insert(subject_table, subject + 4, data);
-       else
-               g_hash_table_insert(subject_table, subject, data);
+       subject += subject_get_reply_prefix_length(subject);
+       g_hash_table_insert(subject_table, subject, data);
 }
 
 void subject_table_remove(GHashTable *subject_table, gchar * subject)
@@ -3289,19 +3273,71 @@ void subject_table_remove(GHashTable *subject_table, gchar * subject)
        if (subject == NULL)
                return;
 
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               g_hash_table_remove(subject_table, subject + 4);
-       else
-               g_hash_table_remove(subject_table, subject);
+       subject += subject_get_reply_prefix_length(subject);    
+       g_hash_table_remove(subject_table, subject);
 }
 
-gboolean subject_is_reply(const gchar *subject)
-{
-       /* XXX: just simply here so someone can handle really
-        * advanced Re: detection like "Re[4]", "ANTW:" or
-        * Re: Re: Re: Re: Re: Re: Re: Re:" stuff. */
-       if (subject == NULL) return FALSE;
-       else return 0 == g_strncasecmp(subject, "Re: ", 4);
+/*!
+ *\brief       Check if a string is prefixed with known (combinations) 
+ *             of reply prefixes. The function assumes that each prefix 
+ *             is terminated by zero or exactly _one_ space.
+ *
+ *\param       str String to check for a prefixes
+ *
+ *\return      int Number of chars in the prefix that should be skipped 
+ *             for a "clean" subject line. If no prefix was found, 0
+ *             is returned.
+ */            
+int subject_get_reply_prefix_length(const gchar *subject)
+{
+       /*!< Array with allowable reply prefixes regexps. */
+       static const gchar * const reply_prefixes[] = {
+               "[Rr][Ee]\\:",                  /* "Re:" */
+               "[Rr][Ee]\\[[1-9][0-9]*\\]\\:", /* Intelligent but stupidly non-conforming Re[XXX]:*/
+               "[Aa][Nn][Tt][Ww]\\:"           /* Overactive i18n / translation teams             */
+               /* add more */
+       };
+       const int REPLY_PREFIXES = sizeof reply_prefixes / sizeof reply_prefixes[0];
+       int n;
+       regmatch_t pos;
+       static regex_t regex;
+       static gboolean init_;
+
+       if (!subject) return 0;
+       if (!*subject) return 0;
+
+       if (!init_) {
+               GString *s = g_string_new("");
+               
+               for (n = 0; n < REPLY_PREFIXES; n++)
+                       /* Terminate each prefix regexpression by a
+                        * "\ ?" (zero or ONE space), and OR them */
+                       g_string_sprintfa(s, "(%s\\ ?)%s",
+                                         reply_prefixes[n],
+                                         n < REPLY_PREFIXES - 1 ? 
+                                         "|" : "");
+               
+               g_string_prepend(s, "(");
+               g_string_append(s, ")+");       /* match at least once */
+               g_string_prepend(s, "^\\ *");   /* from beginning of line */
+               
+
+               /* We now have something like "^\ *((PREFIX1\ ?)|(PREFIX2\ ?))+" 
+                * TODO: Should this be       "^\ *(((PREFIX1)|(PREFIX2))\ ?)+" ??? */
+               if (regcomp(&regex, s->str, REG_EXTENDED)) { 
+                       debug_print("Error compiling regexp %s\n", s->str);
+                       g_string_free(s, TRUE);
+                       return 0;
+               } else {
+                       init_ = TRUE;
+                       g_string_free(s, TRUE);
+               }
+       }
+       
+       if (!regexec(&regex, subject, 1, &pos, 0) && pos.rm_so != -1)
+               return pos.rm_eo;
+       else
+               return 0;
 }
 
 FILE *get_tmpfile_in_dir(const gchar *dir, gchar **filename)
index 42e3611..9f23446 100644 (file)
@@ -411,7 +411,25 @@ void * subject_table_lookup(GHashTable *subject_table, gchar * subject);
 void subject_table_insert(GHashTable *subject_table, gchar * subject,
                          void * data);
 void subject_table_remove(GHashTable *subject_table, gchar * subject);
-gboolean subject_is_reply(const gchar *subject);
+gint subject_get_reply_prefix_length (const gchar *subject);
+
+/* The following macros have the same preconditions as the cleanless
+ * functions above, but work with clean subjects (subject lines already
+ * corrected for the reply prefixes */
+#define subject_table_lookup_clean(t, s) \
+       g_hash_table_lookup((t), (s) ? (s) : "")
+       
+#define subject_table_insert_clean(t, s, d) \
+       do { \
+               if ((s) != NULL && (*(s)) != 0) \
+                       g_hash_table_insert((t), (s), (d)); \
+       } while (0)     
+
+#define subject_table_remove_clean(t, s) \
+       do { \
+               if ((s) != NULL) \
+                       g_hash_table_remove((t), (s)); \
+       } while (0)                     
 
 /* quoting recognition */
 const gchar * line_has_quote_char      (const gchar *str,
index 9c2caea..729469a 100644 (file)
@@ -1789,12 +1789,9 @@ static void compose_reply_set_entry(Compose *compose, MsgInfo *msginfo,
        if (msginfo->subject && *msginfo->subject) {
                gchar *buf, *buf2, *p;
 
-               buf = g_strdup(msginfo->subject);
-               while (!strncasecmp(buf, "Re:", 3)) {
-                       p = buf + 3;
-                       while (isspace(*p)) p++;
-                       memmove(buf, p, strlen(p) + 1);
-               }
+               buf = p = g_strdup(msginfo->subject);
+               p += subject_get_reply_prefix_length(p);
+               memmove(buf, p, strlen(p) + 1);
 
                buf2 = g_strdup_printf("Re: %s", buf);
                gtk_entry_set_text(GTK_ENTRY(compose->subject_entry), buf2);
index 7b76353..2cb62f3 100644 (file)
@@ -175,19 +175,20 @@ GNode *procmsg_get_thread_tree(GSList *mlist)
                        g_hash_table_insert(msgid_table, (gchar *)msgid, node);
 
                if (prefs_common.thread_by_subject) {
-                       subject = msginfo->subject;
-                       found_subject = subject_table_lookup(subject_table,
-                                                            (gchar *) subject);
+                       subject  = msginfo->subject;
+                       subject += subject_get_reply_prefix_length(subject);
+                       found_subject = subject_table_lookup_clean(subject_table,
+                                                                  (gchar *) subject);
                        if (found_subject == NULL)
-                               subject_table_insert(subject_table, (gchar *) subject,
-                                                    node);
+                               subject_table_insert_clean(subject_table, (gchar *) subject,
+                                                          node);
                        else {
                                /* replace if msg in table is older than current one 
                                 * can add here more stuff. */
                                if ( ((MsgInfo*)(found_subject->data))->date_t >
                                     ((MsgInfo*)(node->data))->date_t )  {
-                                       subject_table_remove(subject_table, (gchar *) subject);
-                                       subject_table_insert(subject_table, (gchar *) subject, node);
+                                       subject_table_remove_clean(subject_table, (gchar *) subject);
+                                       subject_table_insert_clean(subject_table, (gchar *) subject, node);
                                }       
                        }
                }
@@ -220,28 +221,24 @@ GNode *procmsg_get_thread_tree(GSList *mlist)
                for (node = root->children; node != NULL; ) {
                        next = node->next;
                        msginfo = (MsgInfo *) node->data;
-                       parent = NULL;
-                       if (subject_is_reply(msginfo->subject)) {
-                               parent = subject_table_lookup(subject_table,
-                                                             msginfo->subject);
-                               /* the node may already be threaded by IN-REPLY-TO,
-                                  so go up in the tree to find the parent node */
-                               if (parent != NULL) {
-                                       if (g_node_is_ancestor(node, parent))
-                                               parent = NULL;
-                                       if (parent == node)
-                                               parent = NULL;
-                               }
+                       parent = subject_table_lookup(subject_table, msginfo->subject);
+                       /* the node may already be threaded by IN-REPLY-TO,
+                          so go up in the tree to find the parent node */
+                       if (parent != NULL) {
+                               if (g_node_is_ancestor(node, parent))
+                                       parent = NULL;
+                               if (parent == node)
+                                       parent = NULL;
+                       }
 
-                               if (parent) {
-                                       g_node_unlink(node);
-                                       g_node_append(parent, node);
-                                       /* CLAWS: ignore thread */
-                                       if (MSG_IS_IGNORE_THREAD(((MsgInfo *)parent->data)->flags) && !MSG_IS_IGNORE_THREAD(msginfo->flags)) {
-                                               g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, procmsg_ignore_node, NULL);
-                                       }
+                       if (parent) {
+                               g_node_unlink(node);
+                               g_node_append(parent, node);
+                               /* CLAWS: ignore thread */
+                               if (MSG_IS_IGNORE_THREAD(((MsgInfo *)parent->data)->flags) && !MSG_IS_IGNORE_THREAD(msginfo->flags)) {
+                                       g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, procmsg_ignore_node, NULL);
                                }
-                       }                                       
+                       }
                        node = next;
                }       
        }