#include <sys/wait.h>
#include <dirent.h>
#include <time.h>
+#include <regex.h>
#include "intl.h"
#include "utils.h"
void trim_subject_for_compare(gchar *str)
{
gchar *srcp;
+ int skip;
eliminate_parenthesis(str, '[', ']');
eliminate_parenthesis(str, '(', ')');
g_strstrip(str);
- while (!strncasecmp(str, "Re:", 3)) {
- srcp = str + 3;
- while (isspace(*srcp)) srcp++;
+ srcp = str + subject_get_reply_prefix_length(str);
+ if (srcp != str)
memmove(str, srcp, strlen(srcp) + 1);
- }
}
void trim_subject_for_sort(gchar *str)
g_strstrip(str);
- while (!strncasecmp(str, "Re:", 3)) {
- srcp = str + 3;
- while (isspace(*srcp)) srcp++;
+ srcp = str + subject_get_reply_prefix_length(str);
+ if (srcp != str)
memmove(str, srcp, strlen(srcp) + 1);
- }
}
void trim_subject(gchar *str)
gchar op, cl;
gint in_brace;
- destp = str;
- while (!strncasecmp(destp, "Re:", 3)) {
- destp += 3;
- while (isspace(*destp)) destp++;
- }
+ destp = str + subject_get_reply_prefix_length(str);
if (*destp == '[') {
op = '[';
{
if (subject == NULL)
subject = "";
-
- if (g_strncasecmp(subject, "Re: ", 4) == 0)
- return g_hash_table_lookup(subject_table, subject + 4);
else
- return g_hash_table_lookup(subject_table, subject);
+ subject += subject_get_reply_prefix_length(subject);
+
+ return g_hash_table_lookup(subject_table, subject);
}
void subject_table_insert(GHashTable *subject_table, gchar * subject,
void * data)
{
- if (subject == NULL)
- return;
- if (* subject == 0)
- return;
- if (g_strcasecmp(subject, "Re:") == 0)
+ if (subject == NULL || *subject == 0)
return;
- if (g_strcasecmp(subject, "Re: ") == 0)
- return;
-
- if (g_strncasecmp(subject, "Re: ", 4) == 0)
- g_hash_table_insert(subject_table, subject + 4, data);
- else
- g_hash_table_insert(subject_table, subject, data);
+ subject += subject_get_reply_prefix_length(subject);
+ g_hash_table_insert(subject_table, subject, data);
}
void subject_table_remove(GHashTable *subject_table, gchar * subject)
if (subject == NULL)
return;
- if (g_strncasecmp(subject, "Re: ", 4) == 0)
- g_hash_table_remove(subject_table, subject + 4);
- else
- g_hash_table_remove(subject_table, subject);
+ subject += subject_get_reply_prefix_length(subject);
+ g_hash_table_remove(subject_table, subject);
}
-gboolean subject_is_reply(const gchar *subject)
-{
- /* XXX: just simply here so someone can handle really
- * advanced Re: detection like "Re[4]", "ANTW:" or
- * Re: Re: Re: Re: Re: Re: Re: Re:" stuff. */
- if (subject == NULL) return FALSE;
- else return 0 == g_strncasecmp(subject, "Re: ", 4);
+/*!
+ *\brief Check if a string is prefixed with known (combinations)
+ * of reply prefixes. The function assumes that each prefix
+ * is terminated by zero or exactly _one_ space.
+ *
+ *\param str String to check for a prefixes
+ *
+ *\return int Number of chars in the prefix that should be skipped
+ * for a "clean" subject line. If no prefix was found, 0
+ * is returned.
+ */
+int subject_get_reply_prefix_length(const gchar *subject)
+{
+ /*!< Array with allowable reply prefixes regexps. */
+ static const gchar * const reply_prefixes[] = {
+ "[Rr][Ee]\\:", /* "Re:" */
+ "[Rr][Ee]\\[[1-9][0-9]*\\]\\:", /* Intelligent but stupidly non-conforming Re[XXX]:*/
+ "[Aa][Nn][Tt][Ww]\\:" /* Overactive i18n / translation teams */
+ /* add more */
+ };
+ const int REPLY_PREFIXES = sizeof reply_prefixes / sizeof reply_prefixes[0];
+ int n;
+ regmatch_t pos;
+ static regex_t regex;
+ static gboolean init_;
+
+ if (!subject) return 0;
+ if (!*subject) return 0;
+
+ if (!init_) {
+ GString *s = g_string_new("");
+
+ for (n = 0; n < REPLY_PREFIXES; n++)
+ /* Terminate each prefix regexpression by a
+ * "\ ?" (zero or ONE space), and OR them */
+ g_string_sprintfa(s, "(%s\\ ?)%s",
+ reply_prefixes[n],
+ n < REPLY_PREFIXES - 1 ?
+ "|" : "");
+
+ g_string_prepend(s, "(");
+ g_string_append(s, ")+"); /* match at least once */
+ g_string_prepend(s, "^\\ *"); /* from beginning of line */
+
+
+ /* We now have something like "^\ *((PREFIX1\ ?)|(PREFIX2\ ?))+"
+ * TODO: Should this be "^\ *(((PREFIX1)|(PREFIX2))\ ?)+" ??? */
+ if (regcomp(®ex, s->str, REG_EXTENDED)) {
+ debug_print("Error compiling regexp %s\n", s->str);
+ g_string_free(s, TRUE);
+ return 0;
+ } else {
+ init_ = TRUE;
+ g_string_free(s, TRUE);
+ }
+ }
+
+ if (!regexec(®ex, subject, 1, &pos, 0) && pos.rm_so != -1)
+ return pos.rm_eo;
+ else
+ return 0;
}
FILE *get_tmpfile_in_dir(const gchar *dir, gchar **filename)
void subject_table_insert(GHashTable *subject_table, gchar * subject,
void * data);
void subject_table_remove(GHashTable *subject_table, gchar * subject);
-gboolean subject_is_reply(const gchar *subject);
+gint subject_get_reply_prefix_length (const gchar *subject);
+
+/* The following macros have the same preconditions as the cleanless
+ * functions above, but work with clean subjects (subject lines already
+ * corrected for the reply prefixes */
+#define subject_table_lookup_clean(t, s) \
+ g_hash_table_lookup((t), (s) ? (s) : "")
+
+#define subject_table_insert_clean(t, s, d) \
+ do { \
+ if ((s) != NULL && (*(s)) != 0) \
+ g_hash_table_insert((t), (s), (d)); \
+ } while (0)
+
+#define subject_table_remove_clean(t, s) \
+ do { \
+ if ((s) != NULL) \
+ g_hash_table_remove((t), (s)); \
+ } while (0)
/* quoting recognition */
const gchar * line_has_quote_char (const gchar *str,
g_hash_table_insert(msgid_table, (gchar *)msgid, node);
if (prefs_common.thread_by_subject) {
- subject = msginfo->subject;
- found_subject = subject_table_lookup(subject_table,
- (gchar *) subject);
+ subject = msginfo->subject;
+ subject += subject_get_reply_prefix_length(subject);
+ found_subject = subject_table_lookup_clean(subject_table,
+ (gchar *) subject);
if (found_subject == NULL)
- subject_table_insert(subject_table, (gchar *) subject,
- node);
+ subject_table_insert_clean(subject_table, (gchar *) subject,
+ node);
else {
/* replace if msg in table is older than current one
* can add here more stuff. */
if ( ((MsgInfo*)(found_subject->data))->date_t >
((MsgInfo*)(node->data))->date_t ) {
- subject_table_remove(subject_table, (gchar *) subject);
- subject_table_insert(subject_table, (gchar *) subject, node);
+ subject_table_remove_clean(subject_table, (gchar *) subject);
+ subject_table_insert_clean(subject_table, (gchar *) subject, node);
}
}
}
for (node = root->children; node != NULL; ) {
next = node->next;
msginfo = (MsgInfo *) node->data;
- parent = NULL;
- if (subject_is_reply(msginfo->subject)) {
- parent = subject_table_lookup(subject_table,
- msginfo->subject);
- /* the node may already be threaded by IN-REPLY-TO,
- so go up in the tree to find the parent node */
- if (parent != NULL) {
- if (g_node_is_ancestor(node, parent))
- parent = NULL;
- if (parent == node)
- parent = NULL;
- }
+ parent = subject_table_lookup(subject_table, msginfo->subject);
+ /* the node may already be threaded by IN-REPLY-TO,
+ so go up in the tree to find the parent node */
+ if (parent != NULL) {
+ if (g_node_is_ancestor(node, parent))
+ parent = NULL;
+ if (parent == node)
+ parent = NULL;
+ }
- if (parent) {
- g_node_unlink(node);
- g_node_append(parent, node);
- /* CLAWS: ignore thread */
- if (MSG_IS_IGNORE_THREAD(((MsgInfo *)parent->data)->flags) && !MSG_IS_IGNORE_THREAD(msginfo->flags)) {
- g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, procmsg_ignore_node, NULL);
- }
+ if (parent) {
+ g_node_unlink(node);
+ g_node_append(parent, node);
+ /* CLAWS: ignore thread */
+ if (MSG_IS_IGNORE_THREAD(((MsgInfo *)parent->data)->flags) && !MSG_IS_IGNORE_THREAD(msginfo->flags)) {
+ g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, procmsg_ignore_node, NULL);
}
- }
+ }
node = next;
}
}