sync with 0.9.5cvs2
[claws.git] / src / common / utils.c
index 0cd9ba6d0b6ab735bc88f061d66d0d33ad3a1844..1b717267a88d15e2ae8f6fef3d42a0b6e6be65df 100644 (file)
@@ -41,6 +41,7 @@
 #include <sys/wait.h>
 #include <dirent.h>
 #include <time.h>
+#include <regex.h>
 
 #include "intl.h"
 #include "utils.h"
@@ -109,6 +110,25 @@ void hash_free_value_mem(GHashTable *table)
        g_hash_table_foreach(table, hash_free_value_mem_func, NULL);
 }
 
+gint str_case_equal(gconstpointer v, gconstpointer v2)
+{
+       return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
+}
+
+guint str_case_hash(gconstpointer key)
+{
+       const gchar *p = key;
+       guint h = *p;
+
+       if (h) {
+               h = tolower(h);
+               for (p += 1; *p != '\0'; p++)
+                       h = (h << 5) - h + tolower(*p);
+       }
+
+       return h;
+}
+
 void ptr_array_free_strings(GPtrArray *array)
 {
        gint i;
@@ -469,6 +489,30 @@ wchar_t *wcscasestr(const wchar_t *haystack, const wchar_t *needle)
        return NULL;
 }
 
+gint get_mbs_len(const gchar *s)
+{
+       const gchar *p = s;
+       gint mb_len;
+       gint len = 0;
+
+       if (!p)
+               return -1;
+
+       while (*p != '\0') {
+               mb_len = mblen(p, MB_LEN_MAX);
+               if (mb_len == 0)
+                       break;
+               else if (mb_len < 0)
+                       return -1;
+               else
+                       len++;
+
+               p += mb_len;
+       }
+
+       return len;
+}
+
 /* Examine if next block is non-ASCII string */
 gboolean is_next_nonascii(const guchar *s)
 {
@@ -506,15 +550,30 @@ gint subject_compare(const gchar *s1, const gchar *s2)
        Xstrdup_a(str1, s1, return -1);
        Xstrdup_a(str2, s2, return -1);
 
-       trim_subject(str1);
-       trim_subject(str2);
+       trim_subject_for_compare(str1);
+       trim_subject_for_compare(str2);
 
        if (!*str1 || !*str2) return -1;
 
        return strcmp(str1, str2);
 }
 
-void trim_subject(gchar *str)
+gint subject_compare_for_sort(const gchar *s1, const gchar *s2)
+{
+       gchar *str1, *str2;
+
+       if (!s1 || !s2) return -1;
+
+       Xstrdup_a(str1, s1, return -1);
+       Xstrdup_a(str2, s2, return -1);
+
+       trim_subject_for_sort(str1);
+       trim_subject_for_sort(str2);
+
+       return strcasecmp(str1, str2);
+}
+
+void trim_subject_for_compare(gchar *str)
 {
        gchar *srcp;
 
@@ -522,11 +581,52 @@ void trim_subject(gchar *str)
        eliminate_parenthesis(str, '(', ')');
        g_strstrip(str);
 
-       while (!strncasecmp(str, "Re:", 3)) {
-               srcp = str + 3;
-               while (isspace(*srcp)) srcp++;
+       srcp = str + subject_get_prefix_length(str);
+       if (srcp != str)
                memmove(str, srcp, strlen(srcp) + 1);
+}
+
+void trim_subject_for_sort(gchar *str)
+{
+       gchar *srcp;
+
+       g_strstrip(str);
+
+       srcp = str + subject_get_prefix_length(str);
+       if (srcp != str)        
+               memmove(str, srcp, strlen(srcp) + 1);
+}
+
+void trim_subject(gchar *str)
+{
+       register gchar *srcp, *destp;
+       gchar op, cl;
+       gint in_brace;
+
+       destp = str + subject_get_prefix_length(str);
+
+       if (*destp == '[') {
+               op = '[';
+               cl = ']';
+       } else if (*destp == '(') {
+               op = '(';
+               cl = ')';
+       } else
+               return;
+
+       srcp = destp + 1;
+       in_brace = 1;
+       while (*srcp) {
+               if (*srcp == op)
+                       in_brace++;
+               else if (*srcp == cl)
+                       in_brace--;
+               srcp++;
+               if (in_brace == 0)
+                       break;
        }
+       while (isspace(*srcp)) srcp++;
+       memmove(destp, srcp, strlen(srcp) + 1);
 }
 
 void eliminate_parenthesis(gchar *str, gchar op, gchar cl)
@@ -580,35 +680,6 @@ void extract_parenthesis(gchar *str, gchar op, gchar cl)
        *destp = '\0';
 }
 
-void extract_one_parenthesis_with_skip_quote(gchar *str, gchar quote_chr,
-                                            gchar op, gchar cl)
-{
-       register gchar *srcp, *destp;
-       gint in_brace;
-       gboolean in_quote = FALSE;
-
-       srcp = destp = str;
-
-       if ((srcp = strchr_with_skip_quote(destp, quote_chr, op))) {
-               memmove(destp, srcp + 1, strlen(srcp));
-               in_brace = 1;
-               while(*destp) {
-                       if (*destp == op && !in_quote)
-                               in_brace++;
-                       else if (*destp == cl && !in_quote)
-                               in_brace--;
-                       else if (*destp == quote_chr)
-                               in_quote ^= TRUE;
-
-                       if (in_brace == 0)
-                               break;
-
-                       destp++;
-               }
-       }
-       *destp = '\0';
-}
-
 void extract_parenthesis_with_skip_quote(gchar *str, gchar quote_chr,
                                         gchar op, gchar cl)
 {
@@ -756,7 +827,14 @@ void extract_address(gchar *str)
        g_strstrip(str);
 }
 
-GSList *address_list_append(GSList *addr_list, const gchar *str)
+void extract_list_id_str(gchar *str)
+{
+       if (strchr_with_skip_quote(str, '"', '<'))
+               extract_parenthesis_with_skip_quote(str, '"', '<', '>');
+       g_strstrip(str);
+}
+
+static GSList *address_list_append_real(GSList *addr_list, const gchar *str, gboolean removecomments)
 {
        gchar *work;
        gchar *workp;
@@ -765,7 +843,8 @@ GSList *address_list_append(GSList *addr_list, const gchar *str)
 
        Xstrdup_a(work, str, return addr_list);
 
-       eliminate_address_comment(work);
+       if (removecomments)
+               eliminate_address_comment(work);
        workp = work;
 
        while (workp && *workp) {
@@ -777,7 +856,7 @@ GSList *address_list_append(GSList *addr_list, const gchar *str)
                } else
                        next = NULL;
 
-               if (strchr_with_skip_quote(workp, '"', '<'))
+               if (removecomments && strchr_with_skip_quote(workp, '"', '<'))
                        extract_parenthesis_with_skip_quote
                                (workp, '"', '<', '>');
 
@@ -791,6 +870,16 @@ GSList *address_list_append(GSList *addr_list, const gchar *str)
        return addr_list;
 }
 
+GSList *address_list_append(GSList *addr_list, const gchar *str)
+{
+       return address_list_append_real(addr_list, str, TRUE);
+}
+
+GSList *address_list_append_with_comments(GSList *addr_list, const gchar *str)
+{
+       return address_list_append_real(addr_list, str, FALSE);
+}
+
 GSList *references_list_append(GSList *msgid_list, const gchar *str)
 {
        const gchar *strp;
@@ -965,6 +1054,8 @@ gboolean is_header_line(const gchar *str)
 
 gboolean is_ascii_str(const guchar *str)
 {
+       g_return_val_if_fail(str, FALSE);
+
        while (*str != '\0') {
                if (*str != '\t' && *str != ' ' &&
                    *str != '\r' && *str != '\n' &&
@@ -1249,6 +1340,8 @@ gchar *get_abbrev_newsgroup_name(const gchar *group, gint len)
        const gchar *p = group;
        const gchar *last;
 
+       g_return_val_if_fail(group != NULL, NULL);
+
        last = group + strlen(group);
        abbrev_group = ap = g_malloc(strlen(group) + 1);
 
@@ -2527,6 +2620,122 @@ gint uncanonicalize_file_replace(const gchar *file)
        return 0;
 }
 
+gchar *normalize_newlines(const gchar *str)
+{
+       const gchar *p = str;
+       gchar *out, *outp;
+
+       out = outp = g_malloc(strlen(str) + 1);
+       for (p = str; *p != '\0'; ++p) {
+               if (*p == '\r') {
+                       if (*(p + 1) != '\n')
+                               *outp++ = '\n';
+               } else
+                       *outp++ = *p;
+       }
+
+       *outp = '\0';
+
+       return out;
+}
+
+gchar *get_outgoing_rfc2822_str(FILE *fp)
+{
+       gchar buf[BUFFSIZE];
+       GString *str;
+       gchar *ret;
+
+       str = g_string_new(NULL);
+
+       /* output header part */
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               strretchomp(buf);
+               if (!g_strncasecmp(buf, "Bcc:", 4)) {
+                       gint next;
+
+                       for (;;) {
+                               next = fgetc(fp);
+                               if (next == EOF)
+                                       break;
+                               else if (next != ' ' && next != '\t') {
+                                       ungetc(next, fp);
+                                       break;
+                               }
+                               if (fgets(buf, sizeof(buf), fp) == NULL)
+                                       break;
+                       }
+               } else {
+                       g_string_append(str, buf);
+                       g_string_append(str, "\r\n");
+                       if (buf[0] == '\0')
+                               break;
+               }
+       }
+
+       /* output body part */
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               strretchomp(buf);
+               if (buf[0] == '.')
+                       g_string_append_c(str, '.');
+               g_string_append(str, buf);
+               g_string_append(str, "\r\n");
+       }
+
+       ret = str->str;
+       g_string_free(str, FALSE);
+
+       return ret;
+}
+
+/*
+ * Create a new boundary in a way that it is very unlikely that this
+ * will occur in the following text.  It would be easy to ensure
+ * uniqueness if everything is either quoted-printable or base64
+ * encoded (note that conversion is allowed), but because MIME bodies
+ * may be nested, it may happen that the same boundary has already
+ * been used. We avoid scanning the message for conflicts and hope the
+ * best.
+ *
+ *   boundary := 0*69<bchars> bcharsnospace
+ *   bchars := bcharsnospace / " "
+ *   bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
+ *                    "+" / "_" / "," / "-" / "." /
+ *                    "/" / ":" / "=" / "?"
+ *
+ * some special characters removed because of buggy MTAs
+ */
+
+gchar *generate_mime_boundary(const gchar *prefix)
+{
+       static gchar tbl[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                            "abcdefghijklmnopqrstuvwxyz"
+                            "1234567890+_./=";
+       gchar buf_uniq[17];
+       gchar buf_date[64];
+       gint i;
+       gint pid;
+
+       pid = getpid();
+
+       /* We make the boundary depend on the pid, so that all running
+        * processes generate different values even when they have been
+        * started within the same second and srandom(time(NULL)) has been
+        * used.  I can't see whether this is really an advantage but it
+        * doesn't do any harm.
+        */
+       for (i = 0; i < sizeof(buf_uniq) - 1; i++)
+               buf_uniq[i] = tbl[(random() ^ pid) % (sizeof(tbl) - 1)];
+       buf_uniq[i] = '\0';
+
+       get_rfc822_date(buf_date, sizeof(buf_date));
+       subst_char(buf_date, ' ', '_');
+       subst_char(buf_date, ',', '_');
+       subst_char(buf_date, ':', '_');
+
+       return g_strdup_printf("%s=_%s_%s", prefix ? prefix : "Multipart",
+                              buf_date, buf_uniq);
+}
+
 gint change_file_mode_rw(FILE *fp, const gchar *file)
 {
 #if HAVE_FCHMOD
@@ -2639,10 +2848,7 @@ gint str_write_to_file(const gchar *str, const gchar *file)
 
 gchar *file_read_to_str(const gchar *file)
 {
-       GByteArray *array;
        FILE *fp;
-       gchar buf[BUFSIZ];
-       gint n_read;
        gchar *str;
 
        g_return_val_if_fail(file != NULL, NULL);
@@ -2652,6 +2858,22 @@ gchar *file_read_to_str(const gchar *file)
                return NULL;
        }
 
+       str = file_read_stream_to_str(fp);
+
+       fclose(fp);
+
+       return str;
+}
+
+gchar *file_read_stream_to_str(FILE *fp)
+{
+       GByteArray *array;
+       gchar buf[BUFSIZ];
+       gint n_read;
+       gchar *str;
+
+       g_return_val_if_fail(fp != NULL, NULL);
+
        array = g_byte_array_new();
 
        while ((n_read = fread(buf, sizeof(gchar), sizeof(buf), fp)) > 0) {
@@ -2661,14 +2883,11 @@ gchar *file_read_to_str(const gchar *file)
        }
 
        if (ferror(fp)) {
-               FILE_OP_ERROR(file, "fread");
-               fclose(fp);
+               FILE_OP_ERROR("file stream", "fread");
                g_byte_array_free(array, TRUE);
                return NULL;
        }
 
-       fclose(fp);
-
        buf[0] = '\0';
        g_byte_array_append(array, buf, 1);
        str = (gchar *)array->data;
@@ -2746,6 +2965,33 @@ gint execute_command_line(const gchar *cmdline, gboolean async)
        return ret;
 }
 
+gchar *get_command_output(const gchar *cmdline)
+{
+       gchar buf[BUFFSIZE];
+       FILE *fp;
+       GString *str;
+       gchar *ret;
+
+       g_return_val_if_fail(cmdline != NULL, NULL);
+
+       if ((fp = popen(cmdline, "r")) == NULL) {
+               FILE_OP_ERROR(cmdline, "popen");
+               return NULL;
+       }
+
+       str = g_string_new("");
+
+       while (fgets(buf, sizeof(buf), fp) != NULL)
+               g_string_append(str, buf);
+
+       pclose(fp);
+
+       ret = str->str;
+       g_string_free(str, FALSE);
+
+       return ret;
+}
+
 static gint is_unchanged_uri_char(char c)
 {
        switch (c) {
@@ -2903,11 +3149,16 @@ time_t remote_tzoffset_sec(const gchar *zone)
        } else if (!strncmp(zone, "UT" , 2) ||
                   !strncmp(zone, "GMT", 2)) {
                remoteoffset = 0;
-       } else if (strlen(zone3) == 3 &&
-                  (p = strstr(ustzstr, zone3)) != NULL &&
-                  (p - ustzstr) % 3 == 0) {
-               iustz = ((gint)(p - ustzstr) / 3 + 1) / 2 - 8;
-               remoteoffset = iustz * 3600;
+       } else if (strlen(zone3) == 3) {
+               for (p = ustzstr; *p != '\0'; p += 3) {
+                       if (!strncasecmp(p, zone3, 3)) {
+                               iustz = ((gint)(p - ustzstr) / 3 + 1) / 2 - 8;
+                               remoteoffset = iustz * 3600;
+                               break;
+                       }
+               }
+               if (*p == '\0')
+                       return -1;
        } else if (strlen(zone3) == 1) {
                switch (zone[0]) {
                case 'Z': remoteoffset =   0; break;
@@ -2938,7 +3189,8 @@ time_t remote_tzoffset_sec(const gchar *zone)
                default:  remoteoffset =   0; break;
                }
                remoteoffset = remoteoffset * 3600;
-       }
+       } else
+               return -1;
 
        return remoteoffset;
 }
@@ -3026,7 +3278,7 @@ void debug_set_mode(gboolean mode)
        debug_mode = mode;
 }
 
-gboolean debug_get_mode()
+gboolean debug_get_mode(void)
 {
        return debug_mode;
 }
@@ -3049,29 +3301,19 @@ void * subject_table_lookup(GHashTable *subject_table, gchar * subject)
 {
        if (subject == NULL)
                subject = "";
-
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               return g_hash_table_lookup(subject_table, subject + 4);
        else
-               return g_hash_table_lookup(subject_table, subject);
+               subject += subject_get_prefix_length(subject);
+
+       return g_hash_table_lookup(subject_table, subject);
 }
 
 void subject_table_insert(GHashTable *subject_table, gchar * subject,
                          void * data)
 {
-       if (subject == NULL)
-               return;
-       if (* subject == 0)
-               return;
-       if (g_strcasecmp(subject, "Re:") == 0)
-               return;
-       if (g_strcasecmp(subject, "Re: ") == 0)
+       if (subject == NULL || *subject == 0)
                return;
-
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               g_hash_table_insert(subject_table, subject + 4, data);
-       else
-               g_hash_table_insert(subject_table, subject, data);
+       subject += subject_get_prefix_length(subject);
+       g_hash_table_insert(subject_table, subject, data);
 }
 
 void subject_table_remove(GHashTable *subject_table, gchar * subject)
@@ -3079,19 +3321,76 @@ void subject_table_remove(GHashTable *subject_table, gchar * subject)
        if (subject == NULL)
                return;
 
-       if (g_strncasecmp(subject, "Re: ", 4) == 0)
-               g_hash_table_remove(subject_table, subject + 4);
-       else
-               g_hash_table_remove(subject_table, subject);
+       subject += subject_get_prefix_length(subject);  
+       g_hash_table_remove(subject_table, subject);
 }
 
-gboolean subject_is_reply(const gchar *subject)
-{
-       /* XXX: just simply here so someone can handle really
-        * advanced Re: detection like "Re[4]", "ANTW:" or
-        * Re: Re: Re: Re: Re: Re: Re: Re:" stuff. */
-       if (subject == NULL) return FALSE;
-       else return 0 == g_strncasecmp(subject, "Re: ", 4);
+/*!
+ *\brief       Check if a string is prefixed with known (combinations) 
+ *             of prefixes. The function assumes that each prefix 
+ *             is terminated by zero or exactly _one_ space.
+ *
+ *\param       str String to check for a prefixes
+ *
+ *\return      int Number of chars in the prefix that should be skipped 
+ *             for a "clean" subject line. If no prefix was found, 0
+ *             is returned.
+ */            
+int subject_get_prefix_length(const gchar *subject)
+{
+       /*!< Array with allowable reply prefixes regexps. */
+       static const gchar * const prefixes[] = {
+               "Re\\:",                        /* "Re:" */
+               "Re\\[[1-9][0-9]*\\]\\:",       /* "Re[XXX]:" (non-conforming news mail clients) */
+               "Antw\\:",                      /* "Antw:" (Dutch / German Outlook) */
+               "Aw\\:",                        /* "Aw:"   (German) */
+               "Antwort\\:",                   /* "Antwort:" (German Lotus Notes) */
+               "Res\\:",                       /* "Res:" (Brazilian Outlook) */
+               "Fw\\:",                        /* "Fw:" Forward */
+               "Enc\\:"                        /* "Enc:" Forward (Brazilian Outlook) */
+               /* add more */
+       };
+       const int PREFIXES = sizeof prefixes / sizeof prefixes[0];
+       int n;
+       regmatch_t pos;
+       static regex_t regex;
+       static gboolean init_;
+
+       if (!subject) return 0;
+       if (!*subject) return 0;
+
+       if (!init_) {
+               GString *s = g_string_new("");
+               
+               for (n = 0; n < PREFIXES; n++)
+                       /* Terminate each prefix regexpression by a
+                        * "\ ?" (zero or ONE space), and OR them */
+                       g_string_sprintfa(s, "(%s\\ ?)%s",
+                                         prefixes[n],
+                                         n < PREFIXES - 1 ? 
+                                         "|" : "");
+               
+               g_string_prepend(s, "(");
+               g_string_append(s, ")+");       /* match at least once */
+               g_string_prepend(s, "^\\ *");   /* from beginning of line */
+               
+
+               /* We now have something like "^\ *((PREFIX1\ ?)|(PREFIX2\ ?))+" 
+                * TODO: Should this be       "^\ *(((PREFIX1)|(PREFIX2))\ ?)+" ??? */
+               if (regcomp(&regex, s->str, REG_EXTENDED | REG_ICASE)) { 
+                       debug_print("Error compiling regexp %s\n", s->str);
+                       g_string_free(s, TRUE);
+                       return 0;
+               } else {
+                       init_ = TRUE;
+                       g_string_free(s, TRUE);
+               }
+       }
+       
+       if (!regexec(&regex, subject, 1, &pos, 0) && pos.rm_so != -1)
+               return pos.rm_eo;
+       else
+               return 0;
 }
 
 FILE *get_tmpfile_in_dir(const gchar *dir, gchar **filename)
@@ -3311,3 +3610,36 @@ gint g_stricase_equal(gconstpointer gptr1, gconstpointer gptr2)
        return !strcasecmp(str1, str2);
 }
 
+gint g_int_compare(gconstpointer a, gconstpointer b)
+{
+       return GPOINTER_TO_INT(a) - GPOINTER_TO_INT(b);
+}
+
+gchar *generate_msgid(const gchar *address, gchar *buf, gint len)
+{
+       /* steal from compose.c::compose_generate_msgid() */
+       struct tm *lt;
+       time_t t;
+       gchar *addr;
+
+       t = time(NULL);
+       lt = localtime(&t);
+
+       if (address && *address) {
+               if (strchr(address, '@'))
+                       addr = g_strdup(address);
+               else
+                       addr = g_strconcat(address, "@", get_domain_name(), NULL);
+       } else
+               addr = g_strconcat(g_get_user_name(), "@", get_domain_name(),
+                                  NULL);
+
+       g_snprintf(buf, len, "%04d%02d%02d%02d%02d%02d.%08x.%s",
+                  lt->tm_year + 1900, lt->tm_mon + 1,
+                  lt->tm_mday, lt->tm_hour,
+                  lt->tm_min, lt->tm_sec,
+                  (guint)random(), addr);
+
+       g_free(addr);
+       return buf;
+}