2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
41 #include "quoted-printable.h"
43 #include "prefs_common.h"
45 /* For unknown reasons the inconv.m4 macro undefs that macro if no
46 const is needed. This would break the code below so we define it. */
59 #define SUBST_CHAR 0x5f;
62 #define iseuckanji(c) \
63 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
64 #define iseuchwkana1(c) \
65 (((c) & 0xff) == 0x8e)
66 #define iseuchwkana2(c) \
67 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
69 (((c) & 0xff) == 0x8f)
70 #define issjiskanji1(c) \
71 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
72 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
73 #define issjiskanji2(c) \
74 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
75 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
76 #define issjishwkana(c) \
77 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
80 if (state != JIS_KANJI) { \
88 if (state != JIS_ASCII) { \
96 if (state != JIS_HWKANA) { \
100 state = JIS_HWKANA; \
104 if (state != JIS_AUXKANJI) { \
109 state = JIS_AUXKANJI; \
112 static CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str,
113 const gchar *dest_charset_str);
115 static gchar *conv_iconv_strdup_with_cd (const gchar *inbuf,
118 static gchar *conv_iconv_strdup (const gchar *inbuf,
119 const gchar *src_code,
120 const gchar *dest_code);
122 static CharSet conv_get_locale_charset (void);
123 static CharSet conv_get_outgoing_charset (void);
124 static CharSet conv_guess_ja_encoding(const gchar *str);
125 static gboolean conv_is_ja_locale (void);
127 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
133 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
134 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
136 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
137 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
139 static void conv_unreadable_8bit(gchar *str);
141 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
143 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
145 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
146 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
147 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
149 static gboolean strict_mode = FALSE;
151 void codeconv_set_strict(gboolean mode)
156 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
158 const guchar *in = inbuf;
159 guchar *out = outbuf;
160 JISState state = JIS_ASCII;
162 while (*in != '\0') {
166 if (*(in + 1) == '@' || *(in + 1) == 'B') {
169 } else if (*(in + 1) == '(' &&
171 state = JIS_AUXKANJI;
174 /* unknown escape sequence */
177 } else if (*in == '(') {
178 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
181 } else if (*(in + 1) == 'I') {
185 /* unknown escape sequence */
189 /* unknown escape sequence */
192 } else if (*in == 0x0e) {
195 } else if (*in == 0x0f) {
204 *out++ = *in++ | 0x80;
205 if (*in == '\0') break;
206 *out++ = *in++ | 0x80;
210 *out++ = *in++ | 0x80;
214 *out++ = *in++ | 0x80;
215 if (*in == '\0') break;
216 *out++ = *in++ | 0x80;
226 #define JIS_HWDAKUTEN 0x5e
227 #define JIS_HWHANDAKUTEN 0x5f
229 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
231 static guint16 h2z_tbl[] = {
233 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
234 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
236 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
237 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
239 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
240 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
242 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
243 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
246 static guint16 dakuten_tbl[] = {
248 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
249 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
251 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
252 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
255 static guint16 handakuten_tbl[] = {
257 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
265 if (jis_code < 0x21 || jis_code > 0x5f)
268 if (sound_sym == JIS_HWDAKUTEN &&
269 jis_code >= 0x36 && jis_code <= 0x4e) {
270 out_code = dakuten_tbl[jis_code - 0x30];
272 *outbuf = out_code >> 8;
273 *(outbuf + 1) = out_code & 0xff;
278 if (sound_sym == JIS_HWHANDAKUTEN &&
279 jis_code >= 0x4a && jis_code <= 0x4e) {
280 out_code = handakuten_tbl[jis_code - 0x4a];
281 *outbuf = out_code >> 8;
282 *(outbuf + 1) = out_code & 0xff;
286 out_code = h2z_tbl[jis_code - 0x20];
287 *outbuf = out_code >> 8;
288 *(outbuf + 1) = out_code & 0xff;
292 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
294 const guchar *in = inbuf;
295 guchar *out = outbuf;
296 JISState state = JIS_ASCII;
298 while (*in != '\0') {
302 } else if (iseuckanji(*in)) {
303 if (iseuckanji(*(in + 1))) {
305 *out++ = *in++ & 0x7f;
306 *out++ = *in++ & 0x7f;
311 if (*in != '\0' && !IS_ASCII(*in)) {
316 } else if (iseuchwkana1(*in)) {
317 if (iseuchwkana2(*(in + 1))) {
318 if (prefs_common.allow_jisx0201_kana) {
321 *out++ = *in++ & 0x7f;
326 if (iseuchwkana1(*(in + 2)) &&
327 iseuchwkana2(*(in + 3)))
328 len = conv_jis_hantozen
330 *(in + 1), *(in + 3));
332 len = conv_jis_hantozen
347 if (*in != '\0' && !IS_ASCII(*in)) {
352 } else if (iseucaux(*in)) {
354 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
356 *out++ = *in++ & 0x7f;
357 *out++ = *in++ & 0x7f;
360 if (*in != '\0' && !IS_ASCII(*in)) {
363 if (*in != '\0' && !IS_ASCII(*in)) {
381 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
383 const guchar *in = inbuf;
384 guchar *out = outbuf;
386 while (*in != '\0') {
389 } else if (issjiskanji1(*in)) {
390 if (issjiskanji2(*(in + 1))) {
392 guchar out2 = *(in + 1);
395 row = out1 < 0xa0 ? 0x70 : 0xb0;
397 out1 = (out1 - row) * 2 - 1;
398 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
400 out1 = (out1 - row) * 2;
404 *out++ = out1 | 0x80;
405 *out++ = out2 | 0x80;
410 if (*in != '\0' && !IS_ASCII(*in)) {
415 } else if (issjishwkana(*in)) {
428 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
432 Xalloca(eucstr, outlen, return -1);
434 if (conv_jistoeuc(eucstr, outlen, inbuf) <0)
436 if (conv_euctoutf8(outbuf, outlen, eucstr) < 0)
441 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
445 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
447 strncpy2(outbuf, tmpstr, outlen);
451 strncpy2(outbuf, inbuf, outlen);
456 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
458 static iconv_t cd = (iconv_t)-1;
459 static gboolean iconv_ok = TRUE;
462 if (cd == (iconv_t)-1) {
464 strncpy2(outbuf, inbuf, outlen);
467 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
468 if (cd == (iconv_t)-1) {
469 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
470 if (cd == (iconv_t)-1) {
471 g_warning("conv_euctoutf8(): %s\n",
474 strncpy2(outbuf, inbuf, outlen);
480 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
482 strncpy2(outbuf, tmpstr, outlen);
486 strncpy2(outbuf, inbuf, outlen);
491 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
494 switch (conv_guess_ja_encoding(inbuf)) {
496 r = conv_jistoutf8(outbuf, outlen, inbuf);
499 r = conv_sjistoutf8(outbuf, outlen, inbuf);
502 r = conv_euctoutf8(outbuf, outlen, inbuf);
506 strncpy2(outbuf, inbuf, outlen);
513 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
515 static iconv_t cd = (iconv_t)-1;
516 static gboolean iconv_ok = TRUE;
519 if (cd == (iconv_t)-1) {
521 strncpy2(outbuf, inbuf, outlen);
524 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
525 if (cd == (iconv_t)-1) {
526 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
527 if (cd == (iconv_t)-1) {
528 g_warning("conv_utf8toeuc(): %s\n",
531 strncpy2(outbuf, inbuf, outlen);
537 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
539 strncpy2(outbuf, tmpstr, outlen);
543 strncpy2(outbuf, inbuf, outlen);
548 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
552 Xalloca(eucstr, outlen, return -1);
554 if (conv_utf8toeuc(eucstr, outlen, inbuf) < 0)
556 if (conv_euctojis(outbuf, outlen, eucstr) < 0)
562 static void conv_unreadable_8bit(gchar *str)
564 register guchar *p = str;
567 /* convert CR+LF -> LF */
568 if (*p == '\r' && *(p + 1) == '\n')
569 memmove(p, p + 1, strlen(p));
570 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
575 static CharSet conv_guess_ja_encoding(const gchar *str)
577 const guchar *p = str;
578 CharSet guessed = C_US_ASCII;
581 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
582 if (guessed == C_US_ASCII)
583 return C_ISO_2022_JP;
585 } else if (IS_ASCII(*p)) {
587 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
588 if (*p >= 0xfd && *p <= 0xfe)
590 else if (guessed == C_SHIFT_JIS) {
591 if ((issjiskanji1(*p) &&
592 issjiskanji2(*(p + 1))) ||
594 guessed = C_SHIFT_JIS;
600 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
601 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
602 guessed = C_SHIFT_JIS;
606 } else if (issjishwkana(*p)) {
607 guessed = C_SHIFT_JIS;
617 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
619 return conv_jistoutf8(outbuf, outlen, inbuf);
622 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
624 return conv_sjistoutf8(outbuf, outlen, inbuf);
627 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
629 return conv_euctoutf8(outbuf, outlen, inbuf);
632 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
634 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
635 strncpy2(outbuf, inbuf, outlen);
637 conv_ustodisp(outbuf, outlen, inbuf);
640 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
643 if (conv_anytoutf8(outbuf, outlen, inbuf) < 0)
645 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
646 conv_unreadable_8bit(outbuf);
650 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
652 strncpy2(outbuf, inbuf, outlen);
653 conv_unreadable_8bit(outbuf);
658 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
662 codeconv_set_strict(TRUE);
663 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
665 codeconv_set_strict(FALSE);
666 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
667 strncpy2(outbuf, tmpstr, outlen);
670 } else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
672 codeconv_set_strict(TRUE);
673 tmpstr = conv_iconv_strdup(inbuf,
674 conv_get_locale_charset_str_no_utf8(),
676 codeconv_set_strict(FALSE);
678 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
679 strncpy2(outbuf, tmpstr, outlen);
684 conv_utf8todisp(outbuf, outlen, inbuf);
688 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
690 strncpy2(outbuf, inbuf, outlen);
695 conv_get_fallback_for_private_encoding(const gchar *encoding)
697 if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
698 encoding[1] == '-') {
699 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
706 CodeConverter *conv_code_converter_new(const gchar *src_charset)
710 src_charset = conv_get_fallback_for_private_encoding(src_charset);
712 conv = g_new0(CodeConverter, 1);
713 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
714 conv->charset_str = g_strdup(src_charset);
715 conv->charset = conv_get_charset_from_str(src_charset);
720 void conv_code_converter_destroy(CodeConverter *conv)
722 g_free(conv->charset_str);
726 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
729 if (conv->code_conv_func != conv_noconv)
730 return conv->code_conv_func(outbuf, outlen, inbuf);
734 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
738 strncpy2(outbuf, str, outlen);
746 gchar *conv_codeset_strdup(const gchar *inbuf,
747 const gchar *src_code, const gchar *dest_code)
751 CodeConvFunc conv_func;
753 if (!strcmp2(src_code, dest_code))
754 return g_strdup(inbuf);
756 src_code = conv_get_fallback_for_private_encoding(src_code);
757 conv_func = conv_get_code_conv_func(src_code, dest_code);
758 if (conv_func == conv_ustodisp && strict_mode && !is_ascii_str(inbuf))
761 if (conv_func != conv_noconv) {
762 len = (strlen(inbuf) + 1) * 3;
764 if (!buf) return NULL;
766 if (conv_func(buf, len, inbuf) == 0 || !strict_mode)
767 return g_realloc(buf, strlen(buf) + 1);
774 return conv_iconv_strdup(inbuf, src_code, dest_code);
777 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
778 const gchar *dest_charset_str)
780 CodeConvFunc code_conv = conv_noconv;
782 CharSet dest_charset;
784 if (!src_charset_str)
785 src_charset = conv_get_locale_charset();
787 src_charset = conv_get_charset_from_str(src_charset_str);
789 /* auto detection mode */
790 if (!src_charset_str && !dest_charset_str) {
791 if (conv_is_ja_locale())
792 return conv_anytodisp;
797 dest_charset = conv_get_charset_from_str(dest_charset_str);
799 if (dest_charset == C_US_ASCII)
800 return conv_ustodisp;
802 switch (src_charset) {
820 case C_ISO_2022_JP_2:
821 case C_ISO_2022_JP_3:
822 if (dest_charset == C_AUTO)
823 code_conv = conv_jistodisp;
824 else if (dest_charset == C_EUC_JP)
825 code_conv = conv_jistoeuc;
826 else if (dest_charset == C_UTF_8)
827 code_conv = conv_jistoutf8;
830 if (dest_charset == C_AUTO)
831 code_conv = conv_sjistodisp;
832 else if (dest_charset == C_EUC_JP)
833 code_conv = conv_sjistoeuc;
834 else if (dest_charset == C_UTF_8)
835 code_conv = conv_sjistoutf8;
838 if (dest_charset == C_AUTO)
839 code_conv = conv_euctodisp;
840 else if (dest_charset == C_ISO_2022_JP ||
841 dest_charset == C_ISO_2022_JP_2 ||
842 dest_charset == C_ISO_2022_JP_3)
843 code_conv = conv_euctojis;
844 else if (dest_charset == C_UTF_8)
845 code_conv = conv_euctoutf8;
848 if (dest_charset == C_EUC_JP)
849 code_conv = conv_utf8toeuc;
850 else if (dest_charset == C_ISO_2022_JP ||
851 dest_charset == C_ISO_2022_JP_2 ||
852 dest_charset == C_ISO_2022_JP_3)
853 code_conv = conv_utf8tojis;
862 static gchar *conv_iconv_strdup(const gchar *inbuf,
863 const gchar *src_code, const gchar *dest_code)
868 if (!src_code && !dest_code &&
869 g_utf8_validate(inbuf, -1, NULL))
870 return g_strdup(inbuf);
873 src_code = conv_get_outgoing_charset_str();
875 dest_code = CS_INTERNAL;
877 /* don't convert if src and dest codeset are identical */
878 if (!strcasecmp(src_code, dest_code))
879 return g_strdup(inbuf);
881 /* don't convert if dest codeset is US-ASCII */
882 if (!strcasecmp(src_code, CS_US_ASCII))
883 return g_strdup(inbuf);
885 /* don't convert if dest codeset is US-ASCII */
886 if (!strcasecmp(dest_code, CS_US_ASCII))
887 return g_strdup(inbuf);
889 cd = iconv_open(dest_code, src_code);
890 if (cd == (iconv_t)-1)
893 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
900 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
902 const gchar *inbuf_p;
913 in_size = strlen(inbuf);
915 out_size = (in_size + 1) * 2;
916 outbuf = g_malloc(out_size);
920 #define EXPAND_BUF() \
922 len = outbuf_p - outbuf; \
924 outbuf = g_realloc(outbuf, out_size); \
925 outbuf_p = outbuf + len; \
926 out_left = out_size - len; \
929 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
930 &outbuf_p, &out_left)) == (size_t)-1) {
931 if (EILSEQ == errno) {
936 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
942 *outbuf_p++ = SUBST_CHAR;
944 } else if (EINVAL == errno) {
946 } else if (E2BIG == errno) {
949 g_warning("conv_iconv_strdup(): %s\n",
955 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
957 if (E2BIG == errno) {
960 g_warning("conv_iconv_strdup(): %s\n",
968 len = outbuf_p - outbuf;
969 outbuf = g_realloc(outbuf, len + 1);
975 static const struct {
979 {C_US_ASCII, CS_US_ASCII},
980 {C_US_ASCII, CS_ANSI_X3_4_1968},
983 {C_ISO_8859_1, CS_ISO_8859_1},
984 {C_ISO_8859_2, CS_ISO_8859_2},
985 {C_ISO_8859_3, CS_ISO_8859_3},
986 {C_ISO_8859_4, CS_ISO_8859_4},
987 {C_ISO_8859_5, CS_ISO_8859_5},
988 {C_ISO_8859_6, CS_ISO_8859_6},
989 {C_ISO_8859_7, CS_ISO_8859_7},
990 {C_ISO_8859_8, CS_ISO_8859_8},
991 {C_ISO_8859_9, CS_ISO_8859_9},
992 {C_ISO_8859_10, CS_ISO_8859_10},
993 {C_ISO_8859_11, CS_ISO_8859_11},
994 {C_ISO_8859_13, CS_ISO_8859_13},
995 {C_ISO_8859_14, CS_ISO_8859_14},
996 {C_ISO_8859_15, CS_ISO_8859_15},
997 {C_BALTIC, CS_BALTIC},
998 {C_CP1250, CS_CP1250},
999 {C_CP1251, CS_CP1251},
1000 {C_CP1252, CS_CP1252},
1001 {C_CP1253, CS_CP1253},
1002 {C_CP1254, CS_CP1254},
1003 {C_CP1255, CS_CP1255},
1004 {C_CP1256, CS_CP1256},
1005 {C_CP1257, CS_CP1257},
1006 {C_CP1258, CS_CP1258},
1007 {C_WINDOWS_1250, CS_WINDOWS_1250},
1008 {C_WINDOWS_1251, CS_WINDOWS_1251},
1009 {C_WINDOWS_1252, CS_WINDOWS_1252},
1010 {C_WINDOWS_1253, CS_WINDOWS_1253},
1011 {C_WINDOWS_1254, CS_WINDOWS_1254},
1012 {C_WINDOWS_1255, CS_WINDOWS_1255},
1013 {C_WINDOWS_1256, CS_WINDOWS_1256},
1014 {C_WINDOWS_1257, CS_WINDOWS_1257},
1015 {C_WINDOWS_1258, CS_WINDOWS_1258},
1016 {C_KOI8_R, CS_KOI8_R},
1017 {C_KOI8_T, CS_KOI8_T},
1018 {C_KOI8_U, CS_KOI8_U},
1019 {C_ISO_2022_JP, CS_ISO_2022_JP},
1020 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1021 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1022 {C_EUC_JP, CS_EUC_JP},
1023 {C_EUC_JP, CS_EUCJP},
1024 {C_EUC_JP_MS, CS_EUC_JP_MS},
1025 {C_SHIFT_JIS, CS_SHIFT_JIS},
1026 {C_SHIFT_JIS, CS_SHIFT__JIS},
1027 {C_SHIFT_JIS, CS_SJIS},
1028 {C_ISO_2022_KR, CS_ISO_2022_KR},
1029 {C_EUC_KR, CS_EUC_KR},
1030 {C_ISO_2022_CN, CS_ISO_2022_CN},
1031 {C_EUC_CN, CS_EUC_CN},
1032 {C_GB18030, CS_GB18030},
1033 {C_GB2312, CS_GB2312},
1035 {C_EUC_TW, CS_EUC_TW},
1037 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1038 {C_TIS_620, CS_TIS_620},
1039 {C_WINDOWS_874, CS_WINDOWS_874},
1040 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1041 {C_TCVN5712_1, CS_TCVN5712_1},
1044 static const struct {
1045 gchar *const locale;
1047 CharSet out_charset;
1048 } locale_table[] = {
1049 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1050 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1051 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1052 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1053 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1054 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1056 {"ja_JP" , C_SHIFT_JIS , C_ISO_2022_JP},
1058 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1060 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1061 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1062 {"zh_CN.GB18030" , C_GB18030 , C_GB18030},
1063 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1064 {"zh_CN.GBK" , C_GBK , C_GBK},
1065 {"zh_CN" , C_GB18030 , C_GB18030},
1066 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1067 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1068 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1069 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1070 {"zh_TW" , C_BIG5 , C_BIG5},
1072 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1073 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1074 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1076 {"ru_RU" , C_WINDOWS_1251, C_KOI8_R},
1078 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1080 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1081 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1082 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1083 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1085 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1086 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1088 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1090 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1091 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1092 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1093 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1094 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1095 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1096 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1097 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1098 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1099 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1100 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1101 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1102 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1103 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1104 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1105 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1106 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1107 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1108 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1109 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1110 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1111 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1112 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1113 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1117 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1119 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1120 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1121 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1122 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1123 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1124 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1125 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1126 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1127 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1128 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1129 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1130 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1131 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1132 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1133 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1134 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1135 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1136 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1137 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1138 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1139 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1140 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1141 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1142 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1143 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1144 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1145 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1146 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1147 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1148 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1149 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1150 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1151 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1152 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1153 {"nb_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1154 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1155 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1156 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1157 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1158 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1159 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1160 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1161 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1162 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1163 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1164 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1166 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1167 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1168 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1169 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1170 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1171 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1172 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1173 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1175 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1176 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1178 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1180 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1181 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1182 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1183 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1185 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1187 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1188 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1189 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1190 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1191 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1192 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1193 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1194 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1195 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1196 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1197 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1198 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1199 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1200 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1201 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1202 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1203 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1205 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1206 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1207 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1208 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1210 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1211 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1213 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1215 {"ar_IN" , C_UTF_8 , C_UTF_8},
1216 {"en_IN" , C_UTF_8 , C_UTF_8},
1217 {"se_NO" , C_UTF_8 , C_UTF_8},
1218 {"ta_IN" , C_UTF_8 , C_UTF_8},
1219 {"te_IN" , C_UTF_8 , C_UTF_8},
1220 {"ur_PK" , C_UTF_8 , C_UTF_8},
1222 {"th_TH" , C_TIS_620 , C_TIS_620},
1223 /* {"th_TH" , C_WINDOWS_874}, */
1224 /* {"th_TH" , C_ISO_8859_11}, */
1226 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1227 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1229 {"C" , C_US_ASCII , C_US_ASCII},
1230 {"POSIX" , C_US_ASCII , C_US_ASCII},
1231 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1234 static GHashTable *conv_get_charset_to_str_table(void)
1236 static GHashTable *table;
1242 table = g_hash_table_new(NULL, g_direct_equal);
1244 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1245 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1248 (table, GUINT_TO_POINTER(charsets[i].charset),
1256 static GHashTable *conv_get_charset_from_str_table(void)
1258 static GHashTable *table;
1264 table = g_hash_table_new(str_case_hash, str_case_equal);
1266 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1267 g_hash_table_insert(table, charsets[i].name,
1268 GUINT_TO_POINTER(charsets[i].charset));
1274 const gchar *conv_get_charset_str(CharSet charset)
1278 table = conv_get_charset_to_str_table();
1279 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1282 CharSet conv_get_charset_from_str(const gchar *charset)
1286 if (!charset) return C_AUTO;
1288 table = conv_get_charset_from_str_table();
1289 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1292 static CharSet conv_get_locale_charset(void)
1294 static CharSet cur_charset = -1;
1295 const gchar *cur_locale;
1299 if (cur_charset != -1)
1302 cur_locale = conv_get_current_locale();
1304 cur_charset = C_US_ASCII;
1308 if (strcasestr(cur_locale, "UTF-8") ||
1309 strcasestr(cur_locale, "utf8")) {
1310 cur_charset = C_UTF_8;
1314 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1315 cur_charset = C_ISO_8859_15;
1319 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1322 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1323 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1324 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1325 strlen(locale_table[i].locale))) {
1326 cur_charset = locale_table[i].charset;
1328 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1329 !strchr(p + 1, '.')) {
1330 if (strlen(cur_locale) == 2 &&
1331 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1332 cur_charset = locale_table[i].charset;
1338 cur_charset = C_AUTO;
1342 static CharSet conv_get_locale_charset_no_utf8(void)
1344 static CharSet cur_charset = -1;
1345 const gchar *cur_locale;
1349 if (prefs_common.broken_are_utf8) {
1350 cur_charset = C_UTF_8;
1354 cur_locale = conv_get_current_locale();
1356 cur_charset = C_US_ASCII;
1360 if (strcasestr(cur_locale, "UTF-8") ||
1361 strcasestr(cur_locale, "utf8")) {
1362 cur_charset = C_UTF_8;
1366 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1367 cur_charset = C_ISO_8859_15;
1371 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1374 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1375 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1376 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1377 strlen(locale_table[i].locale))) {
1378 cur_charset = locale_table[i].charset;
1380 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1381 !strchr(p + 1, '.')) {
1382 if (strlen(cur_locale) == 2 &&
1383 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1384 cur_charset = locale_table[i].charset;
1390 cur_charset = C_AUTO;
1394 const gchar *conv_get_locale_charset_str(void)
1396 static const gchar *codeset = NULL;
1399 codeset = conv_get_charset_str(conv_get_locale_charset());
1401 return codeset ? codeset : CS_INTERNAL;
1404 const gchar *conv_get_locale_charset_str_no_utf8(void)
1406 static const gchar *codeset = NULL;
1409 codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1411 return codeset ? codeset : CS_INTERNAL;
1414 static CharSet conv_get_outgoing_charset(void)
1416 static CharSet out_charset = -1;
1417 const gchar *cur_locale;
1421 if (out_charset != -1)
1424 cur_locale = conv_get_current_locale();
1426 out_charset = C_AUTO;
1430 if (strcasestr(cur_locale, "UTF-8") ||
1431 strcasestr(cur_locale, "utf8")) {
1432 out_charset = C_UTF_8;
1436 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1437 out_charset = C_ISO_8859_15;
1441 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1444 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1445 strlen(locale_table[i].locale))) {
1446 out_charset = locale_table[i].out_charset;
1448 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1449 !strchr(p + 1, '.')) {
1450 if (strlen(cur_locale) == 2 &&
1451 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1452 out_charset = locale_table[i].out_charset;
1461 const gchar *conv_get_outgoing_charset_str(void)
1463 CharSet out_charset;
1466 out_charset = conv_get_outgoing_charset();
1467 str = conv_get_charset_str(out_charset);
1469 return str ? str : CS_UTF_8;
1472 const gchar *conv_get_current_locale(void)
1474 const gchar *cur_locale;
1477 cur_locale = g_win32_getlocale();
1479 cur_locale = g_getenv("LC_ALL");
1480 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1481 if (!cur_locale) cur_locale = g_getenv("LANG");
1482 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1483 #endif /* G_OS_WIN32 */
1485 debug_print("current locale: %s\n",
1486 cur_locale ? cur_locale : "(none)");
1491 static gboolean conv_is_ja_locale(void)
1493 static gint is_ja_locale = -1;
1494 const gchar *cur_locale;
1496 if (is_ja_locale != -1)
1497 return is_ja_locale != 0;
1500 cur_locale = conv_get_current_locale();
1502 if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1506 return is_ja_locale != 0;
1509 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding,
1510 gboolean addr_field)
1512 gchar buf[BUFFSIZE];
1514 if (is_ascii_str(str))
1515 return unmime_header(str, addr_field);
1517 if (default_encoding) {
1520 utf8_buf = conv_codeset_strdup
1521 (str, default_encoding, CS_INTERNAL);
1525 decoded_str = unmime_header(utf8_buf, addr_field);
1531 if (conv_is_ja_locale())
1532 conv_anytodisp(buf, sizeof(buf), str);
1534 conv_localetodisp(buf, sizeof(buf), str);
1536 return unmime_header(buf, addr_field);
1539 #define MAX_LINELEN 76
1540 #define MAX_HARD_LINELEN 996
1541 #define MIMESEP_BEGIN "=?"
1542 #define MIMESEP_END "?="
1544 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1546 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1551 if ((cond) && *srcp) { \
1552 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1553 if (isspace(*(destp - 1))) \
1555 else if (is_plain_text && isspace(*srcp)) \
1560 left = MAX_LINELEN - 1; \
1562 } else if (destp == (guchar *)dest && left < 7) { \
1563 if (isspace(*(destp - 1))) \
1565 else if (is_plain_text && isspace(*srcp)) \
1570 left = MAX_LINELEN - 1; \
1576 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1577 gint header_len, gboolean addr_field,
1578 const gchar *out_encoding_)
1580 const gchar *cur_encoding;
1581 const gchar *out_encoding;
1585 const guchar *srcp = src;
1586 guchar *destp = dest;
1587 gboolean use_base64;
1589 cm_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1590 cm_return_if_fail(destp != NULL);
1592 if (MB_CUR_MAX > 1) {
1594 mimesep_enc = "?B?";
1597 mimesep_enc = "?Q?";
1600 cur_encoding = CS_INTERNAL;
1603 out_encoding = out_encoding_;
1605 out_encoding = conv_get_outgoing_charset_str();
1607 if (!strcmp(out_encoding, CS_US_ASCII))
1608 out_encoding = CS_ISO_8859_1;
1610 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1611 strlen(mimesep_enc) + strlen(MIMESEP_END);
1613 left = MAX_LINELEN - header_len;
1616 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1618 while (isspace(*srcp)) {
1621 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1624 /* output as it is if the next word is ASCII string */
1625 if (!is_next_nonascii(srcp)) {
1628 word_len = get_next_word_len(srcp);
1629 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1630 while (word_len > 0) {
1631 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1640 /* don't include parentheses and quotes in encoded strings */
1641 if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1642 LBREAK_IF_REQUIRED(left < 2, FALSE);
1653 const guchar *p = srcp;
1655 gint out_enc_str_len;
1656 gint mime_block_len;
1657 gboolean cont = FALSE;
1659 while (*p != '\0') {
1660 if (isspace(*p) && !is_next_nonascii(p + 1))
1662 /* don't include parentheses in encoded
1664 if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1667 mb_len = g_utf8_skip[*p];
1669 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1670 out_str = conv_codeset_strdup
1671 (part_str, cur_encoding, out_encoding);
1677 g_warning("conv_encode_header(): code conversion failed\n");
1678 conv_unreadable_8bit(part_str);
1679 out_str = g_strdup(part_str);
1682 out_str_len = strlen(out_str);
1685 out_enc_str_len = B64LEN(out_str_len);
1688 qp_get_q_encoding_len(out_str);
1692 if (mimestr_len + out_enc_str_len <= left) {
1695 } else if (cur_len == 0) {
1697 LBREAK_IF_REQUIRED(1, FALSE);
1706 Xstrndup_a(part_str, srcp, cur_len, );
1707 out_str = conv_codeset_strdup
1708 (part_str, cur_encoding, out_encoding);
1710 g_warning("conv_encode_header(): code conversion failed\n");
1711 conv_unreadable_8bit(part_str);
1712 out_str = g_strdup(part_str);
1714 out_str_len = strlen(out_str);
1717 out_enc_str_len = B64LEN(out_str_len);
1720 qp_get_q_encoding_len(out_str);
1722 Xalloca(enc_str, out_enc_str_len + 1, );
1724 base64_encode(enc_str, out_str, out_str_len);
1726 qp_q_encode(enc_str, out_str);
1730 /* output MIME-encoded string block */
1731 mime_block_len = mimestr_len + strlen(enc_str);
1732 g_snprintf(destp, mime_block_len + 1,
1733 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1734 out_encoding, mimesep_enc, enc_str);
1735 destp += mime_block_len;
1738 left -= mime_block_len;
1741 LBREAK_IF_REQUIRED(cont, FALSE);
1751 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1752 gint header_len, gboolean addr_field)
1754 conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1757 #undef LBREAK_IF_REQUIRED
1758 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1761 GError *error = NULL;
1763 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1765 debug_print("failed to convert encoding of file name: %s\n",
1767 g_error_free(error);
1770 fs_file = g_strdup(utf8_file);
1775 gchar *conv_filename_to_utf8(const gchar *fs_file)
1777 gchar *utf8_file = NULL;
1778 GError *error = NULL;
1780 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1782 g_warning("failed to convert encoding of file name: %s\n",
1784 g_error_free(error);
1787 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1789 utf8_file = g_strdup(fs_file);
1790 conv_unreadable_8bit(utf8_file);