2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str,
112 const gchar *dest_charset_str);
114 static gchar *conv_iconv_strdup_with_cd (const gchar *inbuf,
117 static gchar *conv_iconv_strdup (const gchar *inbuf,
118 const gchar *src_code,
119 const gchar *dest_code);
121 static CharSet conv_get_locale_charset (void);
122 static CharSet conv_get_outgoing_charset (void);
123 static CharSet conv_guess_ja_encoding(const gchar *str);
124 static gboolean conv_is_ja_locale (void);
126 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
130 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
133 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
135 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
136 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
138 static void conv_unreadable_8bit(gchar *str);
140 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
141 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
144 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
145 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
146 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
148 static gboolean strict_mode = FALSE;
150 void codeconv_set_strict(gboolean mode)
155 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
157 const guchar *in = inbuf;
158 guchar *out = outbuf;
159 JISState state = JIS_ASCII;
161 while (*in != '\0') {
165 if (*(in + 1) == '@' || *(in + 1) == 'B') {
168 } else if (*(in + 1) == '(' &&
170 state = JIS_AUXKANJI;
173 /* unknown escape sequence */
176 } else if (*in == '(') {
177 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
180 } else if (*(in + 1) == 'I') {
184 /* unknown escape sequence */
188 /* unknown escape sequence */
191 } else if (*in == 0x0e) {
194 } else if (*in == 0x0f) {
203 *out++ = *in++ | 0x80;
204 if (*in == '\0') break;
205 *out++ = *in++ | 0x80;
209 *out++ = *in++ | 0x80;
213 *out++ = *in++ | 0x80;
214 if (*in == '\0') break;
215 *out++ = *in++ | 0x80;
225 #define JIS_HWDAKUTEN 0x5e
226 #define JIS_HWHANDAKUTEN 0x5f
228 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
230 static guint16 h2z_tbl[] = {
232 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
233 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
235 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
236 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
238 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
239 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
241 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
242 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
245 static guint16 dakuten_tbl[] = {
247 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
248 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
250 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
251 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
254 static guint16 handakuten_tbl[] = {
256 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
264 if (jis_code < 0x21 || jis_code > 0x5f)
267 if (sound_sym == JIS_HWDAKUTEN &&
268 jis_code >= 0x36 && jis_code <= 0x4e) {
269 out_code = dakuten_tbl[jis_code - 0x30];
271 *outbuf = out_code >> 8;
272 *(outbuf + 1) = out_code & 0xff;
277 if (sound_sym == JIS_HWHANDAKUTEN &&
278 jis_code >= 0x4a && jis_code <= 0x4e) {
279 out_code = handakuten_tbl[jis_code - 0x4a];
280 *outbuf = out_code >> 8;
281 *(outbuf + 1) = out_code & 0xff;
285 out_code = h2z_tbl[jis_code - 0x20];
286 *outbuf = out_code >> 8;
287 *(outbuf + 1) = out_code & 0xff;
291 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
293 const guchar *in = inbuf;
294 guchar *out = outbuf;
295 JISState state = JIS_ASCII;
297 while (*in != '\0') {
301 } else if (iseuckanji(*in)) {
302 if (iseuckanji(*(in + 1))) {
304 *out++ = *in++ & 0x7f;
305 *out++ = *in++ & 0x7f;
310 if (*in != '\0' && !IS_ASCII(*in)) {
315 } else if (iseuchwkana1(*in)) {
316 if (iseuchwkana2(*(in + 1))) {
317 if (prefs_common.allow_jisx0201_kana) {
320 *out++ = *in++ & 0x7f;
325 if (iseuchwkana1(*(in + 2)) &&
326 iseuchwkana2(*(in + 3)))
327 len = conv_jis_hantozen
329 *(in + 1), *(in + 3));
331 len = conv_jis_hantozen
346 if (*in != '\0' && !IS_ASCII(*in)) {
351 } else if (iseucaux(*in)) {
353 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
355 *out++ = *in++ & 0x7f;
356 *out++ = *in++ & 0x7f;
359 if (*in != '\0' && !IS_ASCII(*in)) {
362 if (*in != '\0' && !IS_ASCII(*in)) {
380 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
382 const guchar *in = inbuf;
383 guchar *out = outbuf;
385 while (*in != '\0') {
388 } else if (issjiskanji1(*in)) {
389 if (issjiskanji2(*(in + 1))) {
391 guchar out2 = *(in + 1);
394 row = out1 < 0xa0 ? 0x70 : 0xb0;
396 out1 = (out1 - row) * 2 - 1;
397 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
399 out1 = (out1 - row) * 2;
403 *out++ = out1 | 0x80;
404 *out++ = out2 | 0x80;
409 if (*in != '\0' && !IS_ASCII(*in)) {
414 } else if (issjishwkana(*in)) {
427 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
431 Xalloca(eucstr, outlen, return -1);
433 if (conv_jistoeuc(eucstr, outlen, inbuf) <0)
435 if (conv_euctoutf8(outbuf, outlen, eucstr) < 0)
440 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
444 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
446 strncpy2(outbuf, tmpstr, outlen);
450 strncpy2(outbuf, inbuf, outlen);
455 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
457 static iconv_t cd = (iconv_t)-1;
458 static gboolean iconv_ok = TRUE;
461 if (cd == (iconv_t)-1) {
463 strncpy2(outbuf, inbuf, outlen);
466 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
467 if (cd == (iconv_t)-1) {
468 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
469 if (cd == (iconv_t)-1) {
470 g_warning("conv_euctoutf8(): %s\n",
473 strncpy2(outbuf, inbuf, outlen);
479 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
481 strncpy2(outbuf, tmpstr, outlen);
485 strncpy2(outbuf, inbuf, outlen);
490 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
493 switch (conv_guess_ja_encoding(inbuf)) {
495 r = conv_jistoutf8(outbuf, outlen, inbuf);
498 r = conv_sjistoutf8(outbuf, outlen, inbuf);
501 r = conv_euctoutf8(outbuf, outlen, inbuf);
505 strncpy2(outbuf, inbuf, outlen);
512 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
514 static iconv_t cd = (iconv_t)-1;
515 static gboolean iconv_ok = TRUE;
518 if (cd == (iconv_t)-1) {
520 strncpy2(outbuf, inbuf, outlen);
523 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
524 if (cd == (iconv_t)-1) {
525 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
526 if (cd == (iconv_t)-1) {
527 g_warning("conv_utf8toeuc(): %s\n",
530 strncpy2(outbuf, inbuf, outlen);
536 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
538 strncpy2(outbuf, tmpstr, outlen);
542 strncpy2(outbuf, inbuf, outlen);
547 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
551 Xalloca(eucstr, outlen, return -1);
553 if (conv_utf8toeuc(eucstr, outlen, inbuf) < 0)
555 if (conv_euctojis(outbuf, outlen, eucstr) < 0)
561 static void conv_unreadable_8bit(gchar *str)
563 register guchar *p = str;
566 /* convert CR+LF -> LF */
567 if (*p == '\r' && *(p + 1) == '\n')
568 memmove(p, p + 1, strlen(p));
569 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
574 static CharSet conv_guess_ja_encoding(const gchar *str)
576 const guchar *p = str;
577 CharSet guessed = C_US_ASCII;
580 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
581 if (guessed == C_US_ASCII)
582 return C_ISO_2022_JP;
584 } else if (IS_ASCII(*p)) {
586 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
587 if (*p >= 0xfd && *p <= 0xfe)
589 else if (guessed == C_SHIFT_JIS) {
590 if ((issjiskanji1(*p) &&
591 issjiskanji2(*(p + 1))) ||
593 guessed = C_SHIFT_JIS;
599 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
600 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
601 guessed = C_SHIFT_JIS;
605 } else if (issjishwkana(*p)) {
606 guessed = C_SHIFT_JIS;
616 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
618 return conv_jistoutf8(outbuf, outlen, inbuf);
621 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
623 return conv_sjistoutf8(outbuf, outlen, inbuf);
626 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
628 return conv_euctoutf8(outbuf, outlen, inbuf);
631 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
633 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
634 strncpy2(outbuf, inbuf, outlen);
636 conv_ustodisp(outbuf, outlen, inbuf);
639 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
642 if (conv_anytoutf8(outbuf, outlen, inbuf) < 0)
644 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
645 conv_unreadable_8bit(outbuf);
649 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
651 strncpy2(outbuf, inbuf, outlen);
652 conv_unreadable_8bit(outbuf);
657 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
661 codeconv_set_strict(TRUE);
662 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
664 codeconv_set_strict(FALSE);
665 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
666 strncpy2(outbuf, tmpstr, outlen);
669 } else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
671 codeconv_set_strict(TRUE);
672 tmpstr = conv_iconv_strdup(inbuf,
673 conv_get_locale_charset_str_no_utf8(),
675 codeconv_set_strict(FALSE);
677 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
678 strncpy2(outbuf, tmpstr, outlen);
683 conv_utf8todisp(outbuf, outlen, inbuf);
687 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
689 strncpy2(outbuf, inbuf, outlen);
694 conv_get_fallback_for_private_encoding(const gchar *encoding)
696 if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
697 encoding[1] == '-') {
698 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
705 CodeConverter *conv_code_converter_new(const gchar *src_charset)
709 src_charset = conv_get_fallback_for_private_encoding(src_charset);
711 conv = g_new0(CodeConverter, 1);
712 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
713 conv->charset_str = g_strdup(src_charset);
714 conv->charset = conv_get_charset_from_str(src_charset);
719 void conv_code_converter_destroy(CodeConverter *conv)
721 g_free(conv->charset_str);
725 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
728 if (conv->code_conv_func != conv_noconv)
729 return conv->code_conv_func(outbuf, outlen, inbuf);
733 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
737 strncpy2(outbuf, str, outlen);
745 gchar *conv_codeset_strdup(const gchar *inbuf,
746 const gchar *src_code, const gchar *dest_code)
750 CodeConvFunc conv_func;
752 if (!strcmp2(src_code, dest_code)) {
753 CharSet dest_charset = conv_get_charset_from_str(dest_code);
754 if (strict_mode && dest_charset == C_UTF_8) {
755 /* ensure valid UTF-8 if target is UTF-8 */
756 if (!g_utf8_validate(inbuf, -1, NULL)) {
760 /* otherwise, try for a lucky day */
761 return g_strdup(inbuf);
764 src_code = conv_get_fallback_for_private_encoding(src_code);
765 conv_func = conv_get_code_conv_func(src_code, dest_code);
766 if (conv_func == conv_ustodisp && strict_mode && !is_ascii_str(inbuf))
769 if (conv_func != conv_noconv) {
770 len = (strlen(inbuf) + 1) * 3;
772 if (!buf) return NULL;
774 if (conv_func(buf, len, inbuf) == 0 || !strict_mode)
775 return g_realloc(buf, strlen(buf) + 1);
782 return conv_iconv_strdup(inbuf, src_code, dest_code);
785 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
786 const gchar *dest_charset_str)
788 CodeConvFunc code_conv = conv_noconv;
790 CharSet dest_charset;
792 if (!src_charset_str)
793 src_charset = conv_get_locale_charset();
795 src_charset = conv_get_charset_from_str(src_charset_str);
797 /* auto detection mode */
798 if (!src_charset_str && !dest_charset_str) {
799 if (conv_is_ja_locale())
800 return conv_anytodisp;
805 dest_charset = conv_get_charset_from_str(dest_charset_str);
807 if (dest_charset == C_US_ASCII)
808 return conv_ustodisp;
810 switch (src_charset) {
828 case C_ISO_2022_JP_2:
829 case C_ISO_2022_JP_3:
830 if (dest_charset == C_AUTO)
831 code_conv = conv_jistodisp;
832 else if (dest_charset == C_EUC_JP)
833 code_conv = conv_jistoeuc;
834 else if (dest_charset == C_UTF_8)
835 code_conv = conv_jistoutf8;
838 if (dest_charset == C_AUTO)
839 code_conv = conv_sjistodisp;
840 else if (dest_charset == C_EUC_JP)
841 code_conv = conv_sjistoeuc;
842 else if (dest_charset == C_UTF_8)
843 code_conv = conv_sjistoutf8;
846 if (dest_charset == C_AUTO)
847 code_conv = conv_euctodisp;
848 else if (dest_charset == C_ISO_2022_JP ||
849 dest_charset == C_ISO_2022_JP_2 ||
850 dest_charset == C_ISO_2022_JP_3)
851 code_conv = conv_euctojis;
852 else if (dest_charset == C_UTF_8)
853 code_conv = conv_euctoutf8;
856 if (dest_charset == C_EUC_JP)
857 code_conv = conv_utf8toeuc;
858 else if (dest_charset == C_ISO_2022_JP ||
859 dest_charset == C_ISO_2022_JP_2 ||
860 dest_charset == C_ISO_2022_JP_3)
861 code_conv = conv_utf8tojis;
870 static gchar *conv_iconv_strdup(const gchar *inbuf,
871 const gchar *src_code, const gchar *dest_code)
876 if (!src_code && !dest_code &&
877 g_utf8_validate(inbuf, -1, NULL))
878 return g_strdup(inbuf);
881 src_code = conv_get_outgoing_charset_str();
883 dest_code = CS_INTERNAL;
885 /* don't convert if src and dest codeset are identical */
886 if (!strcasecmp(src_code, dest_code))
887 return g_strdup(inbuf);
889 /* don't convert if dest codeset is US-ASCII */
890 if (!strcasecmp(src_code, CS_US_ASCII))
891 return g_strdup(inbuf);
893 /* don't convert if dest codeset is US-ASCII */
894 if (!strcasecmp(dest_code, CS_US_ASCII))
895 return g_strdup(inbuf);
897 cd = iconv_open(dest_code, src_code);
898 if (cd == (iconv_t)-1)
901 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
908 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
910 const gchar *inbuf_p;
921 in_size = strlen(inbuf);
923 out_size = (in_size + 1) * 2;
924 outbuf = g_malloc(out_size);
928 #define EXPAND_BUF() \
930 len = outbuf_p - outbuf; \
932 outbuf = g_realloc(outbuf, out_size); \
933 outbuf_p = outbuf + len; \
934 out_left = out_size - len; \
937 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
938 &outbuf_p, &out_left)) == (size_t)-1) {
939 if (EILSEQ == errno) {
944 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
950 *outbuf_p++ = SUBST_CHAR;
952 } else if (EINVAL == errno) {
954 } else if (E2BIG == errno) {
957 g_warning("conv_iconv_strdup(): %s\n",
963 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
965 if (E2BIG == errno) {
968 g_warning("conv_iconv_strdup(): %s\n",
976 len = outbuf_p - outbuf;
977 outbuf = g_realloc(outbuf, len + 1);
983 static const struct {
987 {C_US_ASCII, CS_US_ASCII},
988 {C_US_ASCII, CS_ANSI_X3_4_1968},
991 {C_ISO_8859_1, CS_ISO_8859_1},
992 {C_ISO_8859_2, CS_ISO_8859_2},
993 {C_ISO_8859_3, CS_ISO_8859_3},
994 {C_ISO_8859_4, CS_ISO_8859_4},
995 {C_ISO_8859_5, CS_ISO_8859_5},
996 {C_ISO_8859_6, CS_ISO_8859_6},
997 {C_ISO_8859_7, CS_ISO_8859_7},
998 {C_ISO_8859_8, CS_ISO_8859_8},
999 {C_ISO_8859_9, CS_ISO_8859_9},
1000 {C_ISO_8859_10, CS_ISO_8859_10},
1001 {C_ISO_8859_11, CS_ISO_8859_11},
1002 {C_ISO_8859_13, CS_ISO_8859_13},
1003 {C_ISO_8859_14, CS_ISO_8859_14},
1004 {C_ISO_8859_15, CS_ISO_8859_15},
1005 {C_BALTIC, CS_BALTIC},
1006 {C_CP1250, CS_CP1250},
1007 {C_CP1251, CS_CP1251},
1008 {C_CP1252, CS_CP1252},
1009 {C_CP1253, CS_CP1253},
1010 {C_CP1254, CS_CP1254},
1011 {C_CP1255, CS_CP1255},
1012 {C_CP1256, CS_CP1256},
1013 {C_CP1257, CS_CP1257},
1014 {C_CP1258, CS_CP1258},
1015 {C_WINDOWS_1250, CS_WINDOWS_1250},
1016 {C_WINDOWS_1251, CS_WINDOWS_1251},
1017 {C_WINDOWS_1252, CS_WINDOWS_1252},
1018 {C_WINDOWS_1253, CS_WINDOWS_1253},
1019 {C_WINDOWS_1254, CS_WINDOWS_1254},
1020 {C_WINDOWS_1255, CS_WINDOWS_1255},
1021 {C_WINDOWS_1256, CS_WINDOWS_1256},
1022 {C_WINDOWS_1257, CS_WINDOWS_1257},
1023 {C_WINDOWS_1258, CS_WINDOWS_1258},
1024 {C_KOI8_R, CS_KOI8_R},
1025 {C_KOI8_T, CS_KOI8_T},
1026 {C_KOI8_U, CS_KOI8_U},
1027 {C_ISO_2022_JP, CS_ISO_2022_JP},
1028 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1029 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1030 {C_EUC_JP, CS_EUC_JP},
1031 {C_EUC_JP, CS_EUCJP},
1032 {C_EUC_JP_MS, CS_EUC_JP_MS},
1033 {C_SHIFT_JIS, CS_SHIFT_JIS},
1034 {C_SHIFT_JIS, CS_SHIFT__JIS},
1035 {C_SHIFT_JIS, CS_SJIS},
1036 {C_ISO_2022_KR, CS_ISO_2022_KR},
1037 {C_EUC_KR, CS_EUC_KR},
1038 {C_ISO_2022_CN, CS_ISO_2022_CN},
1039 {C_EUC_CN, CS_EUC_CN},
1040 {C_GB18030, CS_GB18030},
1041 {C_GB2312, CS_GB2312},
1043 {C_EUC_TW, CS_EUC_TW},
1045 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1046 {C_TIS_620, CS_TIS_620},
1047 {C_WINDOWS_874, CS_WINDOWS_874},
1048 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1049 {C_TCVN5712_1, CS_TCVN5712_1},
1052 static const struct {
1053 gchar *const locale;
1055 CharSet out_charset;
1056 } locale_table[] = {
1057 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1058 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1059 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1060 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1061 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1062 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1064 {"ja_JP" , C_SHIFT_JIS , C_ISO_2022_JP},
1066 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1068 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1069 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1070 {"zh_CN.GB18030" , C_GB18030 , C_GB18030},
1071 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1072 {"zh_CN.GBK" , C_GBK , C_GBK},
1073 {"zh_CN" , C_GB18030 , C_GB18030},
1074 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1075 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1076 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1077 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1078 {"zh_TW" , C_BIG5 , C_BIG5},
1080 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1081 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1082 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1084 {"ru_RU" , C_WINDOWS_1251, C_KOI8_R},
1086 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1088 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1089 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1090 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1091 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1093 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1094 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1096 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1098 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1099 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1100 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1101 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1102 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1103 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1104 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1105 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1106 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1107 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1108 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1109 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1110 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1111 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1112 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1113 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1117 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1119 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1120 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1121 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1122 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1123 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1124 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1125 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1126 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1127 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1128 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1129 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1130 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1131 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1132 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1133 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1134 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1135 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1136 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1137 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1138 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1139 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1140 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1141 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1142 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1143 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1144 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1145 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1146 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1147 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1148 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1149 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1150 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1151 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1152 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1153 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1154 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1155 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1156 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1157 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1158 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1159 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1160 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1161 {"nb_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1162 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1163 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1164 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1165 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1166 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1167 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1168 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1169 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1170 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1171 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1172 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1174 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1175 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1176 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1177 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1178 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1179 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1180 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1181 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1183 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1184 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1186 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1188 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1189 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1190 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1191 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1193 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1195 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1196 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1197 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1198 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1199 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1200 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1201 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1202 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1203 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1204 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1205 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1206 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1207 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1208 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1209 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1210 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1211 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1213 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1214 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1215 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1216 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1218 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1219 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1221 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1223 {"ar_IN" , C_UTF_8 , C_UTF_8},
1224 {"en_IN" , C_UTF_8 , C_UTF_8},
1225 {"se_NO" , C_UTF_8 , C_UTF_8},
1226 {"ta_IN" , C_UTF_8 , C_UTF_8},
1227 {"te_IN" , C_UTF_8 , C_UTF_8},
1228 {"ur_PK" , C_UTF_8 , C_UTF_8},
1230 {"th_TH" , C_TIS_620 , C_TIS_620},
1231 /* {"th_TH" , C_WINDOWS_874}, */
1232 /* {"th_TH" , C_ISO_8859_11}, */
1234 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1235 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1237 {"C" , C_US_ASCII , C_US_ASCII},
1238 {"POSIX" , C_US_ASCII , C_US_ASCII},
1239 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1242 static GHashTable *conv_get_charset_to_str_table(void)
1244 static GHashTable *table;
1250 table = g_hash_table_new(NULL, g_direct_equal);
1252 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1253 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1256 (table, GUINT_TO_POINTER(charsets[i].charset),
1264 static GHashTable *conv_get_charset_from_str_table(void)
1266 static GHashTable *table;
1272 table = g_hash_table_new(str_case_hash, str_case_equal);
1274 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1275 g_hash_table_insert(table, charsets[i].name,
1276 GUINT_TO_POINTER(charsets[i].charset));
1282 const gchar *conv_get_charset_str(CharSet charset)
1286 table = conv_get_charset_to_str_table();
1287 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1290 CharSet conv_get_charset_from_str(const gchar *charset)
1294 if (!charset) return C_AUTO;
1296 table = conv_get_charset_from_str_table();
1297 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1300 static CharSet conv_get_locale_charset(void)
1302 static CharSet cur_charset = -1;
1303 const gchar *cur_locale;
1307 if (cur_charset != -1)
1310 cur_locale = conv_get_current_locale();
1312 cur_charset = C_US_ASCII;
1316 if (strcasestr(cur_locale, "UTF-8") ||
1317 strcasestr(cur_locale, "utf8")) {
1318 cur_charset = C_UTF_8;
1322 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1323 cur_charset = C_ISO_8859_15;
1327 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1330 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1331 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1332 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1333 strlen(locale_table[i].locale))) {
1334 cur_charset = locale_table[i].charset;
1336 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1337 !strchr(p + 1, '.')) {
1338 if (strlen(cur_locale) == 2 &&
1339 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1340 cur_charset = locale_table[i].charset;
1346 cur_charset = C_AUTO;
1350 static CharSet conv_get_locale_charset_no_utf8(void)
1352 static CharSet cur_charset = -1;
1353 const gchar *cur_locale;
1357 if (prefs_common.broken_are_utf8) {
1358 cur_charset = C_UTF_8;
1362 cur_locale = conv_get_current_locale();
1364 cur_charset = C_US_ASCII;
1368 if (strcasestr(cur_locale, "UTF-8") ||
1369 strcasestr(cur_locale, "utf8")) {
1370 cur_charset = C_UTF_8;
1374 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1375 cur_charset = C_ISO_8859_15;
1379 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1382 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1383 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1384 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1385 strlen(locale_table[i].locale))) {
1386 cur_charset = locale_table[i].charset;
1388 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1389 !strchr(p + 1, '.')) {
1390 if (strlen(cur_locale) == 2 &&
1391 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1392 cur_charset = locale_table[i].charset;
1398 cur_charset = C_AUTO;
1402 const gchar *conv_get_locale_charset_str(void)
1404 static const gchar *codeset = NULL;
1407 codeset = conv_get_charset_str(conv_get_locale_charset());
1409 return codeset ? codeset : CS_INTERNAL;
1412 const gchar *conv_get_locale_charset_str_no_utf8(void)
1414 static const gchar *codeset = NULL;
1417 codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1419 return codeset ? codeset : CS_INTERNAL;
1422 static CharSet conv_get_outgoing_charset(void)
1424 static CharSet out_charset = -1;
1425 const gchar *cur_locale;
1429 if (out_charset != -1)
1432 cur_locale = conv_get_current_locale();
1434 out_charset = C_AUTO;
1438 if (strcasestr(cur_locale, "UTF-8") ||
1439 strcasestr(cur_locale, "utf8")) {
1440 out_charset = C_UTF_8;
1444 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1445 out_charset = C_ISO_8859_15;
1449 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1452 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1453 strlen(locale_table[i].locale))) {
1454 out_charset = locale_table[i].out_charset;
1456 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1457 !strchr(p + 1, '.')) {
1458 if (strlen(cur_locale) == 2 &&
1459 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1460 out_charset = locale_table[i].out_charset;
1469 const gchar *conv_get_outgoing_charset_str(void)
1471 CharSet out_charset;
1474 out_charset = conv_get_outgoing_charset();
1475 str = conv_get_charset_str(out_charset);
1477 return str ? str : CS_UTF_8;
1480 const gchar *conv_get_current_locale(void)
1482 const gchar *cur_locale;
1485 cur_locale = g_win32_getlocale();
1487 cur_locale = g_getenv("LC_ALL");
1488 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1489 if (!cur_locale) cur_locale = g_getenv("LANG");
1490 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1491 #endif /* G_OS_WIN32 */
1493 debug_print("current locale: %s\n",
1494 cur_locale ? cur_locale : "(none)");
1499 static gboolean conv_is_ja_locale(void)
1501 static gint is_ja_locale = -1;
1502 const gchar *cur_locale;
1504 if (is_ja_locale != -1)
1505 return is_ja_locale != 0;
1508 cur_locale = conv_get_current_locale();
1510 if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1514 return is_ja_locale != 0;
1517 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding,
1518 gboolean addr_field)
1520 gchar buf[BUFFSIZE];
1522 if (is_ascii_str(str))
1523 return unmime_header(str, addr_field);
1525 if (default_encoding) {
1528 utf8_buf = conv_codeset_strdup
1529 (str, default_encoding, CS_INTERNAL);
1533 decoded_str = unmime_header(utf8_buf, addr_field);
1539 if (conv_is_ja_locale())
1540 conv_anytodisp(buf, sizeof(buf), str);
1542 conv_localetodisp(buf, sizeof(buf), str);
1544 return unmime_header(buf, addr_field);
1547 #define MAX_LINELEN 76
1548 #define MAX_HARD_LINELEN 996
1549 #define MIMESEP_BEGIN "=?"
1550 #define MIMESEP_END "?="
1552 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1554 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1559 if ((cond) && *srcp) { \
1560 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1561 if (isspace(*(destp - 1))) \
1563 else if (is_plain_text && isspace(*srcp)) \
1568 left = MAX_LINELEN - 1; \
1570 } else if (destp == (guchar *)dest && left < 7) { \
1571 if (isspace(*(destp - 1))) \
1573 else if (is_plain_text && isspace(*srcp)) \
1578 left = MAX_LINELEN - 1; \
1584 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1586 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1587 gint header_len, gboolean addr_field,
1588 const gchar *out_encoding_)
1590 const gchar *cur_encoding;
1591 const gchar *out_encoding;
1595 const guchar *srcp = src;
1596 guchar *destp = dest;
1597 gboolean use_base64;
1599 cm_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1600 cm_return_if_fail(destp != NULL);
1602 if (MB_CUR_MAX > 1) {
1604 mimesep_enc = "?B?";
1607 mimesep_enc = "?Q?";
1610 cur_encoding = CS_INTERNAL;
1613 out_encoding = out_encoding_;
1615 out_encoding = conv_get_outgoing_charset_str();
1617 if (!strcmp(out_encoding, CS_US_ASCII))
1618 out_encoding = CS_ISO_8859_1;
1620 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1621 strlen(mimesep_enc) + strlen(MIMESEP_END);
1623 left = MAX_LINELEN - header_len;
1626 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1628 while (isspace(*srcp)) {
1631 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1634 /* output as it is if the next word is ASCII string */
1635 if (!is_next_nonascii(srcp)) {
1638 word_len = get_next_word_len(srcp);
1639 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1640 while (word_len > 0) {
1641 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1650 /* don't include parentheses and quotes in encoded strings */
1651 if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1652 LBREAK_IF_REQUIRED(left < 2, FALSE);
1663 const guchar *p = srcp;
1665 gint out_enc_str_len;
1666 gint mime_block_len;
1667 gboolean cont = FALSE;
1669 while (*p != '\0') {
1670 if (isspace(*p) && !is_next_nonascii(p + 1))
1672 /* don't include parentheses in encoded
1674 if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1677 mb_len = g_utf8_skip[*p];
1679 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1680 out_str = conv_codeset_strdup
1681 (part_str, cur_encoding, out_encoding);
1687 g_warning("conv_encode_header(): code conversion failed\n");
1688 conv_unreadable_8bit(part_str);
1689 out_str = g_strdup(part_str);
1692 out_str_len = strlen(out_str);
1695 out_enc_str_len = B64LEN(out_str_len);
1698 qp_get_q_encoding_len(out_str);
1702 if (mimestr_len + out_enc_str_len <= left) {
1705 } else if (cur_len == 0) {
1707 LBREAK_IF_REQUIRED(1, FALSE);
1716 Xstrndup_a(part_str, srcp, cur_len, );
1717 out_str = conv_codeset_strdup
1718 (part_str, cur_encoding, out_encoding);
1720 g_warning("conv_encode_header(): code conversion failed\n");
1721 conv_unreadable_8bit(part_str);
1722 out_str = g_strdup(part_str);
1724 out_str_len = strlen(out_str);
1727 out_enc_str_len = B64LEN(out_str_len);
1730 qp_get_q_encoding_len(out_str);
1733 enc_str = g_base64_encode(out_str, out_str_len);
1735 Xalloca(enc_str, out_enc_str_len + 1, );
1736 qp_q_encode(enc_str, out_str);
1741 /* output MIME-encoded string block */
1742 mime_block_len = mimestr_len + strlen(enc_str);
1743 g_snprintf(destp, mime_block_len + 1,
1744 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1745 out_encoding, mimesep_enc, enc_str);
1750 destp += mime_block_len;
1753 left -= mime_block_len;
1756 LBREAK_IF_REQUIRED(cont, FALSE);
1766 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1767 gint header_len, gboolean addr_field)
1769 conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1772 #undef LBREAK_IF_REQUIRED
1775 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1778 GError *error = NULL;
1780 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1782 debug_print("failed to convert encoding of file name: %s\n",
1784 g_error_free(error);
1787 fs_file = g_strdup(utf8_file);
1792 gchar *conv_filename_to_utf8(const gchar *fs_file)
1794 gchar *utf8_file = NULL;
1795 GError *error = NULL;
1797 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1799 g_warning("failed to convert encoding of file name: %s\n",
1801 g_error_free(error);
1804 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1806 utf8_file = g_strdup(fs_file);
1807 conv_unreadable_8bit(utf8_file);