2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str,
112 const gchar *dest_charset_str);
114 static gchar *conv_iconv_strdup_with_cd (const gchar *inbuf,
117 static gchar *conv_iconv_strdup (const gchar *inbuf,
118 const gchar *src_code,
119 const gchar *dest_code);
121 static CharSet conv_get_locale_charset (void);
122 static CharSet conv_get_outgoing_charset (void);
123 static CharSet conv_guess_ja_encoding(const gchar *str);
124 static gboolean conv_is_ja_locale (void);
126 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
130 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
133 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
135 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
136 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
138 static void conv_unreadable_8bit(gchar *str);
140 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
141 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
144 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
145 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
146 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
148 static gboolean strict_mode = FALSE;
150 void codeconv_set_strict(gboolean mode)
155 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
157 const guchar *in = inbuf;
159 JISState state = JIS_ASCII;
162 * Loop outputs up to 3 bytes in each pass (aux kanji) and we
163 * need 1 byte to terminate the output
165 while (*in != '\0' && (out - outbuf) < outlen - 4) {
169 if (*(in + 1) == '@' || *(in + 1) == 'B') {
172 } else if (*(in + 1) == '(' &&
174 state = JIS_AUXKANJI;
177 /* unknown escape sequence */
180 } else if (*in == '(') {
181 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
184 } else if (*(in + 1) == 'I') {
188 /* unknown escape sequence */
192 /* unknown escape sequence */
195 } else if (*in == 0x0e) {
198 } else if (*in == 0x0f) {
207 *out++ = *in++ | 0x80;
208 if (*in == '\0') break;
209 *out++ = *in++ | 0x80;
213 *out++ = *in++ | 0x80;
217 *out++ = *in++ | 0x80;
218 if (*in == '\0') break;
219 *out++ = *in++ | 0x80;
229 #define JIS_HWDAKUTEN 0x5e
230 #define JIS_HWHANDAKUTEN 0x5f
232 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
234 static guint16 h2z_tbl[] = {
236 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
237 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
239 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
240 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
242 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
243 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
245 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
246 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
249 static guint16 dakuten_tbl[] = {
251 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
252 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
254 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
255 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
258 static guint16 handakuten_tbl[] = {
260 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
268 if (jis_code < 0x21 || jis_code > 0x5f)
271 if (sound_sym == JIS_HWDAKUTEN &&
272 jis_code >= 0x36 && jis_code <= 0x4e) {
273 out_code = dakuten_tbl[jis_code - 0x30];
275 *outbuf = out_code >> 8;
276 *(outbuf + 1) = out_code & 0xff;
281 if (sound_sym == JIS_HWHANDAKUTEN &&
282 jis_code >= 0x4a && jis_code <= 0x4e) {
283 out_code = handakuten_tbl[jis_code - 0x4a];
284 *outbuf = out_code >> 8;
285 *(outbuf + 1) = out_code & 0xff;
289 out_code = h2z_tbl[jis_code - 0x20];
290 *outbuf = out_code >> 8;
291 *(outbuf + 1) = out_code & 0xff;
295 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
297 const guchar *in = inbuf;
299 JISState state = JIS_ASCII;
302 * Loop outputs up to 6 bytes in each pass (aux shift + aux
303 * kanji) and we need up to 4 bytes to terminate the output
304 * (ASCII shift + null)
306 while (*in != '\0' && (out - outbuf) < outlen - 10) {
310 } else if (iseuckanji(*in)) {
311 if (iseuckanji(*(in + 1))) {
313 *out++ = *in++ & 0x7f;
314 *out++ = *in++ & 0x7f;
319 if (*in != '\0' && !IS_ASCII(*in)) {
324 } else if (iseuchwkana1(*in)) {
325 if (iseuchwkana2(*(in + 1))) {
326 if (prefs_common.allow_jisx0201_kana) {
329 *out++ = *in++ & 0x7f;
334 if (iseuchwkana1(*(in + 2)) &&
335 iseuchwkana2(*(in + 3)))
336 len = conv_jis_hantozen
338 *(in + 1), *(in + 3));
340 len = conv_jis_hantozen
355 if (*in != '\0' && !IS_ASCII(*in)) {
360 } else if (iseucaux(*in)) {
362 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
364 *out++ = *in++ & 0x7f;
365 *out++ = *in++ & 0x7f;
368 if (*in != '\0' && !IS_ASCII(*in)) {
371 if (*in != '\0' && !IS_ASCII(*in)) {
389 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
391 const guchar *in = inbuf;
395 * Loop outputs up to 2 bytes in each pass and we need 1 byte
396 * to terminate the output
398 while (*in != '\0' && (out - outbuf) < outlen - 3) {
401 } else if (issjiskanji1(*in)) {
402 if (issjiskanji2(*(in + 1))) {
404 guchar out2 = *(in + 1);
407 row = out1 < 0xa0 ? 0x70 : 0xb0;
409 out1 = (out1 - row) * 2 - 1;
410 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
412 out1 = (out1 - row) * 2;
416 *out++ = out1 | 0x80;
417 *out++ = out2 | 0x80;
422 if (*in != '\0' && !IS_ASCII(*in)) {
427 } else if (issjishwkana(*in)) {
440 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
444 Xalloca(eucstr, outlen, return -1);
446 if (conv_jistoeuc(eucstr, outlen, inbuf) <0)
448 if (conv_euctoutf8(outbuf, outlen, eucstr) < 0)
453 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
457 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
459 strncpy2(outbuf, tmpstr, outlen);
463 strncpy2(outbuf, inbuf, outlen);
468 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
470 static iconv_t cd = (iconv_t)-1;
471 static gboolean iconv_ok = TRUE;
474 if (cd == (iconv_t)-1) {
476 strncpy2(outbuf, inbuf, outlen);
479 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
480 if (cd == (iconv_t)-1) {
481 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
482 if (cd == (iconv_t)-1) {
483 g_warning("conv_euctoutf8(): %s",
486 strncpy2(outbuf, inbuf, outlen);
492 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
494 strncpy2(outbuf, tmpstr, outlen);
498 strncpy2(outbuf, inbuf, outlen);
503 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
506 switch (conv_guess_ja_encoding(inbuf)) {
508 r = conv_jistoutf8(outbuf, outlen, inbuf);
511 r = conv_sjistoutf8(outbuf, outlen, inbuf);
514 r = conv_euctoutf8(outbuf, outlen, inbuf);
518 strncpy2(outbuf, inbuf, outlen);
525 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
527 static iconv_t cd = (iconv_t)-1;
528 static gboolean iconv_ok = TRUE;
531 if (cd == (iconv_t)-1) {
533 strncpy2(outbuf, inbuf, outlen);
536 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
537 if (cd == (iconv_t)-1) {
538 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
539 if (cd == (iconv_t)-1) {
540 g_warning("conv_utf8toeuc(): %s",
543 strncpy2(outbuf, inbuf, outlen);
549 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
551 strncpy2(outbuf, tmpstr, outlen);
555 strncpy2(outbuf, inbuf, outlen);
560 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
564 Xalloca(eucstr, outlen, return -1);
566 if (conv_utf8toeuc(eucstr, outlen, inbuf) < 0)
568 if (conv_euctojis(outbuf, outlen, eucstr) < 0)
574 static void conv_unreadable_8bit(gchar *str)
576 register guchar *p = str;
579 /* convert CR+LF -> LF */
580 if (*p == '\r' && *(p + 1) == '\n')
581 memmove(p, p + 1, strlen(p));
582 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
587 static CharSet conv_guess_ja_encoding(const gchar *str)
589 const guchar *p = str;
590 CharSet guessed = C_US_ASCII;
593 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
594 if (guessed == C_US_ASCII)
595 return C_ISO_2022_JP;
597 } else if (IS_ASCII(*p)) {
599 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
600 if (*p >= 0xfd && *p <= 0xfe)
602 else if (guessed == C_SHIFT_JIS) {
603 if ((issjiskanji1(*p) &&
604 issjiskanji2(*(p + 1))) ||
606 guessed = C_SHIFT_JIS;
612 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
613 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
614 guessed = C_SHIFT_JIS;
618 } else if (issjishwkana(*p)) {
619 guessed = C_SHIFT_JIS;
629 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
631 return conv_jistoutf8(outbuf, outlen, inbuf);
634 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
636 return conv_sjistoutf8(outbuf, outlen, inbuf);
639 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
641 return conv_euctoutf8(outbuf, outlen, inbuf);
644 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
646 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
647 strncpy2(outbuf, inbuf, outlen);
649 conv_ustodisp(outbuf, outlen, inbuf);
652 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
655 if (conv_anytoutf8(outbuf, outlen, inbuf) < 0)
657 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
658 conv_unreadable_8bit(outbuf);
662 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
664 strncpy2(outbuf, inbuf, outlen);
665 conv_unreadable_8bit(outbuf);
670 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
674 codeconv_set_strict(TRUE);
675 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
677 codeconv_set_strict(FALSE);
678 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
679 strncpy2(outbuf, tmpstr, outlen);
682 } else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
684 codeconv_set_strict(TRUE);
685 tmpstr = conv_iconv_strdup(inbuf,
686 conv_get_locale_charset_str_no_utf8(),
688 codeconv_set_strict(FALSE);
690 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
691 strncpy2(outbuf, tmpstr, outlen);
696 conv_utf8todisp(outbuf, outlen, inbuf);
700 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
702 strncpy2(outbuf, inbuf, outlen);
707 conv_get_fallback_for_private_encoding(const gchar *encoding)
710 if ((encoding[0] == 'X' || encoding[0] == 'x') &&
711 encoding[1] == '-') {
712 if (!g_ascii_strcasecmp(encoding, CS_X_MACCYR))
714 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
717 else if(!g_ascii_strcasecmp(encoding, CS_ISO_8859_8_I)) {
719 * ISO-8859-8-I is a variant which fully
720 * agrees with ISO-8859-8 on character
721 * codings, and differs only in directionality
722 * implications, which are ignored here
723 * anyway; and is not recognized by iconv
725 return CS_ISO_8859_8;
732 CodeConverter *conv_code_converter_new(const gchar *src_charset)
736 src_charset = conv_get_fallback_for_private_encoding(src_charset);
738 conv = g_new0(CodeConverter, 1);
739 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
740 conv->charset_str = g_strdup(src_charset);
741 conv->charset = conv_get_charset_from_str(src_charset);
746 void conv_code_converter_destroy(CodeConverter *conv)
748 g_free(conv->charset_str);
752 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
755 if (conv->code_conv_func != conv_noconv)
756 return conv->code_conv_func(outbuf, outlen, inbuf);
760 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
764 strncpy2(outbuf, str, outlen);
772 gchar *conv_codeset_strdup(const gchar *inbuf,
773 const gchar *src_code, const gchar *dest_code)
777 CodeConvFunc conv_func;
779 if (!strcmp2(src_code, dest_code)) {
780 CharSet dest_charset = conv_get_charset_from_str(dest_code);
781 if (strict_mode && dest_charset == C_UTF_8) {
782 /* ensure valid UTF-8 if target is UTF-8 */
783 if (!g_utf8_validate(inbuf, -1, NULL)) {
787 /* otherwise, try for a lucky day */
788 return g_strdup(inbuf);
791 src_code = conv_get_fallback_for_private_encoding(src_code);
792 conv_func = conv_get_code_conv_func(src_code, dest_code);
793 if (conv_func == conv_ustodisp && strict_mode && !is_ascii_str(inbuf))
796 if (conv_func != conv_noconv) {
797 len = (strlen(inbuf) + 1) * 3;
800 if (conv_func(buf, len, inbuf) == 0 || !strict_mode)
801 return g_realloc(buf, strlen(buf) + 1);
808 return conv_iconv_strdup(inbuf, src_code, dest_code);
811 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
812 const gchar *dest_charset_str)
814 CodeConvFunc code_conv = conv_noconv;
816 CharSet dest_charset;
818 if (!src_charset_str)
819 src_charset = conv_get_locale_charset();
821 src_charset = conv_get_charset_from_str(src_charset_str);
823 /* auto detection mode */
824 if (!src_charset_str && !dest_charset_str) {
825 if (conv_is_ja_locale())
826 return conv_anytodisp;
831 dest_charset = conv_get_charset_from_str(dest_charset_str);
833 if (dest_charset == C_US_ASCII)
834 return conv_ustodisp;
836 switch (src_charset) {
854 case C_ISO_2022_JP_2:
855 case C_ISO_2022_JP_3:
856 if (dest_charset == C_AUTO)
857 code_conv = conv_jistodisp;
858 else if (dest_charset == C_EUC_JP)
859 code_conv = conv_jistoeuc;
860 else if (dest_charset == C_UTF_8)
861 code_conv = conv_jistoutf8;
864 if (dest_charset == C_AUTO)
865 code_conv = conv_sjistodisp;
866 else if (dest_charset == C_EUC_JP)
867 code_conv = conv_sjistoeuc;
868 else if (dest_charset == C_UTF_8)
869 code_conv = conv_sjistoutf8;
872 if (dest_charset == C_AUTO)
873 code_conv = conv_euctodisp;
874 else if (dest_charset == C_ISO_2022_JP ||
875 dest_charset == C_ISO_2022_JP_2 ||
876 dest_charset == C_ISO_2022_JP_3)
877 code_conv = conv_euctojis;
878 else if (dest_charset == C_UTF_8)
879 code_conv = conv_euctoutf8;
882 if (dest_charset == C_EUC_JP)
883 code_conv = conv_utf8toeuc;
884 else if (dest_charset == C_ISO_2022_JP ||
885 dest_charset == C_ISO_2022_JP_2 ||
886 dest_charset == C_ISO_2022_JP_3)
887 code_conv = conv_utf8tojis;
896 static gchar *conv_iconv_strdup(const gchar *inbuf,
897 const gchar *src_code, const gchar *dest_code)
902 if (!src_code && !dest_code &&
903 g_utf8_validate(inbuf, -1, NULL))
904 return g_strdup(inbuf);
907 src_code = conv_get_outgoing_charset_str();
909 dest_code = CS_INTERNAL;
911 /* don't convert if src and dest codeset are identical */
912 if (!strcasecmp(src_code, dest_code))
913 return g_strdup(inbuf);
915 /* don't convert if dest codeset is US-ASCII */
916 if (!strcasecmp(src_code, CS_US_ASCII))
917 return g_strdup(inbuf);
919 /* don't convert if dest codeset is US-ASCII */
920 if (!strcasecmp(dest_code, CS_US_ASCII))
921 return g_strdup(inbuf);
923 cd = iconv_open(dest_code, src_code);
924 if (cd == (iconv_t)-1)
927 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
934 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
936 const gchar *inbuf_p;
947 in_size = strlen(inbuf);
949 out_size = (in_size + 1) * 2;
950 outbuf = g_malloc(out_size);
954 #define EXPAND_BUF() \
956 len = outbuf_p - outbuf; \
958 outbuf = g_realloc(outbuf, out_size); \
959 outbuf_p = outbuf + len; \
960 out_left = out_size - len; \
963 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
964 &outbuf_p, &out_left)) == (size_t)-1) {
965 if (EILSEQ == errno) {
970 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
976 *outbuf_p++ = SUBST_CHAR;
978 } else if (EINVAL == errno) {
980 } else if (E2BIG == errno) {
983 g_warning("conv_iconv_strdup(): %s",
989 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
991 if (E2BIG == errno) {
994 g_warning("conv_iconv_strdup(): %s",
1002 len = outbuf_p - outbuf;
1003 outbuf = g_realloc(outbuf, len + 1);
1009 static const struct {
1013 {C_US_ASCII, CS_US_ASCII},
1014 {C_US_ASCII, CS_ANSI_X3_4_1968},
1015 {C_UTF_8, CS_UTF_8},
1016 {C_UTF_7, CS_UTF_7},
1017 {C_ISO_8859_1, CS_ISO_8859_1},
1018 {C_ISO_8859_2, CS_ISO_8859_2},
1019 {C_ISO_8859_3, CS_ISO_8859_3},
1020 {C_ISO_8859_4, CS_ISO_8859_4},
1021 {C_ISO_8859_5, CS_ISO_8859_5},
1022 {C_ISO_8859_6, CS_ISO_8859_6},
1023 {C_ISO_8859_7, CS_ISO_8859_7},
1024 {C_ISO_8859_8, CS_ISO_8859_8},
1025 {C_ISO_8859_9, CS_ISO_8859_9},
1026 {C_ISO_8859_10, CS_ISO_8859_10},
1027 {C_ISO_8859_11, CS_ISO_8859_11},
1028 {C_ISO_8859_13, CS_ISO_8859_13},
1029 {C_ISO_8859_14, CS_ISO_8859_14},
1030 {C_ISO_8859_15, CS_ISO_8859_15},
1031 {C_BALTIC, CS_BALTIC},
1032 {C_CP1250, CS_CP1250},
1033 {C_CP1251, CS_CP1251},
1034 {C_CP1252, CS_CP1252},
1035 {C_CP1253, CS_CP1253},
1036 {C_CP1254, CS_CP1254},
1037 {C_CP1255, CS_CP1255},
1038 {C_CP1256, CS_CP1256},
1039 {C_CP1257, CS_CP1257},
1040 {C_CP1258, CS_CP1258},
1041 {C_WINDOWS_1250, CS_WINDOWS_1250},
1042 {C_WINDOWS_1251, CS_WINDOWS_1251},
1043 {C_WINDOWS_1252, CS_WINDOWS_1252},
1044 {C_WINDOWS_1253, CS_WINDOWS_1253},
1045 {C_WINDOWS_1254, CS_WINDOWS_1254},
1046 {C_WINDOWS_1255, CS_WINDOWS_1255},
1047 {C_WINDOWS_1256, CS_WINDOWS_1256},
1048 {C_WINDOWS_1257, CS_WINDOWS_1257},
1049 {C_WINDOWS_1258, CS_WINDOWS_1258},
1050 {C_KOI8_R, CS_KOI8_R},
1051 {C_MACCYR, CS_MACCYR},
1052 {C_KOI8_T, CS_KOI8_T},
1053 {C_KOI8_U, CS_KOI8_U},
1054 {C_ISO_2022_JP, CS_ISO_2022_JP},
1055 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1056 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1057 {C_EUC_JP, CS_EUC_JP},
1058 {C_EUC_JP, CS_EUCJP},
1059 {C_EUC_JP_MS, CS_EUC_JP_MS},
1060 {C_SHIFT_JIS, CS_SHIFT_JIS},
1061 {C_SHIFT_JIS, CS_SHIFT__JIS},
1062 {C_SHIFT_JIS, CS_SJIS},
1063 {C_ISO_2022_KR, CS_ISO_2022_KR},
1064 {C_EUC_KR, CS_EUC_KR},
1065 {C_ISO_2022_CN, CS_ISO_2022_CN},
1066 {C_EUC_CN, CS_EUC_CN},
1067 {C_GB18030, CS_GB18030},
1068 {C_GB2312, CS_GB2312},
1070 {C_EUC_TW, CS_EUC_TW},
1072 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1073 {C_TIS_620, CS_TIS_620},
1074 {C_WINDOWS_874, CS_WINDOWS_874},
1075 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1076 {C_TCVN5712_1, CS_TCVN5712_1},
1079 static const struct {
1080 gchar *const locale;
1082 CharSet out_charset;
1083 } locale_table[] = {
1084 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1085 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1086 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1087 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1088 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1089 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1091 {"ja_JP" , C_SHIFT_JIS , C_ISO_2022_JP},
1093 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1095 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1096 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1097 {"zh_CN.GB18030" , C_GB18030 , C_GB18030},
1098 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1099 {"zh_CN.GBK" , C_GBK , C_GBK},
1100 {"zh_CN" , C_GB18030 , C_GB18030},
1101 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1102 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1103 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1104 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1105 {"zh_TW" , C_BIG5 , C_BIG5},
1107 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1108 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1109 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1111 {"ru_RU" , C_WINDOWS_1251, C_KOI8_R},
1113 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1115 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1116 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1117 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1118 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1120 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1121 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1123 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1125 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1126 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1127 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1128 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1129 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1130 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1131 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1132 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1133 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1134 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1135 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1136 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1137 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1138 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1139 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1140 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1141 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1142 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1143 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1144 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1145 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1146 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1147 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1148 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1149 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1150 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1151 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1152 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1153 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1154 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1155 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1156 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1157 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1158 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1159 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1160 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1161 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1162 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1163 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1164 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1165 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1166 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1167 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1168 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1169 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1170 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1171 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1172 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1173 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1174 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1175 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1176 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1177 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1178 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1179 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1180 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1181 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1182 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1183 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1184 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1185 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1186 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1187 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1188 {"nb_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1189 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1190 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1191 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1192 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1193 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1194 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1195 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1196 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1197 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1198 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1199 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1201 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1202 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1203 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1204 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1205 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1206 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1207 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1208 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1210 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1211 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1213 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1215 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1216 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1217 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1218 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1220 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1222 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1223 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1224 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1225 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1226 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1227 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1228 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1229 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1230 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1231 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1232 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1233 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1234 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1235 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1236 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1237 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1238 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1240 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1241 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1242 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1243 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1245 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1246 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1248 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1250 {"ar_IN" , C_UTF_8 , C_UTF_8},
1251 {"en_IN" , C_UTF_8 , C_UTF_8},
1252 {"se_NO" , C_UTF_8 , C_UTF_8},
1253 {"ta_IN" , C_UTF_8 , C_UTF_8},
1254 {"te_IN" , C_UTF_8 , C_UTF_8},
1255 {"ur_PK" , C_UTF_8 , C_UTF_8},
1257 {"th_TH" , C_TIS_620 , C_TIS_620},
1258 /* {"th_TH" , C_WINDOWS_874}, */
1259 /* {"th_TH" , C_ISO_8859_11}, */
1261 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1262 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1264 {"C" , C_US_ASCII , C_US_ASCII},
1265 {"POSIX" , C_US_ASCII , C_US_ASCII},
1266 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1269 static GHashTable *conv_get_charset_to_str_table(void)
1271 static GHashTable *table;
1277 table = g_hash_table_new(NULL, g_direct_equal);
1279 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1280 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1283 (table, GUINT_TO_POINTER(charsets[i].charset),
1291 static GHashTable *conv_get_charset_from_str_table(void)
1293 static GHashTable *table;
1299 table = g_hash_table_new(str_case_hash, str_case_equal);
1301 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1302 g_hash_table_insert(table, charsets[i].name,
1303 GUINT_TO_POINTER(charsets[i].charset));
1309 const gchar *conv_get_charset_str(CharSet charset)
1313 table = conv_get_charset_to_str_table();
1314 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1317 CharSet conv_get_charset_from_str(const gchar *charset)
1321 if (!charset) return C_AUTO;
1323 table = conv_get_charset_from_str_table();
1324 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1327 static CharSet conv_get_locale_charset(void)
1329 static CharSet cur_charset = -1;
1330 const gchar *cur_locale;
1334 if (cur_charset != -1)
1337 cur_locale = conv_get_current_locale();
1339 cur_charset = C_US_ASCII;
1343 if (strcasestr(cur_locale, "UTF-8") ||
1344 strcasestr(cur_locale, "utf8")) {
1345 cur_charset = C_UTF_8;
1349 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1350 cur_charset = C_ISO_8859_15;
1354 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1357 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1358 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1359 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1360 strlen(locale_table[i].locale))) {
1361 cur_charset = locale_table[i].charset;
1363 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1364 !strchr(p + 1, '.')) {
1365 if (strlen(cur_locale) == 2 &&
1366 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1367 cur_charset = locale_table[i].charset;
1373 cur_charset = C_AUTO;
1377 static CharSet conv_get_locale_charset_no_utf8(void)
1379 static CharSet cur_charset = -1;
1380 const gchar *cur_locale;
1384 if (prefs_common.broken_are_utf8) {
1385 cur_charset = C_UTF_8;
1389 cur_locale = conv_get_current_locale();
1391 cur_charset = C_US_ASCII;
1395 if (strcasestr(cur_locale, "UTF-8") ||
1396 strcasestr(cur_locale, "utf8")) {
1397 cur_charset = C_UTF_8;
1401 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1402 cur_charset = C_ISO_8859_15;
1406 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1409 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1410 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1411 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1412 strlen(locale_table[i].locale))) {
1413 cur_charset = locale_table[i].charset;
1415 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1416 !strchr(p + 1, '.')) {
1417 if (strlen(cur_locale) == 2 &&
1418 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1419 cur_charset = locale_table[i].charset;
1425 cur_charset = C_AUTO;
1429 const gchar *conv_get_locale_charset_str(void)
1431 static const gchar *codeset = NULL;
1434 codeset = conv_get_charset_str(conv_get_locale_charset());
1436 return codeset ? codeset : CS_INTERNAL;
1439 const gchar *conv_get_locale_charset_str_no_utf8(void)
1441 static const gchar *codeset = NULL;
1444 codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1446 return codeset ? codeset : CS_INTERNAL;
1449 static CharSet conv_get_outgoing_charset(void)
1451 static CharSet out_charset = -1;
1452 const gchar *cur_locale;
1456 if (out_charset != -1)
1459 cur_locale = conv_get_current_locale();
1461 out_charset = C_AUTO;
1465 if (strcasestr(cur_locale, "UTF-8") ||
1466 strcasestr(cur_locale, "utf8")) {
1467 out_charset = C_UTF_8;
1471 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1472 out_charset = C_ISO_8859_15;
1476 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1479 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1480 strlen(locale_table[i].locale))) {
1481 out_charset = locale_table[i].out_charset;
1483 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1484 !strchr(p + 1, '.')) {
1485 if (strlen(cur_locale) == 2 &&
1486 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1487 out_charset = locale_table[i].out_charset;
1496 const gchar *conv_get_outgoing_charset_str(void)
1498 CharSet out_charset;
1501 out_charset = conv_get_outgoing_charset();
1502 str = conv_get_charset_str(out_charset);
1504 return str ? str : CS_UTF_8;
1507 const gchar *conv_get_current_locale(void)
1509 const gchar *cur_locale;
1512 cur_locale = g_win32_getlocale();
1514 cur_locale = g_getenv("LC_ALL");
1515 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1516 if (!cur_locale) cur_locale = g_getenv("LANG");
1517 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1518 #endif /* G_OS_WIN32 */
1520 debug_print("current locale: %s\n",
1521 cur_locale ? cur_locale : "(none)");
1526 static gboolean conv_is_ja_locale(void)
1528 static gint is_ja_locale = -1;
1529 const gchar *cur_locale;
1531 if (is_ja_locale != -1)
1532 return is_ja_locale != 0;
1535 cur_locale = conv_get_current_locale();
1537 if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1541 return is_ja_locale != 0;
1544 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding,
1545 gboolean addr_field)
1547 gchar buf[BUFFSIZE];
1549 if (is_ascii_str(str))
1550 return unmime_header(str, addr_field);
1552 if (default_encoding) {
1555 utf8_buf = conv_codeset_strdup
1556 (str, default_encoding, CS_INTERNAL);
1560 decoded_str = unmime_header(utf8_buf, addr_field);
1566 if (conv_is_ja_locale())
1567 conv_anytodisp(buf, sizeof(buf), str);
1569 conv_localetodisp(buf, sizeof(buf), str);
1571 return unmime_header(buf, addr_field);
1574 #define MAX_LINELEN 76
1575 #define MAX_HARD_LINELEN 996
1576 #define MIMESEP_BEGIN "=?"
1577 #define MIMESEP_END "?="
1579 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1581 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1586 if ((cond) && *srcp) { \
1587 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1588 if (isspace(*(destp - 1))) \
1590 else if (is_plain_text && isspace(*srcp)) \
1595 left = MAX_LINELEN - 1; \
1597 } else if (destp == (guchar *)dest && left < 7) { \
1598 if (is_plain_text && isspace(*srcp)) \
1603 left = MAX_LINELEN - 1; \
1609 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1611 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1612 gint header_len, gboolean addr_field,
1613 const gchar *out_encoding_)
1615 const gchar *cur_encoding;
1616 const gchar *out_encoding;
1620 const guchar *srcp = src;
1621 guchar *destp = dest;
1622 gboolean use_base64;
1624 cm_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1625 cm_return_if_fail(destp != NULL);
1627 if (MB_CUR_MAX > 1) {
1629 mimesep_enc = "?B?";
1632 mimesep_enc = "?Q?";
1635 cur_encoding = CS_INTERNAL;
1638 out_encoding = out_encoding_;
1640 out_encoding = conv_get_outgoing_charset_str();
1642 if (!strcmp(out_encoding, CS_US_ASCII))
1643 out_encoding = CS_ISO_8859_1;
1645 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1646 strlen(mimesep_enc) + strlen(MIMESEP_END);
1648 left = MAX_LINELEN - header_len;
1651 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1653 while (isspace(*srcp)) {
1656 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1659 /* output as it is if the next word is ASCII string */
1660 if (!is_next_nonascii(srcp)) {
1663 word_len = get_next_word_len(srcp);
1664 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1665 while (word_len > 0) {
1666 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1675 /* don't include parentheses and quotes in encoded strings */
1676 if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1677 LBREAK_IF_REQUIRED(left < 2, FALSE);
1688 const guchar *p = srcp;
1690 gint out_enc_str_len;
1691 gint mime_block_len;
1692 gboolean cont = FALSE;
1694 while (*p != '\0') {
1695 if (isspace(*p) && !is_next_nonascii(p + 1))
1697 /* don't include parentheses in encoded
1699 if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1702 mb_len = g_utf8_skip[*p];
1704 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1705 out_str = conv_codeset_strdup
1706 (part_str, cur_encoding, out_encoding);
1712 g_warning("conv_encode_header_full(): code conversion failed");
1713 conv_unreadable_8bit(part_str);
1714 out_str = g_strdup(part_str);
1717 out_str_len = strlen(out_str);
1720 out_enc_str_len = B64LEN(out_str_len);
1723 qp_get_q_encoding_len(out_str);
1727 if (mimestr_len + out_enc_str_len <= left) {
1730 } else if (cur_len == 0) {
1732 LBREAK_IF_REQUIRED(1, FALSE);
1741 Xstrndup_a(part_str, srcp, cur_len, );
1742 out_str = conv_codeset_strdup
1743 (part_str, cur_encoding, out_encoding);
1745 g_warning("conv_encode_header_full(): code conversion failed");
1746 conv_unreadable_8bit(part_str);
1747 out_str = g_strdup(part_str);
1749 out_str_len = strlen(out_str);
1752 out_enc_str_len = B64LEN(out_str_len);
1755 qp_get_q_encoding_len(out_str);
1758 enc_str = g_base64_encode(out_str, out_str_len);
1760 Xalloca(enc_str, out_enc_str_len + 1, );
1761 qp_q_encode(enc_str, out_str);
1766 /* output MIME-encoded string block */
1767 mime_block_len = mimestr_len + strlen(enc_str);
1768 g_snprintf(destp, mime_block_len + 1,
1769 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1770 out_encoding, mimesep_enc, enc_str);
1775 destp += mime_block_len;
1778 left -= mime_block_len;
1781 LBREAK_IF_REQUIRED(cont, FALSE);
1791 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1792 gint header_len, gboolean addr_field)
1794 conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1797 #undef LBREAK_IF_REQUIRED
1800 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1803 GError *error = NULL;
1805 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1807 debug_print("failed to convert encoding of file name: %s\n",
1809 g_error_free(error);
1812 fs_file = g_strdup(utf8_file);
1817 gchar *conv_filename_to_utf8(const gchar *fs_file)
1819 gchar *utf8_file = NULL;
1820 GError *error = NULL;
1822 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1824 g_warning("failed to convert encoding of file name: %s",
1826 g_error_free(error);
1829 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1831 utf8_file = g_strdup(fs_file);
1832 conv_unreadable_8bit(utf8_file);