2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str,
112 const gchar *dest_charset_str);
114 static gchar *conv_iconv_strdup_with_cd (const gchar *inbuf,
117 static gchar *conv_iconv_strdup (const gchar *inbuf,
118 const gchar *src_code,
119 const gchar *dest_code);
121 static CharSet conv_get_locale_charset (void);
122 static CharSet conv_get_outgoing_charset (void);
123 static CharSet conv_guess_ja_encoding(const gchar *str);
124 static gboolean conv_is_ja_locale (void);
126 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
130 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
133 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
135 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
136 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
138 static void conv_unreadable_8bit(gchar *str);
140 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
141 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
144 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
145 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
146 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
148 static gboolean strict_mode = FALSE;
150 void codeconv_set_strict(gboolean mode)
155 static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
157 const guchar *in = inbuf;
159 JISState state = JIS_ASCII;
162 * Loop outputs up to 3 bytes in each pass (aux kanji) and we
163 * need 1 byte to terminate the output
165 while (*in != '\0' && (out - outbuf) < outlen - 4) {
169 if (*(in + 1) == '@' || *(in + 1) == 'B') {
172 } else if (*(in + 1) == '(' &&
174 state = JIS_AUXKANJI;
177 /* unknown escape sequence */
180 } else if (*in == '(') {
181 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
184 } else if (*(in + 1) == 'I') {
188 /* unknown escape sequence */
192 /* unknown escape sequence */
195 } else if (*in == 0x0e) {
198 } else if (*in == 0x0f) {
207 *out++ = *in++ | 0x80;
208 if (*in == '\0') break;
209 *out++ = *in++ | 0x80;
213 *out++ = *in++ | 0x80;
217 *out++ = *in++ | 0x80;
218 if (*in == '\0') break;
219 *out++ = *in++ | 0x80;
229 #define JIS_HWDAKUTEN 0x5e
230 #define JIS_HWHANDAKUTEN 0x5f
232 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
234 static guint16 h2z_tbl[] = {
236 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
237 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
239 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
240 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
242 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
243 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
245 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
246 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
249 static guint16 dakuten_tbl[] = {
251 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
252 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
254 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
255 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
258 static guint16 handakuten_tbl[] = {
260 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
268 if (jis_code < 0x21 || jis_code > 0x5f)
271 if (sound_sym == JIS_HWDAKUTEN &&
272 jis_code >= 0x36 && jis_code <= 0x4e) {
273 out_code = dakuten_tbl[jis_code - 0x30];
275 *outbuf = out_code >> 8;
276 *(outbuf + 1) = out_code & 0xff;
281 if (sound_sym == JIS_HWHANDAKUTEN &&
282 jis_code >= 0x4a && jis_code <= 0x4e) {
283 out_code = handakuten_tbl[jis_code - 0x4a];
284 *outbuf = out_code >> 8;
285 *(outbuf + 1) = out_code & 0xff;
289 out_code = h2z_tbl[jis_code - 0x20];
290 *outbuf = out_code >> 8;
291 *(outbuf + 1) = out_code & 0xff;
295 static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
297 const guchar *in = inbuf;
299 JISState state = JIS_ASCII;
302 * Loop outputs up to 6 bytes in each pass (aux shift + aux
303 * kanji) and we need up to 4 bytes to terminate the output
304 * (ASCII shift + null)
306 while (*in != '\0' && (out - outbuf) < outlen - 10) {
310 } else if (iseuckanji(*in)) {
311 if (iseuckanji(*(in + 1))) {
313 *out++ = *in++ & 0x7f;
314 *out++ = *in++ & 0x7f;
319 if (*in != '\0' && !IS_ASCII(*in)) {
324 } else if (iseuchwkana1(*in)) {
325 if (iseuchwkana2(*(in + 1))) {
326 if (prefs_common.allow_jisx0201_kana) {
329 *out++ = *in++ & 0x7f;
334 if (iseuchwkana1(*(in + 2)) &&
335 iseuchwkana2(*(in + 3)))
336 len = conv_jis_hantozen
338 *(in + 1), *(in + 3));
340 len = conv_jis_hantozen
355 if (*in != '\0' && !IS_ASCII(*in)) {
360 } else if (iseucaux(*in)) {
362 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
364 *out++ = *in++ & 0x7f;
365 *out++ = *in++ & 0x7f;
368 if (*in != '\0' && !IS_ASCII(*in)) {
371 if (*in != '\0' && !IS_ASCII(*in)) {
389 static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
391 const guchar *in = inbuf;
395 * Loop outputs up to 2 bytes in each pass and we need 1 byte
396 * to terminate the output
398 while (*in != '\0' && (out - outbuf) < outlen - 3) {
401 } else if (issjiskanji1(*in)) {
402 if (issjiskanji2(*(in + 1))) {
404 guchar out2 = *(in + 1);
407 row = out1 < 0xa0 ? 0x70 : 0xb0;
409 out1 = (out1 - row) * 2 - 1;
410 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
412 out1 = (out1 - row) * 2;
416 *out++ = out1 | 0x80;
417 *out++ = out2 | 0x80;
422 if (*in != '\0' && !IS_ASCII(*in)) {
427 } else if (issjishwkana(*in)) {
440 static gint conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
444 Xalloca(eucstr, outlen, return -1);
446 if (conv_jistoeuc(eucstr, outlen, inbuf) <0)
448 if (conv_euctoutf8(outbuf, outlen, eucstr) < 0)
453 static gint conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
457 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
459 strncpy2(outbuf, tmpstr, outlen);
463 strncpy2(outbuf, inbuf, outlen);
468 static gint conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
470 static iconv_t cd = (iconv_t)-1;
471 static gboolean iconv_ok = TRUE;
474 if (cd == (iconv_t)-1) {
476 strncpy2(outbuf, inbuf, outlen);
479 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
480 if (cd == (iconv_t)-1) {
481 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
482 if (cd == (iconv_t)-1) {
483 g_warning("conv_euctoutf8(): %s",
486 strncpy2(outbuf, inbuf, outlen);
492 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
494 strncpy2(outbuf, tmpstr, outlen);
498 strncpy2(outbuf, inbuf, outlen);
503 static gint conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
506 switch (conv_guess_ja_encoding(inbuf)) {
508 r = conv_jistoutf8(outbuf, outlen, inbuf);
511 r = conv_sjistoutf8(outbuf, outlen, inbuf);
514 r = conv_euctoutf8(outbuf, outlen, inbuf);
518 strncpy2(outbuf, inbuf, outlen);
525 static gint conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
527 static iconv_t cd = (iconv_t)-1;
528 static gboolean iconv_ok = TRUE;
531 if (cd == (iconv_t)-1) {
533 strncpy2(outbuf, inbuf, outlen);
536 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
537 if (cd == (iconv_t)-1) {
538 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
539 if (cd == (iconv_t)-1) {
540 g_warning("conv_utf8toeuc(): %s",
543 strncpy2(outbuf, inbuf, outlen);
549 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
551 strncpy2(outbuf, tmpstr, outlen);
555 strncpy2(outbuf, inbuf, outlen);
560 static gint conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
564 Xalloca(eucstr, outlen, return -1);
566 if (conv_utf8toeuc(eucstr, outlen, inbuf) < 0)
568 if (conv_euctojis(outbuf, outlen, eucstr) < 0)
574 static void conv_unreadable_8bit(gchar *str)
576 register guchar *p = str;
579 /* convert CR+LF -> LF */
580 if (*p == '\r' && *(p + 1) == '\n')
581 memmove(p, p + 1, strlen(p));
582 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
587 static CharSet conv_guess_ja_encoding(const gchar *str)
589 const guchar *p = str;
590 CharSet guessed = C_US_ASCII;
593 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
594 if (guessed == C_US_ASCII)
595 return C_ISO_2022_JP;
597 } else if (IS_ASCII(*p)) {
599 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
600 if (*p >= 0xfd && *p <= 0xfe)
602 else if (guessed == C_SHIFT_JIS) {
603 if ((issjiskanji1(*p) &&
604 issjiskanji2(*(p + 1))) ||
606 guessed = C_SHIFT_JIS;
612 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
613 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
614 guessed = C_SHIFT_JIS;
618 } else if (issjishwkana(*p)) {
619 guessed = C_SHIFT_JIS;
629 static gint conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
631 return conv_jistoutf8(outbuf, outlen, inbuf);
634 static gint conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
636 return conv_sjistoutf8(outbuf, outlen, inbuf);
639 static gint conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
641 return conv_euctoutf8(outbuf, outlen, inbuf);
644 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
646 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
647 strncpy2(outbuf, inbuf, outlen);
649 conv_ustodisp(outbuf, outlen, inbuf);
652 static gint conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
655 if (conv_anytoutf8(outbuf, outlen, inbuf) < 0)
657 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
658 conv_unreadable_8bit(outbuf);
662 static gint conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
664 strncpy2(outbuf, inbuf, outlen);
665 conv_unreadable_8bit(outbuf);
670 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
674 codeconv_set_strict(TRUE);
675 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
677 codeconv_set_strict(FALSE);
678 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
679 strncpy2(outbuf, tmpstr, outlen);
682 } else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
684 codeconv_set_strict(TRUE);
685 tmpstr = conv_iconv_strdup(inbuf,
686 conv_get_locale_charset_str_no_utf8(),
688 codeconv_set_strict(FALSE);
690 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
691 strncpy2(outbuf, tmpstr, outlen);
696 conv_utf8todisp(outbuf, outlen, inbuf);
700 static gint conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
702 strncpy2(outbuf, inbuf, outlen);
707 conv_get_fallback_for_private_encoding(const gchar *encoding)
709 if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
710 encoding[1] == '-') {
711 if (!g_ascii_strcasecmp(encoding, CS_X_MACCYR))
713 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
720 CodeConverter *conv_code_converter_new(const gchar *src_charset)
724 src_charset = conv_get_fallback_for_private_encoding(src_charset);
726 conv = g_new0(CodeConverter, 1);
727 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
728 conv->charset_str = g_strdup(src_charset);
729 conv->charset = conv_get_charset_from_str(src_charset);
734 void conv_code_converter_destroy(CodeConverter *conv)
736 g_free(conv->charset_str);
740 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
743 if (conv->code_conv_func != conv_noconv)
744 return conv->code_conv_func(outbuf, outlen, inbuf);
748 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
752 strncpy2(outbuf, str, outlen);
760 gchar *conv_codeset_strdup(const gchar *inbuf,
761 const gchar *src_code, const gchar *dest_code)
765 CodeConvFunc conv_func;
767 if (!strcmp2(src_code, dest_code)) {
768 CharSet dest_charset = conv_get_charset_from_str(dest_code);
769 if (strict_mode && dest_charset == C_UTF_8) {
770 /* ensure valid UTF-8 if target is UTF-8 */
771 if (!g_utf8_validate(inbuf, -1, NULL)) {
775 /* otherwise, try for a lucky day */
776 return g_strdup(inbuf);
779 src_code = conv_get_fallback_for_private_encoding(src_code);
780 conv_func = conv_get_code_conv_func(src_code, dest_code);
781 if (conv_func == conv_ustodisp && strict_mode && !is_ascii_str(inbuf))
784 if (conv_func != conv_noconv) {
785 len = (strlen(inbuf) + 1) * 3;
788 if (conv_func(buf, len, inbuf) == 0 || !strict_mode)
789 return g_realloc(buf, strlen(buf) + 1);
796 return conv_iconv_strdup(inbuf, src_code, dest_code);
799 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
800 const gchar *dest_charset_str)
802 CodeConvFunc code_conv = conv_noconv;
804 CharSet dest_charset;
806 if (!src_charset_str)
807 src_charset = conv_get_locale_charset();
809 src_charset = conv_get_charset_from_str(src_charset_str);
811 /* auto detection mode */
812 if (!src_charset_str && !dest_charset_str) {
813 if (conv_is_ja_locale())
814 return conv_anytodisp;
819 dest_charset = conv_get_charset_from_str(dest_charset_str);
821 if (dest_charset == C_US_ASCII)
822 return conv_ustodisp;
824 switch (src_charset) {
842 case C_ISO_2022_JP_2:
843 case C_ISO_2022_JP_3:
844 if (dest_charset == C_AUTO)
845 code_conv = conv_jistodisp;
846 else if (dest_charset == C_EUC_JP)
847 code_conv = conv_jistoeuc;
848 else if (dest_charset == C_UTF_8)
849 code_conv = conv_jistoutf8;
852 if (dest_charset == C_AUTO)
853 code_conv = conv_sjistodisp;
854 else if (dest_charset == C_EUC_JP)
855 code_conv = conv_sjistoeuc;
856 else if (dest_charset == C_UTF_8)
857 code_conv = conv_sjistoutf8;
860 if (dest_charset == C_AUTO)
861 code_conv = conv_euctodisp;
862 else if (dest_charset == C_ISO_2022_JP ||
863 dest_charset == C_ISO_2022_JP_2 ||
864 dest_charset == C_ISO_2022_JP_3)
865 code_conv = conv_euctojis;
866 else if (dest_charset == C_UTF_8)
867 code_conv = conv_euctoutf8;
870 if (dest_charset == C_EUC_JP)
871 code_conv = conv_utf8toeuc;
872 else if (dest_charset == C_ISO_2022_JP ||
873 dest_charset == C_ISO_2022_JP_2 ||
874 dest_charset == C_ISO_2022_JP_3)
875 code_conv = conv_utf8tojis;
884 static gchar *conv_iconv_strdup(const gchar *inbuf,
885 const gchar *src_code, const gchar *dest_code)
890 if (!src_code && !dest_code &&
891 g_utf8_validate(inbuf, -1, NULL))
892 return g_strdup(inbuf);
895 src_code = conv_get_outgoing_charset_str();
897 dest_code = CS_INTERNAL;
899 /* don't convert if src and dest codeset are identical */
900 if (!strcasecmp(src_code, dest_code))
901 return g_strdup(inbuf);
903 /* don't convert if dest codeset is US-ASCII */
904 if (!strcasecmp(src_code, CS_US_ASCII))
905 return g_strdup(inbuf);
907 /* don't convert if dest codeset is US-ASCII */
908 if (!strcasecmp(dest_code, CS_US_ASCII))
909 return g_strdup(inbuf);
911 cd = iconv_open(dest_code, src_code);
912 if (cd == (iconv_t)-1)
915 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
922 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
924 const gchar *inbuf_p;
935 in_size = strlen(inbuf);
937 out_size = (in_size + 1) * 2;
938 outbuf = g_malloc(out_size);
942 #define EXPAND_BUF() \
944 len = outbuf_p - outbuf; \
946 outbuf = g_realloc(outbuf, out_size); \
947 outbuf_p = outbuf + len; \
948 out_left = out_size - len; \
951 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
952 &outbuf_p, &out_left)) == (size_t)-1) {
953 if (EILSEQ == errno) {
958 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
964 *outbuf_p++ = SUBST_CHAR;
966 } else if (EINVAL == errno) {
968 } else if (E2BIG == errno) {
971 g_warning("conv_iconv_strdup(): %s",
977 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
979 if (E2BIG == errno) {
982 g_warning("conv_iconv_strdup(): %s",
990 len = outbuf_p - outbuf;
991 outbuf = g_realloc(outbuf, len + 1);
997 static const struct {
1001 {C_US_ASCII, CS_US_ASCII},
1002 {C_US_ASCII, CS_ANSI_X3_4_1968},
1003 {C_UTF_8, CS_UTF_8},
1004 {C_UTF_7, CS_UTF_7},
1005 {C_ISO_8859_1, CS_ISO_8859_1},
1006 {C_ISO_8859_2, CS_ISO_8859_2},
1007 {C_ISO_8859_3, CS_ISO_8859_3},
1008 {C_ISO_8859_4, CS_ISO_8859_4},
1009 {C_ISO_8859_5, CS_ISO_8859_5},
1010 {C_ISO_8859_6, CS_ISO_8859_6},
1011 {C_ISO_8859_7, CS_ISO_8859_7},
1012 {C_ISO_8859_8, CS_ISO_8859_8},
1013 {C_ISO_8859_9, CS_ISO_8859_9},
1014 {C_ISO_8859_10, CS_ISO_8859_10},
1015 {C_ISO_8859_11, CS_ISO_8859_11},
1016 {C_ISO_8859_13, CS_ISO_8859_13},
1017 {C_ISO_8859_14, CS_ISO_8859_14},
1018 {C_ISO_8859_15, CS_ISO_8859_15},
1019 {C_BALTIC, CS_BALTIC},
1020 {C_CP1250, CS_CP1250},
1021 {C_CP1251, CS_CP1251},
1022 {C_CP1252, CS_CP1252},
1023 {C_CP1253, CS_CP1253},
1024 {C_CP1254, CS_CP1254},
1025 {C_CP1255, CS_CP1255},
1026 {C_CP1256, CS_CP1256},
1027 {C_CP1257, CS_CP1257},
1028 {C_CP1258, CS_CP1258},
1029 {C_WINDOWS_1250, CS_WINDOWS_1250},
1030 {C_WINDOWS_1251, CS_WINDOWS_1251},
1031 {C_WINDOWS_1252, CS_WINDOWS_1252},
1032 {C_WINDOWS_1253, CS_WINDOWS_1253},
1033 {C_WINDOWS_1254, CS_WINDOWS_1254},
1034 {C_WINDOWS_1255, CS_WINDOWS_1255},
1035 {C_WINDOWS_1256, CS_WINDOWS_1256},
1036 {C_WINDOWS_1257, CS_WINDOWS_1257},
1037 {C_WINDOWS_1258, CS_WINDOWS_1258},
1038 {C_KOI8_R, CS_KOI8_R},
1039 {C_MACCYR, CS_MACCYR},
1040 {C_KOI8_T, CS_KOI8_T},
1041 {C_KOI8_U, CS_KOI8_U},
1042 {C_ISO_2022_JP, CS_ISO_2022_JP},
1043 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1044 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1045 {C_EUC_JP, CS_EUC_JP},
1046 {C_EUC_JP, CS_EUCJP},
1047 {C_EUC_JP_MS, CS_EUC_JP_MS},
1048 {C_SHIFT_JIS, CS_SHIFT_JIS},
1049 {C_SHIFT_JIS, CS_SHIFT__JIS},
1050 {C_SHIFT_JIS, CS_SJIS},
1051 {C_ISO_2022_KR, CS_ISO_2022_KR},
1052 {C_EUC_KR, CS_EUC_KR},
1053 {C_ISO_2022_CN, CS_ISO_2022_CN},
1054 {C_EUC_CN, CS_EUC_CN},
1055 {C_GB18030, CS_GB18030},
1056 {C_GB2312, CS_GB2312},
1058 {C_EUC_TW, CS_EUC_TW},
1060 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1061 {C_TIS_620, CS_TIS_620},
1062 {C_WINDOWS_874, CS_WINDOWS_874},
1063 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1064 {C_TCVN5712_1, CS_TCVN5712_1},
1067 static const struct {
1068 gchar *const locale;
1070 CharSet out_charset;
1071 } locale_table[] = {
1072 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1073 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1074 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1075 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1076 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1077 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1079 {"ja_JP" , C_SHIFT_JIS , C_ISO_2022_JP},
1081 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1083 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1084 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1085 {"zh_CN.GB18030" , C_GB18030 , C_GB18030},
1086 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1087 {"zh_CN.GBK" , C_GBK , C_GBK},
1088 {"zh_CN" , C_GB18030 , C_GB18030},
1089 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1090 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1091 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1092 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1093 {"zh_TW" , C_BIG5 , C_BIG5},
1095 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1096 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1097 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1099 {"ru_RU" , C_WINDOWS_1251, C_KOI8_R},
1101 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1103 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1104 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1105 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1106 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1108 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1109 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1111 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1113 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1117 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1119 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1120 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1121 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1122 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1123 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1124 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1125 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1126 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1127 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1128 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1129 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1130 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1131 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1132 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1133 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1134 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1135 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1136 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1137 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1138 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1139 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1140 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1141 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1142 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1143 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1144 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1145 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1146 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1147 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1148 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1149 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1150 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1151 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1152 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1153 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1154 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1155 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1156 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1157 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1158 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1159 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1160 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1161 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1162 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1163 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1164 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1165 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1166 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1167 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1168 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1169 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1170 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1171 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1172 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1173 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1174 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1175 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1176 {"nb_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1177 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1178 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1179 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1180 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1181 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1182 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1183 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1184 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1185 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1186 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1187 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1189 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1190 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1191 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1192 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1193 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1194 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1195 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1196 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1198 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1199 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1201 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1203 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1204 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1205 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1206 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1208 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1210 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1211 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1212 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1213 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1214 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1215 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1216 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1217 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1218 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1219 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1220 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1221 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1222 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1223 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1224 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1225 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1226 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1228 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1229 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1230 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1231 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1233 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1234 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1236 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1238 {"ar_IN" , C_UTF_8 , C_UTF_8},
1239 {"en_IN" , C_UTF_8 , C_UTF_8},
1240 {"se_NO" , C_UTF_8 , C_UTF_8},
1241 {"ta_IN" , C_UTF_8 , C_UTF_8},
1242 {"te_IN" , C_UTF_8 , C_UTF_8},
1243 {"ur_PK" , C_UTF_8 , C_UTF_8},
1245 {"th_TH" , C_TIS_620 , C_TIS_620},
1246 /* {"th_TH" , C_WINDOWS_874}, */
1247 /* {"th_TH" , C_ISO_8859_11}, */
1249 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1250 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1252 {"C" , C_US_ASCII , C_US_ASCII},
1253 {"POSIX" , C_US_ASCII , C_US_ASCII},
1254 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1257 static GHashTable *conv_get_charset_to_str_table(void)
1259 static GHashTable *table;
1265 table = g_hash_table_new(NULL, g_direct_equal);
1267 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1268 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1271 (table, GUINT_TO_POINTER(charsets[i].charset),
1279 static GHashTable *conv_get_charset_from_str_table(void)
1281 static GHashTable *table;
1287 table = g_hash_table_new(str_case_hash, str_case_equal);
1289 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1290 g_hash_table_insert(table, charsets[i].name,
1291 GUINT_TO_POINTER(charsets[i].charset));
1297 const gchar *conv_get_charset_str(CharSet charset)
1301 table = conv_get_charset_to_str_table();
1302 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1305 CharSet conv_get_charset_from_str(const gchar *charset)
1309 if (!charset) return C_AUTO;
1311 table = conv_get_charset_from_str_table();
1312 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1315 static CharSet conv_get_locale_charset(void)
1317 static CharSet cur_charset = -1;
1318 const gchar *cur_locale;
1322 if (cur_charset != -1)
1325 cur_locale = conv_get_current_locale();
1327 cur_charset = C_US_ASCII;
1331 if (strcasestr(cur_locale, "UTF-8") ||
1332 strcasestr(cur_locale, "utf8")) {
1333 cur_charset = C_UTF_8;
1337 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1338 cur_charset = C_ISO_8859_15;
1342 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1345 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1346 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1347 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1348 strlen(locale_table[i].locale))) {
1349 cur_charset = locale_table[i].charset;
1351 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1352 !strchr(p + 1, '.')) {
1353 if (strlen(cur_locale) == 2 &&
1354 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1355 cur_charset = locale_table[i].charset;
1361 cur_charset = C_AUTO;
1365 static CharSet conv_get_locale_charset_no_utf8(void)
1367 static CharSet cur_charset = -1;
1368 const gchar *cur_locale;
1372 if (prefs_common.broken_are_utf8) {
1373 cur_charset = C_UTF_8;
1377 cur_locale = conv_get_current_locale();
1379 cur_charset = C_US_ASCII;
1383 if (strcasestr(cur_locale, "UTF-8") ||
1384 strcasestr(cur_locale, "utf8")) {
1385 cur_charset = C_UTF_8;
1389 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1390 cur_charset = C_ISO_8859_15;
1394 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1397 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1398 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1399 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1400 strlen(locale_table[i].locale))) {
1401 cur_charset = locale_table[i].charset;
1403 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1404 !strchr(p + 1, '.')) {
1405 if (strlen(cur_locale) == 2 &&
1406 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1407 cur_charset = locale_table[i].charset;
1413 cur_charset = C_AUTO;
1417 const gchar *conv_get_locale_charset_str(void)
1419 static const gchar *codeset = NULL;
1422 codeset = conv_get_charset_str(conv_get_locale_charset());
1424 return codeset ? codeset : CS_INTERNAL;
1427 const gchar *conv_get_locale_charset_str_no_utf8(void)
1429 static const gchar *codeset = NULL;
1432 codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1434 return codeset ? codeset : CS_INTERNAL;
1437 static CharSet conv_get_outgoing_charset(void)
1439 static CharSet out_charset = -1;
1440 const gchar *cur_locale;
1444 if (out_charset != -1)
1447 cur_locale = conv_get_current_locale();
1449 out_charset = C_AUTO;
1453 if (strcasestr(cur_locale, "UTF-8") ||
1454 strcasestr(cur_locale, "utf8")) {
1455 out_charset = C_UTF_8;
1459 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1460 out_charset = C_ISO_8859_15;
1464 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1467 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1468 strlen(locale_table[i].locale))) {
1469 out_charset = locale_table[i].out_charset;
1471 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1472 !strchr(p + 1, '.')) {
1473 if (strlen(cur_locale) == 2 &&
1474 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1475 out_charset = locale_table[i].out_charset;
1484 const gchar *conv_get_outgoing_charset_str(void)
1486 CharSet out_charset;
1489 out_charset = conv_get_outgoing_charset();
1490 str = conv_get_charset_str(out_charset);
1492 return str ? str : CS_UTF_8;
1495 const gchar *conv_get_current_locale(void)
1497 const gchar *cur_locale;
1500 cur_locale = g_win32_getlocale();
1502 cur_locale = g_getenv("LC_ALL");
1503 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1504 if (!cur_locale) cur_locale = g_getenv("LANG");
1505 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1506 #endif /* G_OS_WIN32 */
1508 debug_print("current locale: %s\n",
1509 cur_locale ? cur_locale : "(none)");
1514 static gboolean conv_is_ja_locale(void)
1516 static gint is_ja_locale = -1;
1517 const gchar *cur_locale;
1519 if (is_ja_locale != -1)
1520 return is_ja_locale != 0;
1523 cur_locale = conv_get_current_locale();
1525 if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1529 return is_ja_locale != 0;
1532 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding,
1533 gboolean addr_field)
1535 gchar buf[BUFFSIZE];
1537 if (is_ascii_str(str))
1538 return unmime_header(str, addr_field);
1540 if (default_encoding) {
1543 utf8_buf = conv_codeset_strdup
1544 (str, default_encoding, CS_INTERNAL);
1548 decoded_str = unmime_header(utf8_buf, addr_field);
1554 if (conv_is_ja_locale())
1555 conv_anytodisp(buf, sizeof(buf), str);
1557 conv_localetodisp(buf, sizeof(buf), str);
1559 return unmime_header(buf, addr_field);
1562 #define MAX_LINELEN 76
1563 #define MAX_HARD_LINELEN 996
1564 #define MIMESEP_BEGIN "=?"
1565 #define MIMESEP_END "?="
1567 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1569 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1574 if ((cond) && *srcp) { \
1575 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1576 if (isspace(*(destp - 1))) \
1578 else if (is_plain_text && isspace(*srcp)) \
1583 left = MAX_LINELEN - 1; \
1585 } else if (destp == (guchar *)dest && left < 7) { \
1586 if (is_plain_text && isspace(*srcp)) \
1591 left = MAX_LINELEN - 1; \
1597 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1599 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1600 gint header_len, gboolean addr_field,
1601 const gchar *out_encoding_)
1603 const gchar *cur_encoding;
1604 const gchar *out_encoding;
1608 const guchar *srcp = src;
1609 guchar *destp = dest;
1610 gboolean use_base64;
1612 cm_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1613 cm_return_if_fail(destp != NULL);
1615 if (MB_CUR_MAX > 1) {
1617 mimesep_enc = "?B?";
1620 mimesep_enc = "?Q?";
1623 cur_encoding = CS_INTERNAL;
1626 out_encoding = out_encoding_;
1628 out_encoding = conv_get_outgoing_charset_str();
1630 if (!strcmp(out_encoding, CS_US_ASCII))
1631 out_encoding = CS_ISO_8859_1;
1633 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1634 strlen(mimesep_enc) + strlen(MIMESEP_END);
1636 left = MAX_LINELEN - header_len;
1639 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1641 while (isspace(*srcp)) {
1644 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1647 /* output as it is if the next word is ASCII string */
1648 if (!is_next_nonascii(srcp)) {
1651 word_len = get_next_word_len(srcp);
1652 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1653 while (word_len > 0) {
1654 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1663 /* don't include parentheses and quotes in encoded strings */
1664 if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1665 LBREAK_IF_REQUIRED(left < 2, FALSE);
1676 const guchar *p = srcp;
1678 gint out_enc_str_len;
1679 gint mime_block_len;
1680 gboolean cont = FALSE;
1682 while (*p != '\0') {
1683 if (isspace(*p) && !is_next_nonascii(p + 1))
1685 /* don't include parentheses in encoded
1687 if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1690 mb_len = g_utf8_skip[*p];
1692 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1693 out_str = conv_codeset_strdup
1694 (part_str, cur_encoding, out_encoding);
1700 g_warning("conv_encode_header_full(): code conversion failed");
1701 conv_unreadable_8bit(part_str);
1702 out_str = g_strdup(part_str);
1705 out_str_len = strlen(out_str);
1708 out_enc_str_len = B64LEN(out_str_len);
1711 qp_get_q_encoding_len(out_str);
1715 if (mimestr_len + out_enc_str_len <= left) {
1718 } else if (cur_len == 0) {
1720 LBREAK_IF_REQUIRED(1, FALSE);
1729 Xstrndup_a(part_str, srcp, cur_len, );
1730 out_str = conv_codeset_strdup
1731 (part_str, cur_encoding, out_encoding);
1733 g_warning("conv_encode_header_full(): code conversion failed");
1734 conv_unreadable_8bit(part_str);
1735 out_str = g_strdup(part_str);
1737 out_str_len = strlen(out_str);
1740 out_enc_str_len = B64LEN(out_str_len);
1743 qp_get_q_encoding_len(out_str);
1746 enc_str = g_base64_encode(out_str, out_str_len);
1748 Xalloca(enc_str, out_enc_str_len + 1, );
1749 qp_q_encode(enc_str, out_str);
1754 /* output MIME-encoded string block */
1755 mime_block_len = mimestr_len + strlen(enc_str);
1756 g_snprintf(destp, mime_block_len + 1,
1757 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1758 out_encoding, mimesep_enc, enc_str);
1763 destp += mime_block_len;
1766 left -= mime_block_len;
1769 LBREAK_IF_REQUIRED(cont, FALSE);
1779 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1780 gint header_len, gboolean addr_field)
1782 conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1785 #undef LBREAK_IF_REQUIRED
1788 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1791 GError *error = NULL;
1793 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1795 debug_print("failed to convert encoding of file name: %s\n",
1797 g_error_free(error);
1800 fs_file = g_strdup(utf8_file);
1805 gchar *conv_filename_to_utf8(const gchar *fs_file)
1807 gchar *utf8_file = NULL;
1808 GError *error = NULL;
1810 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1812 g_warning("failed to convert encoding of file name: %s",
1814 g_error_free(error);
1817 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1819 utf8_file = g_strdup(fs_file);
1820 conv_unreadable_8bit(utf8_file);