2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2007 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
44 /* For unknown reasons the inconv.m4 macro undefs that macro if no
45 const is needed. This would break the code below so we define it. */
58 #define SUBST_CHAR 0x5f;
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 static CodeConvFunc conv_get_code_conv_func (const gchar *src_charset_str,
112 const gchar *dest_charset_str);
114 static gchar *conv_iconv_strdup_with_cd (const gchar *inbuf,
117 static CharSet conv_get_locale_charset (void);
118 static CharSet conv_get_outgoing_charset (void);
119 static CharSet conv_guess_ja_encoding(const gchar *str);
120 static gboolean conv_is_ja_locale (void);
122 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
123 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
124 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
126 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
132 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
134 static void conv_unreadable_8bit(gchar *str);
136 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
137 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
138 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
140 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
141 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
142 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
144 static gboolean strict_mode = FALSE;
146 void codeconv_set_strict(gboolean mode)
151 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
153 const guchar *in = inbuf;
154 guchar *out = outbuf;
155 JISState state = JIS_ASCII;
157 while (*in != '\0') {
161 if (*(in + 1) == '@' || *(in + 1) == 'B') {
164 } else if (*(in + 1) == '(' &&
166 state = JIS_AUXKANJI;
169 /* unknown escape sequence */
172 } else if (*in == '(') {
173 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
176 } else if (*(in + 1) == 'I') {
180 /* unknown escape sequence */
184 /* unknown escape sequence */
187 } else if (*in == 0x0e) {
190 } else if (*in == 0x0f) {
199 *out++ = *in++ | 0x80;
200 if (*in == '\0') break;
201 *out++ = *in++ | 0x80;
205 *out++ = *in++ | 0x80;
209 *out++ = *in++ | 0x80;
210 if (*in == '\0') break;
211 *out++ = *in++ | 0x80;
220 #define JIS_HWDAKUTEN 0x5e
221 #define JIS_HWHANDAKUTEN 0x5f
223 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
225 static guint16 h2z_tbl[] = {
227 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
228 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
230 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
231 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
233 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
234 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
236 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
237 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
240 static guint16 dakuten_tbl[] = {
242 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
243 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
245 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
246 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
249 static guint16 handakuten_tbl[] = {
251 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
259 if (jis_code < 0x21 || jis_code > 0x5f)
262 if (sound_sym == JIS_HWDAKUTEN &&
263 jis_code >= 0x36 && jis_code <= 0x4e) {
264 out_code = dakuten_tbl[jis_code - 0x30];
266 *outbuf = out_code >> 8;
267 *(outbuf + 1) = out_code & 0xff;
272 if (sound_sym == JIS_HWHANDAKUTEN &&
273 jis_code >= 0x4a && jis_code <= 0x4e) {
274 out_code = handakuten_tbl[jis_code - 0x4a];
275 *outbuf = out_code >> 8;
276 *(outbuf + 1) = out_code & 0xff;
280 out_code = h2z_tbl[jis_code - 0x20];
281 *outbuf = out_code >> 8;
282 *(outbuf + 1) = out_code & 0xff;
286 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
288 const guchar *in = inbuf;
289 guchar *out = outbuf;
290 JISState state = JIS_ASCII;
292 while (*in != '\0') {
296 } else if (iseuckanji(*in)) {
297 if (iseuckanji(*(in + 1))) {
299 *out++ = *in++ & 0x7f;
300 *out++ = *in++ & 0x7f;
305 if (*in != '\0' && !IS_ASCII(*in)) {
310 } else if (iseuchwkana1(*in)) {
311 if (iseuchwkana2(*(in + 1))) {
312 if (prefs_common.allow_jisx0201_kana) {
315 *out++ = *in++ & 0x7f;
320 if (iseuchwkana1(*(in + 2)) &&
321 iseuchwkana2(*(in + 3)))
322 len = conv_jis_hantozen
324 *(in + 1), *(in + 3));
326 len = conv_jis_hantozen
341 if (*in != '\0' && !IS_ASCII(*in)) {
346 } else if (iseucaux(*in)) {
348 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
350 *out++ = *in++ & 0x7f;
351 *out++ = *in++ & 0x7f;
354 if (*in != '\0' && !IS_ASCII(*in)) {
357 if (*in != '\0' && !IS_ASCII(*in)) {
374 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
376 const guchar *in = inbuf;
377 guchar *out = outbuf;
379 while (*in != '\0') {
382 } else if (issjiskanji1(*in)) {
383 if (issjiskanji2(*(in + 1))) {
385 guchar out2 = *(in + 1);
388 row = out1 < 0xa0 ? 0x70 : 0xb0;
390 out1 = (out1 - row) * 2 - 1;
391 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
393 out1 = (out1 - row) * 2;
397 *out++ = out1 | 0x80;
398 *out++ = out2 | 0x80;
403 if (*in != '\0' && !IS_ASCII(*in)) {
408 } else if (issjishwkana(*in)) {
420 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
424 Xalloca(eucstr, outlen, return);
426 conv_jistoeuc(eucstr, outlen, inbuf);
427 conv_euctoutf8(outbuf, outlen, eucstr);
430 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
434 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
436 strncpy2(outbuf, tmpstr, outlen);
439 strncpy2(outbuf, inbuf, outlen);
442 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
444 static iconv_t cd = (iconv_t)-1;
445 static gboolean iconv_ok = TRUE;
448 if (cd == (iconv_t)-1) {
450 strncpy2(outbuf, inbuf, outlen);
453 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
454 if (cd == (iconv_t)-1) {
455 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
456 if (cd == (iconv_t)-1) {
457 g_warning("conv_euctoutf8(): %s\n",
460 strncpy2(outbuf, inbuf, outlen);
466 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
468 strncpy2(outbuf, tmpstr, outlen);
471 strncpy2(outbuf, inbuf, outlen);
474 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
476 switch (conv_guess_ja_encoding(inbuf)) {
478 conv_jistoutf8(outbuf, outlen, inbuf);
481 conv_sjistoutf8(outbuf, outlen, inbuf);
484 conv_euctoutf8(outbuf, outlen, inbuf);
487 strncpy2(outbuf, inbuf, outlen);
492 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
494 static iconv_t cd = (iconv_t)-1;
495 static gboolean iconv_ok = TRUE;
498 if (cd == (iconv_t)-1) {
500 strncpy2(outbuf, inbuf, outlen);
503 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
504 if (cd == (iconv_t)-1) {
505 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
506 if (cd == (iconv_t)-1) {
507 g_warning("conv_utf8toeuc(): %s\n",
510 strncpy2(outbuf, inbuf, outlen);
516 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
518 strncpy2(outbuf, tmpstr, outlen);
521 strncpy2(outbuf, inbuf, outlen);
524 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
528 Xalloca(eucstr, outlen, return);
530 conv_utf8toeuc(eucstr, outlen, inbuf);
531 conv_euctojis(outbuf, outlen, eucstr);
534 static void conv_unreadable_8bit(gchar *str)
536 register guchar *p = str;
539 /* convert CR+LF -> LF */
540 if (*p == '\r' && *(p + 1) == '\n')
541 memmove(p, p + 1, strlen(p));
542 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
547 static CharSet conv_guess_ja_encoding(const gchar *str)
549 const guchar *p = str;
550 CharSet guessed = C_US_ASCII;
553 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
554 if (guessed == C_US_ASCII)
555 return C_ISO_2022_JP;
557 } else if (IS_ASCII(*p)) {
559 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
560 if (*p >= 0xfd && *p <= 0xfe)
562 else if (guessed == C_SHIFT_JIS) {
563 if ((issjiskanji1(*p) &&
564 issjiskanji2(*(p + 1))) ||
566 guessed = C_SHIFT_JIS;
572 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
573 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
574 guessed = C_SHIFT_JIS;
578 } else if (issjishwkana(*p)) {
579 guessed = C_SHIFT_JIS;
589 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
591 conv_jistoutf8(outbuf, outlen, inbuf);
594 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
596 conv_sjistoutf8(outbuf, outlen, inbuf);
599 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
601 conv_euctoutf8(outbuf, outlen, inbuf);
604 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
606 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
607 strncpy2(outbuf, inbuf, outlen);
609 conv_ustodisp(outbuf, outlen, inbuf);
612 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
614 conv_anytoutf8(outbuf, outlen, inbuf);
615 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
616 conv_unreadable_8bit(outbuf);
619 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
621 strncpy2(outbuf, inbuf, outlen);
622 conv_unreadable_8bit(outbuf);
625 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
629 codeconv_set_strict(TRUE);
630 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
632 codeconv_set_strict(FALSE);
633 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
634 strncpy2(outbuf, tmpstr, outlen);
637 } else if (tmpstr && !g_utf8_validate(tmpstr, -1, NULL)) {
639 codeconv_set_strict(TRUE);
640 tmpstr = conv_iconv_strdup(inbuf,
641 conv_get_locale_charset_str_no_utf8(),
643 codeconv_set_strict(FALSE);
645 if (tmpstr && g_utf8_validate(tmpstr, -1, NULL)) {
646 strncpy2(outbuf, tmpstr, outlen);
651 conv_utf8todisp(outbuf, outlen, inbuf);
655 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
657 strncpy2(outbuf, inbuf, outlen);
661 conv_get_fallback_for_private_encoding(const gchar *encoding)
663 if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
664 encoding[1] == '-') {
665 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
672 CodeConverter *conv_code_converter_new(const gchar *src_charset)
676 src_charset = conv_get_fallback_for_private_encoding(src_charset);
678 conv = g_new0(CodeConverter, 1);
679 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
680 conv->charset_str = g_strdup(src_charset);
681 conv->charset = conv_get_charset_from_str(src_charset);
686 void conv_code_converter_destroy(CodeConverter *conv)
688 g_free(conv->charset_str);
692 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
695 if (conv->code_conv_func != conv_noconv)
696 conv->code_conv_func(outbuf, outlen, inbuf);
700 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
704 strncpy2(outbuf, str, outlen);
712 gchar *conv_codeset_strdup(const gchar *inbuf,
713 const gchar *src_code, const gchar *dest_code)
717 CodeConvFunc conv_func;
719 if (!strcmp2(src_code, dest_code))
720 return g_strdup(inbuf);
722 src_code = conv_get_fallback_for_private_encoding(src_code);
723 conv_func = conv_get_code_conv_func(src_code, dest_code);
724 if (conv_func != conv_noconv) {
725 len = (strlen(inbuf) + 1) * 3;
727 if (!buf) return NULL;
729 conv_func(buf, len, inbuf);
730 return g_realloc(buf, strlen(buf) + 1);
733 return conv_iconv_strdup(inbuf, src_code, dest_code);
736 static CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
737 const gchar *dest_charset_str)
739 CodeConvFunc code_conv = conv_noconv;
741 CharSet dest_charset;
743 if (!src_charset_str)
744 src_charset = conv_get_locale_charset();
746 src_charset = conv_get_charset_from_str(src_charset_str);
748 /* auto detection mode */
749 if (!src_charset_str && !dest_charset_str) {
750 if (conv_is_ja_locale())
751 return conv_anytodisp;
756 dest_charset = conv_get_charset_from_str(dest_charset_str);
758 if (dest_charset == C_US_ASCII)
759 return conv_ustodisp;
761 switch (src_charset) {
779 case C_ISO_2022_JP_2:
780 case C_ISO_2022_JP_3:
781 if (dest_charset == C_AUTO)
782 code_conv = conv_jistodisp;
783 else if (dest_charset == C_EUC_JP)
784 code_conv = conv_jistoeuc;
785 else if (dest_charset == C_UTF_8)
786 code_conv = conv_jistoutf8;
789 if (dest_charset == C_AUTO)
790 code_conv = conv_sjistodisp;
791 else if (dest_charset == C_EUC_JP)
792 code_conv = conv_sjistoeuc;
793 else if (dest_charset == C_UTF_8)
794 code_conv = conv_sjistoutf8;
797 if (dest_charset == C_AUTO)
798 code_conv = conv_euctodisp;
799 else if (dest_charset == C_ISO_2022_JP ||
800 dest_charset == C_ISO_2022_JP_2 ||
801 dest_charset == C_ISO_2022_JP_3)
802 code_conv = conv_euctojis;
803 else if (dest_charset == C_UTF_8)
804 code_conv = conv_euctoutf8;
807 if (dest_charset == C_EUC_JP)
808 code_conv = conv_utf8toeuc;
809 else if (dest_charset == C_ISO_2022_JP ||
810 dest_charset == C_ISO_2022_JP_2 ||
811 dest_charset == C_ISO_2022_JP_3)
812 code_conv = conv_utf8tojis;
821 gchar *conv_iconv_strdup(const gchar *inbuf,
822 const gchar *src_code, const gchar *dest_code)
827 if (!src_code && !dest_code &&
828 g_utf8_validate(inbuf, -1, NULL))
829 return g_strdup(inbuf);
832 src_code = conv_get_outgoing_charset_str();
834 dest_code = CS_INTERNAL;
836 /* don't convert if src and dest codeset are identical */
837 if (!strcasecmp(src_code, dest_code))
838 return g_strdup(inbuf);
840 /* don't convert if dest codeset is US-ASCII */
841 if (!strcasecmp(src_code, CS_US_ASCII))
842 return g_strdup(inbuf);
844 /* don't convert if dest codeset is US-ASCII */
845 if (!strcasecmp(dest_code, CS_US_ASCII))
846 return g_strdup(inbuf);
848 cd = iconv_open(dest_code, src_code);
849 if (cd == (iconv_t)-1)
852 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
859 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
861 const gchar *inbuf_p;
872 in_size = strlen(inbuf);
874 out_size = (in_size + 1) * 2;
875 outbuf = g_malloc(out_size);
879 #define EXPAND_BUF() \
881 len = outbuf_p - outbuf; \
883 outbuf = g_realloc(outbuf, out_size); \
884 outbuf_p = outbuf + len; \
885 out_left = out_size - len; \
888 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
889 &outbuf_p, &out_left)) == (size_t)-1) {
890 if (EILSEQ == errno) {
895 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
901 *outbuf_p++ = SUBST_CHAR;
903 } else if (EINVAL == errno) {
905 } else if (E2BIG == errno) {
908 g_warning("conv_iconv_strdup(): %s\n",
914 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
916 if (E2BIG == errno) {
919 g_warning("conv_iconv_strdup(): %s\n",
927 len = outbuf_p - outbuf;
928 outbuf = g_realloc(outbuf, len + 1);
934 static const struct {
938 {C_US_ASCII, CS_US_ASCII},
939 {C_US_ASCII, CS_ANSI_X3_4_1968},
942 {C_ISO_8859_1, CS_ISO_8859_1},
943 {C_ISO_8859_2, CS_ISO_8859_2},
944 {C_ISO_8859_3, CS_ISO_8859_3},
945 {C_ISO_8859_4, CS_ISO_8859_4},
946 {C_ISO_8859_5, CS_ISO_8859_5},
947 {C_ISO_8859_6, CS_ISO_8859_6},
948 {C_ISO_8859_7, CS_ISO_8859_7},
949 {C_ISO_8859_8, CS_ISO_8859_8},
950 {C_ISO_8859_9, CS_ISO_8859_9},
951 {C_ISO_8859_10, CS_ISO_8859_10},
952 {C_ISO_8859_11, CS_ISO_8859_11},
953 {C_ISO_8859_13, CS_ISO_8859_13},
954 {C_ISO_8859_14, CS_ISO_8859_14},
955 {C_ISO_8859_15, CS_ISO_8859_15},
956 {C_BALTIC, CS_BALTIC},
957 {C_CP1250, CS_CP1250},
958 {C_CP1251, CS_CP1251},
959 {C_CP1252, CS_CP1252},
960 {C_CP1253, CS_CP1253},
961 {C_CP1254, CS_CP1254},
962 {C_CP1255, CS_CP1255},
963 {C_CP1256, CS_CP1256},
964 {C_CP1257, CS_CP1257},
965 {C_CP1258, CS_CP1258},
966 {C_WINDOWS_1250, CS_WINDOWS_1250},
967 {C_WINDOWS_1251, CS_WINDOWS_1251},
968 {C_WINDOWS_1252, CS_WINDOWS_1252},
969 {C_WINDOWS_1253, CS_WINDOWS_1253},
970 {C_WINDOWS_1254, CS_WINDOWS_1254},
971 {C_WINDOWS_1255, CS_WINDOWS_1255},
972 {C_WINDOWS_1256, CS_WINDOWS_1256},
973 {C_WINDOWS_1257, CS_WINDOWS_1257},
974 {C_WINDOWS_1258, CS_WINDOWS_1258},
975 {C_KOI8_R, CS_KOI8_R},
976 {C_KOI8_T, CS_KOI8_T},
977 {C_KOI8_U, CS_KOI8_U},
978 {C_ISO_2022_JP, CS_ISO_2022_JP},
979 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
980 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
981 {C_EUC_JP, CS_EUC_JP},
982 {C_EUC_JP, CS_EUCJP},
983 {C_EUC_JP_MS, CS_EUC_JP_MS},
984 {C_SHIFT_JIS, CS_SHIFT_JIS},
985 {C_SHIFT_JIS, CS_SHIFT__JIS},
986 {C_SHIFT_JIS, CS_SJIS},
987 {C_ISO_2022_KR, CS_ISO_2022_KR},
988 {C_EUC_KR, CS_EUC_KR},
989 {C_ISO_2022_CN, CS_ISO_2022_CN},
990 {C_EUC_CN, CS_EUC_CN},
991 {C_GB2312, CS_GB2312},
993 {C_EUC_TW, CS_EUC_TW},
995 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
996 {C_TIS_620, CS_TIS_620},
997 {C_WINDOWS_874, CS_WINDOWS_874},
998 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
999 {C_TCVN5712_1, CS_TCVN5712_1},
1002 static const struct {
1003 gchar *const locale;
1005 CharSet out_charset;
1006 } locale_table[] = {
1007 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1008 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1009 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1010 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1011 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1012 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1014 {"ja_JP" , C_SHIFT_JIS , C_ISO_2022_JP},
1016 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1018 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1019 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1020 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1021 {"zh_CN.GBK" , C_GBK , C_GBK},
1022 {"zh_CN" , C_GB2312 , C_GB2312},
1023 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1024 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1025 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1026 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1027 {"zh_TW" , C_BIG5 , C_BIG5},
1029 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1030 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1031 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1032 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1033 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1034 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1035 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1036 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1038 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1039 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1041 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1043 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1044 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1045 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1046 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1047 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1048 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1049 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1050 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1051 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1052 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1053 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1054 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1055 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1056 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1057 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1058 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1059 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1060 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1061 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1062 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1063 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1064 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1065 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1066 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1067 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1068 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1069 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1070 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1071 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1072 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1073 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1074 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1075 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1076 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1077 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1078 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1079 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1080 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1081 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1082 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1083 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1084 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1085 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1086 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1087 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1088 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1089 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1090 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1091 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1092 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1093 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1094 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1095 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1096 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1097 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1098 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1099 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1100 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1101 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1102 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1103 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1104 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1105 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1106 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1107 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1108 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1109 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1110 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1111 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1112 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1113 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1119 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1120 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1121 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1122 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1123 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1124 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1125 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1127 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1128 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1130 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1132 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1133 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1134 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1135 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1137 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1139 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1140 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1141 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1142 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1143 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1144 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1145 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1146 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1147 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1148 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1149 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1150 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1151 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1152 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1153 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1154 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1155 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1157 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1158 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1159 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1160 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1162 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1163 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1165 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1167 {"ar_IN" , C_UTF_8 , C_UTF_8},
1168 {"en_IN" , C_UTF_8 , C_UTF_8},
1169 {"se_NO" , C_UTF_8 , C_UTF_8},
1170 {"ta_IN" , C_UTF_8 , C_UTF_8},
1171 {"te_IN" , C_UTF_8 , C_UTF_8},
1172 {"ur_PK" , C_UTF_8 , C_UTF_8},
1174 {"th_TH" , C_TIS_620 , C_TIS_620},
1175 /* {"th_TH" , C_WINDOWS_874}, */
1176 /* {"th_TH" , C_ISO_8859_11}, */
1178 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1179 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1181 {"C" , C_US_ASCII , C_US_ASCII},
1182 {"POSIX" , C_US_ASCII , C_US_ASCII},
1183 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1186 static GHashTable *conv_get_charset_to_str_table(void)
1188 static GHashTable *table;
1194 table = g_hash_table_new(NULL, g_direct_equal);
1196 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1197 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1200 (table, GUINT_TO_POINTER(charsets[i].charset),
1208 static GHashTable *conv_get_charset_from_str_table(void)
1210 static GHashTable *table;
1216 table = g_hash_table_new(str_case_hash, str_case_equal);
1218 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1219 g_hash_table_insert(table, charsets[i].name,
1220 GUINT_TO_POINTER(charsets[i].charset));
1226 const gchar *conv_get_charset_str(CharSet charset)
1230 table = conv_get_charset_to_str_table();
1231 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1234 CharSet conv_get_charset_from_str(const gchar *charset)
1238 if (!charset) return C_AUTO;
1240 table = conv_get_charset_from_str_table();
1241 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1244 static CharSet conv_get_locale_charset(void)
1246 static CharSet cur_charset = -1;
1247 const gchar *cur_locale;
1251 if (cur_charset != -1)
1254 cur_locale = conv_get_current_locale();
1256 cur_charset = C_US_ASCII;
1260 if (strcasestr(cur_locale, ".UTF-8") ||
1261 strcasestr(cur_locale, ".utf8")) {
1262 cur_charset = C_UTF_8;
1266 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1267 cur_charset = C_ISO_8859_15;
1271 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1274 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1275 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1276 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1277 strlen(locale_table[i].locale))) {
1278 cur_charset = locale_table[i].charset;
1280 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1281 !strchr(p + 1, '.')) {
1282 if (strlen(cur_locale) == 2 &&
1283 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1284 cur_charset = locale_table[i].charset;
1290 cur_charset = C_AUTO;
1294 static CharSet conv_get_locale_charset_no_utf8(void)
1296 static CharSet cur_charset = -1;
1297 const gchar *cur_locale;
1302 if (prefs_common.broken_are_utf8)
1303 return conv_get_locale_charset();
1305 if (cur_charset != -1)
1308 cur_locale = conv_get_current_locale();
1310 cur_charset = C_US_ASCII;
1314 if (strcasestr(cur_locale, "UTF-8")) {
1315 tmp = g_strdup(cur_locale);
1316 *(strcasestr(tmp, ".UTF-8")) = '\0';
1320 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1321 cur_charset = C_ISO_8859_15;
1325 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1328 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1329 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1330 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1331 strlen(locale_table[i].locale))) {
1332 cur_charset = locale_table[i].charset;
1334 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1335 !strchr(p + 1, '.')) {
1336 if (strlen(cur_locale) == 2 &&
1337 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1338 cur_charset = locale_table[i].charset;
1344 cur_charset = C_AUTO;
1348 const gchar *conv_get_locale_charset_str(void)
1350 static const gchar *codeset = NULL;
1353 codeset = conv_get_charset_str(conv_get_locale_charset());
1355 return codeset ? codeset : CS_INTERNAL;
1358 const gchar *conv_get_locale_charset_str_no_utf8(void)
1360 static const gchar *codeset = NULL;
1363 codeset = conv_get_charset_str(conv_get_locale_charset_no_utf8());
1365 return codeset ? codeset : CS_INTERNAL;
1368 static CharSet conv_get_outgoing_charset(void)
1370 static CharSet out_charset = -1;
1371 const gchar *cur_locale;
1375 if (out_charset != -1)
1378 cur_locale = conv_get_current_locale();
1380 out_charset = C_AUTO;
1384 if (strcasestr(cur_locale, "UTF-8")) {
1385 out_charset = C_UTF_8;
1389 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1390 out_charset = C_ISO_8859_15;
1394 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1397 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1398 strlen(locale_table[i].locale))) {
1399 out_charset = locale_table[i].out_charset;
1401 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1402 !strchr(p + 1, '.')) {
1403 if (strlen(cur_locale) == 2 &&
1404 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1405 out_charset = locale_table[i].out_charset;
1414 const gchar *conv_get_outgoing_charset_str(void)
1416 CharSet out_charset;
1419 out_charset = conv_get_outgoing_charset();
1420 str = conv_get_charset_str(out_charset);
1422 return str ? str : CS_UTF_8;
1425 const gchar *conv_get_current_locale(void)
1427 const gchar *cur_locale;
1430 cur_locale = g_win32_getlocale();
1432 cur_locale = g_getenv("LC_ALL");
1433 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1434 if (!cur_locale) cur_locale = g_getenv("LANG");
1435 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1436 #endif /* G_OS_WIN32 */
1438 debug_print("current locale: %s\n",
1439 cur_locale ? cur_locale : "(none)");
1444 static gboolean conv_is_ja_locale(void)
1446 static gint is_ja_locale = -1;
1447 const gchar *cur_locale;
1449 if (is_ja_locale != -1)
1450 return is_ja_locale != 0;
1453 cur_locale = conv_get_current_locale();
1455 if (g_ascii_strncasecmp(cur_locale, "ja", 2) == 0)
1459 return is_ja_locale != 0;
1462 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1464 gchar buf[BUFFSIZE];
1466 if (is_ascii_str(str))
1467 return unmime_header(str);
1469 if (default_encoding) {
1472 utf8_buf = conv_codeset_strdup
1473 (str, default_encoding, CS_INTERNAL);
1477 decoded_str = unmime_header(utf8_buf);
1483 if (conv_is_ja_locale())
1484 conv_anytodisp(buf, sizeof(buf), str);
1486 conv_localetodisp(buf, sizeof(buf), str);
1488 return unmime_header(buf);
1491 #define MAX_LINELEN 76
1492 #define MAX_HARD_LINELEN 996
1493 #define MIMESEP_BEGIN "=?"
1494 #define MIMESEP_END "?="
1496 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1498 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1503 if ((cond) && *srcp) { \
1504 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1505 if (isspace(*(destp - 1))) \
1507 else if (is_plain_text && isspace(*srcp)) \
1512 left = MAX_LINELEN - 1; \
1518 void conv_encode_header_full(gchar *dest, gint len, const gchar *src,
1519 gint header_len, gboolean addr_field,
1520 const gchar *out_encoding_)
1522 const gchar *cur_encoding;
1523 const gchar *out_encoding;
1527 const guchar *srcp = src;
1528 guchar *destp = dest;
1529 gboolean use_base64;
1531 g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1533 if (MB_CUR_MAX > 1) {
1535 mimesep_enc = "?B?";
1538 mimesep_enc = "?Q?";
1541 cur_encoding = CS_INTERNAL;
1544 out_encoding = out_encoding_;
1546 out_encoding = conv_get_outgoing_charset_str();
1548 if (!strcmp(out_encoding, CS_US_ASCII))
1549 out_encoding = CS_ISO_8859_1;
1551 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1552 strlen(mimesep_enc) + strlen(MIMESEP_END);
1554 left = MAX_LINELEN - header_len;
1557 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1559 while (isspace(*srcp)) {
1562 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1565 /* output as it is if the next word is ASCII string */
1566 if (!is_next_nonascii(srcp)) {
1569 word_len = get_next_word_len(srcp);
1570 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1571 while (word_len > 0) {
1572 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1581 /* don't include parentheses and quotes in encoded strings */
1582 if (addr_field && (*srcp == '(' || *srcp == ')' || *srcp == '"')) {
1583 LBREAK_IF_REQUIRED(left < 2, FALSE);
1594 const guchar *p = srcp;
1596 gint out_enc_str_len;
1597 gint mime_block_len;
1598 gboolean cont = FALSE;
1600 while (*p != '\0') {
1601 if (isspace(*p) && !is_next_nonascii(p + 1))
1603 /* don't include parentheses in encoded
1605 if (addr_field && (*p == '(' || *p == ')' || *p == '"'))
1608 mb_len = g_utf8_skip[*p];
1610 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1611 out_str = conv_codeset_strdup
1612 (part_str, cur_encoding, out_encoding);
1618 g_warning("conv_encode_header(): code conversion failed\n");
1619 conv_unreadable_8bit(part_str);
1620 out_str = g_strdup(part_str);
1623 out_str_len = strlen(out_str);
1626 out_enc_str_len = B64LEN(out_str_len);
1629 qp_get_q_encoding_len(out_str);
1633 if (mimestr_len + out_enc_str_len <= left) {
1636 } else if (cur_len == 0) {
1637 LBREAK_IF_REQUIRED(1, FALSE);
1646 Xstrndup_a(part_str, srcp, cur_len, );
1647 out_str = conv_codeset_strdup
1648 (part_str, cur_encoding, out_encoding);
1650 g_warning("conv_encode_header(): code conversion failed\n");
1651 conv_unreadable_8bit(part_str);
1652 out_str = g_strdup(part_str);
1654 out_str_len = strlen(out_str);
1657 out_enc_str_len = B64LEN(out_str_len);
1660 qp_get_q_encoding_len(out_str);
1662 Xalloca(enc_str, out_enc_str_len + 1, );
1664 base64_encode(enc_str, out_str, out_str_len);
1666 qp_q_encode(enc_str, out_str);
1670 /* output MIME-encoded string block */
1671 mime_block_len = mimestr_len + strlen(enc_str);
1672 g_snprintf(destp, mime_block_len + 1,
1673 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1674 out_encoding, mimesep_enc, enc_str);
1675 destp += mime_block_len;
1678 left -= mime_block_len;
1681 LBREAK_IF_REQUIRED(cont, FALSE);
1691 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1692 gint header_len, gboolean addr_field)
1694 conv_encode_header_full(dest,len,src,header_len,addr_field,NULL);
1697 #undef LBREAK_IF_REQUIRED
1698 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1701 GError *error = NULL;
1703 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1705 g_warning("failed to convert encoding of file name: %s\n",
1707 g_error_free(error);
1710 fs_file = g_strdup(utf8_file);
1715 gchar *conv_filename_to_utf8(const gchar *fs_file)
1717 gchar *utf8_file = NULL;
1718 GError *error = NULL;
1720 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1722 g_warning("failed to convert encoding of file name: %s\n",
1724 g_error_free(error);
1727 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1729 utf8_file = g_strdup(fs_file);
1730 conv_unreadable_8bit(utf8_file);