2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
46 #include "quoted-printable.h"
48 #include "prefs_common.h"
58 #define SUBST_CHAR '_'
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define isunprintableeuckanji(c) \
70 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
71 #define issjiskanji1(c) \
72 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
73 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
74 #define issjiskanji2(c) \
75 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
76 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
77 #define issjishwkana(c) \
78 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
81 if (state != JIS_KANJI) { \
89 if (state != JIS_ASCII) { \
97 if (state != JIS_HWKANA) { \
101 state = JIS_HWKANA; \
105 if (state != JIS_AUXKANJI) { \
110 state = JIS_AUXKANJI; \
113 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
115 const guchar *in = inbuf;
116 guchar *out = outbuf;
117 JISState state = JIS_ASCII;
119 while (*in != '\0') {
123 if (*(in + 1) == '@' || *(in + 1) == 'B') {
126 } else if (*(in + 1) == '(' &&
128 state = JIS_AUXKANJI;
131 /* unknown escape sequence */
134 } else if (*in == '(') {
135 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
138 } else if (*(in + 1) == 'I') {
142 /* unknown escape sequence */
146 /* unknown escape sequence */
149 } else if (*in == 0x0e) {
152 } else if (*in == 0x0f) {
161 *out++ = *in++ | 0x80;
162 if (*in == '\0') break;
163 *out++ = *in++ | 0x80;
167 *out++ = *in++ | 0x80;
171 *out++ = *in++ | 0x80;
172 if (*in == '\0') break;
173 *out++ = *in++ | 0x80;
182 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
184 const guchar *in = inbuf;
185 guchar *out = outbuf;
186 JISState state = JIS_ASCII;
188 while (*in != '\0') {
192 } else if (iseuckanji(*in)) {
193 if (iseuckanji(*(in + 1))) {
195 *out++ = *in++ & 0x7f;
196 *out++ = *in++ & 0x7f;
201 if (*in != '\0' && !isascii(*in)) {
206 } else if (iseuchwkana1(*in)) {
208 if (iseuchwkana2(*in)) {
210 *out++ = *in++ & 0x7f;
213 if (*in != '\0' && !isascii(*in)) {
218 } else if (iseucaux(*in)) {
220 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
222 *out++ = *in++ & 0x7f;
223 *out++ = *in++ & 0x7f;
226 if (*in != '\0' && !isascii(*in)) {
229 if (*in != '\0' && !isascii(*in)) {
246 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
248 const guchar *in = inbuf;
249 guchar *out = outbuf;
251 while (*in != '\0') {
254 } else if (issjiskanji1(*in)) {
255 if (issjiskanji2(*(in + 1))) {
257 guchar out2 = *(in + 1);
260 row = out1 < 0xa0 ? 0x70 : 0xb0;
262 out1 = (out1 - row) * 2 - 1;
263 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
265 out1 = (out1 - row) * 2;
269 *out++ = out1 | 0x80;
270 *out++ = out2 | 0x80;
275 if (*in != '\0' && !isascii(*in)) {
280 } else if (issjishwkana(*in)) {
292 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
294 switch (conv_guess_encoding(inbuf)) {
296 conv_jistoeuc(outbuf, outlen, inbuf);
299 conv_sjistoeuc(outbuf, outlen, inbuf);
302 strncpy2(outbuf, inbuf, outlen);
307 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
309 switch (conv_guess_encoding(inbuf)) {
311 conv_euctojis(outbuf, outlen, inbuf);
314 strncpy2(outbuf, inbuf, outlen);
319 void conv_unreadable_eucjp(gchar *str)
321 register guchar *p = str;
325 /* convert CR+LF -> LF */
326 if (*p == '\r' && *(p + 1) == '\n')
327 memmove(p, p + 1, strlen(p));
328 /* printable 7 bit code */
330 } else if (iseuckanji(*p)) {
331 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
332 /* printable euc-jp code */
335 /* substitute unprintable code */
344 } else if (iseuchwkana1(*p)) {
345 if (iseuchwkana2(*(p + 1)))
346 /* euc-jp hankaku kana */
350 } else if (iseucaux(*p)) {
351 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
352 /* auxiliary kanji */
357 /* substitute unprintable 1 byte code */
362 void conv_unreadable_8bit(gchar *str)
364 register guchar *p = str;
367 /* convert CR+LF -> LF */
368 if (*p == '\r' && *(p + 1) == '\n')
369 memmove(p, p + 1, strlen(p));
370 else if (!isascii(*p)) *p = SUBST_CHAR;
375 void conv_unreadable_latin(gchar *str)
377 register guchar *p = str;
380 /* convert CR+LF -> LF */
381 if (*p == '\r' && *(p + 1) == '\n')
382 memmove(p, p + 1, strlen(p));
383 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
391 void conv_mb_alnum(gchar *str)
393 static guchar char_tbl[] = {
395 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
396 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
398 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
399 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
401 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
402 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
404 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
405 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
407 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
408 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
411 register guchar *p = str;
418 register guchar ch = *(p + 1);
420 if (ch >= 0xb0 && ch <= 0xfa) {
425 memmove(p, p + 1, len);
431 } else if (*p == 0xa1) {
432 register guchar ch = *(p + 1);
434 if (ch >= 0xa0 && ch <= 0xef &&
435 NCV != char_tbl[ch - 0xa0]) {
436 *p = char_tbl[ch - 0xa0];
439 memmove(p, p + 1, len);
445 } else if (iseuckanji(*p)) {
455 CharSet conv_guess_encoding(const gchar *str)
457 const guchar *p = str;
458 CharSet guessed = C_US_ASCII;
461 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
462 if (guessed == C_US_ASCII)
463 return C_ISO_2022_JP;
465 } else if (isascii(*p)) {
467 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
468 if (*p >= 0xfd && *p <= 0xfe)
470 else if (guessed == C_SHIFT_JIS) {
471 if ((issjiskanji1(*p) &&
472 issjiskanji2(*(p + 1))) ||
474 guessed = C_SHIFT_JIS;
480 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
481 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
482 guessed = C_SHIFT_JIS;
486 } else if (issjishwkana(*p)) {
487 guessed = C_SHIFT_JIS;
497 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
499 conv_jistoeuc(outbuf, outlen, inbuf);
500 conv_unreadable_eucjp(outbuf);
503 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
505 conv_sjistoeuc(outbuf, outlen, inbuf);
506 conv_unreadable_eucjp(outbuf);
509 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
511 strncpy2(outbuf, inbuf, outlen);
512 conv_unreadable_eucjp(outbuf);
515 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
517 conv_anytoeuc(outbuf, outlen, inbuf);
518 conv_unreadable_eucjp(outbuf);
521 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
523 strncpy2(outbuf, inbuf, outlen);
524 conv_unreadable_8bit(outbuf);
527 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
529 strncpy2(outbuf, inbuf, outlen);
530 conv_unreadable_latin(outbuf);
533 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
535 strncpy2(outbuf, inbuf, outlen);
538 CodeConverter *conv_code_converter_new(const gchar *charset)
542 conv = g_new0(CodeConverter, 1);
544 conv->code_conv_func = conv_get_code_conv_func(charset);
546 conv->charset_str = g_strdup(charset);
547 conv->charset = conv_get_charset_from_str(charset);
552 void conv_code_converter_destroy(CodeConverter *conv)
554 g_free(conv->charset_str);
558 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
564 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
568 strncpy2(outbuf, str, outlen);
571 #else /* !HAVE_LIBJCONV */
572 conv->code_conv_func(outbuf, outlen, inbuf);
578 gchar *conv_codeset_strdup(const gchar *inbuf,
579 const gchar *src_codeset, const gchar *dest_codeset)
585 const gchar *const *codesets;
587 #else /* !HAVE_LIBJCONV */
588 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
594 func = conv_get_code_conv_func(src_codeset);
595 if (func != conv_noconv) {
596 if (func == conv_jistodisp ||
597 func == conv_sjistodisp ||
598 func == conv_anytodisp)
599 len = strlen(inbuf) * 2 + 1;
601 len = strlen(inbuf) + 1;
603 if (!buf) return NULL;
604 func(buf, len, inbuf);
605 buf = g_realloc(buf, strlen(buf) + 1);
610 /* don't convert if src and dest codeset are identical */
611 if (src_codeset && dest_codeset &&
612 !strcasecmp(src_codeset, dest_codeset))
613 return g_strdup(inbuf);
617 codesets = &src_codeset;
620 codesets = jconv_info_get_pref_codesets(&n_codesets);
622 dest_codeset = conv_get_current_charset_str();
623 /* don't convert if current codeset is US-ASCII */
624 if (!strcasecmp(dest_codeset, CS_US_ASCII))
625 return g_strdup(inbuf);
628 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
629 codesets, n_codesets,
630 &actual_codeset, dest_codeset)
635 g_warning("code conversion from %s to %s failed\n",
636 codesets && codesets[0] ? codesets[0] : "(unknown)",
641 #else /* !HAVE_LIBJCONV */
643 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
644 !strcasecmp(src_codeset, CS_EUCJP))
645 src_charset = C_EUC_JP;
646 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
647 !strcasecmp(src_codeset, "SHIFT-JIS") ||
648 !strcasecmp(src_codeset, "SJIS"))
649 src_charset = C_SHIFT_JIS;
650 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
651 dest_charset = C_ISO_2022_JP;
654 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
655 dest_charset == C_ISO_2022_JP) {
656 len = (strlen(inbuf) + 1) * 3;
659 if (src_charset == C_EUC_JP)
660 conv_euctojis(buf, len, inbuf);
662 conv_anytojis(buf, len, inbuf);
663 buf = g_realloc(buf, strlen(buf) + 1);
666 buf = g_strdup(inbuf);
669 #endif /* !HAVE_LIBJCONV */
672 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
674 CodeConvFunc code_conv;
678 cur_charset = conv_get_current_charset();
679 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
680 return conv_anytodisp;
685 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
686 !strcasecmp(charset, CS_ISO_2022_JP_2))
687 code_conv = conv_jistodisp;
688 else if (!strcasecmp(charset, CS_US_ASCII))
689 code_conv = conv_ustodisp;
690 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
691 code_conv = conv_latintodisp;
693 else if (!strncasecmp(charset, "ISO-8859-", 9))
694 code_conv = conv_latintodisp;
696 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
697 !strcasecmp(charset, "SHIFT-JIS") ||
698 !strcasecmp(charset, "SJIS") ||
699 !strcasecmp(charset, "X-SJIS"))
700 code_conv = conv_sjistodisp;
701 else if (!strcasecmp(charset, CS_EUC_JP) ||
702 !strcasecmp(charset, CS_EUCJP))
703 code_conv = conv_euctodisp;
705 code_conv = conv_noconv;
710 static const struct {
714 {C_US_ASCII, CS_US_ASCII},
715 {C_US_ASCII, CS_ANSI_X3_4_1968},
717 {C_ISO_8859_1, CS_ISO_8859_1},
718 {C_ISO_8859_2, CS_ISO_8859_2},
719 {C_ISO_8859_4, CS_ISO_8859_4},
720 {C_ISO_8859_5, CS_ISO_8859_5},
721 {C_ISO_8859_7, CS_ISO_8859_7},
722 {C_ISO_8859_8, CS_ISO_8859_8},
723 {C_ISO_8859_9, CS_ISO_8859_9},
724 {C_ISO_8859_11, CS_ISO_8859_11},
725 {C_ISO_8859_13, CS_ISO_8859_13},
726 {C_ISO_8859_15, CS_ISO_8859_15},
727 {C_BALTIC, CS_BALTIC},
728 {C_CP1251, CS_CP1251},
729 {C_WINDOWS_1251, CS_WINDOWS_1251},
730 {C_KOI8_R, CS_KOI8_R},
731 {C_KOI8_U, CS_KOI8_U},
732 {C_ISO_2022_JP, CS_ISO_2022_JP},
733 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
734 {C_EUC_JP, CS_EUC_JP},
735 {C_EUC_JP, CS_EUCJP},
736 {C_SHIFT_JIS, CS_SHIFT_JIS},
737 {C_ISO_2022_KR, CS_ISO_2022_KR},
738 {C_EUC_KR, CS_EUC_KR},
739 {C_ISO_2022_CN, CS_ISO_2022_CN},
740 {C_EUC_CN, CS_EUC_CN},
741 {C_GB2312, CS_GB2312},
742 {C_EUC_TW, CS_EUC_TW},
744 {C_TIS_620, CS_TIS_620},
745 {C_WINDOWS_874, CS_WINDOWS_874},
749 static const struct {
754 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
755 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
756 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
757 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
758 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
759 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
760 {"ko_KR" , C_EUC_KR , C_EUC_KR},
761 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
762 {"zh_CN" , C_GB2312 , C_GB2312},
763 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
764 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
765 {"zh_TW" , C_BIG5 , C_BIG5},
767 {"ru_RU.KOI8-R" , C_KOI8_R , C_ISO_8859_5},
768 {"ru_RU.CP1251" , C_WINDOWS_1251, C_ISO_8859_5},
770 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
772 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
773 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
774 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
775 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
776 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
777 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
778 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
779 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
780 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
781 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
782 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
783 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
784 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
785 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
786 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
788 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
789 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
790 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
791 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
792 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
793 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
794 {"ru_RU" , C_ISO_8859_5 , C_ISO_8859_5},
795 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
796 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
797 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
799 {"th_TH" , C_TIS_620 , C_TIS_620},
800 /* {"th_TH" , C_WINDOWS_874}, */
801 /* {"th_TH" , C_ISO_8859_11}, */
803 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
804 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
805 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
806 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
807 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
809 {"C" , C_US_ASCII , C_US_ASCII},
810 {"POSIX" , C_US_ASCII , C_US_ASCII},
811 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
813 #endif /* !HAVE_LIBJCONV */
815 const gchar *conv_get_charset_str(CharSet charset)
819 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
820 if (charsets[i].charset == charset)
821 return charsets[i].name;
827 CharSet conv_get_charset_from_str(const gchar *charset)
831 if (!charset) return C_AUTO;
833 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
834 if (!strcasecmp(charsets[i].name, charset))
835 return charsets[i].charset;
841 CharSet conv_get_current_charset(void)
843 static CharSet cur_charset = -1;
847 const gchar *cur_codeset;
849 const gchar *cur_locale;
852 if (cur_charset != -1)
856 cur_codeset = jconv_info_get_current_codeset();
857 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
858 if (!strcasecmp(cur_codeset, charsets[i].name)) {
859 cur_charset = charsets[i].charset;
864 cur_locale = conv_get_current_locale();
866 cur_charset = C_US_ASCII;
870 if (strcasestr(cur_locale, "UTF-8")) {
871 cur_charset = C_UTF_8;
875 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
878 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
879 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
880 if (!strncasecmp(cur_locale, locale_table[i].locale,
881 strlen(locale_table[i].locale))) {
882 cur_charset = locale_table[i].charset;
884 } else if ((p = strchr(locale_table[i].locale, '_')) &&
885 !strchr(p + 1, '.')) {
886 if (strlen(cur_locale) == 2 &&
887 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
888 cur_charset = locale_table[i].charset;
895 cur_charset = C_AUTO;
899 const gchar *conv_get_current_charset_str(void)
901 static const gchar *codeset = NULL;
904 codeset = conv_get_charset_str(conv_get_current_charset());
906 return codeset ? codeset : "US-ASCII";
909 CharSet conv_get_outgoing_charset(void)
911 static CharSet out_charset = -1;
915 gint j, n_pref_codesets;
916 const gchar *const *pref_codesets;
918 const gchar *cur_locale;
921 if (out_charset != -1)
925 /* skip US-ASCII and UTF-8 */
926 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
927 for (i = 0; i < n_pref_codesets; i++) {
928 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
929 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
930 out_charset = charsets[j].charset;
936 for (i = 0; i < n_pref_codesets; i++) {
937 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
938 out_charset = C_UTF_8;
943 out_charset = C_AUTO;
945 cur_locale = conv_get_current_locale();
947 out_charset = C_AUTO;
951 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
954 if (!strncasecmp(cur_locale, locale_table[i].locale,
955 strlen(locale_table[i].locale))) {
956 out_charset = locale_table[i].out_charset;
958 } else if ((p = strchr(locale_table[i].locale, '_')) &&
959 !strchr(p + 1, '.')) {
960 if (strlen(cur_locale) == 2 &&
961 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
962 out_charset = locale_table[i].out_charset;
968 /* encoding conversion without libjconv is only supported
969 on Japanese locale for now */
970 if (out_charset == C_ISO_2022_JP)
973 out_charset = conv_get_current_charset();
979 const gchar *conv_get_outgoing_charset_str(void)
984 if (prefs_common.outgoing_charset) {
985 if (!isalpha(prefs_common.outgoing_charset[0])) {
986 g_free(prefs_common.outgoing_charset);
987 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
988 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
989 return prefs_common.outgoing_charset;
992 out_charset = conv_get_outgoing_charset();
993 str = conv_get_charset_str(out_charset);
995 return str ? str : "US-ASCII";
998 const gchar *conv_get_current_locale(void)
1002 cur_locale = g_getenv("LC_ALL");
1003 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1004 if (!cur_locale) cur_locale = g_getenv("LANG");
1005 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1007 debug_print("current locale: %s\n",
1008 cur_locale ? cur_locale : "(none)");
1013 void conv_unmime_header_overwrite(gchar *str)
1017 CharSet cur_charset;
1019 cur_charset = conv_get_current_charset();
1021 if (cur_charset == C_EUC_JP) {
1022 buflen = strlen(str) * 2 + 1;
1023 Xalloca(buf, buflen, return);
1024 conv_anytodisp(buf, buflen, str);
1025 unmime_header(str, buf);
1027 buflen = strlen(str) + 1;
1028 Xalloca(buf, buflen, return);
1029 unmime_header(buf, str);
1030 strncpy2(str, buf, buflen);
1034 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1035 const gchar *charset)
1037 CharSet cur_charset;
1039 cur_charset = conv_get_current_charset();
1041 if (cur_charset == C_EUC_JP) {
1045 buflen = strlen(str) * 2 + 1;
1046 Xalloca(buf, buflen, return);
1047 conv_anytodisp(buf, buflen, str);
1048 unmime_header(outbuf, buf);
1050 unmime_header(outbuf, str);
1053 #define MAX_LINELEN 76
1054 #define MIMESEP_BEGIN "=?"
1055 #define MIMESEP_END "?="
1057 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1059 #define LBREAK_IF_REQUIRED(cond) \
1061 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1067 if (destp > dest && isspace(*(destp - 1))) \
1069 else if (isspace(*srcp)) \
1073 left = MAX_LINELEN - 1; \
1077 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1080 const gchar *cur_encoding;
1081 const gchar *out_encoding;
1085 const gchar *srcp = src;
1086 gchar *destp = dest;
1087 gboolean use_base64;
1089 if (MB_CUR_MAX > 1) {
1091 mimesep_enc = "?B?";
1094 mimesep_enc = "?Q?";
1097 cur_encoding = conv_get_current_charset_str();
1098 out_encoding = conv_get_outgoing_charset_str();
1099 if (!strcmp(out_encoding, "US-ASCII"))
1100 out_encoding = "ISO-8859-1";
1102 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1103 strlen(mimesep_enc) + strlen(MIMESEP_END);
1105 left = MAX_LINELEN - header_len;
1108 LBREAK_IF_REQUIRED(left <= 0);
1110 while (isspace(*srcp)) {
1113 LBREAK_IF_REQUIRED(left <= 0);
1116 /* output as it is if the next word is ASCII string */
1117 if (!is_next_nonascii(srcp)) {
1120 word_len = get_next_word_len(srcp);
1121 LBREAK_IF_REQUIRED(left < word_len);
1122 while(*srcp && !isspace(*srcp)) {
1125 LBREAK_IF_REQUIRED(left <= 0);
1137 const gchar *p = srcp;
1139 gint out_enc_str_len;
1140 gint mime_block_len;
1141 gboolean cont = FALSE;
1143 while (*p != '\0') {
1144 if (isspace(*p) && !is_next_nonascii(p + 1))
1147 mb_len = mblen(p, MB_CUR_MAX);
1149 g_warning("invalid multibyte character encountered\n");
1153 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1154 out_str = conv_codeset_strdup
1155 (part_str, cur_encoding, out_encoding);
1156 out_str_len = strlen(out_str);
1159 out_enc_str_len = B64LEN(out_str_len);
1162 qp_get_q_encoding_len(out_str);
1166 if (mimestr_len + out_enc_str_len <= left) {
1169 } else if (cur_len == 0) {
1170 LBREAK_IF_REQUIRED(1);
1179 Xstrndup_a(part_str, srcp, cur_len, );
1180 out_str = conv_codeset_strdup
1181 (part_str, cur_encoding, out_encoding);
1182 out_str_len = strlen(out_str);
1185 out_enc_str_len = B64LEN(out_str_len);
1188 qp_get_q_encoding_len(out_str);
1190 Xalloca(enc_str, out_enc_str_len + 1, );
1192 base64_encode(enc_str, out_str, out_str_len);
1194 qp_q_encode(enc_str, out_str);
1198 /* output MIME-encoded string block */
1199 mime_block_len = mimestr_len + strlen(enc_str);
1200 g_snprintf(destp, mime_block_len + 1,
1201 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1202 out_encoding, mimesep_enc, enc_str);
1203 destp += mime_block_len;
1206 left -= mime_block_len;
1209 LBREAK_IF_REQUIRED(cont);
1219 #undef LBREAK_IF_REQUIRED