2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
46 #include "quoted-printable.h"
48 #include "prefs_common.h"
58 #define SUBST_CHAR '_'
61 #define iseuckanji(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
63 #define iseuchwkana1(c) \
64 (((c) & 0xff) == 0x8e)
65 #define iseuchwkana2(c) \
66 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
68 (((c) & 0xff) == 0x8f)
69 #define isunprintableeuckanji(c) \
70 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
71 #define issjiskanji1(c) \
72 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
73 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
74 #define issjiskanji2(c) \
75 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
76 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
77 #define issjishwkana(c) \
78 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
81 if (state != JIS_KANJI) { \
89 if (state != JIS_ASCII) { \
97 if (state != JIS_HWKANA) { \
101 state = JIS_HWKANA; \
105 if (state != JIS_AUXKANJI) { \
110 state = JIS_AUXKANJI; \
113 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
115 const guchar *in = inbuf;
116 guchar *out = outbuf;
117 JISState state = JIS_ASCII;
119 while (*in != '\0') {
123 if (*(in + 1) == '@' || *(in + 1) == 'B') {
126 } else if (*(in + 1) == '(' &&
128 state = JIS_AUXKANJI;
131 /* unknown escape sequence */
134 } else if (*in == '(') {
135 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
138 } else if (*(in + 1) == 'I') {
142 /* unknown escape sequence */
146 /* unknown escape sequence */
149 } else if (*in == 0x0e) {
152 } else if (*in == 0x0f) {
161 *out++ = *in++ | 0x80;
162 if (*in == '\0') break;
163 *out++ = *in++ | 0x80;
167 *out++ = *in++ | 0x80;
171 *out++ = *in++ | 0x80;
172 if (*in == '\0') break;
173 *out++ = *in++ | 0x80;
182 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
184 const guchar *in = inbuf;
185 guchar *out = outbuf;
186 JISState state = JIS_ASCII;
188 while (*in != '\0') {
192 } else if (iseuckanji(*in)) {
193 if (iseuckanji(*(in + 1))) {
195 *out++ = *in++ & 0x7f;
196 *out++ = *in++ & 0x7f;
201 if (*in != '\0' && !isascii(*in)) {
206 } else if (iseuchwkana1(*in)) {
208 if (iseuchwkana2(*in)) {
210 *out++ = *in++ & 0x7f;
213 if (*in != '\0' && !isascii(*in)) {
218 } else if (iseucaux(*in)) {
220 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
222 *out++ = *in++ & 0x7f;
223 *out++ = *in++ & 0x7f;
226 if (*in != '\0' && !isascii(*in)) {
229 if (*in != '\0' && !isascii(*in)) {
246 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
248 const guchar *in = inbuf;
249 guchar *out = outbuf;
251 while (*in != '\0') {
254 } else if (issjiskanji1(*in)) {
255 if (issjiskanji2(*(in + 1))) {
257 guchar out2 = *(in + 1);
260 row = out1 < 0xa0 ? 0x70 : 0xb0;
262 out1 = (out1 - row) * 2 - 1;
263 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
265 out1 = (out1 - row) * 2;
269 *out++ = out1 | 0x80;
270 *out++ = out2 | 0x80;
275 if (*in != '\0' && !isascii(*in)) {
280 } else if (issjishwkana(*in)) {
292 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
294 switch (conv_guess_encoding(inbuf)) {
296 conv_jistoeuc(outbuf, outlen, inbuf);
299 conv_sjistoeuc(outbuf, outlen, inbuf);
302 strncpy2(outbuf, inbuf, outlen);
307 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
309 switch (conv_guess_encoding(inbuf)) {
311 conv_euctojis(outbuf, outlen, inbuf);
314 strncpy2(outbuf, inbuf, outlen);
319 void conv_unreadable_eucjp(gchar *str)
321 register guchar *p = str;
325 /* convert CR+LF -> LF */
326 if (*p == '\r' && *(p + 1) == '\n')
327 memmove(p, p + 1, strlen(p));
328 /* printable 7 bit code */
330 } else if (iseuckanji(*p)) {
331 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
332 /* printable euc-jp code */
335 /* substitute unprintable code */
344 } else if (iseuchwkana1(*p)) {
345 if (iseuchwkana2(*(p + 1)))
346 /* euc-jp hankaku kana */
350 } else if (iseucaux(*p)) {
351 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
352 /* auxiliary kanji */
357 /* substitute unprintable 1 byte code */
362 void conv_unreadable_8bit(gchar *str)
364 register guchar *p = str;
367 /* convert CR+LF -> LF */
368 if (*p == '\r' && *(p + 1) == '\n')
369 memmove(p, p + 1, strlen(p));
370 else if (!isascii(*p)) *p = SUBST_CHAR;
375 void conv_unreadable_latin(gchar *str)
377 register guchar *p = str;
380 /* convert CR+LF -> LF */
381 if (*p == '\r' && *(p + 1) == '\n')
382 memmove(p, p + 1, strlen(p));
383 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
391 void conv_mb_alnum(gchar *str)
393 static guchar char_tbl[] = {
395 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
396 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
398 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
399 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
401 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
402 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
404 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
405 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
407 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
408 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
411 register guchar *p = str;
418 register guchar ch = *(p + 1);
420 if (ch >= 0xb0 && ch <= 0xfa) {
425 memmove(p, p + 1, len);
431 } else if (*p == 0xa1) {
432 register guchar ch = *(p + 1);
434 if (ch >= 0xa0 && ch <= 0xef &&
435 NCV != char_tbl[ch - 0xa0]) {
436 *p = char_tbl[ch - 0xa0];
439 memmove(p, p + 1, len);
445 } else if (iseuckanji(*p)) {
455 CharSet conv_guess_encoding(const gchar *str)
457 const guchar *p = str;
458 CharSet guessed = C_US_ASCII;
461 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
462 if (guessed == C_US_ASCII)
463 return C_ISO_2022_JP;
465 } else if (isascii(*p)) {
467 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
468 if (*p >= 0xfd && *p <= 0xfe)
470 else if (guessed == C_SHIFT_JIS) {
471 if ((issjiskanji1(*p) &&
472 issjiskanji2(*(p + 1))) ||
474 guessed = C_SHIFT_JIS;
480 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
481 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
482 guessed = C_SHIFT_JIS;
486 } else if (issjishwkana(*p)) {
487 guessed = C_SHIFT_JIS;
497 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
499 conv_jistoeuc(outbuf, outlen, inbuf);
500 conv_unreadable_eucjp(outbuf);
503 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
505 conv_sjistoeuc(outbuf, outlen, inbuf);
506 conv_unreadable_eucjp(outbuf);
509 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
511 strncpy2(outbuf, inbuf, outlen);
512 conv_unreadable_eucjp(outbuf);
515 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
517 conv_anytoeuc(outbuf, outlen, inbuf);
518 conv_unreadable_eucjp(outbuf);
521 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
523 strncpy2(outbuf, inbuf, outlen);
524 conv_unreadable_8bit(outbuf);
527 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
529 strncpy2(outbuf, inbuf, outlen);
530 conv_unreadable_latin(outbuf);
533 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
535 strncpy2(outbuf, inbuf, outlen);
538 CodeConverter *conv_code_converter_new(const gchar *charset)
542 conv = g_new0(CodeConverter, 1);
544 conv->code_conv_func = conv_get_code_conv_func(charset);
546 conv->charset_str = g_strdup(charset);
547 conv->charset = conv_get_charset_from_str(charset);
552 void conv_code_converter_destroy(CodeConverter *conv)
554 g_free(conv->charset_str);
558 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
564 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
568 strncpy2(outbuf, str, outlen);
571 #else /* !HAVE_LIBJCONV */
572 conv->code_conv_func(outbuf, outlen, inbuf);
578 gchar *conv_codeset_strdup(const gchar *inbuf,
579 const gchar *src_codeset, const gchar *dest_codeset)
585 const gchar *const *codesets;
587 #else /* !HAVE_LIBJCONV */
588 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
594 func = conv_get_code_conv_func(src_codeset);
595 if (func != conv_noconv) {
596 if (func == conv_jistodisp ||
597 func == conv_sjistodisp ||
598 func == conv_anytodisp)
599 len = strlen(inbuf) * 2 + 1;
601 len = strlen(inbuf) + 1;
603 if (!buf) return NULL;
604 func(buf, len, inbuf);
605 buf = g_realloc(buf, strlen(buf) + 1);
610 /* don't convert if src and dest codeset are identical */
611 if (src_codeset && dest_codeset &&
612 !strcasecmp(src_codeset, dest_codeset))
613 return g_strdup(inbuf);
617 codesets = &src_codeset;
620 codesets = jconv_info_get_pref_codesets(&n_codesets);
622 dest_codeset = conv_get_current_charset_str();
623 /* don't convert if current codeset is US-ASCII */
624 if (!strcasecmp(dest_codeset, CS_US_ASCII))
625 return g_strdup(inbuf);
628 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
629 codesets, n_codesets,
630 &actual_codeset, dest_codeset)
635 g_warning("code conversion from %s to %s failed\n",
636 codesets && codesets[0] ? codesets[0] : "(unknown)",
641 #else /* !HAVE_LIBJCONV */
643 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
644 !strcasecmp(src_codeset, CS_EUCJP))
645 src_charset = C_EUC_JP;
646 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
647 !strcasecmp(src_codeset, "SHIFT-JIS") ||
648 !strcasecmp(src_codeset, "SJIS"))
649 src_charset = C_SHIFT_JIS;
650 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
651 dest_charset = C_ISO_2022_JP;
654 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
655 dest_charset == C_ISO_2022_JP) {
656 len = (strlen(inbuf) + 1) * 3;
659 if (src_charset == C_EUC_JP)
660 conv_euctojis(buf, len, inbuf);
662 conv_anytojis(buf, len, inbuf);
663 buf = g_realloc(buf, strlen(buf) + 1);
666 buf = g_strdup(inbuf);
669 #endif /* !HAVE_LIBJCONV */
672 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
674 CodeConvFunc code_conv;
678 cur_charset = conv_get_current_charset();
679 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
680 return conv_anytodisp;
685 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
686 !strcasecmp(charset, CS_ISO_2022_JP_2))
687 code_conv = conv_jistodisp;
688 else if (!strcasecmp(charset, CS_US_ASCII))
689 code_conv = conv_ustodisp;
690 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
691 code_conv = conv_latintodisp;
693 else if (!strncasecmp(charset, "ISO-8859-", 9))
694 code_conv = conv_latintodisp;
696 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
697 !strcasecmp(charset, "SHIFT-JIS") ||
698 !strcasecmp(charset, "SJIS") ||
699 !strcasecmp(charset, "X-SJIS"))
700 code_conv = conv_sjistodisp;
701 else if (!strcasecmp(charset, CS_EUC_JP) ||
702 !strcasecmp(charset, CS_EUCJP))
703 code_conv = conv_euctodisp;
705 code_conv = conv_noconv;
710 static const struct {
714 {C_US_ASCII, CS_US_ASCII},
715 {C_US_ASCII, CS_ANSI_X3_4_1968},
717 {C_ISO_8859_1, CS_ISO_8859_1},
718 {C_ISO_8859_2, CS_ISO_8859_2},
719 {C_ISO_8859_4, CS_ISO_8859_4},
720 {C_ISO_8859_5, CS_ISO_8859_5},
721 {C_ISO_8859_7, CS_ISO_8859_7},
722 {C_ISO_8859_8, CS_ISO_8859_8},
723 {C_ISO_8859_9, CS_ISO_8859_9},
724 {C_ISO_8859_11, CS_ISO_8859_11},
725 {C_ISO_8859_13, CS_ISO_8859_13},
726 {C_ISO_8859_15, CS_ISO_8859_15},
727 {C_BALTIC, CS_BALTIC},
728 {C_CP1251, CS_CP1251},
729 {C_WINDOWS_1251, CS_WINDOWS_1251},
730 {C_KOI8_R, CS_KOI8_R},
731 {C_KOI8_U, CS_KOI8_U},
732 {C_ISO_2022_JP, CS_ISO_2022_JP},
733 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
734 {C_EUC_JP, CS_EUC_JP},
735 {C_EUC_JP, CS_EUCJP},
736 {C_SHIFT_JIS, CS_SHIFT_JIS},
737 {C_ISO_2022_KR, CS_ISO_2022_KR},
738 {C_EUC_KR, CS_EUC_KR},
739 {C_ISO_2022_CN, CS_ISO_2022_CN},
740 {C_EUC_CN, CS_EUC_CN},
741 {C_GB2312, CS_GB2312},
742 {C_EUC_TW, CS_EUC_TW},
744 {C_TIS_620, CS_TIS_620},
745 {C_WINDOWS_874, CS_WINDOWS_874},
749 static const struct {
754 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
755 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
756 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
757 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
758 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
759 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
760 {"ko_KR" , C_EUC_KR , C_EUC_KR},
761 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
762 {"zh_CN" , C_GB2312 , C_GB2312},
763 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
764 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
765 {"zh_TW" , C_BIG5 , C_BIG5},
767 {"ru_RU.KOI8-R" , C_KOI8_R , C_ISO_8859_5},
768 {"ru_RU.CP1251" , C_WINDOWS_1251, C_ISO_8859_5},
770 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
772 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
773 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
774 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
775 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
776 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
777 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
778 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
779 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
780 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
781 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
782 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
783 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
784 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
785 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
786 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
788 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
789 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
790 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
791 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
792 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
793 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
794 {"ru_RU" , C_ISO_8859_5 , C_ISO_8859_5},
795 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
796 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
797 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
799 {"th_TH" , C_TIS_620 , C_TIS_620},
800 /* {"th_TH" , C_WINDOWS_874}, */
801 /* {"th_TH" , C_ISO_8859_11}, */
803 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
804 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
805 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
806 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
807 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
809 {"C" , C_US_ASCII , C_US_ASCII},
810 {"POSIX" , C_US_ASCII , C_US_ASCII},
811 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
813 #endif /* !HAVE_LIBJCONV */
815 const gchar *conv_get_charset_str(CharSet charset)
819 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
820 if (charsets[i].charset == charset)
821 return charsets[i].name;
827 CharSet conv_get_charset_from_str(const gchar *charset)
831 if (!charset) return C_AUTO;
833 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
834 if (!strcasecmp(charsets[i].name, charset))
835 return charsets[i].charset;
841 CharSet conv_get_current_charset(void)
843 static CharSet cur_charset = -1;
847 const gchar *cur_codeset;
849 const gchar *cur_locale;
852 if (cur_charset != -1)
856 cur_codeset = jconv_info_get_current_codeset();
857 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
858 if (!strcasecmp(cur_codeset, charsets[i].name)) {
859 cur_charset = charsets[i].charset;
864 cur_locale = conv_get_current_locale();
866 cur_charset = C_US_ASCII;
870 if (strcasestr(cur_locale, "UTF-8")) {
871 cur_charset = C_UTF_8;
875 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
878 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
879 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
880 if (!strncasecmp(cur_locale, locale_table[i].locale,
881 strlen(locale_table[i].locale))) {
882 cur_charset = locale_table[i].charset;
884 } else if ((p = strchr(locale_table[i].locale, '_')) &&
885 !strchr(p + 1, '.')) {
886 if (strlen(cur_locale) == 2 &&
887 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
888 cur_charset = locale_table[i].charset;
895 cur_charset = C_AUTO;
899 const gchar *conv_get_current_charset_str(void)
901 static const gchar *codeset = NULL;
904 codeset = conv_get_charset_str(conv_get_current_charset());
906 return codeset ? codeset : "US-ASCII";
909 CharSet conv_get_outgoing_charset(void)
911 static CharSet out_charset = -1;
915 gint j, n_pref_codesets;
916 const gchar *const *pref_codesets;
918 const gchar *cur_locale;
921 if (out_charset != -1)
925 /* skip US-ASCII and UTF-8 */
926 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
927 for (i = 0; i < n_pref_codesets; i++) {
928 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
929 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
930 out_charset = charsets[j].charset;
936 for (i = 0; i < n_pref_codesets; i++) {
937 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
938 out_charset = C_UTF_8;
943 out_charset = C_AUTO;
945 cur_locale = conv_get_current_locale();
947 out_charset = C_AUTO;
951 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
954 if (!strncasecmp(cur_locale, locale_table[i].locale,
955 strlen(locale_table[i].locale))) {
956 out_charset = locale_table[i].out_charset;
958 } else if ((p = strchr(locale_table[i].locale, '_')) &&
959 !strchr(p + 1, '.')) {
960 if (strlen(cur_locale) == 2 &&
961 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
962 out_charset = locale_table[i].out_charset;
968 /* encoding conversion without libjconv is only supported
969 on Japanese locale for now */
970 if (out_charset == C_ISO_2022_JP)
973 out_charset = conv_get_current_charset();
979 const gchar *conv_get_outgoing_charset_str(void)
984 if (prefs_common.outgoing_charset) {
985 if (!isalpha(prefs_common.outgoing_charset[0])) {
986 g_free(prefs_common.outgoing_charset);
987 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
988 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
989 return prefs_common.outgoing_charset;
992 out_charset = conv_get_outgoing_charset();
993 str = conv_get_charset_str(out_charset);
995 return str ? str : "US-ASCII";
998 const gchar *conv_get_current_locale(void)
1002 cur_locale = g_getenv("LC_ALL");
1003 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1004 if (!cur_locale) cur_locale = g_getenv("LANG");
1005 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1007 debug_print("current locale: %s\n",
1008 cur_locale ? cur_locale : "(none)");
1013 void conv_unmime_header_overwrite(gchar *str)
1017 CharSet cur_charset;
1019 cur_charset = conv_get_current_charset();
1021 if (cur_charset == C_EUC_JP) {
1022 buflen = strlen(str) * 2 + 1;
1023 Xalloca(buf, buflen, return);
1024 conv_anytodisp(buf, buflen, str);
1025 unmime_header(str, buf);
1027 buflen = strlen(str) + 1;
1028 Xalloca(buf, buflen, return);
1029 unmime_header(buf, str);
1030 strncpy2(str, buf, buflen);
1034 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1035 const gchar *charset)
1037 CharSet cur_charset;
1039 cur_charset = conv_get_current_charset();
1041 if (cur_charset == C_EUC_JP) {
1045 buflen = strlen(str) * 2 + 1;
1046 Xalloca(buf, buflen, return);
1047 conv_anytodisp(buf, buflen, str);
1048 unmime_header(outbuf, buf);
1050 unmime_header(outbuf, str);
1053 #define MAX_LINELEN 76
1054 #define MIMESEP_BEGIN "=?"
1055 #define MIMESEP_END "?="
1057 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1059 #define LBREAK_IF_REQUIRED(cond) \
1061 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1067 if (destp > dest && isspace(*(destp - 1))) \
1071 left = MAX_LINELEN - 1; \
1075 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1078 const gchar *cur_encoding;
1079 const gchar *out_encoding;
1083 const gchar *srcp = src;
1084 gchar *destp = dest;
1085 gboolean use_base64;
1087 if (MB_CUR_MAX > 1) {
1089 mimesep_enc = "?B?";
1092 mimesep_enc = "?Q?";
1095 cur_encoding = conv_get_current_charset_str();
1096 out_encoding = conv_get_outgoing_charset_str();
1097 if (!strcmp(out_encoding, "US-ASCII"))
1098 out_encoding = "ISO-8859-1";
1100 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1101 strlen(mimesep_enc) + strlen(MIMESEP_END);
1103 left = MAX_LINELEN - header_len;
1106 LBREAK_IF_REQUIRED(left <= 0);
1108 while (isspace(*srcp)) {
1111 LBREAK_IF_REQUIRED(left <= 0);
1114 /* output as it is if the next word is ASCII string */
1115 if (!is_next_nonascii(srcp)) {
1118 word_len = get_next_word_len(srcp);
1119 LBREAK_IF_REQUIRED(left < word_len);
1120 while(*srcp && !isspace(*srcp)) {
1123 LBREAK_IF_REQUIRED(left <= 0);
1135 const gchar *p = srcp;
1137 gint out_enc_str_len;
1138 gint mime_block_len;
1139 gboolean cont = FALSE;
1141 while (*p != '\0') {
1142 if (isspace(*p) && !is_next_nonascii(p + 1))
1145 mb_len = mblen(p, MB_CUR_MAX);
1147 g_warning("invalid multibyte character encountered\n");
1151 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1152 out_str = conv_codeset_strdup
1153 (part_str, cur_encoding, out_encoding);
1154 out_str_len = strlen(out_str);
1157 out_enc_str_len = B64LEN(out_str_len);
1160 qp_get_q_encoding_len(out_str);
1164 if (mimestr_len + out_enc_str_len <= left) {
1167 } else if (cur_len == 0) {
1168 LBREAK_IF_REQUIRED(1);
1177 Xstrndup_a(part_str, srcp, cur_len, );
1178 out_str = conv_codeset_strdup
1179 (part_str, cur_encoding, out_encoding);
1180 out_str_len = strlen(out_str);
1183 out_enc_str_len = B64LEN(out_str_len);
1186 qp_get_q_encoding_len(out_str);
1188 Xalloca(enc_str, out_enc_str_len + 1, );
1190 base64_encode(enc_str, out_str, out_str_len);
1192 qp_q_encode(enc_str, out_str);
1196 /* output MIME-encoded string block */
1197 mime_block_len = mimestr_len + strlen(enc_str);
1198 g_snprintf(destp, mime_block_len + 1,
1199 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1200 out_encoding, mimesep_enc, enc_str);
1201 destp += mime_block_len;
1204 left -= mime_block_len;
1207 LBREAK_IF_REQUIRED(cont);
1217 #undef LBREAK_IF_REQUIRED