2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
46 #include "prefs_common.h"
56 #define SUBST_CHAR '_'
59 #define iseuckanji(c) \
60 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
61 #define iseuchwkana1(c) \
62 (((c) & 0xff) == 0x8e)
63 #define iseuchwkana2(c) \
64 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
66 (((c) & 0xff) == 0x8f)
67 #define isunprintableeuckanji(c) \
68 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
69 #define issjiskanji1(c) \
70 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
71 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
72 #define issjiskanji2(c) \
73 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
74 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
75 #define issjishwkana(c) \
76 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
79 if (state != JIS_KANJI) { \
87 if (state != JIS_ASCII) { \
95 if (state != JIS_HWKANA) { \
103 if (state != JIS_AUXKANJI) { \
108 state = JIS_AUXKANJI; \
111 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
113 const guchar *in = inbuf;
114 guchar *out = outbuf;
115 JISState state = JIS_ASCII;
117 while (*in != '\0') {
121 if (*(in + 1) == '@' || *(in + 1) == 'B') {
124 } else if (*(in + 1) == '(' &&
126 state = JIS_AUXKANJI;
129 /* unknown escape sequence */
132 } else if (*in == '(') {
133 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
136 } else if (*(in + 1) == 'I') {
140 /* unknown escape sequence */
144 /* unknown escape sequence */
147 } else if (*in == 0x0e) {
150 } else if (*in == 0x0f) {
159 *out++ = *in++ | 0x80;
160 if (*in == '\0') break;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
169 *out++ = *in++ | 0x80;
170 if (*in == '\0') break;
171 *out++ = *in++ | 0x80;
180 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
182 const guchar *in = inbuf;
183 guchar *out = outbuf;
184 JISState state = JIS_ASCII;
186 while (*in != '\0') {
190 } else if (iseuckanji(*in)) {
191 if (iseuckanji(*(in + 1))) {
193 *out++ = *in++ & 0x7f;
194 *out++ = *in++ & 0x7f;
199 if (*in != '\0' && !isascii(*in)) {
204 } else if (iseuchwkana1(*in)) {
206 if (iseuchwkana2(*in)) {
208 *out++ = *in++ & 0x7f;
211 if (*in != '\0' && !isascii(*in)) {
216 } else if (iseucaux(*in)) {
218 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
220 *out++ = *in++ & 0x7f;
221 *out++ = *in++ & 0x7f;
224 if (*in != '\0' && !isascii(*in)) {
227 if (*in != '\0' && !isascii(*in)) {
244 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
246 const guchar *in = inbuf;
247 guchar *out = outbuf;
249 while (*in != '\0') {
252 } else if (issjiskanji1(*in)) {
253 if (issjiskanji2(*(in + 1))) {
255 guchar out2 = *(in + 1);
258 row = out1 < 0xa0 ? 0x70 : 0xb0;
260 out1 = (out1 - row) * 2 - 1;
261 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
263 out1 = (out1 - row) * 2;
267 *out++ = out1 | 0x80;
268 *out++ = out2 | 0x80;
273 if (*in != '\0' && !isascii(*in)) {
278 } else if (issjishwkana(*in)) {
290 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
292 switch (conv_guess_encoding(inbuf)) {
294 conv_jistoeuc(outbuf, outlen, inbuf);
297 conv_sjistoeuc(outbuf, outlen, inbuf);
300 strncpy2(outbuf, inbuf, outlen);
305 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
307 switch (conv_guess_encoding(inbuf)) {
309 conv_euctojis(outbuf, outlen, inbuf);
312 strncpy2(outbuf, inbuf, outlen);
317 void conv_unreadable_eucjp(gchar *str)
319 register guchar *p = str;
323 /* convert CR+LF -> LF */
324 if (*p == '\r' && *(p + 1) == '\n')
325 memmove(p, p + 1, strlen(p));
326 /* printable 7 bit code */
328 } else if (iseuckanji(*p)) {
329 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
330 /* printable euc-jp code */
333 /* substitute unprintable code */
342 } else if (iseuchwkana1(*p)) {
343 if (iseuchwkana2(*(p + 1)))
344 /* euc-jp hankaku kana */
348 } else if (iseucaux(*p)) {
349 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
350 /* auxiliary kanji */
355 /* substitute unprintable 1 byte code */
360 void conv_unreadable_8bit(gchar *str)
362 register guchar *p = str;
365 /* convert CR+LF -> LF */
366 if (*p == '\r' && *(p + 1) == '\n')
367 memmove(p, p + 1, strlen(p));
368 else if (!isascii(*p)) *p = SUBST_CHAR;
373 void conv_unreadable_latin(gchar *str)
375 register guchar *p = str;
378 /* convert CR+LF -> LF */
379 if (*p == '\r' && *(p + 1) == '\n')
380 memmove(p, p + 1, strlen(p));
381 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
389 void conv_mb_alnum(gchar *str)
391 static guchar char_tbl[] = {
393 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
394 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
396 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
397 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
399 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
400 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
402 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
403 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
405 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
406 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
409 register guchar *p = str;
416 register guchar ch = *(p + 1);
418 if (ch >= 0xb0 && ch <= 0xfa) {
423 memmove(p, p + 1, len);
429 } else if (*p == 0xa1) {
430 register guchar ch = *(p + 1);
432 if (ch >= 0xa0 && ch <= 0xef &&
433 NCV != char_tbl[ch - 0xa0]) {
434 *p = char_tbl[ch - 0xa0];
437 memmove(p, p + 1, len);
443 } else if (iseuckanji(*p)) {
453 CharSet conv_guess_encoding(const gchar *str)
455 const guchar *p = str;
456 CharSet guessed = C_US_ASCII;
459 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
460 if (guessed == C_US_ASCII)
461 return C_ISO_2022_JP;
463 } else if (isascii(*p)) {
465 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
466 if (*p >= 0xfd && *p <= 0xfe)
468 else if (guessed == C_SHIFT_JIS) {
469 if ((issjiskanji1(*p) &&
470 issjiskanji2(*(p + 1))) ||
472 guessed = C_SHIFT_JIS;
478 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
479 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
480 guessed = C_SHIFT_JIS;
484 } else if (issjishwkana(*p)) {
485 guessed = C_SHIFT_JIS;
495 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
497 conv_jistoeuc(outbuf, outlen, inbuf);
498 conv_unreadable_eucjp(outbuf);
501 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
503 conv_sjistoeuc(outbuf, outlen, inbuf);
504 conv_unreadable_eucjp(outbuf);
507 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
509 strncpy2(outbuf, inbuf, outlen);
510 conv_unreadable_eucjp(outbuf);
513 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
515 conv_anytoeuc(outbuf, outlen, inbuf);
516 conv_unreadable_eucjp(outbuf);
519 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
521 strncpy2(outbuf, inbuf, outlen);
522 conv_unreadable_8bit(outbuf);
525 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
527 strncpy2(outbuf, inbuf, outlen);
528 conv_unreadable_latin(outbuf);
531 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
533 strncpy2(outbuf, inbuf, outlen);
536 CodeConverter *conv_code_converter_new(const gchar *charset)
540 conv = g_new0(CodeConverter, 1);
542 conv->code_conv_func = conv_get_code_conv_func(charset);
544 conv->charset_str = g_strdup(charset);
545 conv->charset = conv_get_charset_from_str(charset);
550 void conv_code_converter_destroy(CodeConverter *conv)
552 g_free(conv->charset_str);
556 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
562 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
566 strncpy2(outbuf, str, outlen);
569 #else /* !HAVE_LIBJCONV */
570 conv->code_conv_func(outbuf, outlen, inbuf);
576 gchar *conv_codeset_strdup(const gchar *inbuf,
577 const gchar *src_codeset, const gchar *dest_codeset)
583 const gchar *const *codesets;
585 #else /* !HAVE_LIBJCONV */
586 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
592 func = conv_get_code_conv_func(src_codeset);
593 if (func != conv_noconv) {
594 if (func == conv_jistodisp ||
595 func == conv_sjistodisp ||
596 func == conv_anytodisp)
597 len = strlen(inbuf) * 2 + 1;
599 len = strlen(inbuf) + 1;
601 if (!buf) return NULL;
602 func(buf, len, inbuf);
603 buf = g_realloc(buf, strlen(buf) + 1);
608 /* don't convert if src and dest codeset are identical */
609 if (src_codeset && dest_codeset &&
610 !strcasecmp(src_codeset, dest_codeset))
611 return g_strdup(inbuf);
615 codesets = &src_codeset;
618 codesets = jconv_info_get_pref_codesets(&n_codesets);
620 dest_codeset = conv_get_current_charset_str();
621 /* don't convert if current codeset is US-ASCII */
622 if (!strcasecmp(dest_codeset, CS_US_ASCII))
623 return g_strdup(inbuf);
626 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
627 codesets, n_codesets,
628 &actual_codeset, dest_codeset)
633 g_warning("code conversion from %s to %s failed\n",
634 codesets && codesets[0] ? codesets[0] : "(unknown)",
639 #else /* !HAVE_LIBJCONV */
641 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
642 !strcasecmp(src_codeset, CS_EUCJP))
643 src_charset = C_EUC_JP;
644 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
645 !strcasecmp(src_codeset, "SHIFT-JIS") ||
646 !strcasecmp(src_codeset, "SJIS"))
647 src_charset = C_SHIFT_JIS;
648 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
649 dest_charset = C_ISO_2022_JP;
652 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
653 dest_charset == C_ISO_2022_JP) {
654 len = (strlen(inbuf) + 1) * 3;
657 if (src_charset == C_EUC_JP)
658 conv_euctojis(buf, len, inbuf);
660 conv_anytojis(buf, len, inbuf);
661 buf = g_realloc(buf, strlen(buf) + 1);
664 buf = g_strdup(inbuf);
667 #endif /* !HAVE_LIBJCONV */
670 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
672 CodeConvFunc code_conv;
676 cur_charset = conv_get_current_charset();
677 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
678 return conv_anytodisp;
683 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
684 !strcasecmp(charset, CS_ISO_2022_JP_2))
685 code_conv = conv_jistodisp;
686 else if (!strcasecmp(charset, CS_US_ASCII))
687 code_conv = conv_ustodisp;
688 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
689 code_conv = conv_latintodisp;
691 else if (!strncasecmp(charset, "ISO-8859-", 9))
692 code_conv = conv_latintodisp;
694 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
695 !strcasecmp(charset, "SHIFT-JIS") ||
696 !strcasecmp(charset, "SJIS") ||
697 !strcasecmp(charset, "X-SJIS"))
698 code_conv = conv_sjistodisp;
699 else if (!strcasecmp(charset, CS_EUC_JP) ||
700 !strcasecmp(charset, CS_EUCJP))
701 code_conv = conv_euctodisp;
703 code_conv = conv_noconv;
708 static const struct {
712 {C_US_ASCII, CS_US_ASCII},
713 {C_US_ASCII, CS_ANSI_X3_4_1968},
715 {C_ISO_8859_1, CS_ISO_8859_1},
716 {C_ISO_8859_2, CS_ISO_8859_2},
717 {C_ISO_8859_4, CS_ISO_8859_4},
718 {C_ISO_8859_5, CS_ISO_8859_5},
719 {C_ISO_8859_7, CS_ISO_8859_7},
720 {C_ISO_8859_8, CS_ISO_8859_8},
721 {C_ISO_8859_9, CS_ISO_8859_9},
722 {C_ISO_8859_11, CS_ISO_8859_11},
723 {C_ISO_8859_13, CS_ISO_8859_13},
724 {C_ISO_8859_15, CS_ISO_8859_15},
725 {C_BALTIC, CS_BALTIC},
726 {C_CP1251, CS_CP1251},
727 {C_WINDOWS_1251, CS_WINDOWS_1251},
728 {C_KOI8_R, CS_KOI8_R},
729 {C_KOI8_U, CS_KOI8_U},
730 {C_ISO_2022_JP, CS_ISO_2022_JP},
731 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
732 {C_EUC_JP, CS_EUC_JP},
733 {C_EUC_JP, CS_EUCJP},
734 {C_SHIFT_JIS, CS_SHIFT_JIS},
735 {C_ISO_2022_KR, CS_ISO_2022_KR},
736 {C_EUC_KR, CS_EUC_KR},
737 {C_ISO_2022_CN, CS_ISO_2022_CN},
738 {C_EUC_CN, CS_EUC_CN},
739 {C_GB2312, CS_GB2312},
740 {C_EUC_TW, CS_EUC_TW},
742 {C_TIS_620, CS_TIS_620},
743 {C_WINDOWS_874, CS_WINDOWS_874},
747 static const struct {
752 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
753 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
754 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
755 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
756 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
757 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
758 {"ko_KR" , C_EUC_KR , C_EUC_KR},
759 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
760 {"zh_CN" , C_GB2312 , C_GB2312},
761 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
762 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
763 {"zh_TW" , C_BIG5 , C_BIG5},
765 {"ru_RU.KOI8-R" , C_KOI8_R , C_ISO_8859_5},
766 {"ru_RU.CP1251" , C_WINDOWS_1251, C_ISO_8859_5},
768 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
770 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
771 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
772 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
773 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
774 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
775 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
776 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
777 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
778 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
779 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
780 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
781 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
782 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
783 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
784 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
786 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
787 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
788 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
789 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
790 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
791 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
792 {"ru_RU" , C_ISO_8859_5 , C_ISO_8859_5},
793 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
794 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
795 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
797 {"th_TH" , C_TIS_620 , C_TIS_620},
798 /* {"th_TH" , C_WINDOWS_874}, */
799 /* {"th_TH" , C_ISO_8859_11}, */
801 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
802 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
803 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
804 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
805 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
807 {"C" , C_US_ASCII , C_US_ASCII},
808 {"POSIX" , C_US_ASCII , C_US_ASCII},
809 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
811 #endif /* !HAVE_LIBJCONV */
813 const gchar *conv_get_charset_str(CharSet charset)
817 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
818 if (charsets[i].charset == charset)
819 return charsets[i].name;
825 CharSet conv_get_charset_from_str(const gchar *charset)
829 if (!charset) return C_AUTO;
831 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
832 if (!strcasecmp(charsets[i].name, charset))
833 return charsets[i].charset;
839 CharSet conv_get_current_charset(void)
841 static CharSet cur_charset = -1;
845 const gchar *cur_codeset;
847 const gchar *cur_locale;
850 if (cur_charset != -1)
854 cur_codeset = jconv_info_get_current_codeset();
855 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
856 if (!strcasecmp(cur_codeset, charsets[i].name)) {
857 cur_charset = charsets[i].charset;
862 cur_locale = conv_get_current_locale();
864 cur_charset = C_US_ASCII;
868 if (strcasestr(cur_locale, "UTF-8")) {
869 cur_charset = C_UTF_8;
873 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
876 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
877 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
878 if (!strncasecmp(cur_locale, locale_table[i].locale,
879 strlen(locale_table[i].locale))) {
880 cur_charset = locale_table[i].charset;
882 } else if ((p = strchr(locale_table[i].locale, '_')) &&
883 !strchr(p + 1, '.')) {
884 if (strlen(cur_locale) == 2 &&
885 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
886 cur_charset = locale_table[i].charset;
893 cur_charset = C_AUTO;
897 const gchar *conv_get_current_charset_str(void)
899 static const gchar *codeset = NULL;
902 codeset = conv_get_charset_str(conv_get_current_charset());
904 return codeset ? codeset : "US-ASCII";
907 CharSet conv_get_outgoing_charset(void)
909 static CharSet out_charset = -1;
913 gint j, n_pref_codesets;
914 const gchar *const *pref_codesets;
916 const gchar *cur_locale;
919 if (out_charset != -1)
923 /* skip US-ASCII and UTF-8 */
924 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
925 for (i = 0; i < n_pref_codesets; i++) {
926 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
927 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
928 out_charset = charsets[j].charset;
934 for (i = 0; i < n_pref_codesets; i++) {
935 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
936 out_charset = C_UTF_8;
941 out_charset = C_AUTO;
943 cur_locale = conv_get_current_locale();
945 out_charset = C_AUTO;
949 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
952 if (!strncasecmp(cur_locale, locale_table[i].locale,
953 strlen(locale_table[i].locale))) {
954 out_charset = locale_table[i].out_charset;
956 } else if ((p = strchr(locale_table[i].locale, '_')) &&
957 !strchr(p + 1, '.')) {
958 if (strlen(cur_locale) == 2 &&
959 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
960 out_charset = locale_table[i].out_charset;
966 /* encoding conversion without libjconv is only supported
967 on Japanese locale for now */
968 if (out_charset == C_ISO_2022_JP)
971 out_charset = conv_get_current_charset();
977 const gchar *conv_get_outgoing_charset_str(void)
982 if (prefs_common.outgoing_charset) {
983 if (!isalpha(prefs_common.outgoing_charset[0])) {
984 g_free(prefs_common.outgoing_charset);
985 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
986 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
987 return prefs_common.outgoing_charset;
990 out_charset = conv_get_outgoing_charset();
991 str = conv_get_charset_str(out_charset);
993 return str ? str : "US-ASCII";
996 const gchar *conv_get_current_locale(void)
1000 cur_locale = g_getenv("LC_ALL");
1001 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1002 if (!cur_locale) cur_locale = g_getenv("LANG");
1003 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1005 debug_print("current locale: %s\n",
1006 cur_locale ? cur_locale : "(none)");
1011 void conv_unmime_header_overwrite(gchar *str)
1015 CharSet cur_charset;
1017 cur_charset = conv_get_current_charset();
1019 if (cur_charset == C_EUC_JP) {
1020 buflen = strlen(str) * 2 + 1;
1021 Xalloca(buf, buflen, return);
1022 conv_anytodisp(buf, buflen, str);
1023 unmime_header(str, buf);
1025 buflen = strlen(str) + 1;
1026 Xalloca(buf, buflen, return);
1027 unmime_header(buf, str);
1028 strncpy2(str, buf, buflen);
1032 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1033 const gchar *charset)
1035 CharSet cur_charset;
1037 cur_charset = conv_get_current_charset();
1039 if (cur_charset == C_EUC_JP) {
1043 buflen = strlen(str) * 2 + 1;
1044 Xalloca(buf, buflen, return);
1045 conv_anytodisp(buf, buflen, str);
1046 unmime_header(outbuf, buf);
1048 unmime_header(outbuf, str);
1051 #define MAX_ENCLEN 75
1052 #define MAX_LINELEN 76
1054 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1057 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1063 size_t line_len, mimehdr_len, mimehdr_begin_len;
1064 gchar *mimehdr_init = "=?";
1065 gchar *mimehdr_end = "?=";
1066 gchar *mimehdr_enctype = "?B?";
1067 const gchar *mimehdr_charset;
1069 /* g_print("src = %s\n", src); */
1070 mimehdr_charset = conv_get_outgoing_charset_str();
1072 /* convert to wide-character string */
1073 wsrcp = wsrc = strdup_mbstowcs(src);
1075 g_warning("Can't convert string to wide characters.\n");
1076 strncpy2(dest, src, len);
1080 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1081 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1082 mimehdr_begin_len = strlen(mimehdr_init) +
1083 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1084 line_len = header_len;
1089 wchar_t *wp, *wtmp, *wtmpp;
1091 gboolean str_is_non_ascii;
1093 /* irresponsible buffer overrun check */
1094 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1096 /* encode string including space
1097 if non-ASCII string follows */
1098 if (is_next_nonascii(wsrcp)) {
1100 while ((wp = find_wspace(wp)) != NULL)
1101 if (!is_next_nonascii(wp)) break;
1102 str_is_non_ascii = TRUE;
1104 wp = find_wspace(wsrcp);
1105 str_is_non_ascii = FALSE;
1109 wtmp = wcsndup(wsrcp, wp - wsrcp);
1111 while (iswspace(wsrcp[nspc])) nspc++;
1113 wtmp = wcsdup(wsrcp);
1114 wsrcp += wcslen(wsrcp);
1121 gchar *tmp; /* internal codeset */
1122 gchar *raw; /* converted, but not base64 encoded */
1123 register gchar *tmpp;
1126 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1131 while (*wtmpp != (wchar_t)0) {
1134 gchar *raw_new = NULL;
1135 int raw_new_len = 0;
1136 const gchar *src_codeset;
1138 mbl = wctomb(tmpp, *wtmpp);
1140 g_warning("invalid wide character\n");
1145 src_codeset = conv_get_current_charset_str();
1146 /* printf ("tmp = %s, tlen = %d, mbl\n",
1148 if (jconv_alloc_conv(tmp, tlen + mbl,
1149 &raw_new, &raw_new_len,
1151 &dummy, mimehdr_charset)
1153 g_warning("can't convert\n");
1158 if (str_is_non_ascii) {
1159 gint dlen = mimehdr_len +
1161 if ((line_len + dlen +
1162 (*(wtmpp + 1) ? 0 : nspc) +
1163 (line_len > 1 ? 1 : 0))
1176 } else if ((line_len + tlen + mbl +
1177 (*(wtmpp + 1) ? 0 : nspc) +
1178 (line_len > 1 ? 1 : 0))
1181 if (1 + tlen + mbl +
1182 (*(wtmpp + 1) ? 0 : nspc)
1200 raw_len = raw_new_len;
1204 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1205 tmp, tlen, mb_seqlen); */
1207 if (tlen == 0 || raw_len == 0) {
1213 if (line_len > 1 && destp > dest) {
1219 if (str_is_non_ascii) {
1220 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1221 mimehdr_init, mimehdr_charset,
1223 destp += mimehdr_begin_len;
1224 line_len += mimehdr_begin_len;
1226 base64_encode(destp, raw, raw_len);
1227 line_len += strlen(destp);
1228 destp += strlen(destp);
1230 strcpy(destp, mimehdr_end);
1231 destp += strlen(mimehdr_end);
1232 line_len += strlen(mimehdr_end);
1235 line_len += strlen(destp);
1236 destp += strlen(destp);
1241 /* g_print("line_len = %d\n\n", line_len); */
1242 } while (*wtmpp != (wchar_t)0);
1244 while (iswspace(*wsrcp)) {
1247 mbl = wctomb(destp, *wsrcp++);
1260 /* g_print("dest = %s\n", dest); */
1262 #else /* !HAVE_LIBJCONV */
1264 #define JIS_SEQLEN 3
1266 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1272 size_t line_len, mimehdr_len, mimehdr_begin_len;
1273 gchar *mimehdr_init = "=?";
1274 gchar *mimehdr_end = "?=";
1275 gchar *mimehdr_enctype = "?B?";
1276 const gchar *mimehdr_charset;
1277 gboolean do_conv = FALSE;
1279 /* g_print("src = %s\n", src); */
1280 mimehdr_charset = conv_get_outgoing_charset_str();
1281 if (strcmp(mimehdr_charset, "ISO-2022-JP") == 0)
1283 else if (strcmp(mimehdr_charset, "US-ASCII") == 0)
1284 mimehdr_charset = "ISO-8859-1";
1286 /* convert to wide-character string */
1287 wsrcp = wsrc = strdup_mbstowcs(src);
1289 g_warning("Can't convert string to wide characters.\n");
1290 strncpy2(dest, src, len);
1294 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1295 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1296 mimehdr_begin_len = strlen(mimehdr_init) +
1297 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1298 line_len = header_len;
1303 wchar_t *wp, *wtmp, *wtmpp;
1305 gboolean str_is_non_ascii;
1307 /* irresponsible buffer overrun check */
1308 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1310 /* encode string including space
1311 if non-ASCII string follows */
1312 if (is_next_nonascii(wsrcp)) {
1314 while ((wp = find_wspace(wp)) != NULL)
1315 if (!is_next_nonascii(wp)) break;
1316 str_is_non_ascii = TRUE;
1318 wp = find_wspace(wsrcp);
1319 str_is_non_ascii = FALSE;
1323 wtmp = wcsndup(wsrcp, wp - wsrcp);
1325 while (iswspace(wsrcp[nspc])) nspc++;
1327 wtmp = wcsdup(wsrcp);
1328 wsrcp += wcslen(wsrcp);
1334 gint prev_mbl = 1, tlen = 0, mb_seqlen = 0;
1336 register gchar *tmpp;
1338 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1341 while (*wtmpp != (wchar_t)0) {
1344 mbl = wctomb(tmpp, *wtmpp);
1346 g_warning("invalid wide character\n");
1351 /* length of KI + KO */
1352 if (do_conv && prev_mbl == 1 && mbl == 2)
1353 mb_seqlen += JIS_SEQLEN * 2;
1355 if (str_is_non_ascii) {
1356 gint dlen = mimehdr_len +
1357 B64LEN(tlen + mb_seqlen + mbl);
1359 if ((line_len + dlen +
1360 (*(wtmpp + 1) ? 0 : nspc) +
1361 (line_len > 1 ? 1 : 0))
1374 } else if ((line_len + tlen + mbl +
1375 (*(wtmpp + 1) ? 0 : nspc) +
1376 (line_len > 1 ? 1 : 0))
1378 if (1 + tlen + mbl +
1379 (*(wtmpp + 1) ? 0 : nspc)
1398 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1399 tmp, tlen, mb_seqlen); */
1406 if (line_len > 1 && destp > dest) {
1412 if (str_is_non_ascii) {
1415 raw = g_new(gchar, tlen + mb_seqlen + 1);
1417 conv_euctojis(raw, tlen + mb_seqlen + 1,
1421 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1422 mimehdr_init, mimehdr_charset,
1424 destp += mimehdr_begin_len;
1425 line_len += mimehdr_begin_len;
1427 base64_encode(destp, raw, strlen(raw));
1428 line_len += strlen(destp);
1429 destp += strlen(destp);
1431 strcpy(destp, mimehdr_end);
1432 destp += strlen(mimehdr_end);
1433 line_len += strlen(mimehdr_end);
1438 line_len += strlen(destp);
1439 destp += strlen(destp);
1443 /* g_print("line_len = %d\n\n", line_len); */
1444 } while (*wtmpp != (wchar_t)0);
1446 while (iswspace(*wsrcp)) {
1449 mbl = wctomb(destp, *wsrcp++);
1462 /* g_print("dest = %s\n", dest); */
1464 #endif /* HAVE_LIBJCONV */