2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
45 #include "common/base64.h"
47 #include "prefs_common.h"
57 #define SUBST_CHAR '_'
60 #define iseuckanji(c) \
61 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 (((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
67 (((c) & 0xff) == 0x8f)
68 #define isunprintableeuckanji(c) \
69 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
70 #define issjiskanji1(c) \
71 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
72 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
73 #define issjiskanji2(c) \
74 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
75 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
76 #define issjishwkana(c) \
77 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
80 if (state != JIS_KANJI) { \
88 if (state != JIS_ASCII) { \
96 if (state != JIS_HWKANA) { \
100 state = JIS_HWKANA; \
104 if (state != JIS_AUXKANJI) { \
109 state = JIS_AUXKANJI; \
112 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
114 const guchar *in = inbuf;
115 guchar *out = outbuf;
116 JISState state = JIS_ASCII;
118 while (*in != '\0') {
122 if (*(in + 1) == '@' || *(in + 1) == 'B') {
125 } else if (*(in + 1) == '(' &&
127 state = JIS_AUXKANJI;
130 /* unknown escape sequence */
133 } else if (*in == '(') {
134 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
137 } else if (*(in + 1) == 'I') {
141 /* unknown escape sequence */
145 /* unknown escape sequence */
148 } else if (*in == 0x0e) {
151 } else if (*in == 0x0f) {
160 *out++ = *in++ | 0x80;
161 if (*in == '\0') break;
162 *out++ = *in++ | 0x80;
166 *out++ = *in++ | 0x80;
170 *out++ = *in++ | 0x80;
171 if (*in == '\0') break;
172 *out++ = *in++ | 0x80;
181 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
183 const guchar *in = inbuf;
184 guchar *out = outbuf;
185 JISState state = JIS_ASCII;
187 while (*in != '\0') {
191 } else if (iseuckanji(*in)) {
192 if (iseuckanji(*(in + 1))) {
194 *out++ = *in++ & 0x7f;
195 *out++ = *in++ & 0x7f;
200 if (*in != '\0' && !isascii(*in)) {
205 } else if (iseuchwkana1(*in)) {
207 if (iseuchwkana2(*in)) {
209 *out++ = *in++ & 0x7f;
212 if (*in != '\0' && !isascii(*in)) {
217 } else if (iseucaux(*in)) {
219 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
221 *out++ = *in++ & 0x7f;
222 *out++ = *in++ & 0x7f;
225 if (*in != '\0' && !isascii(*in)) {
228 if (*in != '\0' && !isascii(*in)) {
245 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
247 const guchar *in = inbuf;
248 guchar *out = outbuf;
250 while (*in != '\0') {
253 } else if (issjiskanji1(*in)) {
254 if (issjiskanji2(*(in + 1))) {
256 guchar out2 = *(in + 1);
259 row = out1 < 0xa0 ? 0x70 : 0xb0;
261 out1 = (out1 - row) * 2 - 1;
262 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
264 out1 = (out1 - row) * 2;
268 *out++ = out1 | 0x80;
269 *out++ = out2 | 0x80;
274 if (*in != '\0' && !isascii(*in)) {
279 } else if (issjishwkana(*in)) {
291 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
293 switch (conv_guess_encoding(inbuf)) {
295 conv_jistoeuc(outbuf, outlen, inbuf);
298 conv_sjistoeuc(outbuf, outlen, inbuf);
301 strncpy2(outbuf, inbuf, outlen);
306 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
308 switch (conv_guess_encoding(inbuf)) {
310 conv_euctojis(outbuf, outlen, inbuf);
313 strncpy2(outbuf, inbuf, outlen);
318 void conv_unreadable_eucjp(gchar *str)
320 register guchar *p = str;
324 /* convert CR+LF -> LF */
325 if (*p == '\r' && *(p + 1) == '\n')
326 memmove(p, p + 1, strlen(p));
327 /* printable 7 bit code */
329 } else if (iseuckanji(*p)) {
330 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
331 /* printable euc-jp code */
334 /* substitute unprintable code */
343 } else if (iseuchwkana1(*p)) {
344 if (iseuchwkana2(*(p + 1)))
345 /* euc-jp hankaku kana */
349 } else if (iseucaux(*p)) {
350 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
351 /* auxiliary kanji */
356 /* substitute unprintable 1 byte code */
361 void conv_unreadable_8bit(gchar *str)
363 register guchar *p = str;
366 /* convert CR+LF -> LF */
367 if (*p == '\r' && *(p + 1) == '\n')
368 memmove(p, p + 1, strlen(p));
369 else if (!isascii(*p)) *p = SUBST_CHAR;
374 void conv_unreadable_latin(gchar *str)
376 register guchar *p = str;
379 /* convert CR+LF -> LF */
380 if (*p == '\r' && *(p + 1) == '\n')
381 memmove(p, p + 1, strlen(p));
382 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
390 void conv_mb_alnum(gchar *str)
392 static guchar char_tbl[] = {
394 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
395 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
397 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
398 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
400 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
401 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
403 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
404 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
406 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
407 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
410 register guchar *p = str;
417 register guchar ch = *(p + 1);
419 if (ch >= 0xb0 && ch <= 0xfa) {
424 memmove(p, p + 1, len);
430 } else if (*p == 0xa1) {
431 register guchar ch = *(p + 1);
433 if (ch >= 0xa0 && ch <= 0xef &&
434 NCV != char_tbl[ch - 0xa0]) {
435 *p = char_tbl[ch - 0xa0];
438 memmove(p, p + 1, len);
444 } else if (iseuckanji(*p)) {
454 CharSet conv_guess_encoding(const gchar *str)
456 const guchar *p = str;
457 CharSet guessed = C_US_ASCII;
460 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
461 if (guessed == C_US_ASCII)
462 return C_ISO_2022_JP;
464 } else if (isascii(*p)) {
466 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
467 if (*p >= 0xfd && *p <= 0xfe)
469 else if (guessed == C_SHIFT_JIS) {
470 if ((issjiskanji1(*p) &&
471 issjiskanji2(*(p + 1))) ||
473 guessed = C_SHIFT_JIS;
479 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
480 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
481 guessed = C_SHIFT_JIS;
485 } else if (issjishwkana(*p)) {
486 guessed = C_SHIFT_JIS;
496 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
498 conv_jistoeuc(outbuf, outlen, inbuf);
499 conv_unreadable_eucjp(outbuf);
502 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
504 conv_sjistoeuc(outbuf, outlen, inbuf);
505 conv_unreadable_eucjp(outbuf);
508 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
510 strncpy2(outbuf, inbuf, outlen);
511 conv_unreadable_eucjp(outbuf);
514 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
516 conv_anytoeuc(outbuf, outlen, inbuf);
517 conv_unreadable_eucjp(outbuf);
520 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
522 strncpy2(outbuf, inbuf, outlen);
523 conv_unreadable_8bit(outbuf);
526 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
528 strncpy2(outbuf, inbuf, outlen);
529 conv_unreadable_latin(outbuf);
532 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
534 strncpy2(outbuf, inbuf, outlen);
537 CodeConverter *conv_code_converter_new(const gchar *charset)
541 conv = g_new0(CodeConverter, 1);
543 conv->code_conv_func = conv_get_code_conv_func(charset);
545 conv->charset_str = g_strdup(charset);
546 conv->charset = conv_get_charset_from_str(charset);
551 void conv_code_converter_destroy(CodeConverter *conv)
553 g_free(conv->charset_str);
557 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
563 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
567 strncpy2(outbuf, str, outlen);
570 #else /* !HAVE_LIBJCONV */
571 conv->code_conv_func(outbuf, outlen, inbuf);
577 gchar *conv_codeset_strdup(const gchar *inbuf,
578 const gchar *src_codeset, const gchar *dest_codeset)
584 const gchar *const *codesets;
586 #else /* !HAVE_LIBJCONV */
587 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
593 func = conv_get_code_conv_func(src_codeset);
594 if (func != conv_noconv) {
595 if (func == conv_jistodisp ||
596 func == conv_sjistodisp ||
597 func == conv_anytodisp)
598 len = strlen(inbuf) * 2 + 1;
600 len = strlen(inbuf) + 1;
602 if (!buf) return NULL;
603 func(buf, len, inbuf);
604 buf = g_realloc(buf, strlen(buf) + 1);
609 /* don't convert if src and dest codeset are identical */
610 if (src_codeset && dest_codeset &&
611 !strcasecmp(src_codeset, dest_codeset))
612 return g_strdup(inbuf);
616 codesets = &src_codeset;
619 codesets = jconv_info_get_pref_codesets(&n_codesets);
621 dest_codeset = conv_get_current_charset_str();
622 /* don't convert if current codeset is US-ASCII */
623 if (!strcasecmp(dest_codeset, CS_US_ASCII))
624 return g_strdup(inbuf);
627 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
628 codesets, n_codesets,
629 &actual_codeset, dest_codeset)
634 g_warning("code conversion from %s to %s failed\n",
635 codesets && codesets[0] ? codesets[0] : "(unknown)",
640 #else /* !HAVE_LIBJCONV */
642 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
643 !strcasecmp(src_codeset, CS_EUCJP))
644 src_charset = C_EUC_JP;
645 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
646 !strcasecmp(src_codeset, "SHIFT-JIS") ||
647 !strcasecmp(src_codeset, "SJIS"))
648 src_charset = C_SHIFT_JIS;
649 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
650 dest_charset = C_ISO_2022_JP;
653 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
654 dest_charset == C_ISO_2022_JP) {
655 len = (strlen(inbuf) + 1) * 3;
658 if (src_charset == C_EUC_JP)
659 conv_euctojis(buf, len, inbuf);
661 conv_anytojis(buf, len, inbuf);
662 buf = g_realloc(buf, strlen(buf) + 1);
665 buf = g_strdup(inbuf);
668 #endif /* !HAVE_LIBJCONV */
671 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
673 CodeConvFunc code_conv;
677 cur_charset = conv_get_current_charset();
678 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
679 return conv_anytodisp;
684 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
685 !strcasecmp(charset, CS_ISO_2022_JP_2))
686 code_conv = conv_jistodisp;
687 else if (!strcasecmp(charset, CS_US_ASCII))
688 code_conv = conv_ustodisp;
689 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
690 code_conv = conv_latintodisp;
692 else if (!strncasecmp(charset, "ISO-8859-", 9))
693 code_conv = conv_latintodisp;
695 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
696 !strcasecmp(charset, "SHIFT-JIS") ||
697 !strcasecmp(charset, "SJIS") ||
698 !strcasecmp(charset, "X-SJIS"))
699 code_conv = conv_sjistodisp;
700 else if (!strcasecmp(charset, CS_EUC_JP) ||
701 !strcasecmp(charset, CS_EUCJP))
702 code_conv = conv_euctodisp;
704 code_conv = conv_noconv;
709 static const struct {
713 {C_US_ASCII, CS_US_ASCII},
714 {C_US_ASCII, CS_ANSI_X3_4_1968},
716 {C_ISO_8859_1, CS_ISO_8859_1},
717 {C_ISO_8859_2, CS_ISO_8859_2},
718 {C_ISO_8859_4, CS_ISO_8859_4},
719 {C_ISO_8859_5, CS_ISO_8859_5},
720 {C_ISO_8859_7, CS_ISO_8859_7},
721 {C_ISO_8859_8, CS_ISO_8859_8},
722 {C_ISO_8859_9, CS_ISO_8859_9},
723 {C_ISO_8859_11, CS_ISO_8859_11},
724 {C_ISO_8859_13, CS_ISO_8859_13},
725 {C_ISO_8859_15, CS_ISO_8859_15},
726 {C_BALTIC, CS_BALTIC},
727 {C_CP1251, CS_CP1251},
728 {C_WINDOWS_1251, CS_WINDOWS_1251},
729 {C_KOI8_R, CS_KOI8_R},
730 {C_KOI8_U, CS_KOI8_U},
731 {C_ISO_2022_JP, CS_ISO_2022_JP},
732 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
733 {C_EUC_JP, CS_EUC_JP},
734 {C_EUC_JP, CS_EUCJP},
735 {C_SHIFT_JIS, CS_SHIFT_JIS},
736 {C_ISO_2022_KR, CS_ISO_2022_KR},
737 {C_EUC_KR, CS_EUC_KR},
738 {C_ISO_2022_CN, CS_ISO_2022_CN},
739 {C_EUC_CN, CS_EUC_CN},
740 {C_GB2312, CS_GB2312},
741 {C_EUC_TW, CS_EUC_TW},
743 {C_TIS_620, CS_TIS_620},
744 {C_WINDOWS_874, CS_WINDOWS_874},
748 static const struct {
753 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
754 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
755 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
756 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
757 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
758 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
759 {"ko_KR" , C_EUC_KR , C_EUC_KR},
760 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
761 {"zh_CN" , C_GB2312 , C_GB2312},
762 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
763 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
764 {"zh_TW" , C_BIG5 , C_BIG5},
766 {"ru_RU.KOI8-R" , C_KOI8_R , C_ISO_8859_5},
767 {"ru_RU.CP1251" , C_WINDOWS_1251, C_ISO_8859_5},
769 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
771 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
772 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
773 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
774 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
775 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
776 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
777 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
778 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
779 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
780 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
781 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
782 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
783 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
784 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
785 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
787 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
788 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
789 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
790 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
791 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
792 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
793 {"ru_RU" , C_ISO_8859_5 , C_ISO_8859_5},
794 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
795 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
796 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
798 {"th_TH" , C_TIS_620 , C_TIS_620},
799 /* {"th_TH" , C_WINDOWS_874}, */
800 /* {"th_TH" , C_ISO_8859_11}, */
802 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
803 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
804 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
805 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
806 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
808 {"C" , C_US_ASCII , C_US_ASCII},
809 {"POSIX" , C_US_ASCII , C_US_ASCII},
810 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
812 #endif /* !HAVE_LIBJCONV */
814 const gchar *conv_get_charset_str(CharSet charset)
818 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
819 if (charsets[i].charset == charset)
820 return charsets[i].name;
826 CharSet conv_get_charset_from_str(const gchar *charset)
830 if (!charset) return C_AUTO;
832 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
833 if (!strcasecmp(charsets[i].name, charset))
834 return charsets[i].charset;
840 CharSet conv_get_current_charset(void)
842 static CharSet cur_charset = -1;
846 const gchar *cur_codeset;
848 const gchar *cur_locale;
851 if (cur_charset != -1)
855 cur_codeset = jconv_info_get_current_codeset();
856 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
857 if (!strcasecmp(cur_codeset, charsets[i].name)) {
858 cur_charset = charsets[i].charset;
863 cur_locale = conv_get_current_locale();
865 cur_charset = C_US_ASCII;
869 if (strcasestr(cur_locale, "UTF-8")) {
870 cur_charset = C_UTF_8;
874 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
877 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
878 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
879 if (!strncasecmp(cur_locale, locale_table[i].locale,
880 strlen(locale_table[i].locale))) {
881 cur_charset = locale_table[i].charset;
883 } else if ((p = strchr(locale_table[i].locale, '_')) &&
884 !strchr(p + 1, '.')) {
885 if (strlen(cur_locale) == 2 &&
886 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
887 cur_charset = locale_table[i].charset;
894 cur_charset = C_AUTO;
898 const gchar *conv_get_current_charset_str(void)
900 static const gchar *codeset = NULL;
903 codeset = conv_get_charset_str(conv_get_current_charset());
905 return codeset ? codeset : "US-ASCII";
908 CharSet conv_get_outgoing_charset(void)
910 static CharSet out_charset = -1;
914 gint j, n_pref_codesets;
915 const gchar *const *pref_codesets;
917 const gchar *cur_locale;
920 if (out_charset != -1)
924 /* skip US-ASCII and UTF-8 */
925 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
926 for (i = 0; i < n_pref_codesets; i++) {
927 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
928 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
929 out_charset = charsets[j].charset;
935 for (i = 0; i < n_pref_codesets; i++) {
936 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
937 out_charset = C_UTF_8;
942 out_charset = C_AUTO;
944 cur_locale = conv_get_current_locale();
946 out_charset = C_AUTO;
950 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
953 if (!strncasecmp(cur_locale, locale_table[i].locale,
954 strlen(locale_table[i].locale))) {
955 out_charset = locale_table[i].out_charset;
957 } else if ((p = strchr(locale_table[i].locale, '_')) &&
958 !strchr(p + 1, '.')) {
959 if (strlen(cur_locale) == 2 &&
960 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
961 out_charset = locale_table[i].out_charset;
967 /* encoding conversion without libjconv is only supported
968 on Japanese locale for now */
969 if (out_charset == C_ISO_2022_JP)
972 out_charset = conv_get_current_charset();
978 const gchar *conv_get_outgoing_charset_str(void)
983 if (prefs_common.outgoing_charset) {
984 if (!isalpha(prefs_common.outgoing_charset[0])) {
985 g_free(prefs_common.outgoing_charset);
986 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
987 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
988 return prefs_common.outgoing_charset;
991 out_charset = conv_get_outgoing_charset();
992 str = conv_get_charset_str(out_charset);
994 return str ? str : "US-ASCII";
997 const gchar *conv_get_current_locale(void)
1001 cur_locale = g_getenv("LC_ALL");
1002 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1003 if (!cur_locale) cur_locale = g_getenv("LANG");
1004 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1006 debug_print("current locale: %s\n",
1007 cur_locale ? cur_locale : "(none)");
1012 void conv_unmime_header_overwrite(gchar *str)
1016 CharSet cur_charset;
1018 cur_charset = conv_get_current_charset();
1020 if (cur_charset == C_EUC_JP) {
1021 buflen = strlen(str) * 2 + 1;
1022 Xalloca(buf, buflen, return);
1023 conv_anytodisp(buf, buflen, str);
1024 unmime_header(str, buf);
1026 buflen = strlen(str) + 1;
1027 Xalloca(buf, buflen, return);
1028 unmime_header(buf, str);
1029 strncpy2(str, buf, buflen);
1033 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1034 const gchar *charset)
1036 CharSet cur_charset;
1038 cur_charset = conv_get_current_charset();
1040 if (cur_charset == C_EUC_JP) {
1044 buflen = strlen(str) * 2 + 1;
1045 Xalloca(buf, buflen, return);
1046 conv_anytodisp(buf, buflen, str);
1047 unmime_header(outbuf, buf);
1049 unmime_header(outbuf, str);
1052 #define MAX_ENCLEN 75
1053 #define MAX_LINELEN 76
1055 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1058 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1064 size_t line_len, mimehdr_len, mimehdr_begin_len;
1065 gchar *mimehdr_init = "=?";
1066 gchar *mimehdr_end = "?=";
1067 gchar *mimehdr_enctype = "?B?";
1068 const gchar *mimehdr_charset;
1070 /* g_print("src = %s\n", src); */
1071 mimehdr_charset = conv_get_outgoing_charset_str();
1073 /* convert to wide-character string */
1074 wsrcp = wsrc = strdup_mbstowcs(src);
1076 g_warning("Can't convert string to wide characters.\n");
1077 strncpy2(dest, src, len);
1081 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1082 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1083 mimehdr_begin_len = strlen(mimehdr_init) +
1084 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1085 line_len = header_len;
1090 wchar_t *wp, *wtmp, *wtmpp;
1092 gboolean str_is_non_ascii;
1094 /* irresponsible buffer overrun check */
1095 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1097 /* encode string including space
1098 if non-ASCII string follows */
1099 if (is_next_nonascii(wsrcp)) {
1101 while ((wp = find_wspace(wp)) != NULL)
1102 if (!is_next_nonascii(wp)) break;
1103 str_is_non_ascii = TRUE;
1105 wp = find_wspace(wsrcp);
1106 str_is_non_ascii = FALSE;
1110 wtmp = wcsndup(wsrcp, wp - wsrcp);
1112 while (iswspace(wsrcp[nspc])) nspc++;
1114 wtmp = wcsdup(wsrcp);
1115 wsrcp += wcslen(wsrcp);
1122 gchar *tmp; /* internal codeset */
1123 gchar *raw; /* converted, but not base64 encoded */
1124 register gchar *tmpp;
1127 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1132 while (*wtmpp != (wchar_t)0) {
1135 gchar *raw_new = NULL;
1136 int raw_new_len = 0;
1137 const gchar *src_codeset;
1139 mbl = wctomb(tmpp, *wtmpp);
1141 g_warning("invalid wide character\n");
1146 src_codeset = conv_get_current_charset_str();
1147 /* printf ("tmp = %s, tlen = %d, mbl\n",
1149 if (jconv_alloc_conv(tmp, tlen + mbl,
1150 &raw_new, &raw_new_len,
1152 &dummy, mimehdr_charset)
1154 g_warning("can't convert\n");
1159 if (str_is_non_ascii) {
1160 gint dlen = mimehdr_len +
1162 if ((line_len + dlen +
1163 (*(wtmpp + 1) ? 0 : nspc) +
1164 (line_len > 1 ? 1 : 0))
1177 } else if ((line_len + tlen + mbl +
1178 (*(wtmpp + 1) ? 0 : nspc) +
1179 (line_len > 1 ? 1 : 0))
1182 if (1 + tlen + mbl +
1183 (*(wtmpp + 1) ? 0 : nspc)
1201 raw_len = raw_new_len;
1205 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1206 tmp, tlen, mb_seqlen); */
1208 if (tlen == 0 || raw_len == 0) {
1214 if (line_len > 1 && destp > dest) {
1220 if (str_is_non_ascii) {
1221 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1222 mimehdr_init, mimehdr_charset,
1224 destp += mimehdr_begin_len;
1225 line_len += mimehdr_begin_len;
1227 base64_encode(destp, raw, raw_len);
1228 line_len += strlen(destp);
1229 destp += strlen(destp);
1231 strcpy(destp, mimehdr_end);
1232 destp += strlen(mimehdr_end);
1233 line_len += strlen(mimehdr_end);
1236 line_len += strlen(destp);
1237 destp += strlen(destp);
1242 /* g_print("line_len = %d\n\n", line_len); */
1243 } while (*wtmpp != (wchar_t)0);
1245 while (iswspace(*wsrcp)) {
1248 mbl = wctomb(destp, *wsrcp++);
1261 /* g_print("dest = %s\n", dest); */
1263 #else /* !HAVE_LIBJCONV */
1265 #define JIS_SEQLEN 3
1267 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1273 size_t line_len, mimehdr_len, mimehdr_begin_len;
1274 gchar *mimehdr_init = "=?";
1275 gchar *mimehdr_end = "?=";
1276 gchar *mimehdr_enctype = "?B?";
1277 const gchar *mimehdr_charset;
1278 gboolean do_conv = FALSE;
1280 /* g_print("src = %s\n", src); */
1281 mimehdr_charset = conv_get_outgoing_charset_str();
1282 if (strcmp(mimehdr_charset, "ISO-2022-JP") == 0)
1284 else if (strcmp(mimehdr_charset, "US-ASCII") == 0)
1285 mimehdr_charset = "ISO-8859-1";
1287 /* convert to wide-character string */
1288 wsrcp = wsrc = strdup_mbstowcs(src);
1290 g_warning("Can't convert string to wide characters.\n");
1291 strncpy2(dest, src, len);
1295 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1296 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1297 mimehdr_begin_len = strlen(mimehdr_init) +
1298 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1299 line_len = header_len;
1304 wchar_t *wp, *wtmp, *wtmpp;
1306 gboolean str_is_non_ascii;
1308 /* irresponsible buffer overrun check */
1309 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1311 /* encode string including space
1312 if non-ASCII string follows */
1313 if (is_next_nonascii(wsrcp)) {
1315 while ((wp = find_wspace(wp)) != NULL)
1316 if (!is_next_nonascii(wp)) break;
1317 str_is_non_ascii = TRUE;
1319 wp = find_wspace(wsrcp);
1320 str_is_non_ascii = FALSE;
1324 wtmp = wcsndup(wsrcp, wp - wsrcp);
1326 while (iswspace(wsrcp[nspc])) nspc++;
1328 wtmp = wcsdup(wsrcp);
1329 wsrcp += wcslen(wsrcp);
1335 gint prev_mbl = 1, tlen = 0, mb_seqlen = 0;
1337 register gchar *tmpp;
1339 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1342 while (*wtmpp != (wchar_t)0) {
1345 mbl = wctomb(tmpp, *wtmpp);
1347 g_warning("invalid wide character\n");
1352 /* length of KI + KO */
1353 if (do_conv && prev_mbl == 1 && mbl == 2)
1354 mb_seqlen += JIS_SEQLEN * 2;
1356 if (str_is_non_ascii) {
1357 gint dlen = mimehdr_len +
1358 B64LEN(tlen + mb_seqlen + mbl);
1360 if ((line_len + dlen +
1361 (*(wtmpp + 1) ? 0 : nspc) +
1362 (line_len > 1 ? 1 : 0))
1375 } else if ((line_len + tlen + mbl +
1376 (*(wtmpp + 1) ? 0 : nspc) +
1377 (line_len > 1 ? 1 : 0))
1379 if (1 + tlen + mbl +
1380 (*(wtmpp + 1) ? 0 : nspc)
1399 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1400 tmp, tlen, mb_seqlen); */
1407 if (line_len > 1 && destp > dest) {
1413 if (str_is_non_ascii) {
1416 raw = g_new(gchar, tlen + mb_seqlen + 1);
1418 conv_euctojis(raw, tlen + mb_seqlen + 1,
1422 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1423 mimehdr_init, mimehdr_charset,
1425 destp += mimehdr_begin_len;
1426 line_len += mimehdr_begin_len;
1428 base64_encode(destp, raw, strlen(raw));
1429 line_len += strlen(destp);
1430 destp += strlen(destp);
1432 strcpy(destp, mimehdr_end);
1433 destp += strlen(mimehdr_end);
1434 line_len += strlen(mimehdr_end);
1439 line_len += strlen(destp);
1440 destp += strlen(destp);
1444 /* g_print("line_len = %d\n\n", line_len); */
1445 } while (*wtmpp != (wchar_t)0);
1447 while (iswspace(*wsrcp)) {
1450 mbl = wctomb(destp, *wsrcp++);
1463 /* g_print("dest = %s\n", dest); */
1465 #endif /* HAVE_LIBJCONV */