2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
47 #include "prefs_common.h"
57 #define SUBST_CHAR '_'
60 #define iseuckanji(c) \
61 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 (((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
67 (((c) & 0xff) == 0x8f)
68 #define isunprintableeuckanji(c) \
69 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
70 #define issjiskanji1(c) \
71 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
72 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
73 #define issjiskanji2(c) \
74 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
75 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
76 #define issjishwkana(c) \
77 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
80 if (state != JIS_KANJI) { \
88 if (state != JIS_ASCII) { \
96 if (state != JIS_HWKANA) { \
100 state = JIS_HWKANA; \
104 if (state != JIS_AUXKANJI) { \
109 state = JIS_AUXKANJI; \
112 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
114 const guchar *in = inbuf;
115 guchar *out = outbuf;
116 JISState state = JIS_ASCII;
118 while (*in != '\0') {
122 if (*(in + 1) == '@' || *(in + 1) == 'B') {
125 } else if (*(in + 1) == '(' &&
127 state = JIS_AUXKANJI;
130 /* unknown escape sequence */
133 } else if (*in == '(') {
134 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
137 } else if (*(in + 1) == 'I') {
141 /* unknown escape sequence */
145 /* unknown escape sequence */
148 } else if (*in == 0x0e) {
151 } else if (*in == 0x0f) {
160 *out++ = *in++ | 0x80;
161 if (*in == '\0') break;
162 *out++ = *in++ | 0x80;
166 *out++ = *in++ | 0x80;
170 *out++ = *in++ | 0x80;
171 if (*in == '\0') break;
172 *out++ = *in++ | 0x80;
181 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
183 const guchar *in = inbuf;
184 guchar *out = outbuf;
185 JISState state = JIS_ASCII;
187 while (*in != '\0') {
191 } else if (iseuckanji(*in)) {
192 if (iseuckanji(*(in + 1))) {
194 *out++ = *in++ & 0x7f;
195 *out++ = *in++ & 0x7f;
200 if (*in != '\0' && !isascii(*in)) {
205 } else if (iseuchwkana1(*in)) {
207 if (iseuchwkana2(*in)) {
209 *out++ = *in++ & 0x7f;
212 if (*in != '\0' && !isascii(*in)) {
217 } else if (iseucaux(*in)) {
219 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
221 *out++ = *in++ & 0x7f;
222 *out++ = *in++ & 0x7f;
225 if (*in != '\0' && !isascii(*in)) {
228 if (*in != '\0' && !isascii(*in)) {
241 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
243 const guchar *in = inbuf;
244 guchar *out = outbuf;
246 while (*in != '\0') {
249 } else if (issjiskanji1(*in)) {
250 if (issjiskanji2(*(in + 1))) {
252 guchar out2 = *(in + 1);
255 row = out1 < 0xa0 ? 0x70 : 0xb0;
257 out1 = (out1 - row) * 2 - 1;
258 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
260 out1 = (out1 - row) * 2;
264 *out++ = out1 | 0x80;
265 *out++ = out2 | 0x80;
270 if (*in != '\0' && !isascii(*in)) {
275 } else if (issjishwkana(*in)) {
287 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
289 switch (conv_guess_encoding(inbuf)) {
291 conv_jistoeuc(outbuf, outlen, inbuf);
294 conv_sjistoeuc(outbuf, outlen, inbuf);
297 strncpy2(outbuf, inbuf, outlen);
302 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
304 switch (conv_guess_encoding(inbuf)) {
306 conv_euctojis(outbuf, outlen, inbuf);
309 strncpy2(outbuf, inbuf, outlen);
314 void conv_unreadable_eucjp(gchar *str)
316 register guchar *p = str;
320 /* convert CR+LF -> LF */
321 if (*p == '\r' && *(p + 1) == '\n')
322 memmove(p, p + 1, strlen(p));
323 /* printable 7 bit code */
325 } else if (iseuckanji(*p)) {
326 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
327 /* printable euc-jp code */
330 /* substitute unprintable code */
339 } else if (iseuchwkana1(*p)) {
340 if (iseuchwkana2(*(p + 1)))
341 /* euc-jp hankaku kana */
345 } else if (iseucaux(*p)) {
346 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
347 /* auxiliary kanji */
352 /* substitute unprintable 1 byte code */
357 void conv_unreadable_8bit(gchar *str)
359 register guchar *p = str;
362 /* convert CR+LF -> LF */
363 if (*p == '\r' && *(p + 1) == '\n')
364 memmove(p, p + 1, strlen(p));
365 else if (!isascii(*p)) *p = SUBST_CHAR;
370 void conv_unreadable_latin(gchar *str)
372 register guchar *p = str;
375 /* convert CR+LF -> LF */
376 if (*p == '\r' && *(p + 1) == '\n')
377 memmove(p, p + 1, strlen(p));
378 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
386 void conv_mb_alnum(gchar *str)
388 static guchar char_tbl[] = {
390 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
391 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
393 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
394 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
396 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
397 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
399 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
400 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
402 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
403 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
406 register guchar *p = str;
413 register guchar ch = *(p + 1);
415 if (ch >= 0xb0 && ch <= 0xfa) {
420 memmove(p, p + 1, len);
426 } else if (*p == 0xa1) {
427 register guchar ch = *(p + 1);
429 if (ch >= 0xa0 && ch <= 0xef &&
430 NCV != char_tbl[ch - 0xa0]) {
431 *p = char_tbl[ch - 0xa0];
434 memmove(p, p + 1, len);
440 } else if (iseuckanji(*p)) {
450 CharSet conv_guess_encoding(const gchar *str)
452 const guchar *p = str;
453 CharSet guessed = C_US_ASCII;
456 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
457 if (guessed == C_US_ASCII)
458 return C_ISO_2022_JP;
460 } else if (isascii(*p)) {
462 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
463 if (*p >= 0xfd && *p <= 0xfe)
465 else if (guessed == C_SHIFT_JIS) {
466 if ((issjiskanji1(*p) &&
467 issjiskanji2(*(p + 1))) ||
469 guessed = C_SHIFT_JIS;
475 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
476 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
477 guessed = C_SHIFT_JIS;
481 } else if (issjishwkana(*p)) {
482 guessed = C_SHIFT_JIS;
492 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
494 conv_jistoeuc(outbuf, outlen, inbuf);
495 conv_unreadable_eucjp(outbuf);
498 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
500 conv_sjistoeuc(outbuf, outlen, inbuf);
501 conv_unreadable_eucjp(outbuf);
504 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
506 strncpy2(outbuf, inbuf, outlen);
507 conv_unreadable_eucjp(outbuf);
510 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
512 conv_anytoeuc(outbuf, outlen, inbuf);
513 conv_unreadable_eucjp(outbuf);
516 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
518 strncpy2(outbuf, inbuf, outlen);
519 conv_unreadable_8bit(outbuf);
522 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
524 strncpy2(outbuf, inbuf, outlen);
525 conv_unreadable_latin(outbuf);
528 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
530 strncpy2(outbuf, inbuf, outlen);
533 CodeConverter *conv_code_converter_new(const gchar *charset)
537 conv = g_new0(CodeConverter, 1);
539 conv->code_conv_func = conv_get_code_conv_func(charset);
541 conv->charset_str = g_strdup(charset);
542 conv->charset = conv_get_charset_from_str(charset);
547 void conv_code_converter_destroy(CodeConverter *conv)
549 g_free(conv->charset_str);
553 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
559 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
563 strncpy2(outbuf, str, outlen);
566 #else /* !HAVE_LIBJCONV */
567 conv->code_conv_func(outbuf, outlen, inbuf);
573 gchar *conv_codeset_strdup(const gchar *inbuf,
574 const gchar *src_codeset, const gchar *dest_codeset)
580 const gchar *const *codesets;
582 #else /* !HAVE_LIBJCONV */
583 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
589 func = conv_get_code_conv_func(src_codeset);
590 if (func != conv_noconv) {
591 if (func == conv_jistodisp ||
592 func == conv_sjistodisp ||
593 func == conv_anytodisp)
594 len = strlen(inbuf) * 2 + 1;
596 len = strlen(inbuf) + 1;
598 if (!buf) return NULL;
599 func(buf, len, inbuf);
600 buf = g_realloc(buf, strlen(buf) + 1);
605 /* don't convert if src and dest codeset are identical */
606 if (src_codeset && dest_codeset &&
607 !strcasecmp(src_codeset, dest_codeset))
608 return g_strdup(inbuf);
612 codesets = &src_codeset;
615 codesets = jconv_info_get_pref_codesets(&n_codesets);
617 dest_codeset = conv_get_current_charset_str();
618 /* don't convert if current codeset is US-ASCII */
619 if (!strcasecmp(dest_codeset, CS_US_ASCII))
620 return g_strdup(inbuf);
623 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
624 codesets, n_codesets,
625 &actual_codeset, dest_codeset)
629 g_warning("code conversion from %s to %s failed\n",
630 codesets && codesets[0] ? codesets[0] : "(unknown)",
634 #else /* !HAVE_LIBJCONV */
636 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
637 !strcasecmp(src_codeset, CS_EUCJP))
638 src_charset = C_EUC_JP;
639 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
640 !strcasecmp(src_codeset, "SHIFT-JIS") ||
641 !strcasecmp(src_codeset, "SJIS"))
642 src_charset = C_SHIFT_JIS;
643 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
644 dest_charset = C_ISO_2022_JP;
647 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
648 dest_charset == C_ISO_2022_JP) {
649 len = (strlen(inbuf) + 1) * 3;
652 if (src_charset == C_EUC_JP)
653 conv_euctojis(buf, len, inbuf);
655 conv_anytojis(buf, len, inbuf);
656 buf = g_realloc(buf, strlen(buf) + 1);
659 buf = g_strdup(inbuf);
662 #endif /* !HAVE_LIBJCONV */
665 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
667 CodeConvFunc code_conv;
671 cur_charset = conv_get_current_charset();
672 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
673 return conv_anytodisp;
678 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
679 !strcasecmp(charset, CS_ISO_2022_JP_2))
680 code_conv = conv_jistodisp;
681 else if (!strcasecmp(charset, CS_US_ASCII))
682 code_conv = conv_ustodisp;
683 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
684 code_conv = conv_latintodisp;
686 else if (!strncasecmp(charset, "ISO-8859-", 9))
687 code_conv = conv_latintodisp;
689 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
690 !strcasecmp(charset, "SHIFT-JIS") ||
691 !strcasecmp(charset, "SJIS") ||
692 !strcasecmp(charset, "X-SJIS"))
693 code_conv = conv_sjistodisp;
694 else if (!strcasecmp(charset, CS_EUC_JP) ||
695 !strcasecmp(charset, CS_EUCJP))
696 code_conv = conv_euctodisp;
698 code_conv = conv_noconv;
703 static const struct {
707 {C_US_ASCII, CS_US_ASCII},
708 {C_US_ASCII, CS_ANSI_X3_4_1968},
710 {C_ISO_8859_1, CS_ISO_8859_1},
711 {C_ISO_8859_2, CS_ISO_8859_2},
712 {C_ISO_8859_4, CS_ISO_8859_4},
713 {C_ISO_8859_5, CS_ISO_8859_5},
714 {C_ISO_8859_7, CS_ISO_8859_7},
715 {C_ISO_8859_8, CS_ISO_8859_8},
716 {C_ISO_8859_9, CS_ISO_8859_9},
717 {C_ISO_8859_11, CS_ISO_8859_11},
718 {C_ISO_8859_13, CS_ISO_8859_13},
719 {C_ISO_8859_15, CS_ISO_8859_15},
720 {C_BALTIC, CS_BALTIC},
721 {C_CP1251, CS_CP1251},
722 {C_WINDOWS_1251, CS_WINDOWS_1251},
723 {C_KOI8_R, CS_KOI8_R},
724 {C_KOI8_U, CS_KOI8_U},
725 {C_ISO_2022_JP, CS_ISO_2022_JP},
726 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
727 {C_EUC_JP, CS_EUC_JP},
728 {C_EUC_JP, CS_EUCJP},
729 {C_SHIFT_JIS, CS_SHIFT_JIS},
730 {C_ISO_2022_KR, CS_ISO_2022_KR},
731 {C_EUC_KR, CS_EUC_KR},
732 {C_ISO_2022_CN, CS_ISO_2022_CN},
733 {C_EUC_CN, CS_EUC_CN},
734 {C_GB2312, CS_GB2312},
735 {C_EUC_TW, CS_EUC_TW},
737 {C_TIS_620, CS_TIS_620},
738 {C_WINDOWS_874, CS_WINDOWS_874},
742 static const struct {
746 {"ja_JP.eucJP" , C_EUC_JP},
747 {"ja_JP.ujis" , C_EUC_JP},
748 {"ja_JP.EUC" , C_EUC_JP},
749 {"ja_JP.SJIS" , C_SHIFT_JIS},
750 {"ja_JP.JIS" , C_ISO_2022_JP},
751 {"ja_JP" , C_EUC_JP},
752 {"ko_KR" , C_EUC_KR},
753 {"zh_CN.GB2312" , C_GB2312},
754 {"zh_CN" , C_GB2312},
755 {"zh_TW.eucTW" , C_EUC_TW},
756 {"zh_TW.Big5" , C_BIG5},
759 {"ru_RU.KOI8-R" , C_KOI8_R},
760 {"ru_RU.CP1251" , C_WINDOWS_1251},
762 {"bg_BG" , C_WINDOWS_1251},
764 {"en_US" , C_ISO_8859_1},
765 {"ca_ES" , C_ISO_8859_1},
766 {"da_DK" , C_ISO_8859_1},
767 {"de_DE" , C_ISO_8859_1},
768 {"nl_NL" , C_ISO_8859_1},
769 {"et_EE" , C_ISO_8859_1},
770 {"fi_FI" , C_ISO_8859_1},
771 {"fr_FR" , C_ISO_8859_1},
772 {"is_IS" , C_ISO_8859_1},
773 {"it_IT" , C_ISO_8859_1},
774 {"no_NO" , C_ISO_8859_1},
775 {"pt_PT" , C_ISO_8859_1},
776 {"pt_BR" , C_ISO_8859_1},
777 {"es_ES" , C_ISO_8859_1},
778 {"sv_SE" , C_ISO_8859_1},
780 {"hr_HR" , C_ISO_8859_2},
781 {"hu_HU" , C_ISO_8859_2},
782 {"pl_PL" , C_ISO_8859_2},
783 {"ro_RO" , C_ISO_8859_2},
784 {"sk_SK" , C_ISO_8859_2},
785 {"sl_SI" , C_ISO_8859_2},
786 {"ru_RU" , C_ISO_8859_5},
787 {"el_GR" , C_ISO_8859_7},
788 {"iw_IL" , C_ISO_8859_8},
789 {"tr_TR" , C_ISO_8859_9},
791 {"th_TH" , C_TIS_620},
792 /* {"th_TH" , C_WINDOWS_874}, */
793 /* {"th_TH" , C_ISO_8859_11}, */
795 {"lt_LT.iso88594" , C_ISO_8859_4},
796 {"lt_LT.ISO8859-4" , C_ISO_8859_4},
797 {"lt_LT.ISO_8859-4" , C_ISO_8859_4},
798 {"lt_LT" , C_ISO_8859_13},
799 {"lv_LV" , C_ISO_8859_13},
802 {"POSIX" , C_US_ASCII},
803 {"ANSI_X3.4-1968" , C_US_ASCII},
805 #endif /* !HAVE_LIBJCONV */
807 const gchar *conv_get_charset_str(CharSet charset)
811 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
812 if (charsets[i].charset == charset)
813 return charsets[i].name;
819 CharSet conv_get_charset_from_str(const gchar *charset)
823 if (!charset) return C_AUTO;
825 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
826 if (!strcasecmp(charsets[i].name, charset))
827 return charsets[i].charset;
833 CharSet conv_get_current_charset(void)
835 static CharSet cur_charset = -1;
839 const gchar *cur_codeset;
841 const gchar *cur_locale;
844 if (cur_charset != -1)
848 cur_codeset = jconv_info_get_current_codeset();
849 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
850 if (!strcasecmp(cur_codeset, charsets[i].name)) {
851 cur_charset = charsets[i].charset;
856 cur_locale = g_getenv("LC_ALL");
857 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
858 if (!cur_locale) cur_locale = g_getenv("LANG");
859 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
861 debug_print("current locale: %s\n",
862 cur_locale ? cur_locale : "(none)");
865 cur_charset = C_US_ASCII;
869 if (strcasestr(cur_locale, "UTF-8")) {
870 cur_charset = C_UTF_8;
874 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
877 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
878 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
879 if (!strncasecmp(cur_locale, locale_table[i].locale,
880 strlen(locale_table[i].locale))) {
881 cur_charset = locale_table[i].charset;
883 } else if ((p = strchr(locale_table[i].locale, '_')) &&
884 !strchr(p + 1, '.')) {
885 if (strlen(cur_locale) == 2 &&
886 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
887 cur_charset = locale_table[i].charset;
894 cur_charset = C_AUTO;
898 const gchar *conv_get_current_charset_str(void)
900 static const gchar *codeset = NULL;
903 codeset = conv_get_charset_str(conv_get_current_charset());
905 return codeset ? codeset : "US-ASCII";
908 CharSet conv_get_outgoing_charset(void)
910 static CharSet out_charset = -1;
913 gint i, j, n_pref_codesets;
914 const gchar *const *pref_codesets;
919 if (out_charset != -1)
923 /* skip US-ASCII and UTF-8 */
924 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
925 for (i = 0; i < n_pref_codesets; i++) {
926 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
927 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
928 out_charset = charsets[j].charset;
934 for (i = 0; i < n_pref_codesets; i++) {
935 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
936 out_charset = C_UTF_8;
941 out_charset = C_AUTO;
943 cur_charset = conv_get_current_charset();
944 switch (cur_charset) {
947 out_charset = C_ISO_2022_JP;
950 out_charset = cur_charset;
957 const gchar *conv_get_outgoing_charset_str(void)
962 if (prefs_common.outgoing_charset) {
963 if (!isalpha(prefs_common.outgoing_charset[0])) {
964 g_free(prefs_common.outgoing_charset);
965 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
966 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
967 return prefs_common.outgoing_charset;
970 out_charset = conv_get_outgoing_charset();
971 str = conv_get_charset_str(out_charset);
973 return str ? str : "US-ASCII";
976 const gchar *conv_get_current_locale(void)
980 cur_locale = g_getenv("LC_ALL");
981 if (!cur_locale) cur_locale = g_getenv("LANG");
982 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
984 debug_print("current locale: %s\n",
985 cur_locale ? cur_locale : "(none)");
990 void conv_unmime_header_overwrite(gchar *str)
996 cur_charset = conv_get_current_charset();
998 if (cur_charset == C_EUC_JP) {
999 buflen = strlen(str) * 2 + 1;
1000 Xalloca(buf, buflen, return);
1001 conv_anytodisp(buf, buflen, str);
1002 unmime_header(str, buf);
1004 buflen = strlen(str) + 1;
1005 Xalloca(buf, buflen, return);
1006 unmime_header(buf, str);
1007 strncpy2(str, buf, buflen);
1011 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1012 const gchar *charset)
1014 CharSet cur_charset;
1016 cur_charset = conv_get_current_charset();
1018 if (cur_charset == C_EUC_JP) {
1022 buflen = strlen(str) * 2 + 1;
1023 Xalloca(buf, buflen, return);
1024 conv_anytodisp(buf, buflen, str);
1025 unmime_header(outbuf, buf);
1027 unmime_header(outbuf, str);
1030 #define MAX_ENCLEN 75
1031 #define MAX_LINELEN 76
1033 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1036 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1042 size_t line_len, mimehdr_len, mimehdr_begin_len;
1043 gchar *mimehdr_init = "=?";
1044 gchar *mimehdr_end = "?=";
1045 gchar *mimehdr_enctype = "?B?";
1046 const gchar *mimehdr_charset;
1048 /* g_print("src = %s\n", src); */
1049 mimehdr_charset = conv_get_outgoing_charset_str();
1051 /* convert to wide-character string */
1052 wsrcp = wsrc = strdup_mbstowcs(src);
1054 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1055 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1056 mimehdr_begin_len = strlen(mimehdr_init) +
1057 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1058 line_len = header_len;
1062 g_return_if_fail(wsrc != NULL);
1065 wchar_t *wp, *wtmp, *wtmpp;
1067 gboolean str_is_non_ascii;
1069 /* irresponsible buffer overrun check */
1070 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1072 /* encode string including space
1073 if non-ASCII string follows */
1074 if (is_next_nonascii(wsrcp)) {
1076 while ((wp = find_wspace(wp)) != NULL)
1077 if (!is_next_nonascii(wp)) break;
1078 str_is_non_ascii = TRUE;
1080 wp = find_wspace(wsrcp);
1081 str_is_non_ascii = FALSE;
1085 wtmp = wcsndup(wsrcp, wp - wsrcp);
1087 while (iswspace(wsrcp[nspc])) nspc++;
1089 wtmp = wcsdup(wsrcp);
1090 wsrcp += wcslen(wsrcp);
1097 gchar *tmp; /* internal codeset */
1098 gchar *raw; /* converted, but not base64 encoded */
1099 register gchar *tmpp;
1102 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1107 while (*wtmpp != (wchar_t)0) {
1110 gchar *raw_new = NULL;
1111 int raw_new_len = 0;
1112 const gchar *src_codeset;
1114 mbl = wctomb(tmpp, *wtmpp);
1116 g_warning("invalid wide character\n");
1121 src_codeset = conv_get_current_charset_str();
1122 /* printf ("tmp = %s, tlen = %d, mbl\n",
1124 if (jconv_alloc_conv(tmp, tlen + mbl,
1125 &raw_new, &raw_new_len,
1127 &dummy, mimehdr_charset)
1129 g_warning("can't convert\n");
1134 if (str_is_non_ascii) {
1135 gint dlen = mimehdr_len +
1137 if ((line_len + dlen +
1138 (*(wtmpp + 1) ? 0 : nspc) +
1139 (line_len > 1 ? 1 : 0))
1152 } else if ((line_len + tlen + mbl +
1153 (*(wtmpp + 1) ? 0 : nspc) +
1154 (line_len > 1 ? 1 : 0))
1157 if (1 + tlen + mbl +
1158 (*(wtmpp + 1) ? 0 : nspc)
1176 raw_len = raw_new_len;
1180 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1181 tmp, tlen, mb_seqlen); */
1183 if (tlen == 0 || raw_len == 0) {
1189 if (line_len > 1 && destp > dest) {
1195 if (str_is_non_ascii) {
1196 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1197 mimehdr_init, mimehdr_charset,
1199 destp += mimehdr_begin_len;
1200 line_len += mimehdr_begin_len;
1202 base64_encode(destp, raw, raw_len);
1203 line_len += strlen(destp);
1204 destp += strlen(destp);
1206 strcpy(destp, mimehdr_end);
1207 destp += strlen(mimehdr_end);
1208 line_len += strlen(mimehdr_end);
1211 line_len += strlen(destp);
1212 destp += strlen(destp);
1217 /* g_print("line_len = %d\n\n", line_len); */
1218 } while (*wtmpp != (wchar_t)0);
1220 while (iswspace(*wsrcp)) {
1223 mbl = wctomb(destp, *wsrcp++);
1236 /* g_print("dest = %s\n", dest); */
1238 #else /* !HAVE_LIBJCONV */
1240 #define JIS_SEQLEN 3
1242 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1248 size_t line_len, mimehdr_len, mimehdr_begin_len;
1249 gchar *mimehdr_init = "=?";
1250 gchar *mimehdr_end = "?=";
1251 gchar *mimehdr_enctype = "?B?";
1252 const gchar *mimehdr_charset;
1254 /* g_print("src = %s\n", src); */
1255 mimehdr_charset = conv_get_outgoing_charset_str();
1256 if (strcmp(mimehdr_charset, "ISO-2022-JP") != 0) {
1257 /* currently only supports Japanese */
1258 strncpy2(dest, src, len);
1262 /* convert to wide-character string */
1263 wsrcp = wsrc = strdup_mbstowcs(src);
1265 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1266 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1267 mimehdr_begin_len = strlen(mimehdr_init) +
1268 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1269 line_len = header_len;
1273 g_return_if_fail(wsrc != NULL);
1276 wchar_t *wp, *wtmp, *wtmpp;
1278 gboolean str_is_non_ascii;
1280 /* irresponsible buffer overrun check */
1281 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1283 /* encode string including space
1284 if non-ASCII string follows */
1285 if (is_next_nonascii(wsrcp)) {
1287 while ((wp = find_wspace(wp)) != NULL)
1288 if (!is_next_nonascii(wp)) break;
1289 str_is_non_ascii = TRUE;
1291 wp = find_wspace(wsrcp);
1292 str_is_non_ascii = FALSE;
1296 wtmp = wcsndup(wsrcp, wp - wsrcp);
1298 while (iswspace(wsrcp[nspc])) nspc++;
1300 wtmp = wcsdup(wsrcp);
1301 wsrcp += wcslen(wsrcp);
1307 gint prev_mbl = 1, tlen = 0, mb_seqlen = 0;
1309 register gchar *tmpp;
1311 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1314 while (*wtmpp != (wchar_t)0) {
1317 mbl = wctomb(tmpp, *wtmpp);
1319 g_warning("invalid wide character\n");
1324 /* length of KI + KO */
1325 if (prev_mbl == 1 && mbl == 2)
1326 mb_seqlen += JIS_SEQLEN * 2;
1328 if (str_is_non_ascii) {
1329 gint dlen = mimehdr_len +
1330 B64LEN(tlen + mb_seqlen + mbl);
1332 if ((line_len + dlen +
1333 (*(wtmpp + 1) ? 0 : nspc) +
1334 (line_len > 1 ? 1 : 0))
1347 } else if ((line_len + tlen + mbl +
1348 (*(wtmpp + 1) ? 0 : nspc) +
1349 (line_len > 1 ? 1 : 0))
1351 if (1 + tlen + mbl +
1352 (*(wtmpp + 1) ? 0 : nspc)
1371 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1372 tmp, tlen, mb_seqlen); */
1379 if (line_len > 1 && destp > dest) {
1385 if (str_is_non_ascii) {
1388 tmp_jis = g_new(gchar, tlen + mb_seqlen + 1);
1389 conv_euctojis(tmp_jis,
1390 tlen + mb_seqlen + 1, tmp);
1391 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1392 mimehdr_init, mimehdr_charset,
1394 destp += mimehdr_begin_len;
1395 line_len += mimehdr_begin_len;
1397 base64_encode(destp, tmp_jis, strlen(tmp_jis));
1398 line_len += strlen(destp);
1399 destp += strlen(destp);
1401 strcpy(destp, mimehdr_end);
1402 destp += strlen(mimehdr_end);
1403 line_len += strlen(mimehdr_end);
1408 line_len += strlen(destp);
1409 destp += strlen(destp);
1413 /* g_print("line_len = %d\n\n", line_len); */
1414 } while (*wtmpp != (wchar_t)0);
1416 while (iswspace(*wsrcp)) {
1419 mbl = wctomb(destp, *wsrcp++);
1432 /* g_print("dest = %s\n", dest); */
1434 #endif /* HAVE_LIBJCONV */