2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
47 #include "prefs_common.h"
57 #define SUBST_CHAR '_'
60 #define iseuckanji(c) \
61 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 (((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
67 (((c) & 0xff) == 0x8f)
68 #define isunprintableeuckanji(c) \
69 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
70 #define issjiskanji1(c) \
71 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
72 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
73 #define issjiskanji2(c) \
74 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
75 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
76 #define issjishwkana(c) \
77 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
80 if (state != JIS_KANJI) { \
88 if (state != JIS_ASCII) { \
96 if (state != JIS_HWKANA) { \
100 state = JIS_HWKANA; \
104 if (state != JIS_AUXKANJI) { \
109 state = JIS_AUXKANJI; \
112 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
114 const guchar *in = inbuf;
115 guchar *out = outbuf;
116 JISState state = JIS_ASCII;
118 while (*in != '\0') {
122 if (*(in + 1) == '@' || *(in + 1) == 'B') {
125 } else if (*(in + 1) == '(' &&
127 state = JIS_AUXKANJI;
130 /* unknown escape sequence */
133 } else if (*in == '(') {
134 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
137 } else if (*(in + 1) == 'I') {
141 /* unknown escape sequence */
145 /* unknown escape sequence */
148 } else if (*in == 0x0e) {
151 } else if (*in == 0x0f) {
160 *out++ = *in++ | 0x80;
161 if (*in == '\0') break;
162 *out++ = *in++ | 0x80;
166 *out++ = *in++ | 0x80;
170 *out++ = *in++ | 0x80;
171 if (*in == '\0') break;
172 *out++ = *in++ | 0x80;
181 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
183 const guchar *in = inbuf;
184 guchar *out = outbuf;
185 JISState state = JIS_ASCII;
187 while (*in != '\0') {
191 } else if (iseuckanji(*in)) {
192 if (iseuckanji(*(in + 1))) {
194 *out++ = *in++ & 0x7f;
195 *out++ = *in++ & 0x7f;
200 if (*in != '\0' && !isascii(*in)) {
205 } else if (iseuchwkana1(*in)) {
207 if (iseuchwkana2(*in)) {
209 *out++ = *in++ & 0x7f;
212 if (*in != '\0' && !isascii(*in)) {
217 } else if (iseucaux(*in)) {
219 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
221 *out++ = *in++ & 0x7f;
222 *out++ = *in++ & 0x7f;
225 if (*in != '\0' && !isascii(*in)) {
228 if (*in != '\0' && !isascii(*in)) {
241 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
243 const guchar *in = inbuf;
244 guchar *out = outbuf;
246 while (*in != '\0') {
249 } else if (issjiskanji1(*in)) {
250 if (issjiskanji2(*(in + 1))) {
252 guchar out2 = *(in + 1);
255 row = out1 < 0xa0 ? 0x70 : 0xb0;
257 out1 = (out1 - row) * 2 - 1;
258 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
260 out1 = (out1 - row) * 2;
264 *out++ = out1 | 0x80;
265 *out++ = out2 | 0x80;
270 if (*in != '\0' && !isascii(*in)) {
275 } else if (issjishwkana(*in)) {
287 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
289 switch (conv_guess_encoding(inbuf)) {
291 conv_jistoeuc(outbuf, outlen, inbuf);
294 conv_sjistoeuc(outbuf, outlen, inbuf);
297 strncpy2(outbuf, inbuf, outlen);
302 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
304 switch (conv_guess_encoding(inbuf)) {
306 conv_euctojis(outbuf, outlen, inbuf);
309 strncpy2(outbuf, inbuf, outlen);
314 void conv_unreadable_eucjp(gchar *str)
316 register guchar *p = str;
320 /* convert CR+LF -> LF */
321 if (*p == '\r' && *(p + 1) == '\n')
322 memmove(p, p + 1, strlen(p));
323 /* printable 7 bit code */
325 } else if (iseuckanji(*p)) {
326 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
327 /* printable euc-jp code */
330 /* substitute unprintable code */
339 } else if (iseuchwkana1(*p)) {
340 if (iseuchwkana2(*(p + 1)))
341 /* euc-jp hankaku kana */
345 } else if (iseucaux(*p)) {
346 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
347 /* auxiliary kanji */
352 /* substitute unprintable 1 byte code */
357 void conv_unreadable_8bit(gchar *str)
359 register guchar *p = str;
362 /* convert CR+LF -> LF */
363 if (*p == '\r' && *(p + 1) == '\n')
364 memmove(p, p + 1, strlen(p));
365 else if (!isascii(*p)) *p = SUBST_CHAR;
370 void conv_unreadable_latin(gchar *str)
372 register guchar *p = str;
375 /* convert CR+LF -> LF */
376 if (*p == '\r' && *(p + 1) == '\n')
377 memmove(p, p + 1, strlen(p));
378 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
386 void conv_mb_alnum(gchar *str)
388 static guchar char_tbl[] = {
390 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
391 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
393 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
394 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
396 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
397 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
399 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
400 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
402 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
403 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
406 register guchar *p = str;
413 register guchar ch = *(p + 1);
415 if (ch >= 0xb0 && ch <= 0xfa) {
420 memmove(p, p + 1, len);
426 } else if (*p == 0xa1) {
427 register guchar ch = *(p + 1);
429 if (ch >= 0xa0 && ch <= 0xef &&
430 NCV != char_tbl[ch - 0xa0]) {
431 *p = char_tbl[ch - 0xa0];
434 memmove(p, p + 1, len);
440 } else if (iseuckanji(*p)) {
450 CharSet conv_guess_encoding(const gchar *str)
452 const guchar *p = str;
453 CharSet guessed = C_US_ASCII;
456 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
457 if (guessed == C_US_ASCII)
458 return C_ISO_2022_JP;
460 } else if (isascii(*p)) {
462 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
463 if (*p >= 0xfd && *p <= 0xfe)
465 else if (guessed == C_SHIFT_JIS) {
466 if ((issjiskanji1(*p) &&
467 issjiskanji2(*(p + 1))) ||
469 guessed = C_SHIFT_JIS;
475 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
476 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
477 guessed = C_SHIFT_JIS;
481 } else if (issjishwkana(*p)) {
482 guessed = C_SHIFT_JIS;
492 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
494 conv_jistoeuc(outbuf, outlen, inbuf);
495 conv_unreadable_eucjp(outbuf);
498 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
500 conv_sjistoeuc(outbuf, outlen, inbuf);
501 conv_unreadable_eucjp(outbuf);
504 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
506 strncpy2(outbuf, inbuf, outlen);
507 conv_unreadable_eucjp(outbuf);
510 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
512 conv_anytoeuc(outbuf, outlen, inbuf);
513 conv_unreadable_eucjp(outbuf);
516 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
518 strncpy2(outbuf, inbuf, outlen);
519 conv_unreadable_8bit(outbuf);
522 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
524 strncpy2(outbuf, inbuf, outlen);
525 conv_unreadable_latin(outbuf);
528 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
530 strncpy2(outbuf, inbuf, outlen);
533 CodeConverter *conv_code_converter_new(const gchar *charset)
537 conv = g_new0(CodeConverter, 1);
539 conv->code_conv_func = conv_get_code_conv_func(charset);
541 conv->charset_str = g_strdup(charset);
542 conv->charset = conv_get_charset_from_str(charset);
547 void conv_code_converter_destroy(CodeConverter *conv)
549 g_free(conv->charset_str);
553 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
559 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
563 strncpy2(outbuf, str, outlen);
566 #else /* !HAVE_LIBJCONV */
567 conv->code_conv_func(outbuf, outlen, inbuf);
573 gchar *conv_codeset_strdup(const gchar *inbuf,
574 const gchar *src_codeset, const gchar *dest_codeset)
580 const gchar *const *codesets;
582 #else /* !HAVE_LIBJCONV */
583 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
589 func = conv_get_code_conv_func(src_codeset);
590 if (func != conv_noconv) {
591 if (func == conv_jistodisp ||
592 func == conv_sjistodisp ||
593 func == conv_anytodisp)
594 len = strlen(inbuf) * 2 + 1;
596 len = strlen(inbuf) + 1;
598 if (!buf) return NULL;
599 func(buf, len, inbuf);
600 buf = g_realloc(buf, strlen(buf) + 1);
605 /* don't convert if src and dest codeset are identical */
606 if (src_codeset && dest_codeset &&
607 !strcasecmp(src_codeset, dest_codeset))
608 return g_strdup(inbuf);
612 codesets = &src_codeset;
615 codesets = jconv_info_get_pref_codesets(&n_codesets);
617 dest_codeset = conv_get_current_charset_str();
618 /* don't convert if current codeset is US-ASCII */
619 if (!strcasecmp(dest_codeset, CS_US_ASCII))
620 return g_strdup(inbuf);
623 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
624 codesets, n_codesets,
625 &actual_codeset, dest_codeset)
629 g_warning("code conversion from %s to %s failed\n",
630 codesets && codesets[0] ? codesets[0] : "(unknown)",
634 #else /* !HAVE_LIBJCONV */
636 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
637 !strcasecmp(src_codeset, CS_EUCJP))
638 src_charset = C_EUC_JP;
639 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
640 !strcasecmp(src_codeset, "SHIFT-JIS") ||
641 !strcasecmp(src_codeset, "SJIS"))
642 src_charset = C_SHIFT_JIS;
643 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
644 dest_charset = C_ISO_2022_JP;
647 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
648 dest_charset == C_ISO_2022_JP) {
649 len = (strlen(inbuf) + 1) * 3;
652 if (src_charset == C_EUC_JP)
653 conv_euctojis(buf, len, inbuf);
655 conv_anytojis(buf, len, inbuf);
656 buf = g_realloc(buf, strlen(buf) + 1);
659 buf = g_strdup(inbuf);
662 #endif /* !HAVE_LIBJCONV */
665 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
667 CodeConvFunc code_conv;
671 cur_charset = conv_get_current_charset();
672 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
673 return conv_anytodisp;
678 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
679 !strcasecmp(charset, CS_ISO_2022_JP_2))
680 code_conv = conv_jistodisp;
681 else if (!strcasecmp(charset, CS_US_ASCII))
682 code_conv = conv_ustodisp;
683 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
684 code_conv = conv_latintodisp;
686 else if (!strncasecmp(charset, "ISO-8859-", 9))
687 code_conv = conv_latintodisp;
689 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
690 !strcasecmp(charset, "SHIFT-JIS") ||
691 !strcasecmp(charset, "SJIS") ||
692 !strcasecmp(charset, "X-SJIS"))
693 code_conv = conv_sjistodisp;
694 else if (!strcasecmp(charset, CS_EUC_JP) ||
695 !strcasecmp(charset, CS_EUCJP))
696 code_conv = conv_euctodisp;
698 code_conv = conv_noconv;
703 static const struct {
707 {C_US_ASCII, CS_US_ASCII},
708 {C_US_ASCII, CS_ANSI_X3_4_1968},
710 {C_ISO_8859_1, CS_ISO_8859_1},
711 {C_ISO_8859_2, CS_ISO_8859_2},
712 {C_ISO_8859_4, CS_ISO_8859_4},
713 {C_ISO_8859_5, CS_ISO_8859_5},
714 {C_ISO_8859_7, CS_ISO_8859_7},
715 {C_ISO_8859_8, CS_ISO_8859_8},
716 {C_ISO_8859_9, CS_ISO_8859_9},
717 {C_ISO_8859_11, CS_ISO_8859_11},
718 {C_ISO_8859_13, CS_ISO_8859_13},
719 {C_ISO_8859_15, CS_ISO_8859_15},
720 {C_BALTIC, CS_BALTIC},
721 {C_CP1251, CS_CP1251},
722 {C_WINDOWS_1251, CS_WINDOWS_1251},
723 {C_KOI8_R, CS_KOI8_R},
724 {C_KOI8_U, CS_KOI8_U},
725 {C_ISO_2022_JP, CS_ISO_2022_JP},
726 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
727 {C_EUC_JP, CS_EUC_JP},
728 {C_EUC_JP, CS_EUCJP},
729 {C_SHIFT_JIS, CS_SHIFT_JIS},
730 {C_ISO_2022_KR, CS_ISO_2022_KR},
731 {C_EUC_KR, CS_EUC_KR},
732 {C_ISO_2022_CN, CS_ISO_2022_CN},
733 {C_EUC_CN, CS_EUC_CN},
734 {C_GB2312, CS_GB2312},
735 {C_EUC_TW, CS_EUC_TW},
737 {C_TIS_620, CS_TIS_620},
738 {C_WINDOWS_874, CS_WINDOWS_874},
742 static const struct {
746 {"ja_JP.eucJP" , C_EUC_JP},
747 {"ja_JP.ujis" , C_EUC_JP},
748 {"ja_JP.EUC" , C_EUC_JP},
749 {"ja_JP.SJIS" , C_SHIFT_JIS},
750 {"ja_JP.JIS" , C_ISO_2022_JP},
751 {"ja_JP" , C_EUC_JP},
752 {"ko_KR" , C_EUC_KR},
753 {"zh_CN.GB2312" , C_GB2312},
754 {"zh_CN" , C_GB2312},
755 {"zh_TW.eucTW" , C_EUC_TW},
756 {"zh_TW.Big5" , C_BIG5},
759 {"ru_RU.KOI8-R" , C_KOI8_R},
760 {"ru_RU.CP1251" , C_WINDOWS_1251},
762 {"en_US" , C_ISO_8859_1},
763 {"ca_ES" , C_ISO_8859_1},
764 {"da_DK" , C_ISO_8859_1},
765 {"de_DE" , C_ISO_8859_1},
766 {"nl_NL" , C_ISO_8859_1},
767 {"et_EE" , C_ISO_8859_1},
768 {"fi_FI" , C_ISO_8859_1},
769 {"fr_FR" , C_ISO_8859_1},
770 {"is_IS" , C_ISO_8859_1},
771 {"it_IT" , C_ISO_8859_1},
772 {"no_NO" , C_ISO_8859_1},
773 {"pt_PT" , C_ISO_8859_1},
774 {"pt_BR" , C_ISO_8859_1},
775 {"es_ES" , C_ISO_8859_1},
776 {"sv_SE" , C_ISO_8859_1},
778 {"hr_HR" , C_ISO_8859_2},
779 {"hu_HU" , C_ISO_8859_2},
780 {"pl_PL" , C_ISO_8859_2},
781 {"ro_RO" , C_ISO_8859_2},
782 {"sk_SK" , C_ISO_8859_2},
783 {"sl_SI" , C_ISO_8859_2},
784 {"ru_RU" , C_ISO_8859_5},
785 {"el_GR" , C_ISO_8859_7},
786 {"iw_IL" , C_ISO_8859_8},
787 {"tr_TR" , C_ISO_8859_9},
789 {"th_TH" , C_TIS_620},
790 /* {"th_TH" , C_WINDOWS_874}, */
791 /* {"th_TH" , C_ISO_8859_11}, */
793 {"lt_LT.iso88594" , C_ISO_8859_4},
794 {"lt_LT.ISO8859-4" , C_ISO_8859_4},
795 {"lt_LT.ISO_8859-4" , C_ISO_8859_4},
796 {"lt_LT" , C_ISO_8859_13},
797 {"lv_LV" , C_ISO_8859_13},
800 {"POSIX" , C_US_ASCII},
801 {"ANSI_X3.4-1968" , C_US_ASCII},
803 #endif /* !HAVE_LIBJCONV */
805 const gchar *conv_get_charset_str(CharSet charset)
809 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
810 if (charsets[i].charset == charset)
811 return charsets[i].name;
817 CharSet conv_get_charset_from_str(const gchar *charset)
821 if (!charset) return C_AUTO;
823 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
824 if (!strcasecmp(charsets[i].name, charset))
825 return charsets[i].charset;
831 CharSet conv_get_current_charset(void)
833 static CharSet cur_charset = -1;
837 const gchar *cur_codeset;
839 const gchar *cur_locale;
842 if (cur_charset != -1)
846 cur_codeset = jconv_info_get_current_codeset();
847 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
848 if (!strcasecmp(cur_codeset, charsets[i].name)) {
849 cur_charset = charsets[i].charset;
854 cur_locale = g_getenv("LC_ALL");
855 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
856 if (!cur_locale) cur_locale = g_getenv("LANG");
857 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
859 debug_print("current locale: %s\n",
860 cur_locale ? cur_locale : "(none)");
863 cur_charset = C_US_ASCII;
867 if (strcasestr(cur_locale, "UTF-8")) {
868 cur_charset = C_UTF_8;
872 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
875 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
876 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
877 if (!strncasecmp(cur_locale, locale_table[i].locale,
878 strlen(locale_table[i].locale))) {
879 cur_charset = locale_table[i].charset;
881 } else if ((p = strchr(locale_table[i].locale, '_')) &&
882 !strchr(p + 1, '.')) {
883 if (strlen(cur_locale) == 2 &&
884 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
885 cur_charset = locale_table[i].charset;
892 cur_charset = C_AUTO;
896 const gchar *conv_get_current_charset_str(void)
898 static const gchar *codeset = NULL;
901 codeset = conv_get_charset_str(conv_get_current_charset());
903 return codeset ? codeset : "US-ASCII";
906 CharSet conv_get_outgoing_charset(void)
908 static CharSet out_charset = -1;
911 gint i, j, n_pref_codesets;
912 const gchar *const *pref_codesets;
917 if (out_charset != -1)
921 /* skip US-ASCII and UTF-8 */
922 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
923 for (i = 0; i < n_pref_codesets; i++) {
924 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
925 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
926 out_charset = charsets[j].charset;
932 for (i = 0; i < n_pref_codesets; i++) {
933 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
934 out_charset = C_UTF_8;
939 out_charset = C_AUTO;
941 cur_charset = conv_get_current_charset();
942 switch (cur_charset) {
945 out_charset = C_ISO_2022_JP;
948 out_charset = cur_charset;
955 const gchar *conv_get_outgoing_charset_str(void)
960 if (prefs_common.outgoing_charset) {
961 if (!isalpha(prefs_common.outgoing_charset[0])) {
962 g_free(prefs_common.outgoing_charset);
963 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
964 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
965 return prefs_common.outgoing_charset;
968 out_charset = conv_get_outgoing_charset();
969 str = conv_get_charset_str(out_charset);
971 return str ? str : "US-ASCII";
974 const gchar *conv_get_current_locale(void)
978 cur_locale = g_getenv("LC_ALL");
979 if (!cur_locale) cur_locale = g_getenv("LANG");
980 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
982 debug_print("current locale: %s\n",
983 cur_locale ? cur_locale : "(none)");
988 void conv_unmime_header_overwrite(gchar *str)
994 cur_charset = conv_get_current_charset();
996 if (cur_charset == C_EUC_JP) {
997 buflen = strlen(str) * 2 + 1;
998 Xalloca(buf, buflen, return);
999 conv_anytodisp(buf, buflen, str);
1000 unmime_header(str, buf);
1002 buflen = strlen(str) + 1;
1003 Xalloca(buf, buflen, return);
1004 unmime_header(buf, str);
1005 strncpy2(str, buf, buflen);
1009 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1010 const gchar *charset)
1012 CharSet cur_charset;
1014 cur_charset = conv_get_current_charset();
1016 if (cur_charset == C_EUC_JP) {
1020 buflen = strlen(str) * 2 + 1;
1021 Xalloca(buf, buflen, return);
1022 conv_anytodisp(buf, buflen, str);
1023 unmime_header(outbuf, buf);
1025 unmime_header(outbuf, str);
1028 #define MAX_ENCLEN 75
1029 #define MAX_LINELEN 76
1031 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1034 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1040 size_t line_len, mimehdr_len, mimehdr_begin_len;
1041 gchar *mimehdr_init = "=?";
1042 gchar *mimehdr_end = "?=";
1043 gchar *mimehdr_enctype = "?B?";
1044 const gchar *mimehdr_charset;
1046 /* g_print("src = %s\n", src); */
1047 mimehdr_charset = conv_get_outgoing_charset_str();
1049 /* convert to wide-character string */
1050 wsrcp = wsrc = strdup_mbstowcs(src);
1052 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1053 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1054 mimehdr_begin_len = strlen(mimehdr_init) +
1055 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1056 line_len = header_len;
1060 g_return_if_fail(wsrc != NULL);
1063 wchar_t *wp, *wtmp, *wtmpp;
1066 /* irresponsible buffer overrun check */
1067 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1069 /* encode string including space
1070 if non-ASCII string follows */
1071 if (is_next_nonascii(wsrcp)) {
1073 while ((wp = find_wspace(wp)) != NULL)
1074 if (!is_next_nonascii(wp)) break;
1076 wp = find_wspace(wsrcp);
1079 wtmp = wcsndup(wsrcp, wp - wsrcp);
1081 while (iswspace(wsrcp[nspc])) nspc++;
1083 wtmp = wcsdup(wsrcp);
1084 wsrcp += wcslen(wsrcp);
1090 gint tlen = 0, str_ascii = 1;
1091 gchar *tmp; /* internal codeset */
1092 gchar *raw; /* converted, but not base64 encoded */
1093 register gchar *tmpp;
1096 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1101 while (*wtmpp != (wchar_t)0) {
1104 gchar *raw_new = NULL;
1105 int raw_new_len = 0;
1106 const gchar *src_codeset;
1108 if (*wtmpp < 32 || *wtmpp >= 127)
1110 mbl = wctomb(tmpp, *wtmpp);
1112 g_warning("invalid wide character\n");
1117 src_codeset = conv_get_current_charset_str();
1118 /* printf ("tmp = %s, tlen = %d, mbl\n",
1120 if (jconv_alloc_conv(tmp, tlen + mbl,
1121 &raw_new, &raw_new_len,
1123 &dummy, mimehdr_charset)
1125 g_warning("can't convert\n");
1131 gint dlen = mimehdr_len +
1133 if ((line_len + dlen +
1134 (*(wtmpp + 1) ? 0 : nspc) +
1135 (line_len > 1 ? 1 : 0))
1149 } else if ((line_len + tlen + mbl +
1150 (*(wtmpp + 1) ? 0 : nspc) +
1151 (line_len > 1 ? 1 : 0))
1154 if (1 + tlen + mbl +
1155 (*(wtmpp + 1) ? 0 : nspc)
1173 raw_len = raw_new_len;
1177 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1178 tmp, tlen, mb_seqlen); */
1180 if (tlen == 0 || raw_len == 0) {
1186 if (line_len > 1 && destp > dest) {
1193 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1194 mimehdr_init, mimehdr_charset,
1196 destp += mimehdr_begin_len;
1197 line_len += mimehdr_begin_len;
1199 base64_encode(destp, raw, raw_len);
1200 line_len += strlen(destp);
1201 destp += strlen(destp);
1203 strcpy(destp, mimehdr_end);
1204 destp += strlen(mimehdr_end);
1205 line_len += strlen(mimehdr_end);
1208 line_len += strlen(destp);
1209 destp += strlen(destp);
1214 /* g_print("line_len = %d\n\n", line_len); */
1215 } while (*wtmpp != (wchar_t)0);
1217 while (iswspace(*wsrcp)) {
1220 mbl = wctomb(destp, *wsrcp++);
1233 /* g_print("dest = %s\n", dest); */
1235 #else /* !HAVE_LIBJCONV */
1237 #define JIS_SEQLEN 3
1239 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1245 size_t line_len, mimehdr_len, mimehdr_begin_len;
1246 gchar *mimehdr_init = "=?";
1247 gchar *mimehdr_end = "?=";
1248 gchar *mimehdr_enctype = "?B?";
1249 const gchar *mimehdr_charset;
1251 /* g_print("src = %s\n", src); */
1252 mimehdr_charset = conv_get_outgoing_charset_str();
1253 if (strcmp(mimehdr_charset, "ISO-2022-JP") != 0) {
1254 /* currently only supports Japanese */
1255 strncpy2(dest, src, len);
1259 /* convert to wide-character string */
1260 wsrcp = wsrc = strdup_mbstowcs(src);
1262 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1263 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1264 mimehdr_begin_len = strlen(mimehdr_init) +
1265 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1266 line_len = header_len;
1270 g_return_if_fail(wsrc != NULL);
1273 wchar_t *wp, *wtmp, *wtmpp;
1275 gboolean str_is_non_ascii;
1277 /* irresponsible buffer overrun check */
1278 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1280 /* encode string including space
1281 if non-ASCII string follows */
1282 if (is_next_nonascii(wsrcp)) {
1284 while ((wp = find_wspace(wp)) != NULL)
1285 if (!is_next_nonascii(wp)) break;
1286 str_is_non_ascii = TRUE;
1288 wp = find_wspace(wsrcp);
1289 str_is_non_ascii = FALSE;
1293 wtmp = wcsndup(wsrcp, wp - wsrcp);
1295 while (iswspace(wsrcp[nspc])) nspc++;
1297 wtmp = wcsdup(wsrcp);
1298 wsrcp += wcslen(wsrcp);
1304 gint prev_mbl = 1, tlen = 0, mb_seqlen = 0;
1306 register gchar *tmpp;
1308 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1311 while (*wtmpp != (wchar_t)0) {
1314 mbl = wctomb(tmpp, *wtmpp);
1316 g_warning("invalid wide character\n");
1321 /* length of KI + KO */
1322 if (prev_mbl == 1 && mbl == 2)
1323 mb_seqlen += JIS_SEQLEN * 2;
1325 if (str_is_non_ascii) {
1326 gint dlen = mimehdr_len +
1327 B64LEN(tlen + mb_seqlen + mbl);
1329 if ((line_len + dlen +
1330 (*(wtmpp + 1) ? 0 : nspc) +
1331 (line_len > 1 ? 1 : 0))
1344 } else if ((line_len + tlen + mbl +
1345 (*(wtmpp + 1) ? 0 : nspc) +
1346 (line_len > 1 ? 1 : 0))
1348 if (1 + tlen + mbl +
1349 (*(wtmpp + 1) ? 0 : nspc)
1368 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1369 tmp, tlen, mb_seqlen); */
1376 if (line_len > 1 && destp > dest) {
1382 if (str_is_non_ascii) {
1385 tmp_jis = g_new(gchar, tlen + mb_seqlen + 1);
1386 conv_euctojis(tmp_jis,
1387 tlen + mb_seqlen + 1, tmp);
1388 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1389 mimehdr_init, mimehdr_charset,
1391 destp += mimehdr_begin_len;
1392 line_len += mimehdr_begin_len;
1394 base64_encode(destp, tmp_jis, strlen(tmp_jis));
1395 line_len += strlen(destp);
1396 destp += strlen(destp);
1398 strcpy(destp, mimehdr_end);
1399 destp += strlen(mimehdr_end);
1400 line_len += strlen(mimehdr_end);
1405 line_len += strlen(destp);
1406 destp += strlen(destp);
1410 /* g_print("line_len = %d\n\n", line_len); */
1411 } while (*wtmpp != (wchar_t)0);
1413 while (iswspace(*wsrcp)) {
1416 mbl = wctomb(destp, *wsrcp++);
1429 /* g_print("dest = %s\n", dest); */
1431 #endif /* HAVE_LIBJCONV */