2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2002 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
29 #if (HAVE_WCTYPE_H && HAVE_WCHAR_H)
47 #include "prefs_common.h"
57 #define SUBST_CHAR '_'
60 #define iseuckanji(c) \
61 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 (((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
67 (((c) & 0xff) == 0x8f)
68 #define isunprintableeuckanji(c) \
69 (((c) & 0xff) >= 0xa9 && ((c) & 0xff) <= 0xaf)
70 #define issjiskanji1(c) \
71 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
72 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
73 #define issjiskanji2(c) \
74 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
75 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
76 #define issjishwkana(c) \
77 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
80 if (state != JIS_KANJI) { \
88 if (state != JIS_ASCII) { \
96 if (state != JIS_HWKANA) { \
100 state = JIS_HWKANA; \
104 if (state != JIS_AUXKANJI) { \
109 state = JIS_AUXKANJI; \
112 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
114 const guchar *in = inbuf;
115 guchar *out = outbuf;
116 JISState state = JIS_ASCII;
118 while (*in != '\0') {
122 if (*(in + 1) == '@' || *(in + 1) == 'B') {
125 } else if (*(in + 1) == '(' &&
127 state = JIS_AUXKANJI;
130 /* unknown escape sequence */
133 } else if (*in == '(') {
134 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
137 } else if (*(in + 1) == 'I') {
141 /* unknown escape sequence */
145 /* unknown escape sequence */
148 } else if (*in == 0x0e) {
151 } else if (*in == 0x0f) {
160 *out++ = *in++ | 0x80;
161 if (*in == '\0') break;
162 *out++ = *in++ | 0x80;
166 *out++ = *in++ | 0x80;
170 *out++ = *in++ | 0x80;
171 if (*in == '\0') break;
172 *out++ = *in++ | 0x80;
181 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
183 const guchar *in = inbuf;
184 guchar *out = outbuf;
185 JISState state = JIS_ASCII;
187 while (*in != '\0') {
191 } else if (iseuckanji(*in)) {
192 if (iseuckanji(*(in + 1))) {
194 *out++ = *in++ & 0x7f;
195 *out++ = *in++ & 0x7f;
200 if (*in != '\0' && !isascii(*in)) {
205 } else if (iseuchwkana1(*in)) {
207 if (iseuchwkana2(*in)) {
209 *out++ = *in++ & 0x7f;
212 if (*in != '\0' && !isascii(*in)) {
217 } else if (iseucaux(*in)) {
219 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
221 *out++ = *in++ & 0x7f;
222 *out++ = *in++ & 0x7f;
225 if (*in != '\0' && !isascii(*in)) {
228 if (*in != '\0' && !isascii(*in)) {
245 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
247 const guchar *in = inbuf;
248 guchar *out = outbuf;
250 while (*in != '\0') {
253 } else if (issjiskanji1(*in)) {
254 if (issjiskanji2(*(in + 1))) {
256 guchar out2 = *(in + 1);
259 row = out1 < 0xa0 ? 0x70 : 0xb0;
261 out1 = (out1 - row) * 2 - 1;
262 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
264 out1 = (out1 - row) * 2;
268 *out++ = out1 | 0x80;
269 *out++ = out2 | 0x80;
274 if (*in != '\0' && !isascii(*in)) {
279 } else if (issjishwkana(*in)) {
291 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
293 switch (conv_guess_encoding(inbuf)) {
295 conv_jistoeuc(outbuf, outlen, inbuf);
298 conv_sjistoeuc(outbuf, outlen, inbuf);
301 strncpy2(outbuf, inbuf, outlen);
306 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
308 switch (conv_guess_encoding(inbuf)) {
310 conv_euctojis(outbuf, outlen, inbuf);
313 strncpy2(outbuf, inbuf, outlen);
318 void conv_unreadable_eucjp(gchar *str)
320 register guchar *p = str;
324 /* convert CR+LF -> LF */
325 if (*p == '\r' && *(p + 1) == '\n')
326 memmove(p, p + 1, strlen(p));
327 /* printable 7 bit code */
329 } else if (iseuckanji(*p)) {
330 if (iseuckanji(*(p + 1)) && !isunprintableeuckanji(*p))
331 /* printable euc-jp code */
334 /* substitute unprintable code */
343 } else if (iseuchwkana1(*p)) {
344 if (iseuchwkana2(*(p + 1)))
345 /* euc-jp hankaku kana */
349 } else if (iseucaux(*p)) {
350 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
351 /* auxiliary kanji */
356 /* substitute unprintable 1 byte code */
361 void conv_unreadable_8bit(gchar *str)
363 register guchar *p = str;
366 /* convert CR+LF -> LF */
367 if (*p == '\r' && *(p + 1) == '\n')
368 memmove(p, p + 1, strlen(p));
369 else if (!isascii(*p)) *p = SUBST_CHAR;
374 void conv_unreadable_latin(gchar *str)
376 register guchar *p = str;
379 /* convert CR+LF -> LF */
380 if (*p == '\r' && *(p + 1) == '\n')
381 memmove(p, p + 1, strlen(p));
382 else if ((*p & 0xff) >= 0x80 && (*p & 0xff) <= 0x9f)
390 void conv_mb_alnum(gchar *str)
392 static guchar char_tbl[] = {
394 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
395 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
397 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
398 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
400 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
401 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
403 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
404 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
406 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
407 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
410 register guchar *p = str;
417 register guchar ch = *(p + 1);
419 if (ch >= 0xb0 && ch <= 0xfa) {
424 memmove(p, p + 1, len);
430 } else if (*p == 0xa1) {
431 register guchar ch = *(p + 1);
433 if (ch >= 0xa0 && ch <= 0xef &&
434 NCV != char_tbl[ch - 0xa0]) {
435 *p = char_tbl[ch - 0xa0];
438 memmove(p, p + 1, len);
444 } else if (iseuckanji(*p)) {
454 CharSet conv_guess_encoding(const gchar *str)
456 const guchar *p = str;
457 CharSet guessed = C_US_ASCII;
460 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
461 if (guessed == C_US_ASCII)
462 return C_ISO_2022_JP;
464 } else if (isascii(*p)) {
466 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
467 if (*p >= 0xfd && *p <= 0xfe)
469 else if (guessed == C_SHIFT_JIS) {
470 if ((issjiskanji1(*p) &&
471 issjiskanji2(*(p + 1))) ||
473 guessed = C_SHIFT_JIS;
479 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
480 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
481 guessed = C_SHIFT_JIS;
485 } else if (issjishwkana(*p)) {
486 guessed = C_SHIFT_JIS;
496 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
498 conv_jistoeuc(outbuf, outlen, inbuf);
499 conv_unreadable_eucjp(outbuf);
502 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
504 conv_sjistoeuc(outbuf, outlen, inbuf);
505 conv_unreadable_eucjp(outbuf);
508 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
510 strncpy2(outbuf, inbuf, outlen);
511 conv_unreadable_eucjp(outbuf);
514 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
516 conv_anytoeuc(outbuf, outlen, inbuf);
517 conv_unreadable_eucjp(outbuf);
520 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
522 strncpy2(outbuf, inbuf, outlen);
523 conv_unreadable_8bit(outbuf);
526 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
528 strncpy2(outbuf, inbuf, outlen);
529 conv_unreadable_latin(outbuf);
532 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
534 strncpy2(outbuf, inbuf, outlen);
537 CodeConverter *conv_code_converter_new(const gchar *charset)
541 conv = g_new0(CodeConverter, 1);
543 conv->code_conv_func = conv_get_code_conv_func(charset);
545 conv->charset_str = g_strdup(charset);
546 conv->charset = conv_get_charset_from_str(charset);
551 void conv_code_converter_destroy(CodeConverter *conv)
553 g_free(conv->charset_str);
557 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
563 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
567 strncpy2(outbuf, str, outlen);
570 #else /* !HAVE_LIBJCONV */
571 conv->code_conv_func(outbuf, outlen, inbuf);
577 gchar *conv_codeset_strdup(const gchar *inbuf,
578 const gchar *src_codeset, const gchar *dest_codeset)
584 const gchar *const *codesets;
586 #else /* !HAVE_LIBJCONV */
587 CharSet src_charset = C_AUTO, dest_charset = C_AUTO;
593 func = conv_get_code_conv_func(src_codeset);
594 if (func != conv_noconv) {
595 if (func == conv_jistodisp ||
596 func == conv_sjistodisp ||
597 func == conv_anytodisp)
598 len = strlen(inbuf) * 2 + 1;
600 len = strlen(inbuf) + 1;
602 if (!buf) return NULL;
603 func(buf, len, inbuf);
604 buf = g_realloc(buf, strlen(buf) + 1);
609 /* don't convert if src and dest codeset are identical */
610 if (src_codeset && dest_codeset &&
611 !strcasecmp(src_codeset, dest_codeset))
612 return g_strdup(inbuf);
616 codesets = &src_codeset;
619 codesets = jconv_info_get_pref_codesets(&n_codesets);
621 dest_codeset = conv_get_current_charset_str();
622 /* don't convert if current codeset is US-ASCII */
623 if (!strcasecmp(dest_codeset, CS_US_ASCII))
624 return g_strdup(inbuf);
627 if (jconv_alloc_conv(inbuf, strlen(inbuf), &buf, &len,
628 codesets, n_codesets,
629 &actual_codeset, dest_codeset)
634 g_warning("code conversion from %s to %s failed\n",
635 codesets && codesets[0] ? codesets[0] : "(unknown)",
640 #else /* !HAVE_LIBJCONV */
642 if (!strcasecmp(src_codeset, CS_EUC_JP) ||
643 !strcasecmp(src_codeset, CS_EUCJP))
644 src_charset = C_EUC_JP;
645 else if (!strcasecmp(src_codeset, CS_SHIFT_JIS) ||
646 !strcasecmp(src_codeset, "SHIFT-JIS") ||
647 !strcasecmp(src_codeset, "SJIS"))
648 src_charset = C_SHIFT_JIS;
649 if (dest_codeset && !strcasecmp(dest_codeset, CS_ISO_2022_JP))
650 dest_charset = C_ISO_2022_JP;
653 if ((src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS) &&
654 dest_charset == C_ISO_2022_JP) {
655 len = (strlen(inbuf) + 1) * 3;
658 if (src_charset == C_EUC_JP)
659 conv_euctojis(buf, len, inbuf);
661 conv_anytojis(buf, len, inbuf);
662 buf = g_realloc(buf, strlen(buf) + 1);
665 buf = g_strdup(inbuf);
668 #endif /* !HAVE_LIBJCONV */
671 CodeConvFunc conv_get_code_conv_func(const gchar *charset)
673 CodeConvFunc code_conv;
677 cur_charset = conv_get_current_charset();
678 if (cur_charset == C_EUC_JP || cur_charset == C_SHIFT_JIS)
679 return conv_anytodisp;
684 if (!strcasecmp(charset, CS_ISO_2022_JP) ||
685 !strcasecmp(charset, CS_ISO_2022_JP_2))
686 code_conv = conv_jistodisp;
687 else if (!strcasecmp(charset, CS_US_ASCII))
688 code_conv = conv_ustodisp;
689 else if (!strncasecmp(charset, CS_ISO_8859_1, 10))
690 code_conv = conv_latintodisp;
692 else if (!strncasecmp(charset, "ISO-8859-", 9))
693 code_conv = conv_latintodisp;
695 else if (!strcasecmp(charset, CS_SHIFT_JIS) ||
696 !strcasecmp(charset, "SHIFT-JIS") ||
697 !strcasecmp(charset, "SJIS") ||
698 !strcasecmp(charset, "X-SJIS"))
699 code_conv = conv_sjistodisp;
700 else if (!strcasecmp(charset, CS_EUC_JP) ||
701 !strcasecmp(charset, CS_EUCJP))
702 code_conv = conv_euctodisp;
704 code_conv = conv_noconv;
709 static const struct {
713 {C_US_ASCII, CS_US_ASCII},
714 {C_US_ASCII, CS_ANSI_X3_4_1968},
716 {C_ISO_8859_1, CS_ISO_8859_1},
717 {C_ISO_8859_2, CS_ISO_8859_2},
718 {C_ISO_8859_4, CS_ISO_8859_4},
719 {C_ISO_8859_5, CS_ISO_8859_5},
720 {C_ISO_8859_7, CS_ISO_8859_7},
721 {C_ISO_8859_8, CS_ISO_8859_8},
722 {C_ISO_8859_9, CS_ISO_8859_9},
723 {C_ISO_8859_11, CS_ISO_8859_11},
724 {C_ISO_8859_13, CS_ISO_8859_13},
725 {C_ISO_8859_15, CS_ISO_8859_15},
726 {C_BALTIC, CS_BALTIC},
727 {C_CP1251, CS_CP1251},
728 {C_WINDOWS_1251, CS_WINDOWS_1251},
729 {C_KOI8_R, CS_KOI8_R},
730 {C_KOI8_U, CS_KOI8_U},
731 {C_ISO_2022_JP, CS_ISO_2022_JP},
732 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
733 {C_EUC_JP, CS_EUC_JP},
734 {C_EUC_JP, CS_EUCJP},
735 {C_SHIFT_JIS, CS_SHIFT_JIS},
736 {C_ISO_2022_KR, CS_ISO_2022_KR},
737 {C_EUC_KR, CS_EUC_KR},
738 {C_ISO_2022_CN, CS_ISO_2022_CN},
739 {C_EUC_CN, CS_EUC_CN},
740 {C_GB2312, CS_GB2312},
741 {C_EUC_TW, CS_EUC_TW},
743 {C_TIS_620, CS_TIS_620},
744 {C_WINDOWS_874, CS_WINDOWS_874},
748 static const struct {
752 {"ja_JP.eucJP" , C_EUC_JP},
753 {"ja_JP.ujis" , C_EUC_JP},
754 {"ja_JP.EUC" , C_EUC_JP},
755 {"ja_JP.SJIS" , C_SHIFT_JIS},
756 {"ja_JP.JIS" , C_ISO_2022_JP},
757 {"ja_JP" , C_EUC_JP},
758 {"ko_KR" , C_EUC_KR},
759 {"zh_CN.GB2312" , C_GB2312},
760 {"zh_CN" , C_GB2312},
761 {"zh_TW.eucTW" , C_EUC_TW},
762 {"zh_TW.Big5" , C_BIG5},
765 {"ru_RU.KOI8-R" , C_KOI8_R},
766 {"ru_RU.CP1251" , C_WINDOWS_1251},
768 {"bg_BG" , C_WINDOWS_1251},
770 {"en_US" , C_ISO_8859_1},
771 {"ca_ES" , C_ISO_8859_1},
772 {"da_DK" , C_ISO_8859_1},
773 {"de_DE" , C_ISO_8859_1},
774 {"nl_NL" , C_ISO_8859_1},
775 {"et_EE" , C_ISO_8859_1},
776 {"fi_FI" , C_ISO_8859_1},
777 {"fr_FR" , C_ISO_8859_1},
778 {"is_IS" , C_ISO_8859_1},
779 {"it_IT" , C_ISO_8859_1},
780 {"no_NO" , C_ISO_8859_1},
781 {"pt_PT" , C_ISO_8859_1},
782 {"pt_BR" , C_ISO_8859_1},
783 {"es_ES" , C_ISO_8859_1},
784 {"sv_SE" , C_ISO_8859_1},
786 {"hr_HR" , C_ISO_8859_2},
787 {"hu_HU" , C_ISO_8859_2},
788 {"pl_PL" , C_ISO_8859_2},
789 {"ro_RO" , C_ISO_8859_2},
790 {"sk_SK" , C_ISO_8859_2},
791 {"sl_SI" , C_ISO_8859_2},
792 {"ru_RU" , C_ISO_8859_5},
793 {"el_GR" , C_ISO_8859_7},
794 {"iw_IL" , C_ISO_8859_8},
795 {"tr_TR" , C_ISO_8859_9},
797 {"th_TH" , C_TIS_620},
798 /* {"th_TH" , C_WINDOWS_874}, */
799 /* {"th_TH" , C_ISO_8859_11}, */
801 {"lt_LT.iso88594" , C_ISO_8859_4},
802 {"lt_LT.ISO8859-4" , C_ISO_8859_4},
803 {"lt_LT.ISO_8859-4" , C_ISO_8859_4},
804 {"lt_LT" , C_ISO_8859_13},
805 {"lv_LV" , C_ISO_8859_13},
808 {"POSIX" , C_US_ASCII},
809 {"ANSI_X3.4-1968" , C_US_ASCII},
811 #endif /* !HAVE_LIBJCONV */
813 const gchar *conv_get_charset_str(CharSet charset)
817 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
818 if (charsets[i].charset == charset)
819 return charsets[i].name;
825 CharSet conv_get_charset_from_str(const gchar *charset)
829 if (!charset) return C_AUTO;
831 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
832 if (!strcasecmp(charsets[i].name, charset))
833 return charsets[i].charset;
839 CharSet conv_get_current_charset(void)
841 static CharSet cur_charset = -1;
845 const gchar *cur_codeset;
847 const gchar *cur_locale;
850 if (cur_charset != -1)
854 cur_codeset = jconv_info_get_current_codeset();
855 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
856 if (!strcasecmp(cur_codeset, charsets[i].name)) {
857 cur_charset = charsets[i].charset;
862 cur_locale = g_getenv("LC_ALL");
863 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
864 if (!cur_locale) cur_locale = g_getenv("LANG");
865 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
867 debug_print("current locale: %s\n",
868 cur_locale ? cur_locale : "(none)");
871 cur_charset = C_US_ASCII;
875 if (strcasestr(cur_locale, "UTF-8")) {
876 cur_charset = C_UTF_8;
880 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
883 /* "ja_JP.EUC" matches with "ja_JP.eucJP" and "ja_JP.EUC" */
884 /* "ja_JP" matches with "ja_JP.xxxx" and "ja" */
885 if (!strncasecmp(cur_locale, locale_table[i].locale,
886 strlen(locale_table[i].locale))) {
887 cur_charset = locale_table[i].charset;
889 } else if ((p = strchr(locale_table[i].locale, '_')) &&
890 !strchr(p + 1, '.')) {
891 if (strlen(cur_locale) == 2 &&
892 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
893 cur_charset = locale_table[i].charset;
900 cur_charset = C_AUTO;
904 const gchar *conv_get_current_charset_str(void)
906 static const gchar *codeset = NULL;
909 codeset = conv_get_charset_str(conv_get_current_charset());
911 return codeset ? codeset : "US-ASCII";
914 CharSet conv_get_outgoing_charset(void)
916 static CharSet out_charset = -1;
919 gint i, j, n_pref_codesets;
920 const gchar *const *pref_codesets;
925 if (out_charset != -1)
929 /* skip US-ASCII and UTF-8 */
930 pref_codesets = jconv_info_get_pref_codesets(&n_pref_codesets);
931 for (i = 0; i < n_pref_codesets; i++) {
932 for (j = 3; j < sizeof(charsets) / sizeof(charsets[0]); j++) {
933 if (!strcasecmp(pref_codesets[i], charsets[j].name)) {
934 out_charset = charsets[j].charset;
940 for (i = 0; i < n_pref_codesets; i++) {
941 if (!strcasecmp(pref_codesets[i], "UTF-8")) {
942 out_charset = C_UTF_8;
947 out_charset = C_AUTO;
949 cur_charset = conv_get_current_charset();
950 switch (cur_charset) {
953 out_charset = C_ISO_2022_JP;
956 out_charset = cur_charset;
963 const gchar *conv_get_outgoing_charset_str(void)
968 if (prefs_common.outgoing_charset) {
969 if (!isalpha(prefs_common.outgoing_charset[0])) {
970 g_free(prefs_common.outgoing_charset);
971 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
972 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
973 return prefs_common.outgoing_charset;
976 out_charset = conv_get_outgoing_charset();
977 str = conv_get_charset_str(out_charset);
979 return str ? str : "US-ASCII";
982 const gchar *conv_get_current_locale(void)
986 cur_locale = g_getenv("LC_ALL");
987 if (!cur_locale) cur_locale = g_getenv("LANG");
988 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
990 debug_print("current locale: %s\n",
991 cur_locale ? cur_locale : "(none)");
996 void conv_unmime_header_overwrite(gchar *str)
1000 CharSet cur_charset;
1002 cur_charset = conv_get_current_charset();
1004 if (cur_charset == C_EUC_JP) {
1005 buflen = strlen(str) * 2 + 1;
1006 Xalloca(buf, buflen, return);
1007 conv_anytodisp(buf, buflen, str);
1008 unmime_header(str, buf);
1010 buflen = strlen(str) + 1;
1011 Xalloca(buf, buflen, return);
1012 unmime_header(buf, str);
1013 strncpy2(str, buf, buflen);
1017 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1018 const gchar *charset)
1020 CharSet cur_charset;
1022 cur_charset = conv_get_current_charset();
1024 if (cur_charset == C_EUC_JP) {
1028 buflen = strlen(str) * 2 + 1;
1029 Xalloca(buf, buflen, return);
1030 conv_anytodisp(buf, buflen, str);
1031 unmime_header(outbuf, buf);
1033 unmime_header(outbuf, str);
1036 #define MAX_ENCLEN 75
1037 #define MAX_LINELEN 76
1039 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1042 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1048 size_t line_len, mimehdr_len, mimehdr_begin_len;
1049 gchar *mimehdr_init = "=?";
1050 gchar *mimehdr_end = "?=";
1051 gchar *mimehdr_enctype = "?B?";
1052 const gchar *mimehdr_charset;
1054 /* g_print("src = %s\n", src); */
1055 mimehdr_charset = conv_get_outgoing_charset_str();
1057 /* convert to wide-character string */
1058 wsrcp = wsrc = strdup_mbstowcs(src);
1060 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1061 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1062 mimehdr_begin_len = strlen(mimehdr_init) +
1063 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1064 line_len = header_len;
1068 g_return_if_fail(wsrc != NULL);
1071 wchar_t *wp, *wtmp, *wtmpp;
1073 gboolean str_is_non_ascii;
1075 /* irresponsible buffer overrun check */
1076 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1078 /* encode string including space
1079 if non-ASCII string follows */
1080 if (is_next_nonascii(wsrcp)) {
1082 while ((wp = find_wspace(wp)) != NULL)
1083 if (!is_next_nonascii(wp)) break;
1084 str_is_non_ascii = TRUE;
1086 wp = find_wspace(wsrcp);
1087 str_is_non_ascii = FALSE;
1091 wtmp = wcsndup(wsrcp, wp - wsrcp);
1093 while (iswspace(wsrcp[nspc])) nspc++;
1095 wtmp = wcsdup(wsrcp);
1096 wsrcp += wcslen(wsrcp);
1103 gchar *tmp; /* internal codeset */
1104 gchar *raw; /* converted, but not base64 encoded */
1105 register gchar *tmpp;
1108 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1113 while (*wtmpp != (wchar_t)0) {
1116 gchar *raw_new = NULL;
1117 int raw_new_len = 0;
1118 const gchar *src_codeset;
1120 mbl = wctomb(tmpp, *wtmpp);
1122 g_warning("invalid wide character\n");
1127 src_codeset = conv_get_current_charset_str();
1128 /* printf ("tmp = %s, tlen = %d, mbl\n",
1130 if (jconv_alloc_conv(tmp, tlen + mbl,
1131 &raw_new, &raw_new_len,
1133 &dummy, mimehdr_charset)
1135 g_warning("can't convert\n");
1140 if (str_is_non_ascii) {
1141 gint dlen = mimehdr_len +
1143 if ((line_len + dlen +
1144 (*(wtmpp + 1) ? 0 : nspc) +
1145 (line_len > 1 ? 1 : 0))
1158 } else if ((line_len + tlen + mbl +
1159 (*(wtmpp + 1) ? 0 : nspc) +
1160 (line_len > 1 ? 1 : 0))
1163 if (1 + tlen + mbl +
1164 (*(wtmpp + 1) ? 0 : nspc)
1182 raw_len = raw_new_len;
1186 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1187 tmp, tlen, mb_seqlen); */
1189 if (tlen == 0 || raw_len == 0) {
1195 if (line_len > 1 && destp > dest) {
1201 if (str_is_non_ascii) {
1202 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1203 mimehdr_init, mimehdr_charset,
1205 destp += mimehdr_begin_len;
1206 line_len += mimehdr_begin_len;
1208 base64_encode(destp, raw, raw_len);
1209 line_len += strlen(destp);
1210 destp += strlen(destp);
1212 strcpy(destp, mimehdr_end);
1213 destp += strlen(mimehdr_end);
1214 line_len += strlen(mimehdr_end);
1217 line_len += strlen(destp);
1218 destp += strlen(destp);
1223 /* g_print("line_len = %d\n\n", line_len); */
1224 } while (*wtmpp != (wchar_t)0);
1226 while (iswspace(*wsrcp)) {
1229 mbl = wctomb(destp, *wsrcp++);
1242 /* g_print("dest = %s\n", dest); */
1244 #else /* !HAVE_LIBJCONV */
1246 #define JIS_SEQLEN 3
1248 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1254 size_t line_len, mimehdr_len, mimehdr_begin_len;
1255 gchar *mimehdr_init = "=?";
1256 gchar *mimehdr_end = "?=";
1257 gchar *mimehdr_enctype = "?B?";
1258 const gchar *mimehdr_charset;
1260 /* g_print("src = %s\n", src); */
1261 mimehdr_charset = conv_get_outgoing_charset_str();
1262 if (strcmp(mimehdr_charset, "ISO-2022-JP") != 0) {
1263 /* currently only supports Japanese */
1264 strncpy2(dest, src, len);
1268 /* convert to wide-character string */
1269 wsrcp = wsrc = strdup_mbstowcs(src);
1271 mimehdr_len = strlen(mimehdr_init) + strlen(mimehdr_end) +
1272 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1273 mimehdr_begin_len = strlen(mimehdr_init) +
1274 strlen(mimehdr_charset) + strlen(mimehdr_enctype);
1275 line_len = header_len;
1279 g_return_if_fail(wsrc != NULL);
1282 wchar_t *wp, *wtmp, *wtmpp;
1284 gboolean str_is_non_ascii;
1286 /* irresponsible buffer overrun check */
1287 if ((len - (destp - dest)) < (MAX_LINELEN + 1) * 2) break;
1289 /* encode string including space
1290 if non-ASCII string follows */
1291 if (is_next_nonascii(wsrcp)) {
1293 while ((wp = find_wspace(wp)) != NULL)
1294 if (!is_next_nonascii(wp)) break;
1295 str_is_non_ascii = TRUE;
1297 wp = find_wspace(wsrcp);
1298 str_is_non_ascii = FALSE;
1302 wtmp = wcsndup(wsrcp, wp - wsrcp);
1304 while (iswspace(wsrcp[nspc])) nspc++;
1306 wtmp = wcsdup(wsrcp);
1307 wsrcp += wcslen(wsrcp);
1313 gint prev_mbl = 1, tlen = 0, mb_seqlen = 0;
1315 register gchar *tmpp;
1317 tmpp = tmp = g_malloc(wcslen(wtmpp) * MB_CUR_MAX + 1);
1320 while (*wtmpp != (wchar_t)0) {
1323 mbl = wctomb(tmpp, *wtmpp);
1325 g_warning("invalid wide character\n");
1330 /* length of KI + KO */
1331 if (prev_mbl == 1 && mbl == 2)
1332 mb_seqlen += JIS_SEQLEN * 2;
1334 if (str_is_non_ascii) {
1335 gint dlen = mimehdr_len +
1336 B64LEN(tlen + mb_seqlen + mbl);
1338 if ((line_len + dlen +
1339 (*(wtmpp + 1) ? 0 : nspc) +
1340 (line_len > 1 ? 1 : 0))
1353 } else if ((line_len + tlen + mbl +
1354 (*(wtmpp + 1) ? 0 : nspc) +
1355 (line_len > 1 ? 1 : 0))
1357 if (1 + tlen + mbl +
1358 (*(wtmpp + 1) ? 0 : nspc)
1377 /* g_print("tmp = %s, tlen = %d, mb_seqlen = %d\n",
1378 tmp, tlen, mb_seqlen); */
1385 if (line_len > 1 && destp > dest) {
1391 if (str_is_non_ascii) {
1394 tmp_jis = g_new(gchar, tlen + mb_seqlen + 1);
1395 conv_euctojis(tmp_jis,
1396 tlen + mb_seqlen + 1, tmp);
1397 g_snprintf(destp, len - strlen(dest), "%s%s%s",
1398 mimehdr_init, mimehdr_charset,
1400 destp += mimehdr_begin_len;
1401 line_len += mimehdr_begin_len;
1403 base64_encode(destp, tmp_jis, strlen(tmp_jis));
1404 line_len += strlen(destp);
1405 destp += strlen(destp);
1407 strcpy(destp, mimehdr_end);
1408 destp += strlen(mimehdr_end);
1409 line_len += strlen(mimehdr_end);
1414 line_len += strlen(destp);
1415 destp += strlen(destp);
1419 /* g_print("line_len = %d\n\n", line_len); */
1420 } while (*wtmpp != (wchar_t)0);
1422 while (iswspace(*wsrcp)) {
1425 mbl = wctomb(destp, *wsrcp++);
1438 /* g_print("dest = %s\n", dest); */
1440 #endif /* HAVE_LIBJCONV */