2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2003 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR '_'
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
109 const guchar *in = inbuf;
110 guchar *out = outbuf;
111 JISState state = JIS_ASCII;
113 while (*in != '\0') {
117 if (*(in + 1) == '@' || *(in + 1) == 'B') {
120 } else if (*(in + 1) == '(' &&
122 state = JIS_AUXKANJI;
125 /* unknown escape sequence */
128 } else if (*in == '(') {
129 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
132 } else if (*(in + 1) == 'I') {
136 /* unknown escape sequence */
140 /* unknown escape sequence */
143 } else if (*in == 0x0e) {
146 } else if (*in == 0x0f) {
155 *out++ = *in++ | 0x80;
156 if (*in == '\0') break;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
166 if (*in == '\0') break;
167 *out++ = *in++ | 0x80;
176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
178 const guchar *in = inbuf;
179 guchar *out = outbuf;
180 JISState state = JIS_ASCII;
182 while (*in != '\0') {
186 } else if (iseuckanji(*in)) {
187 if (iseuckanji(*(in + 1))) {
189 *out++ = *in++ & 0x7f;
190 *out++ = *in++ & 0x7f;
195 if (*in != '\0' && !isascii(*in)) {
200 } else if (iseuchwkana1(*in)) {
202 if (iseuchwkana2(*in)) {
204 *out++ = *in++ & 0x7f;
207 if (*in != '\0' && !isascii(*in)) {
212 } else if (iseucaux(*in)) {
214 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
216 *out++ = *in++ & 0x7f;
217 *out++ = *in++ & 0x7f;
220 if (*in != '\0' && !isascii(*in)) {
223 if (*in != '\0' && !isascii(*in)) {
240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
242 const guchar *in = inbuf;
243 guchar *out = outbuf;
245 while (*in != '\0') {
248 } else if (issjiskanji1(*in)) {
249 if (issjiskanji2(*(in + 1))) {
251 guchar out2 = *(in + 1);
254 row = out1 < 0xa0 ? 0x70 : 0xb0;
256 out1 = (out1 - row) * 2 - 1;
257 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
259 out1 = (out1 - row) * 2;
263 *out++ = out1 | 0x80;
264 *out++ = out2 | 0x80;
269 if (*in != '\0' && !isascii(*in)) {
274 } else if (issjishwkana(*in)) {
286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
288 switch (conv_guess_ja_encoding(inbuf)) {
290 conv_jistoeuc(outbuf, outlen, inbuf);
293 conv_sjistoeuc(outbuf, outlen, inbuf);
296 strncpy2(outbuf, inbuf, outlen);
301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
303 switch (conv_guess_ja_encoding(inbuf)) {
305 conv_euctojis(outbuf, outlen, inbuf);
308 strncpy2(outbuf, inbuf, outlen);
313 static gchar valid_eucjp_tbl[][96] = {
314 /* 0xa2a0 - 0xa2ff */
315 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
317 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
318 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
320 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
322 /* 0xa3a0 - 0xa3ff */
323 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
325 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
327 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
330 /* 0xa4a0 - 0xa4ff */
331 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
338 /* 0xa5a0 - 0xa5ff */
339 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
346 /* 0xa6a0 - 0xa6ff */
347 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
349 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
354 /* 0xa7a0 - 0xa7ff */
355 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
357 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
360 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
362 /* 0xa8a0 - 0xa8ff */
363 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
373 if (c1 <= 0xa0 || c1 >= 0xf5)
375 if (c2 <= 0xa0 || c2 == 0xff)
378 if (c1 >= 0xa9 && c1 <= 0xaf)
381 if (c1 >= 0xa2 && c1 <= 0xa8)
382 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
385 if (c2 >= 0xd4 && c2 <= 0xff)
387 } else if (c1 == 0xf4) {
388 if (c2 >= 0xa7 && c2 <= 0xff)
395 void conv_unreadable_eucjp(gchar *str)
397 register guchar *p = str;
401 /* convert CR+LF -> LF */
402 if (*p == '\r' && *(p + 1) == '\n')
403 memmove(p, p + 1, strlen(p));
404 /* printable 7 bit code */
406 } else if (iseuckanji(*p)) {
407 if (isprintableeuckanji(*p, *(p + 1))) {
408 /* printable euc-jp code */
411 /* substitute unprintable code */
420 } else if (iseuchwkana1(*p)) {
421 if (iseuchwkana2(*(p + 1)))
422 /* euc-jp hankaku kana */
426 } else if (iseucaux(*p)) {
427 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
428 /* auxiliary kanji */
433 /* substitute unprintable 1 byte code */
438 void conv_unreadable_8bit(gchar *str)
440 register guchar *p = str;
443 /* convert CR+LF -> LF */
444 if (*p == '\r' && *(p + 1) == '\n')
445 memmove(p, p + 1, strlen(p));
446 else if (!isascii(*p)) *p = SUBST_CHAR;
451 void conv_unreadable_latin(gchar *str)
453 register guchar *p = str;
456 /* convert CR+LF -> LF */
457 if (*p == '\r' && *(p + 1) == '\n')
458 memmove(p, p + 1, strlen(p));
459 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
465 void conv_unreadable_locale(gchar *str)
467 switch (conv_get_current_charset()) {
483 conv_unreadable_latin(str);
486 conv_unreadable_eucjp(str);
495 void conv_mb_alnum(gchar *str)
497 static guchar char_tbl[] = {
499 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
500 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
502 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
503 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
505 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
506 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
508 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
509 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
511 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
512 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
515 register guchar *p = str;
522 register guchar ch = *(p + 1);
524 if (ch >= 0xb0 && ch <= 0xfa) {
529 memmove(p, p + 1, len);
535 } else if (*p == 0xa1) {
536 register guchar ch = *(p + 1);
538 if (ch >= 0xa0 && ch <= 0xef &&
539 NCV != char_tbl[ch - 0xa0]) {
540 *p = char_tbl[ch - 0xa0];
543 memmove(p, p + 1, len);
549 } else if (iseuckanji(*p)) {
559 CharSet conv_guess_ja_encoding(const gchar *str)
561 const guchar *p = str;
562 CharSet guessed = C_US_ASCII;
565 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
566 if (guessed == C_US_ASCII)
567 return C_ISO_2022_JP;
569 } else if (isascii(*p)) {
571 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
572 if (*p >= 0xfd && *p <= 0xfe)
574 else if (guessed == C_SHIFT_JIS) {
575 if ((issjiskanji1(*p) &&
576 issjiskanji2(*(p + 1))) ||
578 guessed = C_SHIFT_JIS;
584 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
585 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
586 guessed = C_SHIFT_JIS;
590 } else if (issjishwkana(*p)) {
591 guessed = C_SHIFT_JIS;
601 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
603 conv_jistoeuc(outbuf, outlen, inbuf);
604 conv_unreadable_eucjp(outbuf);
607 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
609 conv_sjistoeuc(outbuf, outlen, inbuf);
610 conv_unreadable_eucjp(outbuf);
613 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
615 strncpy2(outbuf, inbuf, outlen);
616 conv_unreadable_eucjp(outbuf);
619 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
621 conv_anytoeuc(outbuf, outlen, inbuf);
622 conv_unreadable_eucjp(outbuf);
625 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
627 strncpy2(outbuf, inbuf, outlen);
628 conv_unreadable_8bit(outbuf);
631 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
633 strncpy2(outbuf, inbuf, outlen);
634 conv_unreadable_latin(outbuf);
637 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
639 strncpy2(outbuf, inbuf, outlen);
640 conv_unreadable_locale(outbuf);
643 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
645 strncpy2(outbuf, inbuf, outlen);
648 CodeConverter *conv_code_converter_new(const gchar *charset)
652 conv = g_new0(CodeConverter, 1);
653 conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
654 conv->charset_str = g_strdup(charset);
655 conv->charset = conv_get_charset_from_str(charset);
660 void conv_code_converter_destroy(CodeConverter *conv)
662 g_free(conv->charset_str);
666 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
670 if (conv->code_conv_func != conv_noconv)
671 conv->code_conv_func(outbuf, outlen, inbuf);
675 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
679 strncpy2(outbuf, str, outlen);
683 #else /* !HAVE_ICONV */
684 conv->code_conv_func(outbuf, outlen, inbuf);
690 gchar *conv_codeset_strdup(const gchar *inbuf,
691 const gchar *src_code, const gchar *dest_code)
695 CodeConvFunc conv_func;
697 conv_func = conv_get_code_conv_func(src_code, dest_code);
698 if (conv_func != conv_noconv) {
699 len = (strlen(inbuf) + 1) * 3;
701 if (!buf) return NULL;
703 conv_func(buf, len, inbuf);
704 return g_realloc(buf, strlen(buf) + 1);
708 return conv_iconv_strdup(inbuf, src_code, dest_code);
710 return g_strdup(inbuf);
711 #endif /* HAVE_ICONV */
714 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
715 const gchar *dest_charset_str)
717 CodeConvFunc code_conv = conv_noconv;
719 CharSet dest_charset;
721 if (!src_charset_str)
722 src_charset = conv_get_current_charset();
724 src_charset = conv_get_charset_from_str(src_charset_str);
726 /* auto detection mode */
727 if (!src_charset_str && !dest_charset_str) {
728 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
729 return conv_anytodisp;
734 dest_charset = conv_get_charset_from_str(dest_charset_str);
736 if (dest_charset == C_US_ASCII)
737 return conv_ustodisp;
738 else if (dest_charset == C_UTF_8 ||
739 (dest_charset == C_AUTO &&
740 conv_get_current_charset() == C_UTF_8))
743 switch (src_charset) {
745 case C_ISO_2022_JP_2:
746 if (dest_charset == C_AUTO)
747 code_conv = conv_jistodisp;
748 else if (dest_charset == C_EUC_JP)
749 code_conv = conv_jistoeuc;
752 if (dest_charset == C_AUTO)
753 code_conv = conv_ustodisp;
769 if (dest_charset == C_AUTO)
770 code_conv = conv_latintodisp;
773 if (dest_charset == C_AUTO)
774 code_conv = conv_sjistodisp;
775 else if (dest_charset == C_EUC_JP)
776 code_conv = conv_sjistoeuc;
779 if (dest_charset == C_AUTO)
780 code_conv = conv_euctodisp;
781 else if (dest_charset == C_ISO_2022_JP ||
782 dest_charset == C_ISO_2022_JP_2)
783 code_conv = conv_euctojis;
793 gchar *conv_iconv_strdup(const gchar *inbuf,
794 const gchar *src_code, const gchar *dest_code)
797 const gchar *inbuf_p;
807 src_code = conv_get_outgoing_charset_str();
809 dest_code = conv_get_current_charset_str();
811 /* don't convert if current codeset is US-ASCII */
812 if (!strcasecmp(dest_code, CS_US_ASCII))
813 return g_strdup(inbuf);
815 /* don't convert if src and dest codeset are identical */
816 if (!strcasecmp(src_code, dest_code))
817 return g_strdup(inbuf);
819 cd = iconv_open(dest_code, src_code);
820 if (cd == (iconv_t)-1)
824 in_size = strlen(inbuf) + 1;
826 out_size = in_size * 2;
827 outbuf = g_malloc(out_size);
831 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
832 &outbuf_p, &out_left)) < 0) {
833 if (EILSEQ == errno) {
836 *outbuf_p++ = SUBST_CHAR;
838 } else if (EINVAL == errno) {
841 } else if (E2BIG == errno) {
843 outbuf = g_realloc(outbuf, out_size);
849 g_warning("conv_iconv_strdup(): %s\n",
856 iconv(cd, NULL, NULL, &outbuf_p, &out_left);
857 outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
863 #endif /* HAVE_ICONV */
865 static const struct {
869 {C_US_ASCII, CS_US_ASCII},
870 {C_US_ASCII, CS_ANSI_X3_4_1968},
872 {C_ISO_8859_1, CS_ISO_8859_1},
873 {C_ISO_8859_2, CS_ISO_8859_2},
874 {C_ISO_8859_3, CS_ISO_8859_3},
875 {C_ISO_8859_4, CS_ISO_8859_4},
876 {C_ISO_8859_5, CS_ISO_8859_5},
877 {C_ISO_8859_6, CS_ISO_8859_6},
878 {C_ISO_8859_7, CS_ISO_8859_7},
879 {C_ISO_8859_8, CS_ISO_8859_8},
880 {C_ISO_8859_9, CS_ISO_8859_9},
881 {C_ISO_8859_10, CS_ISO_8859_10},
882 {C_ISO_8859_11, CS_ISO_8859_11},
883 {C_ISO_8859_13, CS_ISO_8859_13},
884 {C_ISO_8859_14, CS_ISO_8859_14},
885 {C_ISO_8859_15, CS_ISO_8859_15},
886 {C_BALTIC, CS_BALTIC},
887 {C_CP1250, CS_CP1250},
888 {C_CP1251, CS_CP1251},
889 {C_CP1252, CS_CP1252},
890 {C_CP1253, CS_CP1253},
891 {C_CP1254, CS_CP1254},
892 {C_CP1255, CS_CP1255},
893 {C_CP1256, CS_CP1256},
894 {C_CP1257, CS_CP1257},
895 {C_CP1258, CS_CP1258},
896 {C_WINDOWS_1250, CS_WINDOWS_1250},
897 {C_WINDOWS_1251, CS_WINDOWS_1251},
898 {C_WINDOWS_1252, CS_WINDOWS_1252},
899 {C_WINDOWS_1253, CS_WINDOWS_1253},
900 {C_WINDOWS_1254, CS_WINDOWS_1254},
901 {C_WINDOWS_1255, CS_WINDOWS_1255},
902 {C_WINDOWS_1256, CS_WINDOWS_1256},
903 {C_WINDOWS_1257, CS_WINDOWS_1257},
904 {C_WINDOWS_1258, CS_WINDOWS_1258},
905 {C_KOI8_R, CS_KOI8_R},
906 {C_KOI8_T, CS_KOI8_T},
907 {C_KOI8_U, CS_KOI8_U},
908 {C_ISO_2022_JP, CS_ISO_2022_JP},
909 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
910 {C_EUC_JP, CS_EUC_JP},
911 {C_EUC_JP, CS_EUCJP},
912 {C_SHIFT_JIS, CS_SHIFT_JIS},
913 {C_SHIFT_JIS, CS_SHIFT__JIS},
914 {C_SHIFT_JIS, CS_SJIS},
915 {C_ISO_2022_KR, CS_ISO_2022_KR},
916 {C_EUC_KR, CS_EUC_KR},
917 {C_ISO_2022_CN, CS_ISO_2022_CN},
918 {C_EUC_CN, CS_EUC_CN},
919 {C_GB2312, CS_GB2312},
921 {C_EUC_TW, CS_EUC_TW},
923 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
924 {C_TIS_620, CS_TIS_620},
925 {C_WINDOWS_874, CS_WINDOWS_874},
926 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
927 {C_TCVN5712_1, CS_TCVN5712_1},
930 static const struct {
935 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
936 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
937 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
938 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
939 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
940 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
941 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
942 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
943 {"ko_KR" , C_EUC_KR , C_EUC_KR},
944 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
945 {"zh_CN.GBK" , C_GBK , C_GB2312},
946 {"zh_CN" , C_GB2312 , C_GB2312},
947 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
948 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
949 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
950 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
951 {"zh_TW" , C_BIG5 , C_BIG5},
953 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
954 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
955 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
956 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
957 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
958 {"ru_UA" , C_KOI8_U , C_KOI8_U},
959 {"uk_UA" , C_KOI8_U , C_KOI8_U},
961 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
962 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
964 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
966 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
967 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
968 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
969 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
970 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
971 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
972 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
973 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
974 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
975 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
976 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
977 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
978 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
979 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
980 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
981 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
982 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
983 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
984 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
985 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
986 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
987 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
988 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
989 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
990 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
991 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
992 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
993 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
994 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
995 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
996 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
997 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
998 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
999 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1000 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1001 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1002 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1003 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1004 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1005 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1006 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1007 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1008 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1009 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1010 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1011 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1012 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1013 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1014 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1015 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1016 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1017 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1018 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1019 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1020 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1021 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1022 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1023 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1024 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1025 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1026 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1027 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1028 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1029 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1030 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1031 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1032 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1033 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1034 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1035 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1036 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1037 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1038 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1039 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1041 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1042 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1043 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1044 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1045 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1046 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1047 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1048 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1050 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1051 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1053 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1055 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1056 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1057 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1058 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1060 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1062 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1063 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1064 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1065 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1066 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1067 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1068 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1069 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1070 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1071 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1072 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1073 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1074 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1075 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1076 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1077 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1078 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1080 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1081 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1082 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1083 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1085 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1086 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1088 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1090 {"ar_IN" , C_UTF_8 , C_UTF_8},
1091 {"en_IN" , C_UTF_8 , C_UTF_8},
1092 {"se_NO" , C_UTF_8 , C_UTF_8},
1093 {"ta_IN" , C_UTF_8 , C_UTF_8},
1094 {"te_IN" , C_UTF_8 , C_UTF_8},
1095 {"ur_PK" , C_UTF_8 , C_UTF_8},
1097 {"th_TH" , C_TIS_620 , C_TIS_620},
1098 /* {"th_TH" , C_WINDOWS_874}, */
1099 /* {"th_TH" , C_ISO_8859_11}, */
1101 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1102 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1104 {"C" , C_US_ASCII , C_US_ASCII},
1105 {"POSIX" , C_US_ASCII , C_US_ASCII},
1106 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1109 static GHashTable *conv_get_charset_to_str_table(void)
1111 static GHashTable *table;
1117 table = g_hash_table_new(NULL, g_direct_equal);
1119 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1120 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1123 (table, GUINT_TO_POINTER(charsets[i].charset),
1131 static gint str_case_equal(gconstpointer v, gconstpointer v2)
1133 return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
1136 static guint str_case_hash(gconstpointer key)
1138 const gchar *p = key;
1143 for (p += 1; *p != '\0'; p++)
1144 h = (h << 5) - h + tolower(*p);
1150 static GHashTable *conv_get_charset_from_str_table(void)
1152 static GHashTable *table;
1158 table = g_hash_table_new(str_case_hash, str_case_equal);
1160 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1161 g_hash_table_insert(table, charsets[i].name,
1162 GUINT_TO_POINTER(charsets[i].charset));
1168 const gchar *conv_get_charset_str(CharSet charset)
1172 table = conv_get_charset_to_str_table();
1173 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1176 CharSet conv_get_charset_from_str(const gchar *charset)
1180 if (!charset) return C_AUTO;
1182 table = conv_get_charset_from_str_table();
1183 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1186 CharSet conv_get_current_charset(void)
1188 static CharSet cur_charset = -1;
1189 const gchar *cur_locale;
1193 if (cur_charset != -1)
1196 cur_locale = conv_get_current_locale();
1198 cur_charset = C_US_ASCII;
1202 if (strcasestr(cur_locale, "UTF-8")) {
1203 cur_charset = C_UTF_8;
1207 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1208 cur_charset = C_ISO_8859_15;
1212 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1215 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1216 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1217 if (!strncasecmp(cur_locale, locale_table[i].locale,
1218 strlen(locale_table[i].locale))) {
1219 cur_charset = locale_table[i].charset;
1221 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1222 !strchr(p + 1, '.')) {
1223 if (strlen(cur_locale) == 2 &&
1224 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1225 cur_charset = locale_table[i].charset;
1231 cur_charset = C_AUTO;
1235 const gchar *conv_get_current_charset_str(void)
1237 static const gchar *codeset = NULL;
1240 codeset = conv_get_charset_str(conv_get_current_charset());
1242 return codeset ? codeset : CS_US_ASCII;
1245 CharSet conv_get_outgoing_charset(void)
1247 static CharSet out_charset = -1;
1248 const gchar *cur_locale;
1252 if (out_charset != -1)
1255 cur_locale = conv_get_current_locale();
1257 out_charset = C_AUTO;
1261 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1262 out_charset = C_ISO_8859_15;
1266 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1269 if (!strncasecmp(cur_locale, locale_table[i].locale,
1270 strlen(locale_table[i].locale))) {
1271 out_charset = locale_table[i].out_charset;
1273 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1274 !strchr(p + 1, '.')) {
1275 if (strlen(cur_locale) == 2 &&
1276 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1277 out_charset = locale_table[i].out_charset;
1284 /* encoding conversion without iconv() is only supported
1285 on Japanese locale for now */
1286 if (out_charset == C_ISO_2022_JP)
1289 return conv_get_current_charset();
1295 const gchar *conv_get_outgoing_charset_str(void)
1297 CharSet out_charset;
1300 if (prefs_common.outgoing_charset) {
1301 if (!isalpha(prefs_common.outgoing_charset[0])) {
1302 g_free(prefs_common.outgoing_charset);
1303 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1304 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1305 return prefs_common.outgoing_charset;
1308 out_charset = conv_get_outgoing_charset();
1309 str = conv_get_charset_str(out_charset);
1311 return str ? str : CS_US_ASCII;
1314 gboolean conv_is_multibyte_encoding(CharSet encoding)
1322 case C_ISO_2022_JP_2:
1335 const gchar *conv_get_current_locale(void)
1339 cur_locale = g_getenv("LC_ALL");
1340 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1341 if (!cur_locale) cur_locale = g_getenv("LANG");
1342 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1344 debug_print("current locale: %s\n",
1345 cur_locale ? cur_locale : "(none)");
1350 void conv_unmime_header_overwrite(gchar *str)
1354 CharSet cur_charset;
1356 cur_charset = conv_get_current_charset();
1358 if (cur_charset == C_EUC_JP) {
1359 buflen = strlen(str) * 2 + 1;
1360 Xalloca(buf, buflen, return);
1361 conv_anytodisp(buf, buflen, str);
1362 unmime_header(str, buf);
1364 buflen = strlen(str) + 1;
1365 Xalloca(buf, buflen, return);
1366 unmime_header(buf, str);
1367 strncpy2(str, buf, buflen);
1371 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1372 const gchar *charset)
1374 CharSet cur_charset;
1376 cur_charset = conv_get_current_charset();
1378 if (cur_charset == C_EUC_JP) {
1382 buflen = strlen(str) * 2 + 1;
1383 Xalloca(buf, buflen, return);
1384 conv_anytodisp(buf, buflen, str);
1385 unmime_header(outbuf, buf);
1387 unmime_header(outbuf, str);
1390 #define MAX_LINELEN 76
1391 #define MAX_HARD_LINELEN 996
1392 #define MIMESEP_BEGIN "=?"
1393 #define MIMESEP_END "?="
1395 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1397 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1399 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1404 if ((cond) && *srcp) { \
1405 if (destp > dest && left < MAX_LINELEN - 1) { \
1406 if (isspace(*(destp - 1))) \
1408 else if (is_plain_text && isspace(*srcp)) \
1413 left = MAX_LINELEN - 1; \
1419 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1422 const gchar *cur_encoding;
1423 const gchar *out_encoding;
1427 const gchar *srcp = src;
1428 gchar *destp = dest;
1429 gboolean use_base64;
1431 if (MB_CUR_MAX > 1) {
1433 mimesep_enc = "?B?";
1436 mimesep_enc = "?Q?";
1439 cur_encoding = conv_get_current_charset_str();
1440 if (!strcmp(cur_encoding, CS_US_ASCII))
1441 cur_encoding = CS_ISO_8859_1;
1442 out_encoding = conv_get_outgoing_charset_str();
1443 if (!strcmp(out_encoding, CS_US_ASCII))
1444 out_encoding = CS_ISO_8859_1;
1446 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1447 strlen(mimesep_enc) + strlen(MIMESEP_END);
1449 left = MAX_LINELEN - header_len;
1452 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1454 while (isspace(*srcp)) {
1457 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1460 /* output as it is if the next word is ASCII string */
1461 if (!is_next_nonascii(srcp)) {
1464 word_len = get_next_word_len(srcp);
1465 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1466 while (word_len > 0) {
1467 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1482 const gchar *p = srcp;
1484 gint out_enc_str_len;
1485 gint mime_block_len;
1486 gboolean cont = FALSE;
1488 while (*p != '\0') {
1489 if (isspace(*p) && !is_next_nonascii(p + 1))
1492 if (MB_CUR_MAX > 1) {
1493 mb_len = mblen(p, MB_CUR_MAX);
1495 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1501 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1502 out_str = conv_codeset_strdup
1503 (part_str, cur_encoding, out_encoding);
1505 g_warning("conv_encode_header(): code conversion failed\n");
1506 conv_unreadable_8bit(part_str);
1507 out_str = g_strdup(part_str);
1509 out_str_len = strlen(out_str);
1512 out_enc_str_len = B64LEN(out_str_len);
1515 qp_get_q_encoding_len(out_str);
1519 if (mimestr_len + out_enc_str_len <= left) {
1522 } else if (cur_len == 0) {
1523 LBREAK_IF_REQUIRED(1, FALSE);
1532 Xstrndup_a(part_str, srcp, cur_len, );
1533 out_str = conv_codeset_strdup
1534 (part_str, cur_encoding, out_encoding);
1536 g_warning("conv_encode_header(): code conversion failed\n");
1537 conv_unreadable_8bit(part_str);
1538 out_str = g_strdup(part_str);
1540 out_str_len = strlen(out_str);
1543 out_enc_str_len = B64LEN(out_str_len);
1546 qp_get_q_encoding_len(out_str);
1548 Xalloca(enc_str, out_enc_str_len + 1, );
1550 base64_encode(enc_str, out_str, out_str_len);
1552 qp_q_encode(enc_str, out_str);
1556 /* output MIME-encoded string block */
1557 mime_block_len = mimestr_len + strlen(enc_str);
1558 g_snprintf(destp, mime_block_len + 1,
1559 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1560 out_encoding, mimesep_enc, enc_str);
1561 destp += mime_block_len;
1564 left -= mime_block_len;
1567 LBREAK_IF_REQUIRED(cont, FALSE);
1577 #undef LBREAK_IF_REQUIRED