2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2003 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR '_'
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
109 const guchar *in = inbuf;
110 guchar *out = outbuf;
111 JISState state = JIS_ASCII;
113 while (*in != '\0') {
117 if (*(in + 1) == '@' || *(in + 1) == 'B') {
120 } else if (*(in + 1) == '(' &&
122 state = JIS_AUXKANJI;
125 /* unknown escape sequence */
128 } else if (*in == '(') {
129 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
132 } else if (*(in + 1) == 'I') {
136 /* unknown escape sequence */
140 /* unknown escape sequence */
143 } else if (*in == 0x0e) {
146 } else if (*in == 0x0f) {
155 *out++ = *in++ | 0x80;
156 if (*in == '\0') break;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
166 if (*in == '\0') break;
167 *out++ = *in++ | 0x80;
176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
178 const guchar *in = inbuf;
179 guchar *out = outbuf;
180 JISState state = JIS_ASCII;
182 while (*in != '\0') {
186 } else if (iseuckanji(*in)) {
187 if (iseuckanji(*(in + 1))) {
189 *out++ = *in++ & 0x7f;
190 *out++ = *in++ & 0x7f;
195 if (*in != '\0' && !isascii(*in)) {
200 } else if (iseuchwkana1(*in)) {
202 if (iseuchwkana2(*in)) {
204 *out++ = *in++ & 0x7f;
207 if (*in != '\0' && !isascii(*in)) {
212 } else if (iseucaux(*in)) {
214 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
216 *out++ = *in++ & 0x7f;
217 *out++ = *in++ & 0x7f;
220 if (*in != '\0' && !isascii(*in)) {
223 if (*in != '\0' && !isascii(*in)) {
240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
242 const guchar *in = inbuf;
243 guchar *out = outbuf;
245 while (*in != '\0') {
248 } else if (issjiskanji1(*in)) {
249 if (issjiskanji2(*(in + 1))) {
251 guchar out2 = *(in + 1);
254 row = out1 < 0xa0 ? 0x70 : 0xb0;
256 out1 = (out1 - row) * 2 - 1;
257 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
259 out1 = (out1 - row) * 2;
263 *out++ = out1 | 0x80;
264 *out++ = out2 | 0x80;
269 if (*in != '\0' && !isascii(*in)) {
274 } else if (issjishwkana(*in)) {
286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
288 switch (conv_guess_ja_encoding(inbuf)) {
290 conv_jistoeuc(outbuf, outlen, inbuf);
293 conv_sjistoeuc(outbuf, outlen, inbuf);
296 strncpy2(outbuf, inbuf, outlen);
301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
303 switch (conv_guess_ja_encoding(inbuf)) {
305 conv_euctojis(outbuf, outlen, inbuf);
308 strncpy2(outbuf, inbuf, outlen);
313 static gchar valid_eucjp_tbl[][96] = {
314 /* 0xa2a0 - 0xa2ff */
315 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
317 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
318 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
320 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
322 /* 0xa3a0 - 0xa3ff */
323 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
325 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
327 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
330 /* 0xa4a0 - 0xa4ff */
331 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
338 /* 0xa5a0 - 0xa5ff */
339 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
346 /* 0xa6a0 - 0xa6ff */
347 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
349 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
354 /* 0xa7a0 - 0xa7ff */
355 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
357 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
360 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
362 /* 0xa8a0 - 0xa8ff */
363 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
373 if (c1 <= 0xa0 || c1 >= 0xf5)
375 if (c2 <= 0xa0 || c2 == 0xff)
378 if (c1 >= 0xa9 && c1 <= 0xaf)
381 if (c1 >= 0xa2 && c1 <= 0xa8)
382 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
385 if (c2 >= 0xd4 && c2 <= 0xff)
387 } else if (c1 == 0xf4) {
388 if (c2 >= 0xa7 && c2 <= 0xff)
395 void conv_unreadable_eucjp(gchar *str)
397 register guchar *p = str;
401 /* convert CR+LF -> LF */
402 if (*p == '\r' && *(p + 1) == '\n')
403 memmove(p, p + 1, strlen(p));
404 /* printable 7 bit code */
406 } else if (iseuckanji(*p)) {
407 if (isprintableeuckanji(*p, *(p + 1))) {
408 /* printable euc-jp code */
411 /* substitute unprintable code */
420 } else if (iseuchwkana1(*p)) {
421 if (iseuchwkana2(*(p + 1)))
422 /* euc-jp hankaku kana */
426 } else if (iseucaux(*p)) {
427 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
428 /* auxiliary kanji */
433 /* substitute unprintable 1 byte code */
438 void conv_unreadable_8bit(gchar *str)
440 register guchar *p = str;
443 /* convert CR+LF -> LF */
444 if (*p == '\r' && *(p + 1) == '\n')
445 memmove(p, p + 1, strlen(p));
446 else if (!isascii(*p)) *p = SUBST_CHAR;
451 void conv_unreadable_latin(gchar *str)
453 register guchar *p = str;
456 /* convert CR+LF -> LF */
457 if (*p == '\r' && *(p + 1) == '\n')
458 memmove(p, p + 1, strlen(p));
459 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
467 void conv_mb_alnum(gchar *str)
469 static guchar char_tbl[] = {
471 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
472 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
474 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
475 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
477 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
478 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
480 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
481 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
483 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
484 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
487 register guchar *p = str;
494 register guchar ch = *(p + 1);
496 if (ch >= 0xb0 && ch <= 0xfa) {
501 memmove(p, p + 1, len);
507 } else if (*p == 0xa1) {
508 register guchar ch = *(p + 1);
510 if (ch >= 0xa0 && ch <= 0xef &&
511 NCV != char_tbl[ch - 0xa0]) {
512 *p = char_tbl[ch - 0xa0];
515 memmove(p, p + 1, len);
521 } else if (iseuckanji(*p)) {
531 CharSet conv_guess_ja_encoding(const gchar *str)
533 const guchar *p = str;
534 CharSet guessed = C_US_ASCII;
537 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
538 if (guessed == C_US_ASCII)
539 return C_ISO_2022_JP;
541 } else if (isascii(*p)) {
543 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
544 if (*p >= 0xfd && *p <= 0xfe)
546 else if (guessed == C_SHIFT_JIS) {
547 if ((issjiskanji1(*p) &&
548 issjiskanji2(*(p + 1))) ||
550 guessed = C_SHIFT_JIS;
556 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
557 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
558 guessed = C_SHIFT_JIS;
562 } else if (issjishwkana(*p)) {
563 guessed = C_SHIFT_JIS;
573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
575 conv_jistoeuc(outbuf, outlen, inbuf);
576 conv_unreadable_eucjp(outbuf);
579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
581 conv_sjistoeuc(outbuf, outlen, inbuf);
582 conv_unreadable_eucjp(outbuf);
585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
587 strncpy2(outbuf, inbuf, outlen);
588 conv_unreadable_eucjp(outbuf);
591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
593 conv_anytoeuc(outbuf, outlen, inbuf);
594 conv_unreadable_eucjp(outbuf);
597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
599 strncpy2(outbuf, inbuf, outlen);
600 conv_unreadable_8bit(outbuf);
603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
605 strncpy2(outbuf, inbuf, outlen);
606 conv_unreadable_latin(outbuf);
609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
611 strncpy2(outbuf, inbuf, outlen);
614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
616 strncpy2(outbuf, inbuf, outlen);
618 switch (conv_get_current_charset()) {
634 conv_unreadable_latin(outbuf);
637 conv_unreadable_eucjp(outbuf);
644 CodeConverter *conv_code_converter_new(const gchar *charset)
648 conv = g_new0(CodeConverter, 1);
649 conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
650 conv->charset_str = g_strdup(charset);
651 conv->charset = conv_get_charset_from_str(charset);
656 void conv_code_converter_destroy(CodeConverter *conv)
658 g_free(conv->charset_str);
662 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
666 if (conv->code_conv_func != conv_noconv)
667 conv->code_conv_func(outbuf, outlen, inbuf);
671 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
675 strncpy2(outbuf, str, outlen);
679 #else /* !HAVE_ICONV */
680 conv->code_conv_func(outbuf, outlen, inbuf);
686 gchar *conv_codeset_strdup(const gchar *inbuf,
687 const gchar *src_code, const gchar *dest_code)
691 CodeConvFunc conv_func;
693 conv_func = conv_get_code_conv_func(src_code, dest_code);
694 if (conv_func != conv_noconv) {
695 len = (strlen(inbuf) + 1) * 3;
697 if (!buf) return NULL;
699 conv_func(buf, len, inbuf);
700 return g_realloc(buf, strlen(buf) + 1);
704 return conv_iconv_strdup(inbuf, src_code, dest_code);
706 return g_strdup(inbuf);
707 #endif /* HAVE_ICONV */
710 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
711 const gchar *dest_charset_str)
713 CodeConvFunc code_conv = conv_noconv;
715 CharSet dest_charset;
717 if (!src_charset_str)
718 src_charset = conv_get_current_charset();
720 src_charset = conv_get_charset_from_str(src_charset_str);
722 /* auto detection mode */
723 if (!src_charset_str && !dest_charset_str) {
724 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
725 return conv_anytodisp;
730 dest_charset = conv_get_charset_from_str(dest_charset_str);
732 if (dest_charset == C_US_ASCII)
733 return conv_ustodisp;
734 else if (dest_charset == C_UTF_8 ||
735 (dest_charset == C_AUTO &&
736 conv_get_current_charset() == C_UTF_8))
739 switch (src_charset) {
741 case C_ISO_2022_JP_2:
742 if (dest_charset == C_AUTO)
743 code_conv = conv_jistodisp;
744 else if (dest_charset == C_EUC_JP)
745 code_conv = conv_jistoeuc;
748 if (dest_charset == C_AUTO)
749 code_conv = conv_ustodisp;
765 if (dest_charset == C_AUTO)
766 code_conv = conv_latintodisp;
769 if (dest_charset == C_AUTO)
770 code_conv = conv_sjistodisp;
771 else if (dest_charset == C_EUC_JP)
772 code_conv = conv_sjistoeuc;
775 if (dest_charset == C_AUTO)
776 code_conv = conv_euctodisp;
777 else if (dest_charset == C_ISO_2022_JP ||
778 dest_charset == C_ISO_2022_JP_2)
779 code_conv = conv_euctojis;
789 gchar *conv_iconv_strdup(const gchar *inbuf,
790 const gchar *src_code, const gchar *dest_code)
793 const gchar *inbuf_p;
803 src_code = conv_get_outgoing_charset_str();
805 dest_code = conv_get_current_charset_str();
807 /* don't convert if current codeset is US-ASCII */
808 if (!strcasecmp(dest_code, CS_US_ASCII))
809 return g_strdup(inbuf);
811 /* don't convert if src and dest codeset are identical */
812 if (!strcasecmp(src_code, dest_code))
813 return g_strdup(inbuf);
815 cd = iconv_open(dest_code, src_code);
816 if (cd == (iconv_t)-1)
820 in_size = strlen(inbuf) + 1;
822 out_size = in_size * 2;
823 outbuf = g_malloc(out_size);
827 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
828 &outbuf_p, &out_left)) < 0) {
829 if (EILSEQ == errno) {
832 *outbuf_p++ = SUBST_CHAR;
834 } else if (EINVAL == errno) {
837 } else if (E2BIG == errno) {
839 outbuf = g_realloc(outbuf, out_size);
845 g_warning("conv_iconv_strdup(): %s\n",
852 iconv(cd, NULL, NULL, &outbuf_p, &out_left);
853 outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
859 #endif /* HAVE_ICONV */
861 static const struct {
865 {C_US_ASCII, CS_US_ASCII},
866 {C_US_ASCII, CS_ANSI_X3_4_1968},
868 {C_ISO_8859_1, CS_ISO_8859_1},
869 {C_ISO_8859_2, CS_ISO_8859_2},
870 {C_ISO_8859_3, CS_ISO_8859_3},
871 {C_ISO_8859_4, CS_ISO_8859_4},
872 {C_ISO_8859_5, CS_ISO_8859_5},
873 {C_ISO_8859_6, CS_ISO_8859_6},
874 {C_ISO_8859_7, CS_ISO_8859_7},
875 {C_ISO_8859_8, CS_ISO_8859_8},
876 {C_ISO_8859_9, CS_ISO_8859_9},
877 {C_ISO_8859_10, CS_ISO_8859_10},
878 {C_ISO_8859_11, CS_ISO_8859_11},
879 {C_ISO_8859_13, CS_ISO_8859_13},
880 {C_ISO_8859_14, CS_ISO_8859_14},
881 {C_ISO_8859_15, CS_ISO_8859_15},
882 {C_BALTIC, CS_BALTIC},
883 {C_CP1250, CS_CP1250},
884 {C_CP1251, CS_CP1251},
885 {C_CP1252, CS_CP1252},
886 {C_CP1253, CS_CP1253},
887 {C_CP1254, CS_CP1254},
888 {C_CP1255, CS_CP1255},
889 {C_CP1256, CS_CP1256},
890 {C_CP1257, CS_CP1257},
891 {C_CP1258, CS_CP1258},
892 {C_WINDOWS_1250, CS_WINDOWS_1250},
893 {C_WINDOWS_1251, CS_WINDOWS_1251},
894 {C_WINDOWS_1252, CS_WINDOWS_1252},
895 {C_WINDOWS_1253, CS_WINDOWS_1253},
896 {C_WINDOWS_1254, CS_WINDOWS_1254},
897 {C_WINDOWS_1255, CS_WINDOWS_1255},
898 {C_WINDOWS_1256, CS_WINDOWS_1256},
899 {C_WINDOWS_1257, CS_WINDOWS_1257},
900 {C_WINDOWS_1258, CS_WINDOWS_1258},
901 {C_KOI8_R, CS_KOI8_R},
902 {C_KOI8_T, CS_KOI8_T},
903 {C_KOI8_U, CS_KOI8_U},
904 {C_ISO_2022_JP, CS_ISO_2022_JP},
905 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
906 {C_EUC_JP, CS_EUC_JP},
907 {C_EUC_JP, CS_EUCJP},
908 {C_SHIFT_JIS, CS_SHIFT_JIS},
909 {C_SHIFT_JIS, CS_SHIFT__JIS},
910 {C_SHIFT_JIS, CS_SJIS},
911 {C_ISO_2022_KR, CS_ISO_2022_KR},
912 {C_EUC_KR, CS_EUC_KR},
913 {C_ISO_2022_CN, CS_ISO_2022_CN},
914 {C_EUC_CN, CS_EUC_CN},
915 {C_GB2312, CS_GB2312},
917 {C_EUC_TW, CS_EUC_TW},
919 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
920 {C_TIS_620, CS_TIS_620},
921 {C_WINDOWS_874, CS_WINDOWS_874},
922 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
923 {C_TCVN5712_1, CS_TCVN5712_1},
926 static const struct {
931 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
932 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
933 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
934 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
935 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
936 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
937 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
938 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
939 {"ko_KR" , C_EUC_KR , C_EUC_KR},
940 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
941 {"zh_CN.GBK" , C_GBK , C_GB2312},
942 {"zh_CN" , C_GB2312 , C_GB2312},
943 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
944 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
945 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
946 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
947 {"zh_TW" , C_BIG5 , C_BIG5},
949 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
950 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
951 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
952 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
953 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
954 {"ru_UA" , C_KOI8_U , C_KOI8_U},
955 {"uk_UA" , C_KOI8_U , C_KOI8_U},
957 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
958 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
960 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
962 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
963 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
964 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
965 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
966 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
967 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
968 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
969 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
970 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
971 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
972 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
973 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
974 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
975 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
976 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
977 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
978 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
979 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
980 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
981 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
982 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
983 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
984 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
985 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
986 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
987 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
988 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
989 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
990 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
991 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
992 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
993 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
994 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
995 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
996 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
997 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
998 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
999 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1000 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1001 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1002 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1003 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1004 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1005 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1006 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1007 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1008 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1009 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1010 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1011 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1012 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1013 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1014 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1015 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1016 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1017 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1018 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1019 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1020 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1021 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1022 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1023 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1024 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1025 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1026 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1027 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1028 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1029 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1030 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1031 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1032 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1033 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1034 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1035 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1037 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1038 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1039 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1040 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1041 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1042 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1043 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1044 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1046 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1047 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1049 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1051 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1052 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1053 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1054 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1056 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1058 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1059 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1060 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1061 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1062 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1063 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1064 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1065 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1066 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1067 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1068 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1069 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1070 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1071 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1072 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1073 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1074 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1076 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1077 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1078 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1079 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1081 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1082 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1084 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1086 {"ar_IN" , C_UTF_8 , C_UTF_8},
1087 {"en_IN" , C_UTF_8 , C_UTF_8},
1088 {"se_NO" , C_UTF_8 , C_UTF_8},
1089 {"ta_IN" , C_UTF_8 , C_UTF_8},
1090 {"te_IN" , C_UTF_8 , C_UTF_8},
1091 {"ur_PK" , C_UTF_8 , C_UTF_8},
1093 {"th_TH" , C_TIS_620 , C_TIS_620},
1094 /* {"th_TH" , C_WINDOWS_874}, */
1095 /* {"th_TH" , C_ISO_8859_11}, */
1097 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1098 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1100 {"C" , C_US_ASCII , C_US_ASCII},
1101 {"POSIX" , C_US_ASCII , C_US_ASCII},
1102 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1105 static GHashTable *conv_get_charset_to_str_table(void)
1107 static GHashTable *table;
1113 table = g_hash_table_new(NULL, g_direct_equal);
1115 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1116 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1119 (table, GUINT_TO_POINTER(charsets[i].charset),
1127 static gint str_case_equal(gconstpointer v, gconstpointer v2)
1129 return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
1132 static guint str_case_hash(gconstpointer key)
1134 const gchar *p = key;
1139 for (p += 1; *p != '\0'; p++)
1140 h = (h << 5) - h + tolower(*p);
1146 static GHashTable *conv_get_charset_from_str_table(void)
1148 static GHashTable *table;
1154 table = g_hash_table_new(str_case_hash, str_case_equal);
1156 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1157 g_hash_table_insert(table, charsets[i].name,
1158 GUINT_TO_POINTER(charsets[i].charset));
1164 const gchar *conv_get_charset_str(CharSet charset)
1168 table = conv_get_charset_to_str_table();
1169 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1172 CharSet conv_get_charset_from_str(const gchar *charset)
1176 if (!charset) return C_AUTO;
1178 table = conv_get_charset_from_str_table();
1179 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1182 CharSet conv_get_current_charset(void)
1184 static CharSet cur_charset = -1;
1185 const gchar *cur_locale;
1189 if (cur_charset != -1)
1192 cur_locale = conv_get_current_locale();
1194 cur_charset = C_US_ASCII;
1198 if (strcasestr(cur_locale, "UTF-8")) {
1199 cur_charset = C_UTF_8;
1203 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1204 cur_charset = C_ISO_8859_15;
1208 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1211 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1212 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1213 if (!strncasecmp(cur_locale, locale_table[i].locale,
1214 strlen(locale_table[i].locale))) {
1215 cur_charset = locale_table[i].charset;
1217 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1218 !strchr(p + 1, '.')) {
1219 if (strlen(cur_locale) == 2 &&
1220 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1221 cur_charset = locale_table[i].charset;
1227 cur_charset = C_AUTO;
1231 const gchar *conv_get_current_charset_str(void)
1233 static const gchar *codeset = NULL;
1236 codeset = conv_get_charset_str(conv_get_current_charset());
1238 return codeset ? codeset : CS_US_ASCII;
1241 CharSet conv_get_outgoing_charset(void)
1243 static CharSet out_charset = -1;
1244 const gchar *cur_locale;
1248 if (out_charset != -1)
1251 cur_locale = conv_get_current_locale();
1253 out_charset = C_AUTO;
1257 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1258 out_charset = C_ISO_8859_15;
1262 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1265 if (!strncasecmp(cur_locale, locale_table[i].locale,
1266 strlen(locale_table[i].locale))) {
1267 out_charset = locale_table[i].out_charset;
1269 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1270 !strchr(p + 1, '.')) {
1271 if (strlen(cur_locale) == 2 &&
1272 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1273 out_charset = locale_table[i].out_charset;
1280 /* encoding conversion without iconv() is only supported
1281 on Japanese locale for now */
1282 if (out_charset == C_ISO_2022_JP)
1285 return conv_get_current_charset();
1291 const gchar *conv_get_outgoing_charset_str(void)
1293 CharSet out_charset;
1296 if (prefs_common.outgoing_charset) {
1297 if (!isalpha(prefs_common.outgoing_charset[0])) {
1298 g_free(prefs_common.outgoing_charset);
1299 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1300 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1301 return prefs_common.outgoing_charset;
1304 out_charset = conv_get_outgoing_charset();
1305 str = conv_get_charset_str(out_charset);
1307 return str ? str : CS_US_ASCII;
1310 gboolean conv_is_multibyte_encoding(CharSet encoding)
1318 case C_ISO_2022_JP_2:
1331 const gchar *conv_get_current_locale(void)
1335 cur_locale = g_getenv("LC_ALL");
1336 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1337 if (!cur_locale) cur_locale = g_getenv("LANG");
1338 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1340 debug_print("current locale: %s\n",
1341 cur_locale ? cur_locale : "(none)");
1346 void conv_unmime_header_overwrite(gchar *str)
1350 CharSet cur_charset;
1352 cur_charset = conv_get_current_charset();
1354 if (cur_charset == C_EUC_JP) {
1355 buflen = strlen(str) * 2 + 1;
1356 Xalloca(buf, buflen, return);
1357 conv_anytodisp(buf, buflen, str);
1358 unmime_header(str, buf);
1360 buflen = strlen(str) + 1;
1361 Xalloca(buf, buflen, return);
1362 unmime_header(buf, str);
1363 strncpy2(str, buf, buflen);
1367 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1368 const gchar *charset)
1370 CharSet cur_charset;
1372 cur_charset = conv_get_current_charset();
1374 if (cur_charset == C_EUC_JP) {
1378 buflen = strlen(str) * 2 + 1;
1379 Xalloca(buf, buflen, return);
1380 conv_anytodisp(buf, buflen, str);
1381 unmime_header(outbuf, buf);
1383 unmime_header(outbuf, str);
1386 #define MAX_LINELEN 76
1387 #define MAX_HARD_LINELEN 996
1388 #define MIMESEP_BEGIN "=?"
1389 #define MIMESEP_END "?="
1391 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1393 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1395 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1400 if ((cond) && *srcp) { \
1401 if (destp > dest && left < MAX_LINELEN - 1) { \
1402 if (isspace(*(destp - 1))) \
1404 else if (is_plain_text && isspace(*srcp)) \
1409 left = MAX_LINELEN - 1; \
1415 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1418 const gchar *cur_encoding;
1419 const gchar *out_encoding;
1423 const gchar *srcp = src;
1424 gchar *destp = dest;
1425 gboolean use_base64;
1427 if (MB_CUR_MAX > 1) {
1429 mimesep_enc = "?B?";
1432 mimesep_enc = "?Q?";
1435 cur_encoding = conv_get_current_charset_str();
1436 if (!strcmp(cur_encoding, CS_US_ASCII))
1437 cur_encoding = CS_ISO_8859_1;
1438 out_encoding = conv_get_outgoing_charset_str();
1439 if (!strcmp(out_encoding, CS_US_ASCII))
1440 out_encoding = CS_ISO_8859_1;
1442 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1443 strlen(mimesep_enc) + strlen(MIMESEP_END);
1445 left = MAX_LINELEN - header_len;
1448 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1450 while (isspace(*srcp)) {
1453 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1456 /* output as it is if the next word is ASCII string */
1457 if (!is_next_nonascii(srcp)) {
1460 word_len = get_next_word_len(srcp);
1461 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1462 while (word_len > 0) {
1463 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1478 const gchar *p = srcp;
1480 gint out_enc_str_len;
1481 gint mime_block_len;
1482 gboolean cont = FALSE;
1484 while (*p != '\0') {
1485 if (isspace(*p) && !is_next_nonascii(p + 1))
1488 if (MB_CUR_MAX > 1) {
1489 mb_len = mblen(p, MB_CUR_MAX);
1491 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1497 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1498 out_str = conv_codeset_strdup
1499 (part_str, cur_encoding, out_encoding);
1501 g_warning("conv_encode_header(): code conversion failed\n");
1502 conv_unreadable_8bit(part_str);
1503 out_str = g_strdup(part_str);
1505 out_str_len = strlen(out_str);
1508 out_enc_str_len = B64LEN(out_str_len);
1511 qp_get_q_encoding_len(out_str);
1515 if (mimestr_len + out_enc_str_len <= left) {
1518 } else if (cur_len == 0) {
1519 LBREAK_IF_REQUIRED(1, FALSE);
1528 Xstrndup_a(part_str, srcp, cur_len, );
1529 out_str = conv_codeset_strdup
1530 (part_str, cur_encoding, out_encoding);
1532 g_warning("conv_encode_header(): code conversion failed\n");
1533 conv_unreadable_8bit(part_str);
1534 out_str = g_strdup(part_str);
1536 out_str_len = strlen(out_str);
1539 out_enc_str_len = B64LEN(out_str_len);
1542 qp_get_q_encoding_len(out_str);
1544 Xalloca(enc_str, out_enc_str_len + 1, );
1546 base64_encode(enc_str, out_str, out_str_len);
1548 qp_q_encode(enc_str, out_str);
1552 /* output MIME-encoded string block */
1553 mime_block_len = mimestr_len + strlen(enc_str);
1554 g_snprintf(destp, mime_block_len + 1,
1555 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1556 out_encoding, mimesep_enc, enc_str);
1557 destp += mime_block_len;
1560 left -= mime_block_len;
1563 LBREAK_IF_REQUIRED(cont, FALSE);
1573 #undef LBREAK_IF_REQUIRED