2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2003 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR '_'
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
109 const guchar *in = inbuf;
110 guchar *out = outbuf;
111 JISState state = JIS_ASCII;
113 while (*in != '\0') {
117 if (*(in + 1) == '@' || *(in + 1) == 'B') {
120 } else if (*(in + 1) == '(' &&
122 state = JIS_AUXKANJI;
125 /* unknown escape sequence */
128 } else if (*in == '(') {
129 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
132 } else if (*(in + 1) == 'I') {
136 /* unknown escape sequence */
140 /* unknown escape sequence */
143 } else if (*in == 0x0e) {
146 } else if (*in == 0x0f) {
155 *out++ = *in++ | 0x80;
156 if (*in == '\0') break;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
166 if (*in == '\0') break;
167 *out++ = *in++ | 0x80;
176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
178 const guchar *in = inbuf;
179 guchar *out = outbuf;
180 JISState state = JIS_ASCII;
182 while (*in != '\0') {
186 } else if (iseuckanji(*in)) {
187 if (iseuckanji(*(in + 1))) {
189 *out++ = *in++ & 0x7f;
190 *out++ = *in++ & 0x7f;
195 if (*in != '\0' && !isascii(*in)) {
200 } else if (iseuchwkana1(*in)) {
202 if (iseuchwkana2(*in)) {
204 *out++ = *in++ & 0x7f;
207 if (*in != '\0' && !isascii(*in)) {
212 } else if (iseucaux(*in)) {
214 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
216 *out++ = *in++ & 0x7f;
217 *out++ = *in++ & 0x7f;
220 if (*in != '\0' && !isascii(*in)) {
223 if (*in != '\0' && !isascii(*in)) {
240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
242 const guchar *in = inbuf;
243 guchar *out = outbuf;
245 while (*in != '\0') {
248 } else if (issjiskanji1(*in)) {
249 if (issjiskanji2(*(in + 1))) {
251 guchar out2 = *(in + 1);
254 row = out1 < 0xa0 ? 0x70 : 0xb0;
256 out1 = (out1 - row) * 2 - 1;
257 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
259 out1 = (out1 - row) * 2;
263 *out++ = out1 | 0x80;
264 *out++ = out2 | 0x80;
269 if (*in != '\0' && !isascii(*in)) {
274 } else if (issjishwkana(*in)) {
286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
288 switch (conv_guess_ja_encoding(inbuf)) {
290 conv_jistoeuc(outbuf, outlen, inbuf);
293 conv_sjistoeuc(outbuf, outlen, inbuf);
296 strncpy2(outbuf, inbuf, outlen);
301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
303 switch (conv_guess_ja_encoding(inbuf)) {
305 conv_euctojis(outbuf, outlen, inbuf);
308 strncpy2(outbuf, inbuf, outlen);
313 static gchar valid_eucjp_tbl[][96] = {
314 /* 0xa2a0 - 0xa2ff */
315 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
317 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
318 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
320 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
322 /* 0xa3a0 - 0xa3ff */
323 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
325 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
327 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
330 /* 0xa4a0 - 0xa4ff */
331 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
338 /* 0xa5a0 - 0xa5ff */
339 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
346 /* 0xa6a0 - 0xa6ff */
347 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
349 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
354 /* 0xa7a0 - 0xa7ff */
355 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
357 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
360 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
362 /* 0xa8a0 - 0xa8ff */
363 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
373 if (c1 <= 0xa0 || c1 >= 0xf5)
375 if (c2 <= 0xa0 || c2 == 0xff)
378 if (c1 >= 0xa9 && c1 <= 0xaf)
381 if (c1 >= 0xa2 && c1 <= 0xa8)
382 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
385 if (c2 >= 0xd4 && c2 <= 0xff)
387 } else if (c1 == 0xf4) {
388 if (c2 >= 0xa7 && c2 <= 0xff)
395 void conv_unreadable_eucjp(gchar *str)
397 register guchar *p = str;
401 /* convert CR+LF -> LF */
402 if (*p == '\r' && *(p + 1) == '\n')
403 memmove(p, p + 1, strlen(p));
404 /* printable 7 bit code */
406 } else if (iseuckanji(*p)) {
407 if (isprintableeuckanji(*p, *(p + 1))) {
408 /* printable euc-jp code */
411 /* substitute unprintable code */
420 } else if (iseuchwkana1(*p)) {
421 if (iseuchwkana2(*(p + 1)))
422 /* euc-jp hankaku kana */
426 } else if (iseucaux(*p)) {
427 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
428 /* auxiliary kanji */
433 /* substitute unprintable 1 byte code */
438 void conv_unreadable_8bit(gchar *str)
440 register guchar *p = str;
443 /* convert CR+LF -> LF */
444 if (*p == '\r' && *(p + 1) == '\n')
445 memmove(p, p + 1, strlen(p));
446 else if (!isascii(*p)) *p = SUBST_CHAR;
451 void conv_unreadable_latin(gchar *str)
453 register guchar *p = str;
456 /* convert CR+LF -> LF */
457 if (*p == '\r' && *(p + 1) == '\n')
458 memmove(p, p + 1, strlen(p));
459 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
467 void conv_mb_alnum(gchar *str)
469 static guchar char_tbl[] = {
471 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
472 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
474 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
475 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
477 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
478 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
480 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
481 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
483 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
484 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
487 register guchar *p = str;
494 register guchar ch = *(p + 1);
496 if (ch >= 0xb0 && ch <= 0xfa) {
501 memmove(p, p + 1, len);
507 } else if (*p == 0xa1) {
508 register guchar ch = *(p + 1);
510 if (ch >= 0xa0 && ch <= 0xef &&
511 NCV != char_tbl[ch - 0xa0]) {
512 *p = char_tbl[ch - 0xa0];
515 memmove(p, p + 1, len);
521 } else if (iseuckanji(*p)) {
531 CharSet conv_guess_ja_encoding(const gchar *str)
533 const guchar *p = str;
534 CharSet guessed = C_US_ASCII;
537 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
538 if (guessed == C_US_ASCII)
539 return C_ISO_2022_JP;
541 } else if (isascii(*p)) {
543 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
544 if (*p >= 0xfd && *p <= 0xfe)
546 else if (guessed == C_SHIFT_JIS) {
547 if ((issjiskanji1(*p) &&
548 issjiskanji2(*(p + 1))) ||
550 guessed = C_SHIFT_JIS;
556 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
557 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
558 guessed = C_SHIFT_JIS;
562 } else if (issjishwkana(*p)) {
563 guessed = C_SHIFT_JIS;
573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
575 conv_jistoeuc(outbuf, outlen, inbuf);
576 conv_unreadable_eucjp(outbuf);
579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
581 conv_sjistoeuc(outbuf, outlen, inbuf);
582 conv_unreadable_eucjp(outbuf);
585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
587 strncpy2(outbuf, inbuf, outlen);
588 conv_unreadable_eucjp(outbuf);
591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
593 conv_anytoeuc(outbuf, outlen, inbuf);
594 conv_unreadable_eucjp(outbuf);
597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
599 strncpy2(outbuf, inbuf, outlen);
600 conv_unreadable_8bit(outbuf);
603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
605 strncpy2(outbuf, inbuf, outlen);
606 conv_unreadable_latin(outbuf);
609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
611 strncpy2(outbuf, inbuf, outlen);
614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
616 strncpy2(outbuf, inbuf, outlen);
618 switch (conv_get_current_charset()) {
630 conv_unreadable_latin(outbuf);
633 conv_unreadable_eucjp(outbuf);
640 CodeConverter *conv_code_converter_new(const gchar *charset)
644 conv = g_new0(CodeConverter, 1);
645 conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
646 conv->charset_str = g_strdup(charset);
647 conv->charset = conv_get_charset_from_str(charset);
652 void conv_code_converter_destroy(CodeConverter *conv)
654 g_free(conv->charset_str);
658 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
662 if (conv->code_conv_func != conv_noconv)
663 conv->code_conv_func(outbuf, outlen, inbuf);
667 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
671 strncpy2(outbuf, str, outlen);
675 #else /* !HAVE_ICONV */
676 conv->code_conv_func(outbuf, outlen, inbuf);
682 gchar *conv_codeset_strdup(const gchar *inbuf,
683 const gchar *src_code, const gchar *dest_code)
687 CodeConvFunc conv_func;
689 conv_func = conv_get_code_conv_func(src_code, dest_code);
690 if (conv_func != conv_noconv) {
691 len = (strlen(inbuf) + 1) * 3;
693 if (!buf) return NULL;
695 conv_func(buf, len, inbuf);
696 return g_realloc(buf, strlen(buf) + 1);
700 return conv_iconv_strdup(inbuf, src_code, dest_code);
702 return g_strdup(inbuf);
703 #endif /* HAVE_ICONV */
706 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
707 const gchar *dest_charset_str)
709 CodeConvFunc code_conv = conv_noconv;
711 CharSet dest_charset;
713 if (!src_charset_str)
714 src_charset = conv_get_current_charset();
716 src_charset = conv_get_charset_from_str(src_charset_str);
718 /* auto detection mode */
719 if (!src_charset_str && !dest_charset_str) {
720 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
721 return conv_anytodisp;
726 dest_charset = conv_get_charset_from_str(dest_charset_str);
728 if (dest_charset == C_US_ASCII)
729 return conv_ustodisp;
730 else if (dest_charset == C_UTF_8 ||
731 (dest_charset == C_AUTO &&
732 conv_get_current_charset() == C_UTF_8))
735 switch (src_charset) {
737 case C_ISO_2022_JP_2:
738 if (dest_charset == C_AUTO)
739 code_conv = conv_jistodisp;
740 else if (dest_charset == C_EUC_JP)
741 code_conv = conv_jistoeuc;
744 if (dest_charset == C_AUTO)
745 code_conv = conv_ustodisp;
757 if (dest_charset == C_AUTO)
758 code_conv = conv_latintodisp;
761 if (dest_charset == C_AUTO)
762 code_conv = conv_sjistodisp;
763 else if (dest_charset == C_EUC_JP)
764 code_conv = conv_sjistoeuc;
767 if (dest_charset == C_AUTO)
768 code_conv = conv_euctodisp;
769 else if (dest_charset == C_ISO_2022_JP ||
770 dest_charset == C_ISO_2022_JP_2)
771 code_conv = conv_euctojis;
781 gchar *conv_iconv_strdup(const gchar *inbuf,
782 const gchar *src_code, const gchar *dest_code)
785 const gchar *inbuf_p;
795 src_code = conv_get_outgoing_charset_str();
797 dest_code = conv_get_current_charset_str();
799 /* don't convert if current codeset is US-ASCII */
800 if (!strcasecmp(dest_code, CS_US_ASCII))
801 return g_strdup(inbuf);
803 /* don't convert if src and dest codeset are identical */
804 if (!strcasecmp(src_code, dest_code))
805 return g_strdup(inbuf);
807 cd = iconv_open(dest_code, src_code);
808 if (cd == (iconv_t)-1)
812 in_size = strlen(inbuf) + 1;
814 out_size = in_size * 2;
815 outbuf = g_malloc(out_size);
819 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
820 &outbuf_p, &out_left)) < 0) {
821 if (EILSEQ == errno) {
824 *outbuf_p++ = SUBST_CHAR;
826 } else if (EINVAL == errno) {
829 } else if (E2BIG == errno) {
831 outbuf = g_realloc(outbuf, out_size);
837 g_warning("conv_iconv_strdup(): %s\n",
844 iconv(cd, NULL, NULL, &outbuf_p, &out_left);
845 outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
851 #endif /* HAVE_ICONV */
853 static const struct {
857 {C_US_ASCII, CS_US_ASCII},
858 {C_US_ASCII, CS_ANSI_X3_4_1968},
860 {C_ISO_8859_1, CS_ISO_8859_1},
861 {C_ISO_8859_2, CS_ISO_8859_2},
862 {C_ISO_8859_4, CS_ISO_8859_4},
863 {C_ISO_8859_5, CS_ISO_8859_5},
864 {C_ISO_8859_7, CS_ISO_8859_7},
865 {C_ISO_8859_8, CS_ISO_8859_8},
866 {C_ISO_8859_9, CS_ISO_8859_9},
867 {C_ISO_8859_11, CS_ISO_8859_11},
868 {C_ISO_8859_13, CS_ISO_8859_13},
869 {C_ISO_8859_15, CS_ISO_8859_15},
870 {C_BALTIC, CS_BALTIC},
871 {C_CP1251, CS_CP1251},
872 {C_WINDOWS_1251, CS_WINDOWS_1251},
873 {C_KOI8_R, CS_KOI8_R},
874 {C_KOI8_U, CS_KOI8_U},
875 {C_ISO_2022_JP, CS_ISO_2022_JP},
876 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
877 {C_EUC_JP, CS_EUC_JP},
878 {C_EUC_JP, CS_EUCJP},
879 {C_SHIFT_JIS, CS_SHIFT_JIS},
880 {C_SHIFT_JIS, CS_SHIFT__JIS},
881 {C_SHIFT_JIS, CS_SJIS},
882 {C_ISO_2022_KR, CS_ISO_2022_KR},
883 {C_EUC_KR, CS_EUC_KR},
884 {C_ISO_2022_CN, CS_ISO_2022_CN},
885 {C_EUC_CN, CS_EUC_CN},
886 {C_GB2312, CS_GB2312},
887 {C_EUC_TW, CS_EUC_TW},
889 {C_TIS_620, CS_TIS_620},
890 {C_WINDOWS_874, CS_WINDOWS_874},
893 static const struct {
898 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
899 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
900 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
901 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
902 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
903 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
904 {"ko_KR" , C_EUC_KR , C_EUC_KR},
905 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
906 {"zh_CN" , C_GB2312 , C_GB2312},
907 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
908 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
909 {"zh_TW" , C_BIG5 , C_BIG5},
911 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
912 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
913 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
914 {"ru_UA" , C_KOI8_U , C_KOI8_U},
915 {"uk_UA" , C_KOI8_U , C_KOI8_U},
916 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
917 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
919 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
920 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
921 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
922 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
923 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
924 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
925 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
926 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
927 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
928 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
929 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
930 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
931 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
932 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
933 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
935 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
936 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
937 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
938 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
939 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
940 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
941 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
942 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
943 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
945 {"th_TH" , C_TIS_620 , C_TIS_620},
946 /* {"th_TH" , C_WINDOWS_874}, */
947 /* {"th_TH" , C_ISO_8859_11}, */
949 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
950 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
951 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
952 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
953 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
955 {"C" , C_US_ASCII , C_US_ASCII},
956 {"POSIX" , C_US_ASCII , C_US_ASCII},
957 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
960 static GHashTable *conv_get_charset_to_str_table(void)
962 static GHashTable *table;
968 table = g_hash_table_new(NULL, g_direct_equal);
970 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
971 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
974 (table, GUINT_TO_POINTER(charsets[i].charset),
982 static gint str_case_equal(gconstpointer v, gconstpointer v2)
984 return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
987 static guint str_case_hash(gconstpointer key)
989 const gchar *p = key;
994 for (p += 1; *p != '\0'; p++)
995 h = (h << 5) - h + tolower(*p);
1001 static GHashTable *conv_get_charset_from_str_table(void)
1003 static GHashTable *table;
1009 table = g_hash_table_new(str_case_hash, str_case_equal);
1011 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1012 g_hash_table_insert(table, charsets[i].name,
1013 GUINT_TO_POINTER(charsets[i].charset));
1019 const gchar *conv_get_charset_str(CharSet charset)
1023 table = conv_get_charset_to_str_table();
1024 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1027 CharSet conv_get_charset_from_str(const gchar *charset)
1031 if (!charset) return C_AUTO;
1033 table = conv_get_charset_from_str_table();
1034 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1037 CharSet conv_get_current_charset(void)
1039 static CharSet cur_charset = -1;
1040 const gchar *cur_locale;
1044 if (cur_charset != -1)
1047 cur_locale = conv_get_current_locale();
1049 cur_charset = C_US_ASCII;
1053 if (strcasestr(cur_locale, "UTF-8")) {
1054 cur_charset = C_UTF_8;
1058 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1059 cur_charset = C_ISO_8859_15;
1063 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1066 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1067 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1068 if (!strncasecmp(cur_locale, locale_table[i].locale,
1069 strlen(locale_table[i].locale))) {
1070 cur_charset = locale_table[i].charset;
1072 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1073 !strchr(p + 1, '.')) {
1074 if (strlen(cur_locale) == 2 &&
1075 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1076 cur_charset = locale_table[i].charset;
1082 cur_charset = C_AUTO;
1086 const gchar *conv_get_current_charset_str(void)
1088 static const gchar *codeset = NULL;
1091 codeset = conv_get_charset_str(conv_get_current_charset());
1093 return codeset ? codeset : CS_US_ASCII;
1096 CharSet conv_get_outgoing_charset(void)
1098 static CharSet out_charset = -1;
1099 const gchar *cur_locale;
1103 if (out_charset != -1)
1106 cur_locale = conv_get_current_locale();
1108 out_charset = C_AUTO;
1112 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1113 out_charset = C_ISO_8859_15;
1117 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1120 if (!strncasecmp(cur_locale, locale_table[i].locale,
1121 strlen(locale_table[i].locale))) {
1122 out_charset = locale_table[i].out_charset;
1124 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1125 !strchr(p + 1, '.')) {
1126 if (strlen(cur_locale) == 2 &&
1127 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1128 out_charset = locale_table[i].out_charset;
1135 /* encoding conversion without iconv() is only supported
1136 on Japanese locale for now */
1137 if (out_charset == C_ISO_2022_JP)
1140 return conv_get_current_charset();
1146 const gchar *conv_get_outgoing_charset_str(void)
1148 CharSet out_charset;
1151 if (prefs_common.outgoing_charset) {
1152 if (!isalpha(prefs_common.outgoing_charset[0])) {
1153 g_free(prefs_common.outgoing_charset);
1154 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1155 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1156 return prefs_common.outgoing_charset;
1159 out_charset = conv_get_outgoing_charset();
1160 str = conv_get_charset_str(out_charset);
1162 return str ? str : CS_US_ASCII;
1165 const gchar *conv_get_current_locale(void)
1169 cur_locale = g_getenv("LC_ALL");
1170 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1171 if (!cur_locale) cur_locale = g_getenv("LANG");
1172 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1174 debug_print("current locale: %s\n",
1175 cur_locale ? cur_locale : "(none)");
1180 void conv_unmime_header_overwrite(gchar *str)
1184 CharSet cur_charset;
1186 cur_charset = conv_get_current_charset();
1188 if (cur_charset == C_EUC_JP) {
1189 buflen = strlen(str) * 2 + 1;
1190 Xalloca(buf, buflen, return);
1191 conv_anytodisp(buf, buflen, str);
1192 unmime_header(str, buf);
1194 buflen = strlen(str) + 1;
1195 Xalloca(buf, buflen, return);
1196 unmime_header(buf, str);
1197 strncpy2(str, buf, buflen);
1201 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1202 const gchar *charset)
1204 CharSet cur_charset;
1206 cur_charset = conv_get_current_charset();
1208 if (cur_charset == C_EUC_JP) {
1212 buflen = strlen(str) * 2 + 1;
1213 Xalloca(buf, buflen, return);
1214 conv_anytodisp(buf, buflen, str);
1215 unmime_header(outbuf, buf);
1217 unmime_header(outbuf, str);
1220 #define MAX_LINELEN 76
1221 #define MAX_HARD_LINELEN 996
1222 #define MIMESEP_BEGIN "=?"
1223 #define MIMESEP_END "?="
1225 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1227 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1229 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1234 if ((cond) && *srcp) { \
1235 if (destp > dest && left < MAX_LINELEN - 1) { \
1236 if (isspace(*(destp - 1))) \
1238 else if (is_plain_text && isspace(*srcp)) \
1243 left = MAX_LINELEN - 1; \
1249 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1252 const gchar *cur_encoding;
1253 const gchar *out_encoding;
1257 const gchar *srcp = src;
1258 gchar *destp = dest;
1259 gboolean use_base64;
1261 if (MB_CUR_MAX > 1) {
1263 mimesep_enc = "?B?";
1266 mimesep_enc = "?Q?";
1269 cur_encoding = conv_get_current_charset_str();
1270 if (!strcmp(cur_encoding, CS_US_ASCII))
1271 cur_encoding = CS_ISO_8859_1;
1272 out_encoding = conv_get_outgoing_charset_str();
1273 if (!strcmp(out_encoding, CS_US_ASCII))
1274 out_encoding = CS_ISO_8859_1;
1276 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1277 strlen(mimesep_enc) + strlen(MIMESEP_END);
1279 left = MAX_LINELEN - header_len;
1282 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1284 while (isspace(*srcp)) {
1287 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1290 /* output as it is if the next word is ASCII string */
1291 if (!is_next_nonascii(srcp)) {
1294 word_len = get_next_word_len(srcp);
1295 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1296 while (word_len > 0) {
1297 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1312 const gchar *p = srcp;
1314 gint out_enc_str_len;
1315 gint mime_block_len;
1316 gboolean cont = FALSE;
1318 while (*p != '\0') {
1319 if (isspace(*p) && !is_next_nonascii(p + 1))
1322 if (MB_CUR_MAX > 1) {
1323 mb_len = mblen(p, MB_CUR_MAX);
1325 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1331 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1332 out_str = conv_codeset_strdup
1333 (part_str, cur_encoding, out_encoding);
1335 g_warning("conv_encode_header(): code conversion failed\n");
1336 conv_unreadable_8bit(part_str);
1337 out_str = g_strdup(part_str);
1339 out_str_len = strlen(out_str);
1342 out_enc_str_len = B64LEN(out_str_len);
1345 qp_get_q_encoding_len(out_str);
1349 if (mimestr_len + out_enc_str_len <= left) {
1352 } else if (cur_len == 0) {
1353 LBREAK_IF_REQUIRED(1, FALSE);
1362 Xstrndup_a(part_str, srcp, cur_len, );
1363 out_str = conv_codeset_strdup
1364 (part_str, cur_encoding, out_encoding);
1366 g_warning("conv_encode_header(): code conversion failed\n");
1367 conv_unreadable_8bit(part_str);
1368 out_str = g_strdup(part_str);
1370 out_str_len = strlen(out_str);
1373 out_enc_str_len = B64LEN(out_str_len);
1376 qp_get_q_encoding_len(out_str);
1378 Xalloca(enc_str, out_enc_str_len + 1, );
1380 base64_encode(enc_str, out_str, out_str_len);
1382 qp_q_encode(enc_str, out_str);
1386 /* output MIME-encoded string block */
1387 mime_block_len = mimestr_len + strlen(enc_str);
1388 g_snprintf(destp, mime_block_len + 1,
1389 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1390 out_encoding, mimesep_enc, enc_str);
1391 destp += mime_block_len;
1394 left -= mime_block_len;
1397 LBREAK_IF_REQUIRED(cont, FALSE);
1407 #undef LBREAK_IF_REQUIRED