2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2003 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR '_'
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
109 const guchar *in = inbuf;
110 guchar *out = outbuf;
111 JISState state = JIS_ASCII;
113 while (*in != '\0') {
117 if (*(in + 1) == '@' || *(in + 1) == 'B') {
120 } else if (*(in + 1) == '(' &&
122 state = JIS_AUXKANJI;
125 /* unknown escape sequence */
128 } else if (*in == '(') {
129 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
132 } else if (*(in + 1) == 'I') {
136 /* unknown escape sequence */
140 /* unknown escape sequence */
143 } else if (*in == 0x0e) {
146 } else if (*in == 0x0f) {
155 *out++ = *in++ | 0x80;
156 if (*in == '\0') break;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
166 if (*in == '\0') break;
167 *out++ = *in++ | 0x80;
176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
178 const guchar *in = inbuf;
179 guchar *out = outbuf;
180 JISState state = JIS_ASCII;
182 while (*in != '\0') {
186 } else if (iseuckanji(*in)) {
187 if (iseuckanji(*(in + 1))) {
189 *out++ = *in++ & 0x7f;
190 *out++ = *in++ & 0x7f;
195 if (*in != '\0' && !isascii(*in)) {
200 } else if (iseuchwkana1(*in)) {
202 if (iseuchwkana2(*in)) {
204 *out++ = *in++ & 0x7f;
207 if (*in != '\0' && !isascii(*in)) {
212 } else if (iseucaux(*in)) {
214 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
216 *out++ = *in++ & 0x7f;
217 *out++ = *in++ & 0x7f;
220 if (*in != '\0' && !isascii(*in)) {
223 if (*in != '\0' && !isascii(*in)) {
240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
242 const guchar *in = inbuf;
243 guchar *out = outbuf;
245 while (*in != '\0') {
248 } else if (issjiskanji1(*in)) {
249 if (issjiskanji2(*(in + 1))) {
251 guchar out2 = *(in + 1);
254 row = out1 < 0xa0 ? 0x70 : 0xb0;
256 out1 = (out1 - row) * 2 - 1;
257 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
259 out1 = (out1 - row) * 2;
263 *out++ = out1 | 0x80;
264 *out++ = out2 | 0x80;
269 if (*in != '\0' && !isascii(*in)) {
274 } else if (issjishwkana(*in)) {
286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
288 switch (conv_guess_ja_encoding(inbuf)) {
290 conv_jistoeuc(outbuf, outlen, inbuf);
293 conv_sjistoeuc(outbuf, outlen, inbuf);
296 strncpy2(outbuf, inbuf, outlen);
301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
303 switch (conv_guess_ja_encoding(inbuf)) {
305 conv_euctojis(outbuf, outlen, inbuf);
308 strncpy2(outbuf, inbuf, outlen);
313 static gchar valid_eucjp_tbl[][96] = {
314 /* 0xa2a0 - 0xa2ff */
315 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
317 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
318 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
320 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
322 /* 0xa3a0 - 0xa3ff */
323 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
325 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
327 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
330 /* 0xa4a0 - 0xa4ff */
331 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
338 /* 0xa5a0 - 0xa5ff */
339 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
346 /* 0xa6a0 - 0xa6ff */
347 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
349 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
354 /* 0xa7a0 - 0xa7ff */
355 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
357 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
360 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
362 /* 0xa8a0 - 0xa8ff */
363 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
373 if (c1 <= 0xa0 || c1 >= 0xf5)
375 if (c2 <= 0xa0 || c2 == 0xff)
378 if (c1 >= 0xa9 && c1 <= 0xaf)
381 if (c1 >= 0xa2 && c1 <= 0xa8)
382 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
385 if (c2 >= 0xd4 && c2 <= 0xff)
387 } else if (c1 == 0xf4) {
388 if (c2 >= 0xa7 && c2 <= 0xff)
395 void conv_unreadable_eucjp(gchar *str)
397 register guchar *p = str;
401 /* convert CR+LF -> LF */
402 if (*p == '\r' && *(p + 1) == '\n')
403 memmove(p, p + 1, strlen(p));
404 /* printable 7 bit code */
406 } else if (iseuckanji(*p)) {
407 if (isprintableeuckanji(*p, *(p + 1))) {
408 /* printable euc-jp code */
411 /* substitute unprintable code */
420 } else if (iseuchwkana1(*p)) {
421 if (iseuchwkana2(*(p + 1)))
422 /* euc-jp hankaku kana */
426 } else if (iseucaux(*p)) {
427 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
428 /* auxiliary kanji */
433 /* substitute unprintable 1 byte code */
438 void conv_unreadable_8bit(gchar *str)
440 register guchar *p = str;
443 /* convert CR+LF -> LF */
444 if (*p == '\r' && *(p + 1) == '\n')
445 memmove(p, p + 1, strlen(p));
446 else if (!isascii(*p)) *p = SUBST_CHAR;
451 void conv_unreadable_latin(gchar *str)
453 register guchar *p = str;
456 /* convert CR+LF -> LF */
457 if (*p == '\r' && *(p + 1) == '\n')
458 memmove(p, p + 1, strlen(p));
459 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
467 void conv_mb_alnum(gchar *str)
469 static guchar char_tbl[] = {
471 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
472 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
474 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
475 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
477 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
478 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
480 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
481 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
483 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
484 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
487 register guchar *p = str;
494 register guchar ch = *(p + 1);
496 if (ch >= 0xb0 && ch <= 0xfa) {
501 memmove(p, p + 1, len);
507 } else if (*p == 0xa1) {
508 register guchar ch = *(p + 1);
510 if (ch >= 0xa0 && ch <= 0xef &&
511 NCV != char_tbl[ch - 0xa0]) {
512 *p = char_tbl[ch - 0xa0];
515 memmove(p, p + 1, len);
521 } else if (iseuckanji(*p)) {
531 CharSet conv_guess_ja_encoding(const gchar *str)
533 const guchar *p = str;
534 CharSet guessed = C_US_ASCII;
537 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
538 if (guessed == C_US_ASCII)
539 return C_ISO_2022_JP;
541 } else if (isascii(*p)) {
543 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
544 if (*p >= 0xfd && *p <= 0xfe)
546 else if (guessed == C_SHIFT_JIS) {
547 if ((issjiskanji1(*p) &&
548 issjiskanji2(*(p + 1))) ||
550 guessed = C_SHIFT_JIS;
556 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
557 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
558 guessed = C_SHIFT_JIS;
562 } else if (issjishwkana(*p)) {
563 guessed = C_SHIFT_JIS;
573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
575 conv_jistoeuc(outbuf, outlen, inbuf);
576 conv_unreadable_eucjp(outbuf);
579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
581 conv_sjistoeuc(outbuf, outlen, inbuf);
582 conv_unreadable_eucjp(outbuf);
585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
587 strncpy2(outbuf, inbuf, outlen);
588 conv_unreadable_eucjp(outbuf);
591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
593 conv_anytoeuc(outbuf, outlen, inbuf);
594 conv_unreadable_eucjp(outbuf);
597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
599 strncpy2(outbuf, inbuf, outlen);
600 conv_unreadable_8bit(outbuf);
603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
605 strncpy2(outbuf, inbuf, outlen);
606 conv_unreadable_latin(outbuf);
609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
611 strncpy2(outbuf, inbuf, outlen);
614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
616 strncpy2(outbuf, inbuf, outlen);
618 switch (conv_get_current_charset()) {
630 conv_unreadable_latin(outbuf);
633 conv_unreadable_eucjp(outbuf);
640 CodeConverter *conv_code_converter_new(const gchar *charset)
644 conv = g_new0(CodeConverter, 1);
645 conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
646 conv->charset_str = g_strdup(charset);
647 conv->charset = conv_get_charset_from_str(charset);
652 void conv_code_converter_destroy(CodeConverter *conv)
654 g_free(conv->charset_str);
658 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
662 if (conv->code_conv_func != conv_noconv)
663 conv->code_conv_func(outbuf, outlen, inbuf);
667 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
671 strncpy2(outbuf, str, outlen);
675 #else /* !HAVE_ICONV */
676 conv->code_conv_func(outbuf, outlen, inbuf);
682 gchar *conv_codeset_strdup(const gchar *inbuf,
683 const gchar *src_code, const gchar *dest_code)
687 CodeConvFunc conv_func;
689 conv_func = conv_get_code_conv_func(src_code, dest_code);
690 if (conv_func != conv_noconv) {
691 len = (strlen(inbuf) + 1) * 3;
693 if (!buf) return NULL;
695 conv_func(buf, len, inbuf);
696 return g_realloc(buf, strlen(buf) + 1);
701 src_code = conv_get_outgoing_charset_str();
703 dest_code = conv_get_current_charset_str();
704 if (!strcasecmp(dest_code, CS_US_ASCII))
705 dest_code = CS_ISO_8859_1;
708 /* don't convert if current codeset is US-ASCII */
709 if (!strcasecmp(dest_code, CS_US_ASCII))
710 return g_strdup(inbuf);
712 /* don't convert if src and dest codeset are identical */
713 if (!strcasecmp(src_code, dest_code))
714 return g_strdup(inbuf);
716 return conv_iconv_strdup(inbuf, src_code, dest_code);
718 return g_strdup(inbuf);
719 #endif /* HAVE_ICONV */
722 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
723 const gchar *dest_charset_str)
725 CodeConvFunc code_conv = conv_noconv;
727 CharSet dest_charset;
729 if (!src_charset_str)
730 src_charset = conv_get_current_charset();
732 src_charset = conv_get_charset_from_str(src_charset_str);
734 /* auto detection mode */
735 if (!src_charset_str && !dest_charset_str) {
736 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
737 return conv_anytodisp;
742 dest_charset = conv_get_charset_from_str(dest_charset_str);
744 if (dest_charset == C_US_ASCII)
745 return conv_ustodisp;
747 switch (src_charset) {
749 case C_ISO_2022_JP_2:
750 if (dest_charset == C_AUTO)
751 code_conv = conv_jistodisp;
752 else if (dest_charset == C_EUC_JP)
753 code_conv = conv_jistoeuc;
756 if (dest_charset == C_AUTO)
757 code_conv = conv_ustodisp;
771 if (dest_charset == C_AUTO)
772 code_conv = conv_latintodisp;
775 if (dest_charset == C_AUTO)
776 code_conv = conv_sjistodisp;
777 else if (dest_charset == C_EUC_JP)
778 code_conv = conv_sjistoeuc;
781 if (dest_charset == C_AUTO)
782 code_conv = conv_euctodisp;
783 else if (dest_charset == C_ISO_2022_JP ||
784 dest_charset == C_ISO_2022_JP_2)
785 code_conv = conv_euctojis;
795 gchar *conv_iconv_strdup(const gchar *inbuf,
796 const gchar *src_code, const gchar *dest_code)
799 const gchar *inbuf_p;
808 cd = iconv_open(dest_code, src_code);
809 if (cd == (iconv_t)-1)
813 in_size = strlen(inbuf) + 1;
815 out_size = in_size * 2;
816 outbuf = g_malloc(out_size);
820 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
821 &outbuf_p, &out_left)) < 0) {
822 if (EILSEQ == errno) {
826 } else if (EINVAL == errno) {
830 } else if (E2BIG == errno) {
832 outbuf = g_realloc(outbuf, out_size);
838 g_warning("conv_iconv_strdup(): %s\n",
846 iconv(cd, NULL, NULL, &outbuf_p, &out_left);
847 outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
854 #endif /* HAVE_ICONV */
856 static const struct {
860 {C_US_ASCII, CS_US_ASCII},
861 {C_US_ASCII, CS_ANSI_X3_4_1968},
863 {C_ISO_8859_1, CS_ISO_8859_1},
864 {C_ISO_8859_2, CS_ISO_8859_2},
865 {C_ISO_8859_4, CS_ISO_8859_4},
866 {C_ISO_8859_5, CS_ISO_8859_5},
867 {C_ISO_8859_7, CS_ISO_8859_7},
868 {C_ISO_8859_8, CS_ISO_8859_8},
869 {C_ISO_8859_9, CS_ISO_8859_9},
870 {C_ISO_8859_11, CS_ISO_8859_11},
871 {C_ISO_8859_13, CS_ISO_8859_13},
872 {C_ISO_8859_15, CS_ISO_8859_15},
873 {C_BALTIC, CS_BALTIC},
874 {C_CP1251, CS_CP1251},
875 {C_WINDOWS_1251, CS_WINDOWS_1251},
876 {C_KOI8_R, CS_KOI8_R},
877 {C_KOI8_U, CS_KOI8_U},
878 {C_ISO_2022_JP, CS_ISO_2022_JP},
879 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
880 {C_EUC_JP, CS_EUC_JP},
881 {C_EUC_JP, CS_EUCJP},
882 {C_SHIFT_JIS, CS_SHIFT_JIS},
883 {C_SHIFT_JIS, CS_SHIFT__JIS},
884 {C_SHIFT_JIS, CS_SJIS},
885 {C_ISO_2022_KR, CS_ISO_2022_KR},
886 {C_EUC_KR, CS_EUC_KR},
887 {C_ISO_2022_CN, CS_ISO_2022_CN},
888 {C_EUC_CN, CS_EUC_CN},
889 {C_GB2312, CS_GB2312},
890 {C_EUC_TW, CS_EUC_TW},
892 {C_TIS_620, CS_TIS_620},
893 {C_WINDOWS_874, CS_WINDOWS_874},
896 static const struct {
901 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
902 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
903 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
904 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
905 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
906 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
907 {"ko_KR" , C_EUC_KR , C_EUC_KR},
908 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
909 {"zh_CN" , C_GB2312 , C_GB2312},
910 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
911 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
912 {"zh_TW" , C_BIG5 , C_BIG5},
914 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
915 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
916 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
917 {"ru_UA" , C_KOI8_U , C_KOI8_U},
918 {"uk_UA" , C_KOI8_U , C_KOI8_U},
919 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
920 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
922 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
923 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
924 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
925 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
926 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
927 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
928 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
929 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
930 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
931 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
932 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
933 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
934 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
935 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
936 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
938 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
939 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
940 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
941 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
942 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
943 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
944 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
945 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
946 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
948 {"th_TH" , C_TIS_620 , C_TIS_620},
949 /* {"th_TH" , C_WINDOWS_874}, */
950 /* {"th_TH" , C_ISO_8859_11}, */
952 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
953 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
954 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
955 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
956 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
958 {"C" , C_US_ASCII , C_US_ASCII},
959 {"POSIX" , C_US_ASCII , C_US_ASCII},
960 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
963 static GHashTable *conv_get_charset_to_str_table(void)
965 static GHashTable *table;
971 table = g_hash_table_new(NULL, g_direct_equal);
973 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
974 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
977 (table, GUINT_TO_POINTER(charsets[i].charset),
985 static gint str_case_equal(gconstpointer v, gconstpointer v2)
987 return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
990 static guint str_case_hash(gconstpointer key)
992 const gchar *p = key;
997 for (p += 1; *p != '\0'; p++)
998 h = (h << 5) - h + tolower(*p);
1004 static GHashTable *conv_get_charset_from_str_table(void)
1006 static GHashTable *table;
1012 table = g_hash_table_new(str_case_hash, str_case_equal);
1014 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1015 g_hash_table_insert(table, charsets[i].name,
1016 GUINT_TO_POINTER(charsets[i].charset));
1022 const gchar *conv_get_charset_str(CharSet charset)
1026 table = conv_get_charset_to_str_table();
1027 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1030 CharSet conv_get_charset_from_str(const gchar *charset)
1034 if (!charset) return C_AUTO;
1036 table = conv_get_charset_from_str_table();
1037 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1040 CharSet conv_get_current_charset(void)
1042 static CharSet cur_charset = -1;
1043 const gchar *cur_locale;
1047 if (cur_charset != -1)
1050 cur_locale = conv_get_current_locale();
1052 cur_charset = C_US_ASCII;
1056 if (strcasestr(cur_locale, "UTF-8")) {
1057 cur_charset = C_UTF_8;
1061 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1062 cur_charset = C_ISO_8859_15;
1066 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1069 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1070 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1071 if (!strncasecmp(cur_locale, locale_table[i].locale,
1072 strlen(locale_table[i].locale))) {
1073 cur_charset = locale_table[i].charset;
1075 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1076 !strchr(p + 1, '.')) {
1077 if (strlen(cur_locale) == 2 &&
1078 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1079 cur_charset = locale_table[i].charset;
1085 cur_charset = C_AUTO;
1089 const gchar *conv_get_current_charset_str(void)
1091 static const gchar *codeset = NULL;
1094 codeset = conv_get_charset_str(conv_get_current_charset());
1096 return codeset ? codeset : CS_US_ASCII;
1099 CharSet conv_get_outgoing_charset(void)
1101 static CharSet out_charset = -1;
1102 const gchar *cur_locale;
1106 if (out_charset != -1)
1109 cur_locale = conv_get_current_locale();
1111 out_charset = C_AUTO;
1115 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1116 out_charset = C_ISO_8859_15;
1120 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1123 if (!strncasecmp(cur_locale, locale_table[i].locale,
1124 strlen(locale_table[i].locale))) {
1125 out_charset = locale_table[i].out_charset;
1127 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1128 !strchr(p + 1, '.')) {
1129 if (strlen(cur_locale) == 2 &&
1130 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1131 out_charset = locale_table[i].out_charset;
1138 /* encoding conversion without iconv() is only supported
1139 on Japanese locale for now */
1140 if (out_charset == C_ISO_2022_JP)
1143 return conv_get_current_charset();
1149 const gchar *conv_get_outgoing_charset_str(void)
1151 CharSet out_charset;
1154 if (prefs_common.outgoing_charset) {
1155 if (!isalpha(prefs_common.outgoing_charset[0])) {
1156 g_free(prefs_common.outgoing_charset);
1157 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1158 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1159 return prefs_common.outgoing_charset;
1162 out_charset = conv_get_outgoing_charset();
1163 str = conv_get_charset_str(out_charset);
1165 return str ? str : CS_US_ASCII;
1168 const gchar *conv_get_current_locale(void)
1172 cur_locale = g_getenv("LC_ALL");
1173 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1174 if (!cur_locale) cur_locale = g_getenv("LANG");
1175 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1177 debug_print("current locale: %s\n",
1178 cur_locale ? cur_locale : "(none)");
1183 void conv_unmime_header_overwrite(gchar *str)
1187 CharSet cur_charset;
1189 cur_charset = conv_get_current_charset();
1191 if (cur_charset == C_EUC_JP) {
1192 buflen = strlen(str) * 2 + 1;
1193 Xalloca(buf, buflen, return);
1194 conv_anytodisp(buf, buflen, str);
1195 unmime_header(str, buf);
1197 buflen = strlen(str) + 1;
1198 Xalloca(buf, buflen, return);
1199 unmime_header(buf, str);
1200 strncpy2(str, buf, buflen);
1204 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1205 const gchar *charset)
1207 CharSet cur_charset;
1209 cur_charset = conv_get_current_charset();
1211 if (cur_charset == C_EUC_JP) {
1215 buflen = strlen(str) * 2 + 1;
1216 Xalloca(buf, buflen, return);
1217 conv_anytodisp(buf, buflen, str);
1218 unmime_header(outbuf, buf);
1220 unmime_header(outbuf, str);
1223 #define MAX_LINELEN 76
1224 #define MIMESEP_BEGIN "=?"
1225 #define MIMESEP_END "?="
1227 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1229 #define LBREAK_IF_REQUIRED(cond, plaintext) \
1231 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1236 if ((cond) && *srcp) { \
1237 if (destp > dest && left < MAX_LINELEN - 1) { \
1238 if (isspace(*(destp - 1))) \
1240 else if (plaintext && isspace(*srcp)) \
1245 left = MAX_LINELEN - 1; \
1251 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1254 const gchar *cur_encoding;
1255 const gchar *out_encoding;
1259 const gchar *srcp = src;
1260 gchar *destp = dest;
1261 gboolean use_base64;
1263 if (MB_CUR_MAX > 1) {
1265 mimesep_enc = "?B?";
1268 mimesep_enc = "?Q?";
1271 cur_encoding = conv_get_current_charset_str();
1272 if (!strcmp(cur_encoding, CS_US_ASCII))
1273 cur_encoding = CS_ISO_8859_1;
1274 out_encoding = conv_get_outgoing_charset_str();
1275 if (!strcmp(out_encoding, CS_US_ASCII))
1276 out_encoding = CS_ISO_8859_1;
1278 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1279 strlen(mimesep_enc) + strlen(MIMESEP_END);
1281 left = MAX_LINELEN - header_len;
1284 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1286 while (isspace(*srcp)) {
1289 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1292 /* output as it is if the next word is ASCII string */
1293 if (!is_next_nonascii(srcp)) {
1296 word_len = get_next_word_len(srcp);
1297 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1298 while (word_len > 0) {
1299 LBREAK_IF_REQUIRED(left + 22 <= 0, TRUE);
1314 const gchar *p = srcp;
1316 gint out_enc_str_len;
1317 gint mime_block_len;
1318 gboolean cont = FALSE;
1320 while (*p != '\0') {
1321 if (isspace(*p) && !is_next_nonascii(p + 1))
1324 if (MB_CUR_MAX > 1) {
1325 mb_len = mblen(p, MB_CUR_MAX);
1327 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1333 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1334 out_str = conv_codeset_strdup
1335 (part_str, cur_encoding, out_encoding);
1337 g_warning("conv_encode_header(): code conversion failed\n");
1338 conv_unreadable_8bit(part_str);
1339 out_str = g_strdup(part_str);
1341 out_str_len = strlen(out_str);
1344 out_enc_str_len = B64LEN(out_str_len);
1347 qp_get_q_encoding_len(out_str);
1351 if (mimestr_len + out_enc_str_len <= left) {
1354 } else if (cur_len == 0) {
1355 LBREAK_IF_REQUIRED(1, FALSE);
1364 Xstrndup_a(part_str, srcp, cur_len, );
1365 out_str = conv_codeset_strdup
1366 (part_str, cur_encoding, out_encoding);
1368 g_warning("conv_encode_header(): code conversion failed\n");
1369 conv_unreadable_8bit(part_str);
1370 out_str = g_strdup(part_str);
1372 out_str_len = strlen(out_str);
1375 out_enc_str_len = B64LEN(out_str_len);
1378 qp_get_q_encoding_len(out_str);
1380 Xalloca(enc_str, out_enc_str_len + 1, );
1382 base64_encode(enc_str, out_str, out_str_len);
1384 qp_q_encode(enc_str, out_str);
1388 /* output MIME-encoded string block */
1389 mime_block_len = mimestr_len + strlen(enc_str);
1390 g_snprintf(destp, mime_block_len + 1,
1391 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1392 out_encoding, mimesep_enc, enc_str);
1393 destp += mime_block_len;
1396 left -= mime_block_len;
1399 LBREAK_IF_REQUIRED(cont, FALSE);
1409 #undef LBREAK_IF_REQUIRED