2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2003 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR '_'
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
109 const guchar *in = inbuf;
110 guchar *out = outbuf;
111 JISState state = JIS_ASCII;
113 while (*in != '\0') {
117 if (*(in + 1) == '@' || *(in + 1) == 'B') {
120 } else if (*(in + 1) == '(' &&
122 state = JIS_AUXKANJI;
125 /* unknown escape sequence */
128 } else if (*in == '(') {
129 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
132 } else if (*(in + 1) == 'I') {
136 /* unknown escape sequence */
140 /* unknown escape sequence */
143 } else if (*in == 0x0e) {
146 } else if (*in == 0x0f) {
155 *out++ = *in++ | 0x80;
156 if (*in == '\0') break;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
165 *out++ = *in++ | 0x80;
166 if (*in == '\0') break;
167 *out++ = *in++ | 0x80;
176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
178 const guchar *in = inbuf;
179 guchar *out = outbuf;
180 JISState state = JIS_ASCII;
182 while (*in != '\0') {
186 } else if (iseuckanji(*in)) {
187 if (iseuckanji(*(in + 1))) {
189 *out++ = *in++ & 0x7f;
190 *out++ = *in++ & 0x7f;
195 if (*in != '\0' && !isascii(*in)) {
200 } else if (iseuchwkana1(*in)) {
202 if (iseuchwkana2(*in)) {
204 *out++ = *in++ & 0x7f;
207 if (*in != '\0' && !isascii(*in)) {
212 } else if (iseucaux(*in)) {
214 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
216 *out++ = *in++ & 0x7f;
217 *out++ = *in++ & 0x7f;
220 if (*in != '\0' && !isascii(*in)) {
223 if (*in != '\0' && !isascii(*in)) {
240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
242 const guchar *in = inbuf;
243 guchar *out = outbuf;
245 while (*in != '\0') {
248 } else if (issjiskanji1(*in)) {
249 if (issjiskanji2(*(in + 1))) {
251 guchar out2 = *(in + 1);
254 row = out1 < 0xa0 ? 0x70 : 0xb0;
256 out1 = (out1 - row) * 2 - 1;
257 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
259 out1 = (out1 - row) * 2;
263 *out++ = out1 | 0x80;
264 *out++ = out2 | 0x80;
269 if (*in != '\0' && !isascii(*in)) {
274 } else if (issjishwkana(*in)) {
286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
288 switch (conv_guess_ja_encoding(inbuf)) {
290 conv_jistoeuc(outbuf, outlen, inbuf);
293 conv_sjistoeuc(outbuf, outlen, inbuf);
296 strncpy2(outbuf, inbuf, outlen);
301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
303 switch (conv_guess_ja_encoding(inbuf)) {
305 conv_euctojis(outbuf, outlen, inbuf);
308 strncpy2(outbuf, inbuf, outlen);
313 static gchar valid_eucjp_tbl[][96] = {
314 /* 0xa2a0 - 0xa2ff */
315 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
317 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
318 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
320 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
322 /* 0xa3a0 - 0xa3ff */
323 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
325 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
327 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
330 /* 0xa4a0 - 0xa4ff */
331 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
336 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
338 /* 0xa5a0 - 0xa5ff */
339 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
344 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
346 /* 0xa6a0 - 0xa6ff */
347 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
348 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
349 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
350 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
354 /* 0xa7a0 - 0xa7ff */
355 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
357 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
358 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
360 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
362 /* 0xa8a0 - 0xa8ff */
363 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
365 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
373 if (c1 <= 0xa0 || c1 == 0xff)
375 if (c2 <= 0xa0 || c2 == 0xff)
378 if (c1 >= 0xa9 && c1 <= 0xaf)
381 if (c1 >= 0xa2 && c1 <= 0xa8)
382 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
385 if (c2 >= 0xd4 && c2 <= 0xff)
387 } else if (c1 == 0xf4) {
388 if (c2 >= 0xa7 && c2 <= 0xff)
395 void conv_unreadable_eucjp(gchar *str)
397 register guchar *p = str;
401 /* convert CR+LF -> LF */
402 if (*p == '\r' && *(p + 1) == '\n')
403 memmove(p, p + 1, strlen(p));
404 /* printable 7 bit code */
406 } else if (iseuckanji(*p)) {
407 if (isprintableeuckanji(*p, *(p + 1))) {
408 /* printable euc-jp code */
411 /* substitute unprintable code */
420 } else if (iseuchwkana1(*p)) {
421 if (iseuchwkana2(*(p + 1)))
422 /* euc-jp hankaku kana */
426 } else if (iseucaux(*p)) {
427 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
428 /* auxiliary kanji */
433 /* substitute unprintable 1 byte code */
438 void conv_unreadable_8bit(gchar *str)
440 register guchar *p = str;
443 /* convert CR+LF -> LF */
444 if (*p == '\r' && *(p + 1) == '\n')
445 memmove(p, p + 1, strlen(p));
446 else if (!isascii(*p)) *p = SUBST_CHAR;
451 void conv_unreadable_latin(gchar *str)
453 register guchar *p = str;
456 /* convert CR+LF -> LF */
457 if (*p == '\r' && *(p + 1) == '\n')
458 memmove(p, p + 1, strlen(p));
459 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
467 void conv_mb_alnum(gchar *str)
469 static guchar char_tbl[] = {
471 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
472 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
474 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
475 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
477 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
478 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
480 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
481 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
483 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
484 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
487 register guchar *p = str;
494 register guchar ch = *(p + 1);
496 if (ch >= 0xb0 && ch <= 0xfa) {
501 memmove(p, p + 1, len);
507 } else if (*p == 0xa1) {
508 register guchar ch = *(p + 1);
510 if (ch >= 0xa0 && ch <= 0xef &&
511 NCV != char_tbl[ch - 0xa0]) {
512 *p = char_tbl[ch - 0xa0];
515 memmove(p, p + 1, len);
521 } else if (iseuckanji(*p)) {
531 CharSet conv_guess_ja_encoding(const gchar *str)
533 const guchar *p = str;
534 CharSet guessed = C_US_ASCII;
537 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
538 if (guessed == C_US_ASCII)
539 return C_ISO_2022_JP;
541 } else if (isascii(*p)) {
543 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
544 if (*p >= 0xfd && *p <= 0xfe)
546 else if (guessed == C_SHIFT_JIS) {
547 if ((issjiskanji1(*p) &&
548 issjiskanji2(*(p + 1))) ||
550 guessed = C_SHIFT_JIS;
556 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
557 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
558 guessed = C_SHIFT_JIS;
562 } else if (issjishwkana(*p)) {
563 guessed = C_SHIFT_JIS;
573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
575 conv_jistoeuc(outbuf, outlen, inbuf);
576 conv_unreadable_eucjp(outbuf);
579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
581 conv_sjistoeuc(outbuf, outlen, inbuf);
582 conv_unreadable_eucjp(outbuf);
585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
587 strncpy2(outbuf, inbuf, outlen);
588 conv_unreadable_eucjp(outbuf);
591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
593 conv_anytoeuc(outbuf, outlen, inbuf);
594 conv_unreadable_eucjp(outbuf);
597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
599 strncpy2(outbuf, inbuf, outlen);
600 conv_unreadable_8bit(outbuf);
603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
605 strncpy2(outbuf, inbuf, outlen);
606 conv_unreadable_latin(outbuf);
609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
611 strncpy2(outbuf, inbuf, outlen);
614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
616 strncpy2(outbuf, inbuf, outlen);
618 switch (conv_get_current_charset()) {
630 conv_unreadable_latin(outbuf);
633 conv_unreadable_eucjp(outbuf);
640 CodeConverter *conv_code_converter_new(const gchar *charset)
644 conv = g_new0(CodeConverter, 1);
645 conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
646 conv->charset_str = g_strdup(charset);
647 conv->charset = conv_get_charset_from_str(charset);
652 void conv_code_converter_destroy(CodeConverter *conv)
654 g_free(conv->charset_str);
658 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
662 if (conv->code_conv_func != conv_noconv)
663 conv->code_conv_func(outbuf, outlen, inbuf);
667 str = conv_codeset_strdup(inbuf, conv->charset_str, NULL);
671 strncpy2(outbuf, str, outlen);
675 #else /* !HAVE_ICONV */
676 conv->code_conv_func(outbuf, outlen, inbuf);
682 gchar *conv_codeset_strdup(const gchar *inbuf,
683 const gchar *src_code, const gchar *dest_code)
687 CodeConvFunc conv_func;
689 conv_func = conv_get_code_conv_func(src_code, dest_code);
690 if (conv_func != conv_noconv) {
691 len = (strlen(inbuf) + 1) * 3;
693 if (!buf) return NULL;
695 conv_func(buf, len, inbuf);
696 return g_realloc(buf, strlen(buf) + 1);
701 src_code = conv_get_outgoing_charset_str();
703 dest_code = conv_get_current_charset_str();
704 if (!strcasecmp(dest_code, CS_US_ASCII))
705 dest_code = CS_ISO_8859_1;
708 /* don't convert if current codeset is US-ASCII */
709 if (!strcasecmp(dest_code, CS_US_ASCII))
710 return g_strdup(inbuf);
712 /* don't convert if src and dest codeset are identical */
713 if (!strcasecmp(src_code, dest_code))
714 return g_strdup(inbuf);
716 return conv_iconv_strdup(inbuf, src_code, dest_code);
718 return g_strdup(inbuf);
719 #endif /* HAVE_ICONV */
722 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
723 const gchar *dest_charset_str)
725 CodeConvFunc code_conv = conv_noconv;
727 CharSet dest_charset;
729 if (!src_charset_str)
730 src_charset = conv_get_current_charset();
732 src_charset = conv_get_charset_from_str(src_charset_str);
734 /* auto detection mode */
735 if (!src_charset_str && !dest_charset_str) {
736 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
737 return conv_anytodisp;
742 dest_charset = conv_get_charset_from_str(dest_charset_str);
744 if (dest_charset == C_US_ASCII)
745 return conv_ustodisp;
747 switch (src_charset) {
749 case C_ISO_2022_JP_2:
750 if (dest_charset == C_AUTO)
751 code_conv = conv_jistodisp;
752 else if (dest_charset == C_EUC_JP)
753 code_conv = conv_jistoeuc;
756 if (dest_charset == C_AUTO)
757 code_conv = conv_ustodisp;
771 if (dest_charset == C_AUTO)
772 code_conv = conv_latintodisp;
775 if (dest_charset == C_AUTO)
776 code_conv = conv_sjistodisp;
777 else if (dest_charset == C_EUC_JP)
778 code_conv = conv_sjistoeuc;
781 if (dest_charset == C_AUTO)
782 code_conv = conv_euctodisp;
783 else if (dest_charset == C_ISO_2022_JP ||
784 dest_charset == C_ISO_2022_JP_2)
785 code_conv = conv_euctojis;
795 gchar *conv_iconv_strdup(const gchar *inbuf,
796 const gchar *src_code, const gchar *dest_code)
799 const gchar *inbuf_p;
808 cd = iconv_open(dest_code, src_code);
809 if (cd == (iconv_t)-1)
813 in_size = strlen(inbuf) + 1;
815 out_size = in_size * 2;
816 outbuf = g_malloc(out_size);
820 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
821 &outbuf_p, &out_left)) < 0) {
822 if (EILSEQ == errno) {
826 } else if (EINVAL == errno) {
830 } else if (E2BIG == errno) {
832 outbuf = g_realloc(outbuf, out_size);
838 g_warning("conv_iconv_strdup(): %s\n",
846 iconv(cd, NULL, NULL, &outbuf_p, &out_left);
847 outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
854 #endif /* HAVE_ICONV */
856 static const struct {
860 {C_US_ASCII, CS_US_ASCII},
861 {C_US_ASCII, CS_ANSI_X3_4_1968},
863 {C_ISO_8859_1, CS_ISO_8859_1},
864 {C_ISO_8859_2, CS_ISO_8859_2},
865 {C_ISO_8859_4, CS_ISO_8859_4},
866 {C_ISO_8859_5, CS_ISO_8859_5},
867 {C_ISO_8859_7, CS_ISO_8859_7},
868 {C_ISO_8859_8, CS_ISO_8859_8},
869 {C_ISO_8859_9, CS_ISO_8859_9},
870 {C_ISO_8859_11, CS_ISO_8859_11},
871 {C_ISO_8859_13, CS_ISO_8859_13},
872 {C_ISO_8859_15, CS_ISO_8859_15},
873 {C_BALTIC, CS_BALTIC},
874 {C_CP1251, CS_CP1251},
875 {C_WINDOWS_1251, CS_WINDOWS_1251},
876 {C_KOI8_R, CS_KOI8_R},
877 {C_KOI8_U, CS_KOI8_U},
878 {C_ISO_2022_JP, CS_ISO_2022_JP},
879 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
880 {C_EUC_JP, CS_EUC_JP},
881 {C_EUC_JP, CS_EUCJP},
882 {C_SHIFT_JIS, CS_SHIFT_JIS},
883 {C_SHIFT_JIS, CS_SHIFT__JIS},
884 {C_SHIFT_JIS, CS_SJIS},
885 {C_ISO_2022_KR, CS_ISO_2022_KR},
886 {C_EUC_KR, CS_EUC_KR},
887 {C_ISO_2022_CN, CS_ISO_2022_CN},
888 {C_EUC_CN, CS_EUC_CN},
889 {C_GB2312, CS_GB2312},
890 {C_EUC_TW, CS_EUC_TW},
892 {C_TIS_620, CS_TIS_620},
893 {C_WINDOWS_874, CS_WINDOWS_874},
896 static const struct {
901 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
902 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
903 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
904 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
905 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
906 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
907 {"ko_KR" , C_EUC_KR , C_EUC_KR},
908 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
909 {"zh_CN" , C_GB2312 , C_GB2312},
910 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
911 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
912 {"zh_TW" , C_BIG5 , C_BIG5},
914 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
915 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
916 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
917 {"ru_UA" , C_KOI8_U , C_KOI8_U},
918 {"uk_UA" , C_KOI8_U , C_KOI8_U},
919 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
920 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
922 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
923 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
924 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
925 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
926 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
927 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
928 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
929 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
930 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
931 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
932 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
933 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
934 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
935 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
936 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
938 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
939 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
940 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
941 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
942 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
943 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
944 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
945 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
946 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
948 {"th_TH" , C_TIS_620 , C_TIS_620},
949 /* {"th_TH" , C_WINDOWS_874}, */
950 /* {"th_TH" , C_ISO_8859_11}, */
952 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
953 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
954 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
955 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
956 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
958 {"C" , C_US_ASCII , C_US_ASCII},
959 {"POSIX" , C_US_ASCII , C_US_ASCII},
960 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
963 const gchar *conv_get_charset_str(CharSet charset)
967 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
968 if (charsets[i].charset == charset)
969 return charsets[i].name;
975 CharSet conv_get_charset_from_str(const gchar *charset)
979 if (!charset) return C_AUTO;
981 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
982 if (!strcasecmp(charsets[i].name, charset))
983 return charsets[i].charset;
989 CharSet conv_get_current_charset(void)
991 static CharSet cur_charset = -1;
992 const gchar *cur_locale;
996 if (cur_charset != -1)
999 cur_locale = conv_get_current_locale();
1001 cur_charset = C_US_ASCII;
1005 if (strcasestr(cur_locale, "UTF-8")) {
1006 cur_charset = C_UTF_8;
1010 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1011 cur_charset = C_ISO_8859_15;
1015 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1018 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1019 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1020 if (!strncasecmp(cur_locale, locale_table[i].locale,
1021 strlen(locale_table[i].locale))) {
1022 cur_charset = locale_table[i].charset;
1024 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1025 !strchr(p + 1, '.')) {
1026 if (strlen(cur_locale) == 2 &&
1027 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1028 cur_charset = locale_table[i].charset;
1034 cur_charset = C_AUTO;
1038 const gchar *conv_get_current_charset_str(void)
1040 static const gchar *codeset = NULL;
1043 codeset = conv_get_charset_str(conv_get_current_charset());
1045 return codeset ? codeset : CS_US_ASCII;
1048 CharSet conv_get_outgoing_charset(void)
1050 static CharSet out_charset = -1;
1051 const gchar *cur_locale;
1055 if (out_charset != -1)
1058 cur_locale = conv_get_current_locale();
1060 out_charset = C_AUTO;
1064 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1065 out_charset = C_ISO_8859_15;
1069 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1072 if (!strncasecmp(cur_locale, locale_table[i].locale,
1073 strlen(locale_table[i].locale))) {
1074 out_charset = locale_table[i].out_charset;
1076 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1077 !strchr(p + 1, '.')) {
1078 if (strlen(cur_locale) == 2 &&
1079 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1080 out_charset = locale_table[i].out_charset;
1087 /* encoding conversion without iconv() is only supported
1088 on Japanese locale for now */
1089 if (out_charset == C_ISO_2022_JP)
1092 return conv_get_current_charset();
1098 const gchar *conv_get_outgoing_charset_str(void)
1100 CharSet out_charset;
1103 if (prefs_common.outgoing_charset) {
1104 if (!isalpha(prefs_common.outgoing_charset[0])) {
1105 g_free(prefs_common.outgoing_charset);
1106 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1107 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1108 return prefs_common.outgoing_charset;
1111 out_charset = conv_get_outgoing_charset();
1112 str = conv_get_charset_str(out_charset);
1114 return str ? str : CS_US_ASCII;
1117 const gchar *conv_get_current_locale(void)
1121 cur_locale = g_getenv("LC_ALL");
1122 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1123 if (!cur_locale) cur_locale = g_getenv("LANG");
1124 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1126 debug_print("current locale: %s\n",
1127 cur_locale ? cur_locale : "(none)");
1132 void conv_unmime_header_overwrite(gchar *str)
1136 CharSet cur_charset;
1138 cur_charset = conv_get_current_charset();
1140 if (cur_charset == C_EUC_JP) {
1141 buflen = strlen(str) * 2 + 1;
1142 Xalloca(buf, buflen, return);
1143 conv_anytodisp(buf, buflen, str);
1144 unmime_header(str, buf);
1146 buflen = strlen(str) + 1;
1147 Xalloca(buf, buflen, return);
1148 unmime_header(buf, str);
1149 strncpy2(str, buf, buflen);
1153 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1154 const gchar *charset)
1156 CharSet cur_charset;
1158 cur_charset = conv_get_current_charset();
1160 if (cur_charset == C_EUC_JP) {
1164 buflen = strlen(str) * 2 + 1;
1165 Xalloca(buf, buflen, return);
1166 conv_anytodisp(buf, buflen, str);
1167 unmime_header(outbuf, buf);
1169 unmime_header(outbuf, str);
1172 #define MAX_LINELEN 76
1173 #define MIMESEP_BEGIN "=?"
1174 #define MIMESEP_END "?="
1176 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1178 #define LBREAK_IF_REQUIRED(cond, plaintext) \
1180 if (len - (destp - dest) < MAX_LINELEN + 2) { \
1185 if ((cond) && *srcp) { \
1186 if (destp > dest && isspace(*(destp - 1))) \
1188 else if (plaintext && isspace(*srcp)) \
1193 left = MAX_LINELEN - 1; \
1198 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1201 const gchar *cur_encoding;
1202 const gchar *out_encoding;
1206 const gchar *srcp = src;
1207 gchar *destp = dest;
1208 gboolean use_base64;
1210 if (MB_CUR_MAX > 1) {
1212 mimesep_enc = "?B?";
1215 mimesep_enc = "?Q?";
1218 cur_encoding = conv_get_current_charset_str();
1219 if (!strcmp(cur_encoding, CS_US_ASCII))
1220 cur_encoding = CS_ISO_8859_1;
1221 out_encoding = conv_get_outgoing_charset_str();
1222 if (!strcmp(out_encoding, CS_US_ASCII))
1223 out_encoding = CS_ISO_8859_1;
1225 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1226 strlen(mimesep_enc) + strlen(MIMESEP_END);
1228 left = MAX_LINELEN - header_len;
1231 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1233 while (isspace(*srcp)) {
1236 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1239 /* output as it is if the next word is ASCII string */
1240 if (!is_next_nonascii(srcp)) {
1243 word_len = get_next_word_len(srcp);
1244 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1245 while (word_len > 0) {
1246 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1261 const gchar *p = srcp;
1263 gint out_enc_str_len;
1264 gint mime_block_len;
1265 gboolean cont = FALSE;
1267 while (*p != '\0') {
1268 if (isspace(*p) && !is_next_nonascii(p + 1))
1271 if (MB_CUR_MAX > 1) {
1272 mb_len = mblen(p, MB_CUR_MAX);
1274 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1280 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1281 out_str = conv_codeset_strdup
1282 (part_str, cur_encoding, out_encoding);
1284 g_warning("conv_encode_header(): code conversion failed\n");
1285 out_str = g_strdup(out_str);
1287 out_str_len = strlen(out_str);
1290 out_enc_str_len = B64LEN(out_str_len);
1293 qp_get_q_encoding_len(out_str);
1297 if (mimestr_len + out_enc_str_len <= left) {
1300 } else if (cur_len == 0) {
1301 LBREAK_IF_REQUIRED(1, FALSE);
1310 Xstrndup_a(part_str, srcp, cur_len, );
1311 out_str = conv_codeset_strdup
1312 (part_str, cur_encoding, out_encoding);
1314 g_warning("conv_encode_header(): code conversion failed\n");
1315 out_str = g_strdup(out_str);
1317 out_str_len = strlen(out_str);
1320 out_enc_str_len = B64LEN(out_str_len);
1323 qp_get_q_encoding_len(out_str);
1325 Xalloca(enc_str, out_enc_str_len + 1, );
1327 base64_encode(enc_str, out_str, out_str_len);
1329 qp_q_encode(enc_str, out_str);
1333 /* output MIME-encoded string block */
1334 mime_block_len = mimestr_len + strlen(enc_str);
1335 g_snprintf(destp, mime_block_len + 1,
1336 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1337 out_encoding, mimesep_enc, enc_str);
1338 destp += mime_block_len;
1341 left -= mime_block_len;
1344 LBREAK_IF_REQUIRED(cont, FALSE);
1354 #undef LBREAK_IF_REQUIRED