2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2004 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
38 #include "quoted-printable.h"
40 #include "prefs_common.h"
50 #define SUBST_CHAR 0x5f;
53 #define iseuckanji(c) \
54 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
55 #define iseuchwkana1(c) \
56 (((c) & 0xff) == 0x8e)
57 #define iseuchwkana2(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
60 (((c) & 0xff) == 0x8f)
61 #define issjiskanji1(c) \
62 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
63 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
64 #define issjiskanji2(c) \
65 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
66 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
67 #define issjishwkana(c) \
68 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
71 if (state != JIS_KANJI) { \
79 if (state != JIS_ASCII) { \
87 if (state != JIS_HWKANA) { \
95 if (state != JIS_AUXKANJI) { \
100 state = JIS_AUXKANJI; \
103 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
105 const guchar *in = inbuf;
106 guchar *out = outbuf;
107 JISState state = JIS_ASCII;
109 while (*in != '\0') {
113 if (*(in + 1) == '@' || *(in + 1) == 'B') {
116 } else if (*(in + 1) == '(' &&
118 state = JIS_AUXKANJI;
121 /* unknown escape sequence */
124 } else if (*in == '(') {
125 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
128 } else if (*(in + 1) == 'I') {
132 /* unknown escape sequence */
136 /* unknown escape sequence */
139 } else if (*in == 0x0e) {
142 } else if (*in == 0x0f) {
151 *out++ = *in++ | 0x80;
152 if (*in == '\0') break;
153 *out++ = *in++ | 0x80;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
162 if (*in == '\0') break;
163 *out++ = *in++ | 0x80;
172 #define JIS_HWDAKUTEN 0x5e
173 #define JIS_HWHANDAKUTEN 0x5f
175 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
177 static guint16 h2z_tbl[] = {
179 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
180 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
182 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
183 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
185 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
186 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
188 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
189 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
192 static guint16 dakuten_tbl[] = {
194 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
195 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
197 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
198 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
201 static guint16 handakuten_tbl[] = {
203 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
211 if (jis_code < 0x21 || jis_code > 0x5f)
214 if (sound_sym == JIS_HWDAKUTEN &&
215 jis_code >= 0x36 && jis_code <= 0x4e) {
216 out_code = dakuten_tbl[jis_code - 0x30];
218 *outbuf = out_code >> 8;
219 *(outbuf + 1) = out_code & 0xff;
224 if (sound_sym == JIS_HWHANDAKUTEN &&
225 jis_code >= 0x4a && jis_code <= 0x4e) {
226 out_code = handakuten_tbl[jis_code - 0x4a];
227 *outbuf = out_code >> 8;
228 *(outbuf + 1) = out_code & 0xff;
232 out_code = h2z_tbl[jis_code - 0x20];
233 *outbuf = out_code >> 8;
234 *(outbuf + 1) = out_code & 0xff;
238 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
240 const guchar *in = inbuf;
241 guchar *out = outbuf;
242 JISState state = JIS_ASCII;
244 while (*in != '\0') {
248 } else if (iseuckanji(*in)) {
249 if (iseuckanji(*(in + 1))) {
251 *out++ = *in++ & 0x7f;
252 *out++ = *in++ & 0x7f;
257 if (*in != '\0' && !IS_ASCII(*in)) {
262 } else if (iseuchwkana1(*in)) {
263 if (iseuchwkana2(*(in + 1))) {
264 if (prefs_common.allow_jisx0201_kana) {
267 *out++ = *in++ & 0x7f;
272 if (iseuchwkana1(*(in + 2)) &&
273 iseuchwkana2(*(in + 3)))
274 len = conv_jis_hantozen
276 *(in + 1), *(in + 3));
278 len = conv_jis_hantozen
293 if (*in != '\0' && !IS_ASCII(*in)) {
298 } else if (iseucaux(*in)) {
300 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
302 *out++ = *in++ & 0x7f;
303 *out++ = *in++ & 0x7f;
306 if (*in != '\0' && !IS_ASCII(*in)) {
309 if (*in != '\0' && !IS_ASCII(*in)) {
326 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
328 const guchar *in = inbuf;
329 guchar *out = outbuf;
331 while (*in != '\0') {
334 } else if (issjiskanji1(*in)) {
335 if (issjiskanji2(*(in + 1))) {
337 guchar out2 = *(in + 1);
340 row = out1 < 0xa0 ? 0x70 : 0xb0;
342 out1 = (out1 - row) * 2 - 1;
343 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
345 out1 = (out1 - row) * 2;
349 *out++ = out1 | 0x80;
350 *out++ = out2 | 0x80;
355 if (*in != '\0' && !IS_ASCII(*in)) {
360 } else if (issjishwkana(*in)) {
372 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
374 switch (conv_guess_ja_encoding(inbuf)) {
376 conv_jistoeuc(outbuf, outlen, inbuf);
379 conv_sjistoeuc(outbuf, outlen, inbuf);
382 strncpy2(outbuf, inbuf, outlen);
387 void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
389 gchar *tmpstr = NULL;
391 switch (conv_guess_ja_encoding(inbuf)) {
393 tmpstr = conv_codeset_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8);
394 strncpy2(outbuf, tmpstr, outlen);
398 tmpstr = conv_codeset_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
399 strncpy2(outbuf, tmpstr, outlen);
403 tmpstr = conv_codeset_strdup(inbuf, CS_EUC_JP, CS_UTF_8);
404 strncpy2(outbuf, tmpstr, outlen);
408 strncpy2(outbuf, inbuf, outlen);
413 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
415 switch (conv_guess_ja_encoding(inbuf)) {
417 conv_euctojis(outbuf, outlen, inbuf);
420 strncpy2(outbuf, inbuf, outlen);
425 static gchar valid_eucjp_tbl[][96] = {
426 /* 0xa2a0 - 0xa2ff */
427 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
429 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
430 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
432 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
434 /* 0xa3a0 - 0xa3ff */
435 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
442 /* 0xa4a0 - 0xa4ff */
443 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
450 /* 0xa5a0 - 0xa5ff */
451 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
452 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
455 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
456 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
458 /* 0xa6a0 - 0xa6ff */
459 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
460 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
461 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
466 /* 0xa7a0 - 0xa7ff */
467 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
474 /* 0xa8a0 - 0xa8ff */
475 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
477 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
483 static gboolean isprintableeuckanji(guchar c1, guchar c2)
485 if (c1 <= 0xa0 || c1 >= 0xf5)
487 if (c2 <= 0xa0 || c2 == 0xff)
490 if (c1 >= 0xa9 && c1 <= 0xaf)
493 if (c1 >= 0xa2 && c1 <= 0xa8)
494 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
497 if (c2 >= 0xd4 && c2 <= 0xfe)
499 } else if (c1 == 0xf4) {
500 if (c2 >= 0xa7 && c2 <= 0xfe)
507 void conv_unreadable_eucjp(gchar *str)
509 register guchar *p = str;
513 /* convert CR+LF -> LF */
514 if (*p == '\r' && *(p + 1) == '\n')
515 memmove(p, p + 1, strlen(p));
516 /* printable 7 bit code */
518 } else if (iseuckanji(*p)) {
519 if (isprintableeuckanji(*p, *(p + 1))) {
520 /* printable euc-jp code */
523 /* substitute unprintable code */
532 } else if (iseuchwkana1(*p)) {
533 if (iseuchwkana2(*(p + 1)))
534 /* euc-jp hankaku kana */
538 } else if (iseucaux(*p)) {
539 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
540 /* auxiliary kanji */
545 /* substitute unprintable 1 byte code */
550 void conv_unreadable_8bit(gchar *str)
552 register guchar *p = str;
555 /* convert CR+LF -> LF */
556 if (*p == '\r' && *(p + 1) == '\n')
557 memmove(p, p + 1, strlen(p));
558 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
563 void conv_unreadable_latin(gchar *str)
565 register guchar *p = str;
568 /* convert CR+LF -> LF */
569 if (*p == '\r' && *(p + 1) == '\n')
570 memmove(p, p + 1, strlen(p));
571 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
577 void conv_unreadable_utf8(gchar *str)
579 register guchar *p = str;
582 /* convert CR+LF -> LF */
584 if (*p == '\r' && *(p + 1) == '\n')
585 memmove(p, p + 1, strlen(p));
586 else if (((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
593 void conv_unreadable_locale(gchar *str)
595 switch (conv_get_current_charset()) {
611 conv_unreadable_latin(str);
614 conv_unreadable_eucjp(str);
617 conv_unreadable_utf8(str);
626 void conv_mb_alnum(gchar *str)
628 static guchar char_tbl[] = {
630 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
631 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
633 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
634 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
636 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
637 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
639 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
640 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
642 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
643 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
646 register guchar *p = str;
653 register guchar ch = *(p + 1);
655 if (ch >= 0xb0 && ch <= 0xfa) {
660 memmove(p, p + 1, len);
666 } else if (*p == 0xa1) {
667 register guchar ch = *(p + 1);
669 if (ch >= 0xa0 && ch <= 0xef &&
670 NCV != char_tbl[ch - 0xa0]) {
671 *p = char_tbl[ch - 0xa0];
674 memmove(p, p + 1, len);
680 } else if (iseuckanji(*p)) {
690 CharSet conv_guess_ja_encoding(const gchar *str)
692 const guchar *p = str;
693 CharSet guessed = C_US_ASCII;
696 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
697 if (guessed == C_US_ASCII)
698 return C_ISO_2022_JP;
700 } else if (IS_ASCII(*p)) {
702 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
703 if (*p >= 0xfd && *p <= 0xfe)
705 else if (guessed == C_SHIFT_JIS) {
706 if ((issjiskanji1(*p) &&
707 issjiskanji2(*(p + 1))) ||
709 guessed = C_SHIFT_JIS;
715 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
716 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
717 guessed = C_SHIFT_JIS;
721 } else if (issjishwkana(*p)) {
722 guessed = C_SHIFT_JIS;
732 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
734 conv_jistoeuc(outbuf, outlen, inbuf);
735 conv_unreadable_eucjp(outbuf);
738 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
740 conv_sjistoeuc(outbuf, outlen, inbuf);
741 conv_unreadable_eucjp(outbuf);
744 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
746 strncpy2(outbuf, inbuf, outlen);
747 conv_unreadable_eucjp(outbuf);
750 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
752 conv_anytoutf8(outbuf, outlen, inbuf);
756 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
758 strncpy2(outbuf, inbuf, outlen);
759 conv_unreadable_8bit(outbuf);
763 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
765 strncpy2(outbuf, inbuf, outlen);
766 conv_unreadable_latin(outbuf);
770 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
772 strncpy2(outbuf, inbuf, outlen);
773 conv_unreadable_locale(outbuf);
776 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
778 strncpy2(outbuf, inbuf, outlen);
781 CodeConverter *conv_code_converter_new(const gchar *charset)
785 conv = g_new0(CodeConverter, 1);
786 conv->code_conv_func = conv_get_code_conv_func(charset, CS_UTF_8);
787 conv->charset_str = g_strdup(charset);
788 conv->charset = conv_get_charset_from_str(charset);
793 void conv_code_converter_destroy(CodeConverter *conv)
795 g_free(conv->charset_str);
799 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
802 if (conv->code_conv_func != conv_noconv)
803 conv->code_conv_func(outbuf, outlen, inbuf);
807 str = conv_iconv_strdup(inbuf, conv->charset_str, CS_UTF_8);
811 strncpy2(outbuf, str, outlen);
819 gchar *conv_codeset_strdup(const gchar *inbuf,
820 const gchar *src_code, const gchar *dest_code)
824 CodeConvFunc conv_func;
826 conv_func = conv_get_code_conv_func(src_code, dest_code);
827 if (conv_func != conv_noconv) {
828 len = (strlen(inbuf) + 1) * 3;
830 if (!buf) return NULL;
832 conv_func(buf, len, inbuf);
833 return g_realloc(buf, strlen(buf) + 1);
836 return conv_iconv_strdup(inbuf, src_code, dest_code);
839 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
840 const gchar *dest_charset_str)
842 CodeConvFunc code_conv = conv_noconv;
844 CharSet dest_charset;
846 if (!src_charset_str)
847 src_charset = conv_get_current_charset();
849 src_charset = conv_get_charset_from_str(src_charset_str);
851 /* auto detection mode */
852 if (!src_charset_str && !dest_charset_str) {
853 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
854 return conv_anytodisp;
859 dest_charset = conv_get_charset_from_str(dest_charset_str);
861 if (dest_charset == C_US_ASCII)
862 return conv_ustodisp;
863 else if (dest_charset == C_UTF_8 ||
864 (dest_charset == C_AUTO &&
865 conv_get_current_charset() == C_UTF_8))
868 switch (src_charset) {
870 case C_ISO_2022_JP_2:
871 case C_ISO_2022_JP_3:
872 if (dest_charset == C_AUTO &&
873 conv_get_current_charset() == C_EUC_JP)
874 code_conv = conv_jistodisp;
875 else if (dest_charset == C_EUC_JP)
876 code_conv = conv_jistoeuc;
879 if (dest_charset == C_AUTO)
880 code_conv = conv_ustodisp;
896 if (dest_charset == C_AUTO &&
897 (conv_get_current_charset() == src_charset ||
899 code_conv = conv_latintodisp;
902 if (dest_charset == C_AUTO &&
903 conv_get_current_charset() == C_EUC_JP)
904 code_conv = conv_sjistodisp;
905 else if (dest_charset == C_EUC_JP)
906 code_conv = conv_sjistoeuc;
909 if (dest_charset == C_AUTO &&
910 conv_get_current_charset() == C_EUC_JP)
911 code_conv = conv_euctodisp;
912 else if (dest_charset == C_ISO_2022_JP ||
913 dest_charset == C_ISO_2022_JP_2 ||
914 dest_charset == C_ISO_2022_JP_3)
915 code_conv = conv_euctojis;
924 gchar *conv_iconv_strdup(const gchar *inbuf,
925 const gchar *isrc_code, const gchar *idest_code)
927 /* presumably GLib 2's function handles the conversion details,
928 * whether iconv is sitting below, or something else */
930 gsize read_len, written_len;
931 gchar *src_code = (char *)conv_get_outgoing_charset_str();
932 gchar *dest_code = (char *)conv_get_current_charset_str();
935 src_code = (char *)isrc_code;
937 dest_code = (char *)idest_code;
939 /* don't convert if current codeset is US-ASCII */
940 if (!g_strcasecmp(dest_code, CS_US_ASCII))
941 return g_strdup(inbuf);
943 /* don't convert if src and dest codeset are identical */
944 if (!g_strcasecmp(src_code, dest_code))
945 return g_strdup(inbuf);
947 /* FIXME: unchecked inbuf? Can't see at this level. */
948 outbuf = g_convert(inbuf, strlen(inbuf), dest_code, src_code,
949 &read_len, &written_len, NULL);
952 g_warning(_("Valid locale type set? (Currently: %s to %s)\n"),
953 src_code, dest_code);
958 static const struct {
962 {C_US_ASCII, CS_US_ASCII},
963 {C_US_ASCII, CS_ANSI_X3_4_1968},
966 {C_ISO_8859_1, CS_ISO_8859_1},
967 {C_ISO_8859_2, CS_ISO_8859_2},
968 {C_ISO_8859_3, CS_ISO_8859_3},
969 {C_ISO_8859_4, CS_ISO_8859_4},
970 {C_ISO_8859_5, CS_ISO_8859_5},
971 {C_ISO_8859_6, CS_ISO_8859_6},
972 {C_ISO_8859_7, CS_ISO_8859_7},
973 {C_ISO_8859_8, CS_ISO_8859_8},
974 {C_ISO_8859_9, CS_ISO_8859_9},
975 {C_ISO_8859_10, CS_ISO_8859_10},
976 {C_ISO_8859_11, CS_ISO_8859_11},
977 {C_ISO_8859_13, CS_ISO_8859_13},
978 {C_ISO_8859_14, CS_ISO_8859_14},
979 {C_ISO_8859_15, CS_ISO_8859_15},
980 {C_BALTIC, CS_BALTIC},
981 {C_CP1250, CS_CP1250},
982 {C_CP1251, CS_CP1251},
983 {C_CP1252, CS_CP1252},
984 {C_CP1253, CS_CP1253},
985 {C_CP1254, CS_CP1254},
986 {C_CP1255, CS_CP1255},
987 {C_CP1256, CS_CP1256},
988 {C_CP1257, CS_CP1257},
989 {C_CP1258, CS_CP1258},
990 {C_WINDOWS_1250, CS_WINDOWS_1250},
991 {C_WINDOWS_1251, CS_WINDOWS_1251},
992 {C_WINDOWS_1252, CS_WINDOWS_1252},
993 {C_WINDOWS_1253, CS_WINDOWS_1253},
994 {C_WINDOWS_1254, CS_WINDOWS_1254},
995 {C_WINDOWS_1255, CS_WINDOWS_1255},
996 {C_WINDOWS_1256, CS_WINDOWS_1256},
997 {C_WINDOWS_1257, CS_WINDOWS_1257},
998 {C_WINDOWS_1258, CS_WINDOWS_1258},
999 {C_KOI8_R, CS_KOI8_R},
1000 {C_KOI8_T, CS_KOI8_T},
1001 {C_KOI8_U, CS_KOI8_U},
1002 {C_ISO_2022_JP, CS_ISO_2022_JP},
1003 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1004 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1005 {C_EUC_JP, CS_EUC_JP},
1006 {C_EUC_JP, CS_EUCJP},
1007 {C_SHIFT_JIS, CS_SHIFT_JIS},
1008 {C_SHIFT_JIS, CS_SHIFT__JIS},
1009 {C_SHIFT_JIS, CS_SJIS},
1010 {C_ISO_2022_KR, CS_ISO_2022_KR},
1011 {C_EUC_KR, CS_EUC_KR},
1012 {C_ISO_2022_CN, CS_ISO_2022_CN},
1013 {C_EUC_CN, CS_EUC_CN},
1014 {C_GB2312, CS_GB2312},
1016 {C_EUC_TW, CS_EUC_TW},
1018 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1019 {C_TIS_620, CS_TIS_620},
1020 {C_WINDOWS_874, CS_WINDOWS_874},
1021 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1022 {C_TCVN5712_1, CS_TCVN5712_1},
1025 static const struct {
1026 gchar *const locale;
1028 CharSet out_charset;
1029 } locale_table[] = {
1030 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1031 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1032 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1033 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1034 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1035 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1036 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1037 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1038 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1039 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1040 {"zh_CN.GBK" , C_GBK , C_GB2312},
1041 {"zh_CN" , C_GB2312 , C_GB2312},
1042 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1043 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1044 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1045 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1046 {"zh_TW" , C_BIG5 , C_BIG5},
1048 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1049 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1050 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1051 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1052 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1053 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1054 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1055 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1057 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1058 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1060 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1062 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1063 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1064 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1065 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1066 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1067 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1068 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1069 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1070 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1071 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1072 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1073 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1074 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1075 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1076 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1077 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1078 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1079 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1080 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1081 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1082 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1083 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1084 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1085 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1086 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1087 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1088 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1089 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1090 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1091 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1092 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1093 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1094 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1095 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1096 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1097 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1098 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1099 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1100 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1101 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1102 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1103 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1104 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1105 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1106 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1107 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1108 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1109 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1110 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1111 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1112 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1113 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1117 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1119 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1120 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1121 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1122 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1123 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1124 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1125 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1126 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1127 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1128 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1129 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1130 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1131 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1132 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1133 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1134 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1135 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1137 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1138 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1139 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1140 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1141 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1142 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1143 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1144 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1146 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1147 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1149 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1151 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1152 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1153 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1154 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1156 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1158 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1159 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1160 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1161 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1162 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1163 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1164 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1165 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1166 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1167 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1168 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1169 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1170 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1171 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1172 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1173 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1174 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1176 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1177 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1178 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1179 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1181 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1182 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1184 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1186 {"ar_IN" , C_UTF_8 , C_UTF_8},
1187 {"en_IN" , C_UTF_8 , C_UTF_8},
1188 {"se_NO" , C_UTF_8 , C_UTF_8},
1189 {"ta_IN" , C_UTF_8 , C_UTF_8},
1190 {"te_IN" , C_UTF_8 , C_UTF_8},
1191 {"ur_PK" , C_UTF_8 , C_UTF_8},
1193 {"th_TH" , C_TIS_620 , C_TIS_620},
1194 /* {"th_TH" , C_WINDOWS_874}, */
1195 /* {"th_TH" , C_ISO_8859_11}, */
1197 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1198 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1200 {"C" , C_US_ASCII , C_US_ASCII},
1201 {"POSIX" , C_US_ASCII , C_US_ASCII},
1202 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1205 static GHashTable *conv_get_charset_to_str_table(void)
1207 static GHashTable *table;
1213 table = g_hash_table_new(NULL, g_direct_equal);
1215 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1216 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1219 (table, GUINT_TO_POINTER(charsets[i].charset),
1227 static GHashTable *conv_get_charset_from_str_table(void)
1229 static GHashTable *table;
1235 table = g_hash_table_new(str_case_hash, str_case_equal);
1237 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1238 g_hash_table_insert(table, charsets[i].name,
1239 GUINT_TO_POINTER(charsets[i].charset));
1245 const gchar *conv_get_charset_str(CharSet charset)
1249 table = conv_get_charset_to_str_table();
1250 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1253 CharSet conv_get_charset_from_str(const gchar *charset)
1257 if (!charset) return C_AUTO;
1259 table = conv_get_charset_from_str_table();
1260 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1263 CharSet conv_get_current_charset(void)
1265 static CharSet cur_charset = -1;
1266 const gchar *cur_locale;
1270 if (cur_charset != -1)
1273 cur_locale = conv_get_current_locale();
1275 cur_charset = C_US_ASCII;
1279 if (strcasestr(cur_locale, "UTF-8")) {
1280 cur_charset = C_UTF_8;
1284 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1285 cur_charset = C_ISO_8859_15;
1289 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1292 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1293 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1294 if (!g_strncasecmp(cur_locale, locale_table[i].locale,
1295 strlen(locale_table[i].locale))) {
1296 cur_charset = locale_table[i].charset;
1298 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1299 !strchr(p + 1, '.')) {
1300 if (strlen(cur_locale) == 2 &&
1301 !g_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1302 cur_charset = locale_table[i].charset;
1308 cur_charset = C_AUTO;
1312 const gchar *conv_get_current_charset_str(void)
1314 static const gchar *codeset = NULL;
1317 codeset = conv_get_charset_str(conv_get_current_charset());
1319 return codeset ? codeset : CS_US_ASCII;
1322 CharSet conv_get_outgoing_charset(void)
1324 static CharSet out_charset = -1;
1325 const gchar *cur_locale;
1329 if (out_charset != -1)
1332 cur_locale = conv_get_current_locale();
1334 out_charset = C_AUTO;
1338 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1339 out_charset = C_ISO_8859_15;
1343 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1346 if (!g_strncasecmp(cur_locale, locale_table[i].locale,
1347 strlen(locale_table[i].locale))) {
1348 out_charset = locale_table[i].out_charset;
1350 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1351 !strchr(p + 1, '.')) {
1352 if (strlen(cur_locale) == 2 &&
1353 !g_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1354 out_charset = locale_table[i].out_charset;
1363 const gchar *conv_get_outgoing_charset_str(void)
1365 CharSet out_charset;
1368 if (prefs_common.outgoing_charset) {
1369 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1370 g_free(prefs_common.outgoing_charset);
1371 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1372 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1373 return prefs_common.outgoing_charset;
1376 out_charset = conv_get_outgoing_charset();
1377 str = conv_get_charset_str(out_charset);
1379 return str ? str : CS_US_ASCII;
1382 gboolean conv_is_multibyte_encoding(CharSet encoding)
1390 case C_ISO_2022_JP_2:
1391 case C_ISO_2022_JP_3:
1405 const gchar *conv_get_current_locale(void)
1407 static const gchar *cur_locale = NULL;
1409 if (cur_locale != NULL)
1412 cur_locale = g_getenv("LC_ALL");
1413 if (!cur_locale || !strlen(cur_locale))
1414 cur_locale = g_getenv("LC_CTYPE");
1415 if (!cur_locale || !strlen(cur_locale))
1416 cur_locale = g_getenv("LANG");
1417 if (!cur_locale || !strlen(cur_locale))
1418 cur_locale = setlocale(LC_CTYPE, NULL);
1420 if (cur_locale && strlen(cur_locale)) {
1421 gchar *tmp = g_strdup(cur_locale);
1422 cur_locale = g_strdup(tmp);
1429 void conv_unmime_header_overwrite(gchar *str)
1433 CharSet cur_charset;
1434 const gchar *locale;
1436 g_return_if_fail(str != NULL);
1438 cur_charset = conv_get_current_charset();
1441 /* Should we always ensure to convert? */
1442 locale = conv_get_current_locale();
1444 if (locale && !strncasecmp(locale, "ja", 2)) {
1445 buflen = strlen(str) * 2 + 1;
1446 Xalloca(buf, buflen, return);
1447 conv_anytodisp(buf, buflen, str);
1448 unmime_header(str, buf);
1450 buflen = strlen(str) + 1;
1451 Xalloca(buf, buflen, return);
1452 unmime_header(buf, str);
1453 strncpy2(str, buf, buflen);
1457 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1458 const gchar *charset)
1460 const gchar *locale;
1462 memset(outbuf, 0, outlen);
1465 /* Should we always ensure to convert? */
1466 locale = conv_get_current_locale();
1468 if (locale && !strncasecmp(locale, "ja", 2)) {
1472 buflen = strlen(str) * 2 + 1;
1473 Xalloca(buf, buflen, return);
1474 conv_anytodisp(buf, buflen, str);
1475 unmime_header(outbuf, buf);
1478 unmime_header(outbuf, str);
1479 if (outbuf && !g_utf8_validate(outbuf, -1, NULL)) {
1480 tmp = conv_codeset_strdup(outbuf,
1481 conv_get_current_charset_str(),
1484 strncpy(outbuf, tmp, outlen-1);
1491 #define MAX_LINELEN 76
1492 #define MAX_HARD_LINELEN 996
1493 #define MIMESEP_BEGIN "=?"
1494 #define MIMESEP_END "?="
1496 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1498 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1500 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1505 if ((cond) && *srcp) { \
1506 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1507 if (isspace(*(destp - 1))) \
1509 else if (is_plain_text && isspace(*srcp)) \
1514 left = MAX_LINELEN - 1; \
1520 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1521 gint header_len, gboolean addr_field)
1523 const gchar *cur_encoding;
1524 const gchar *out_encoding;
1528 const guchar *srcp = src;
1529 guchar *destp = dest;
1530 gboolean use_base64;
1533 if (MB_CUR_MAX > 1) {
1535 mimesep_enc = "?B?";
1538 mimesep_enc = "?Q?";
1541 cur_encoding = CS_UTF_8; /* gtk2 */
1543 out_encoding = conv_get_outgoing_charset_str();
1544 if (!strcmp(out_encoding, CS_US_ASCII))
1545 out_encoding = CS_ISO_8859_1;
1547 testbuf = conv_codeset_strdup(src, cur_encoding, out_encoding);
1549 if (testbuf != NULL)
1552 out_encoding = CS_UTF_8;
1554 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1555 strlen(mimesep_enc) + strlen(MIMESEP_END);
1557 left = MAX_LINELEN - header_len;
1560 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1562 while (isspace(*srcp)) {
1565 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1568 /* output as it is if the next word is ASCII string */
1569 if (!is_next_nonascii(srcp)) {
1572 word_len = get_next_word_len(srcp);
1573 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1574 while (word_len > 0) {
1575 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1584 /* don't include parentheses in encoded strings */
1585 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1586 LBREAK_IF_REQUIRED(left < 2, FALSE);
1597 const guchar *p = srcp;
1599 gint out_enc_str_len;
1600 gint mime_block_len;
1601 gboolean cont = FALSE;
1603 while (*p != '\0') {
1604 if (isspace(*p) && !is_next_nonascii(p + 1))
1606 /* don't include parentheses in encoded
1608 if (addr_field && (*p == '(' || *p == ')'))
1611 if (MB_CUR_MAX > 1) {
1612 mb_len = mblen(p, MB_CUR_MAX);
1614 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1620 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1621 out_str = conv_codeset_strdup
1622 (part_str, cur_encoding, out_encoding);
1624 g_warning("conv_encode_header(): code conversion failed\n");
1625 conv_unreadable_8bit(part_str);
1626 out_str = g_strdup(part_str);
1628 out_str_len = strlen(out_str);
1631 out_enc_str_len = B64LEN(out_str_len);
1634 qp_get_q_encoding_len(out_str);
1638 if (mimestr_len + out_enc_str_len <= left) {
1641 } else if (cur_len == 0) {
1642 LBREAK_IF_REQUIRED(1, FALSE);
1651 Xstrndup_a(part_str, srcp, cur_len, );
1652 out_str = conv_codeset_strdup
1653 (part_str, cur_encoding, out_encoding);
1655 g_warning("conv_encode_header(): code conversion failed\n");
1656 conv_unreadable_8bit(part_str);
1657 out_str = g_strdup(part_str);
1659 out_str_len = strlen(out_str);
1662 out_enc_str_len = B64LEN(out_str_len);
1665 qp_get_q_encoding_len(out_str);
1667 Xalloca(enc_str, out_enc_str_len + 1, );
1669 base64_encode(enc_str, out_str, out_str_len);
1671 qp_q_encode(enc_str, out_str);
1675 /* output MIME-encoded string block */
1676 mime_block_len = mimestr_len + strlen(enc_str);
1677 g_snprintf(destp, mime_block_len + 1,
1678 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1679 out_encoding, mimesep_enc, enc_str);
1680 destp += mime_block_len;
1683 left -= mime_block_len;
1686 LBREAK_IF_REQUIRED(cont, FALSE);
1696 #undef LBREAK_IF_REQUIRED