2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2004 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
38 #include "quoted-printable.h"
40 #include "prefs_common.h"
50 #define SUBST_CHAR '_'
53 #define iseuckanji(c) \
54 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
55 #define iseuchwkana1(c) \
56 (((c) & 0xff) == 0x8e)
57 #define iseuchwkana2(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
60 (((c) & 0xff) == 0x8f)
61 #define issjiskanji1(c) \
62 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
63 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
64 #define issjiskanji2(c) \
65 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
66 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
67 #define issjishwkana(c) \
68 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
71 if (state != JIS_KANJI) { \
79 if (state != JIS_ASCII) { \
87 if (state != JIS_HWKANA) { \
95 if (state != JIS_AUXKANJI) { \
100 state = JIS_AUXKANJI; \
103 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
105 const guchar *in = inbuf;
106 guchar *out = outbuf;
107 JISState state = JIS_ASCII;
109 while (*in != '\0') {
113 if (*(in + 1) == '@' || *(in + 1) == 'B') {
116 } else if (*(in + 1) == '(' &&
118 state = JIS_AUXKANJI;
121 /* unknown escape sequence */
124 } else if (*in == '(') {
125 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
128 } else if (*(in + 1) == 'I') {
132 /* unknown escape sequence */
136 /* unknown escape sequence */
139 } else if (*in == 0x0e) {
142 } else if (*in == 0x0f) {
151 *out++ = *in++ | 0x80;
152 if (*in == '\0') break;
153 *out++ = *in++ | 0x80;
157 *out++ = *in++ | 0x80;
161 *out++ = *in++ | 0x80;
162 if (*in == '\0') break;
163 *out++ = *in++ | 0x80;
172 #define JIS_HWDAKUTEN 0x5e
173 #define JIS_HWHANDAKUTEN 0x5f
175 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
177 static guint16 h2z_tbl[] = {
179 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
180 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
182 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
183 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
185 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
186 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
188 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
189 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
192 static guint16 dakuten_tbl[] = {
194 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
195 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
197 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
198 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
201 static guint16 handakuten_tbl[] = {
203 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
211 if (jis_code < 0x21 || jis_code > 0x5f)
214 if (sound_sym == JIS_HWDAKUTEN &&
215 jis_code >= 0x36 && jis_code <= 0x4e) {
216 out_code = dakuten_tbl[jis_code - 0x30];
218 *outbuf = out_code >> 8;
219 *(outbuf + 1) = out_code & 0xff;
224 if (sound_sym == JIS_HWHANDAKUTEN &&
225 jis_code >= 0x4a && jis_code <= 0x4e) {
226 out_code = handakuten_tbl[jis_code - 0x4a];
227 *outbuf = out_code >> 8;
228 *(outbuf + 1) = out_code & 0xff;
232 out_code = h2z_tbl[jis_code - 0x20];
233 *outbuf = out_code >> 8;
234 *(outbuf + 1) = out_code & 0xff;
238 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
240 const guchar *in = inbuf;
241 guchar *out = outbuf;
242 JISState state = JIS_ASCII;
244 while (*in != '\0') {
248 } else if (iseuckanji(*in)) {
249 if (iseuckanji(*(in + 1))) {
251 *out++ = *in++ & 0x7f;
252 *out++ = *in++ & 0x7f;
257 if (*in != '\0' && !isascii(*in)) {
262 } else if (iseuchwkana1(*in)) {
263 if (iseuchwkana2(*(in + 1))) {
264 if (prefs_common.allow_jisx0201_kana) {
267 *out++ = *in++ & 0x7f;
272 if (iseuchwkana1(*(in + 2)) &&
273 iseuchwkana2(*(in + 3)))
274 len = conv_jis_hantozen
276 *(in + 1), *(in + 3));
278 len = conv_jis_hantozen
293 if (*in != '\0' && !isascii(*in)) {
298 } else if (iseucaux(*in)) {
300 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
302 *out++ = *in++ & 0x7f;
303 *out++ = *in++ & 0x7f;
306 if (*in != '\0' && !isascii(*in)) {
309 if (*in != '\0' && !isascii(*in)) {
326 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
328 const guchar *in = inbuf;
329 guchar *out = outbuf;
331 while (*in != '\0') {
334 } else if (issjiskanji1(*in)) {
335 if (issjiskanji2(*(in + 1))) {
337 guchar out2 = *(in + 1);
340 row = out1 < 0xa0 ? 0x70 : 0xb0;
342 out1 = (out1 - row) * 2 - 1;
343 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
345 out1 = (out1 - row) * 2;
349 *out++ = out1 | 0x80;
350 *out++ = out2 | 0x80;
355 if (*in != '\0' && !isascii(*in)) {
360 } else if (issjishwkana(*in)) {
372 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
374 switch (conv_guess_ja_encoding(inbuf)) {
376 conv_jistoeuc(outbuf, outlen, inbuf);
379 conv_sjistoeuc(outbuf, outlen, inbuf);
382 strncpy2(outbuf, inbuf, outlen);
387 void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
389 gchar *tmpstr = NULL;
391 switch (conv_guess_ja_encoding(inbuf)) {
393 tmpstr = conv_codeset_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8);
394 strncpy2(outbuf, tmpstr, outlen);
398 tmpstr = conv_codeset_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
399 strncpy2(outbuf, tmpstr, outlen);
403 tmpstr = conv_codeset_strdup(inbuf, CS_EUC_JP, CS_UTF_8);
404 strncpy2(outbuf, tmpstr, outlen);
408 strncpy2(outbuf, inbuf, outlen);
413 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
415 switch (conv_guess_ja_encoding(inbuf)) {
417 conv_euctojis(outbuf, outlen, inbuf);
420 strncpy2(outbuf, inbuf, outlen);
425 static gchar valid_eucjp_tbl[][96] = {
426 /* 0xa2a0 - 0xa2ff */
427 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
429 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
430 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
432 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
434 /* 0xa3a0 - 0xa3ff */
435 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
437 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
439 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
442 /* 0xa4a0 - 0xa4ff */
443 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
448 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
450 /* 0xa5a0 - 0xa5ff */
451 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
452 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
453 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
455 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
456 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
458 /* 0xa6a0 - 0xa6ff */
459 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
460 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
461 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
462 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
466 /* 0xa7a0 - 0xa7ff */
467 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
469 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
470 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
472 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
474 /* 0xa8a0 - 0xa8ff */
475 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
477 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
479 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
480 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
483 static gboolean isprintableeuckanji(guchar c1, guchar c2)
485 if (c1 <= 0xa0 || c1 >= 0xf5)
487 if (c2 <= 0xa0 || c2 == 0xff)
490 if (c1 >= 0xa9 && c1 <= 0xaf)
493 if (c1 >= 0xa2 && c1 <= 0xa8)
494 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
497 if (c2 >= 0xd4 && c2 <= 0xfe)
499 } else if (c1 == 0xf4) {
500 if (c2 >= 0xa7 && c2 <= 0xfe)
507 void conv_unreadable_eucjp(gchar *str)
509 register guchar *p = str;
513 /* convert CR+LF -> LF */
514 if (*p == '\r' && *(p + 1) == '\n')
515 memmove(p, p + 1, strlen(p));
516 /* printable 7 bit code */
518 } else if (iseuckanji(*p)) {
519 if (isprintableeuckanji(*p, *(p + 1))) {
520 /* printable euc-jp code */
523 /* substitute unprintable code */
532 } else if (iseuchwkana1(*p)) {
533 if (iseuchwkana2(*(p + 1)))
534 /* euc-jp hankaku kana */
538 } else if (iseucaux(*p)) {
539 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
540 /* auxiliary kanji */
545 /* substitute unprintable 1 byte code */
550 void conv_unreadable_8bit(gchar *str)
552 register guchar *p = str;
555 /* convert CR+LF -> LF */
556 if (*p == '\r' && *(p + 1) == '\n')
557 memmove(p, p + 1, strlen(p));
558 else if (!isascii(*p)) *p = SUBST_CHAR;
563 void conv_unreadable_latin(gchar *str)
565 register guchar *p = str;
568 /* convert CR+LF -> LF */
569 if (*p == '\r' && *(p + 1) == '\n')
570 memmove(p, p + 1, strlen(p));
571 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
577 void conv_unreadable_locale(gchar *str)
579 switch (conv_get_current_charset()) {
595 conv_unreadable_latin(str);
598 conv_unreadable_eucjp(str);
607 void conv_mb_alnum(gchar *str)
609 static guchar char_tbl[] = {
611 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
612 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
614 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
615 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
617 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
618 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
620 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
621 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
623 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
624 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
627 register guchar *p = str;
634 register guchar ch = *(p + 1);
636 if (ch >= 0xb0 && ch <= 0xfa) {
641 memmove(p, p + 1, len);
647 } else if (*p == 0xa1) {
648 register guchar ch = *(p + 1);
650 if (ch >= 0xa0 && ch <= 0xef &&
651 NCV != char_tbl[ch - 0xa0]) {
652 *p = char_tbl[ch - 0xa0];
655 memmove(p, p + 1, len);
661 } else if (iseuckanji(*p)) {
671 CharSet conv_guess_ja_encoding(const gchar *str)
673 const guchar *p = str;
674 CharSet guessed = C_US_ASCII;
677 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
678 if (guessed == C_US_ASCII)
679 return C_ISO_2022_JP;
681 } else if (isascii(*p)) {
683 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
684 if (*p >= 0xfd && *p <= 0xfe)
686 else if (guessed == C_SHIFT_JIS) {
687 if ((issjiskanji1(*p) &&
688 issjiskanji2(*(p + 1))) ||
690 guessed = C_SHIFT_JIS;
696 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
697 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
698 guessed = C_SHIFT_JIS;
702 } else if (issjishwkana(*p)) {
703 guessed = C_SHIFT_JIS;
713 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
715 conv_jistoeuc(outbuf, outlen, inbuf);
716 conv_unreadable_eucjp(outbuf);
719 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
721 conv_sjistoeuc(outbuf, outlen, inbuf);
722 conv_unreadable_eucjp(outbuf);
725 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
727 strncpy2(outbuf, inbuf, outlen);
728 conv_unreadable_eucjp(outbuf);
731 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
733 conv_anytoutf8(outbuf, outlen, inbuf);
737 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
739 strncpy2(outbuf, inbuf, outlen);
740 conv_unreadable_8bit(outbuf);
744 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
746 strncpy2(outbuf, inbuf, outlen);
747 conv_unreadable_latin(outbuf);
751 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
753 strncpy2(outbuf, inbuf, outlen);
754 conv_unreadable_locale(outbuf);
757 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
759 strncpy2(outbuf, inbuf, outlen);
762 CodeConverter *conv_code_converter_new(const gchar *charset)
766 conv = g_new0(CodeConverter, 1);
767 conv->code_conv_func = conv_get_code_conv_func(charset, CS_UTF_8);
768 conv->charset_str = g_strdup(charset);
769 conv->charset = conv_get_charset_from_str(charset);
774 void conv_code_converter_destroy(CodeConverter *conv)
776 g_free(conv->charset_str);
780 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
783 if (conv->code_conv_func != conv_noconv)
784 conv->code_conv_func(outbuf, outlen, inbuf);
788 str = conv_iconv_strdup(inbuf, conv->charset_str, CS_UTF_8);
792 strncpy2(outbuf, str, outlen);
800 gchar *conv_codeset_strdup(const gchar *inbuf,
801 const gchar *src_code, const gchar *dest_code)
805 CodeConvFunc conv_func;
807 conv_func = conv_get_code_conv_func(src_code, dest_code);
808 if (conv_func != conv_noconv) {
809 len = (strlen(inbuf) + 1) * 3;
811 if (!buf) return NULL;
813 conv_func(buf, len, inbuf);
814 return g_realloc(buf, strlen(buf) + 1);
817 return conv_iconv_strdup(inbuf, src_code, dest_code);
820 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
821 const gchar *dest_charset_str)
823 CodeConvFunc code_conv = conv_noconv;
825 CharSet dest_charset;
827 if (!src_charset_str)
828 src_charset = conv_get_current_charset();
830 src_charset = conv_get_charset_from_str(src_charset_str);
832 /* auto detection mode */
833 if (!src_charset_str && !dest_charset_str) {
834 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
835 return conv_anytodisp;
840 dest_charset = conv_get_charset_from_str(dest_charset_str);
842 if (dest_charset == C_US_ASCII)
843 return conv_ustodisp;
844 else if (dest_charset == C_UTF_8 ||
845 (dest_charset == C_AUTO &&
846 conv_get_current_charset() == C_UTF_8))
849 switch (src_charset) {
851 case C_ISO_2022_JP_2:
852 case C_ISO_2022_JP_3:
853 if (dest_charset == C_AUTO &&
854 conv_get_current_charset() == C_EUC_JP)
855 code_conv = conv_jistodisp;
856 else if (dest_charset == C_EUC_JP)
857 code_conv = conv_jistoeuc;
860 if (dest_charset == C_AUTO)
861 code_conv = conv_ustodisp;
877 if (dest_charset == C_AUTO &&
878 (conv_get_current_charset() == src_charset ||
880 code_conv = conv_latintodisp;
883 if (dest_charset == C_AUTO &&
884 conv_get_current_charset() == C_EUC_JP)
885 code_conv = conv_sjistodisp;
886 else if (dest_charset == C_EUC_JP)
887 code_conv = conv_sjistoeuc;
890 if (dest_charset == C_AUTO &&
891 conv_get_current_charset() == C_EUC_JP)
892 code_conv = conv_euctodisp;
893 else if (dest_charset == C_ISO_2022_JP ||
894 dest_charset == C_ISO_2022_JP_2 ||
895 dest_charset == C_ISO_2022_JP_3)
896 code_conv = conv_euctojis;
905 gchar *conv_iconv_strdup(const gchar *inbuf,
906 const gchar *isrc_code, const gchar *idest_code)
908 /* presumably GLib 2's function handles the conversion details,
909 * whether iconv is sitting below, or something else */
911 gsize read_len, written_len;
912 gchar *src_code = conv_get_outgoing_charset_str();
913 gchar *dest_code = conv_get_current_charset_str();
916 src_code = isrc_code;
918 dest_code = idest_code;
920 /* don't convert if current codeset is US-ASCII */
921 if (!strcasecmp(dest_code, CS_US_ASCII))
922 return g_strdup(inbuf);
924 /* don't convert if src and dest codeset are identical */
925 if (!strcasecmp(src_code, dest_code))
926 return g_strdup(inbuf);
928 /* FIXME: unchecked inbuf? Can't see at this level. */
929 outbuf = g_convert(inbuf, strlen(inbuf), dest_code, src_code,
930 &read_len, &written_len, NULL);
932 if (outbuf == NULL && strcasecmp(src_code, CS_ISO_8859_15))
933 /* also try iso-8859-15 */
934 outbuf = conv_iconv_strdup(inbuf, CS_ISO_8859_15, dest_code);
936 g_warning(_("Valid locale type set? (Currently: %s to %s)\n"),
937 src_code, dest_code);
942 static const struct {
946 {C_US_ASCII, CS_US_ASCII},
947 {C_US_ASCII, CS_ANSI_X3_4_1968},
950 {C_ISO_8859_1, CS_ISO_8859_1},
951 {C_ISO_8859_2, CS_ISO_8859_2},
952 {C_ISO_8859_3, CS_ISO_8859_3},
953 {C_ISO_8859_4, CS_ISO_8859_4},
954 {C_ISO_8859_5, CS_ISO_8859_5},
955 {C_ISO_8859_6, CS_ISO_8859_6},
956 {C_ISO_8859_7, CS_ISO_8859_7},
957 {C_ISO_8859_8, CS_ISO_8859_8},
958 {C_ISO_8859_9, CS_ISO_8859_9},
959 {C_ISO_8859_10, CS_ISO_8859_10},
960 {C_ISO_8859_11, CS_ISO_8859_11},
961 {C_ISO_8859_13, CS_ISO_8859_13},
962 {C_ISO_8859_14, CS_ISO_8859_14},
963 {C_ISO_8859_15, CS_ISO_8859_15},
964 {C_BALTIC, CS_BALTIC},
965 {C_CP1250, CS_CP1250},
966 {C_CP1251, CS_CP1251},
967 {C_CP1252, CS_CP1252},
968 {C_CP1253, CS_CP1253},
969 {C_CP1254, CS_CP1254},
970 {C_CP1255, CS_CP1255},
971 {C_CP1256, CS_CP1256},
972 {C_CP1257, CS_CP1257},
973 {C_CP1258, CS_CP1258},
974 {C_WINDOWS_1250, CS_WINDOWS_1250},
975 {C_WINDOWS_1251, CS_WINDOWS_1251},
976 {C_WINDOWS_1252, CS_WINDOWS_1252},
977 {C_WINDOWS_1253, CS_WINDOWS_1253},
978 {C_WINDOWS_1254, CS_WINDOWS_1254},
979 {C_WINDOWS_1255, CS_WINDOWS_1255},
980 {C_WINDOWS_1256, CS_WINDOWS_1256},
981 {C_WINDOWS_1257, CS_WINDOWS_1257},
982 {C_WINDOWS_1258, CS_WINDOWS_1258},
983 {C_KOI8_R, CS_KOI8_R},
984 {C_KOI8_T, CS_KOI8_T},
985 {C_KOI8_U, CS_KOI8_U},
986 {C_ISO_2022_JP, CS_ISO_2022_JP},
987 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
988 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
989 {C_EUC_JP, CS_EUC_JP},
990 {C_EUC_JP, CS_EUCJP},
991 {C_SHIFT_JIS, CS_SHIFT_JIS},
992 {C_SHIFT_JIS, CS_SHIFT__JIS},
993 {C_SHIFT_JIS, CS_SJIS},
994 {C_ISO_2022_KR, CS_ISO_2022_KR},
995 {C_EUC_KR, CS_EUC_KR},
996 {C_ISO_2022_CN, CS_ISO_2022_CN},
997 {C_EUC_CN, CS_EUC_CN},
998 {C_GB2312, CS_GB2312},
1000 {C_EUC_TW, CS_EUC_TW},
1002 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1003 {C_TIS_620, CS_TIS_620},
1004 {C_WINDOWS_874, CS_WINDOWS_874},
1005 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1006 {C_TCVN5712_1, CS_TCVN5712_1},
1009 static const struct {
1010 gchar *const locale;
1012 CharSet out_charset;
1013 } locale_table[] = {
1014 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1015 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1016 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1017 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1018 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1019 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1020 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1021 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1022 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1023 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1024 {"zh_CN.GBK" , C_GBK , C_GB2312},
1025 {"zh_CN" , C_GB2312 , C_GB2312},
1026 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1027 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1028 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1029 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1030 {"zh_TW" , C_BIG5 , C_BIG5},
1032 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1033 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1034 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1035 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1036 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1037 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1038 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1039 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1041 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1042 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1044 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1046 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1047 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1048 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1049 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1050 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1051 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1052 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1053 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1054 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1055 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1056 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1057 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1058 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1059 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1060 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1061 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1062 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1063 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1064 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1065 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1066 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1067 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1068 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1069 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1070 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1071 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1072 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1073 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1074 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1075 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1076 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1077 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1078 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1079 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1080 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1081 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1082 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1083 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1084 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1085 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1086 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1087 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1088 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1089 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1090 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1091 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1092 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1093 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1094 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1095 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1096 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1097 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1098 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1099 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1100 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1101 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1102 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1103 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1104 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1105 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1106 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1107 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1108 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1109 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1110 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1111 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1112 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1113 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1114 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1115 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1116 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1117 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1118 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1119 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1121 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1122 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1123 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1124 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1125 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1126 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1127 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1128 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1130 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1131 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1133 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1135 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1136 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1137 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1138 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1140 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1142 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1143 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1144 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1145 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1146 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1147 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1148 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1149 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1150 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1151 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1152 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1153 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1154 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1155 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1156 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1157 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1158 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1160 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1161 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1162 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1163 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1165 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1166 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1168 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1170 {"ar_IN" , C_UTF_8 , C_UTF_8},
1171 {"en_IN" , C_UTF_8 , C_UTF_8},
1172 {"se_NO" , C_UTF_8 , C_UTF_8},
1173 {"ta_IN" , C_UTF_8 , C_UTF_8},
1174 {"te_IN" , C_UTF_8 , C_UTF_8},
1175 {"ur_PK" , C_UTF_8 , C_UTF_8},
1177 {"th_TH" , C_TIS_620 , C_TIS_620},
1178 /* {"th_TH" , C_WINDOWS_874}, */
1179 /* {"th_TH" , C_ISO_8859_11}, */
1181 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1182 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1184 {"C" , C_US_ASCII , C_US_ASCII},
1185 {"POSIX" , C_US_ASCII , C_US_ASCII},
1186 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1189 static GHashTable *conv_get_charset_to_str_table(void)
1191 static GHashTable *table;
1197 table = g_hash_table_new(NULL, g_direct_equal);
1199 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1200 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1203 (table, GUINT_TO_POINTER(charsets[i].charset),
1211 static GHashTable *conv_get_charset_from_str_table(void)
1213 static GHashTable *table;
1219 table = g_hash_table_new(str_case_hash, str_case_equal);
1221 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1222 g_hash_table_insert(table, charsets[i].name,
1223 GUINT_TO_POINTER(charsets[i].charset));
1229 const gchar *conv_get_charset_str(CharSet charset)
1233 table = conv_get_charset_to_str_table();
1234 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1237 CharSet conv_get_charset_from_str(const gchar *charset)
1241 if (!charset) return C_AUTO;
1243 table = conv_get_charset_from_str_table();
1244 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1247 CharSet conv_get_current_charset(void)
1249 static CharSet cur_charset = -1;
1250 const gchar *cur_locale;
1254 if (cur_charset != -1)
1257 cur_locale = conv_get_current_locale();
1259 cur_charset = C_US_ASCII;
1263 if (strcasestr(cur_locale, "UTF-8")) {
1264 cur_charset = C_UTF_8;
1268 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1269 cur_charset = C_ISO_8859_15;
1273 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1276 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1277 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1278 if (!strncasecmp(cur_locale, locale_table[i].locale,
1279 strlen(locale_table[i].locale))) {
1280 cur_charset = locale_table[i].charset;
1282 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1283 !strchr(p + 1, '.')) {
1284 if (strlen(cur_locale) == 2 &&
1285 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1286 cur_charset = locale_table[i].charset;
1292 cur_charset = C_AUTO;
1296 const gchar *conv_get_current_charset_str(void)
1298 static const gchar *codeset = NULL;
1301 codeset = conv_get_charset_str(conv_get_current_charset());
1303 return codeset ? codeset : CS_US_ASCII;
1306 CharSet conv_get_outgoing_charset(void)
1308 static CharSet out_charset = -1;
1309 const gchar *cur_locale;
1313 if (out_charset != -1)
1316 cur_locale = conv_get_current_locale();
1318 out_charset = C_AUTO;
1322 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1323 out_charset = C_ISO_8859_15;
1327 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1330 if (!strncasecmp(cur_locale, locale_table[i].locale,
1331 strlen(locale_table[i].locale))) {
1332 out_charset = locale_table[i].out_charset;
1334 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1335 !strchr(p + 1, '.')) {
1336 if (strlen(cur_locale) == 2 &&
1337 !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1338 out_charset = locale_table[i].out_charset;
1347 const gchar *conv_get_outgoing_charset_str(void)
1349 CharSet out_charset;
1352 if (prefs_common.outgoing_charset) {
1353 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1354 g_free(prefs_common.outgoing_charset);
1355 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1356 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1357 return prefs_common.outgoing_charset;
1360 out_charset = conv_get_outgoing_charset();
1361 str = conv_get_charset_str(out_charset);
1363 return str ? str : CS_US_ASCII;
1366 gboolean conv_is_multibyte_encoding(CharSet encoding)
1374 case C_ISO_2022_JP_2:
1375 case C_ISO_2022_JP_3:
1389 const gchar *conv_get_current_locale(void)
1391 const gchar *cur_locale;
1393 cur_locale = g_getenv("LC_ALL");
1394 if (!cur_locale || !strlen(cur_locale))
1395 cur_locale = g_getenv("LC_CTYPE");
1396 if (!cur_locale || !strlen(cur_locale))
1397 cur_locale = g_getenv("LANG");
1398 if (!cur_locale || !strlen(cur_locale))
1399 cur_locale = setlocale(LC_CTYPE, NULL);
1401 /* debug_print("current locale: %s\n",
1402 cur_locale ? cur_locale : "(none)"); */
1407 void conv_unmime_header_overwrite(gchar *str)
1411 CharSet cur_charset;
1412 const gchar *locale;
1414 g_return_if_fail(str != NULL);
1416 cur_charset = conv_get_current_charset();
1419 /* Should we always ensure to convert? */
1420 locale = conv_get_current_locale();
1422 if (locale && !strncasecmp(locale, "ja", 2)) {
1423 buflen = strlen(str) * 2 + 1;
1424 Xalloca(buf, buflen, return);
1425 conv_anytodisp(buf, buflen, str);
1426 unmime_header(str, buf);
1428 buflen = strlen(str) + 1;
1429 Xalloca(buf, buflen, return);
1430 unmime_header(buf, str);
1431 strncpy2(str, buf, buflen);
1435 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1436 const gchar *charset)
1438 CharSet cur_charset;
1439 const gchar *locale;
1441 cur_charset = conv_get_current_charset();
1444 /* Should we always ensure to convert? */
1445 locale = conv_get_current_locale();
1447 if (locale && !strncasecmp(locale, "ja", 2)) {
1451 buflen = strlen(str) * 2 + 1;
1452 Xalloca(buf, buflen, return);
1453 conv_anytodisp(buf, buflen, str);
1454 unmime_header(outbuf, buf);
1455 } else if (g_utf8_validate(str, -1, NULL)) {
1456 unmime_header(outbuf, str);
1460 const gchar *src_codeset, *dest_codeset;
1461 src_codeset = conv_get_current_charset_str();
1462 dest_codeset = CS_UTF_8;
1463 buf = conv_codeset_strdup(str, src_codeset, dest_codeset);
1464 unmime_header(outbuf, buf);
1468 #define MAX_LINELEN 76
1469 #define MAX_HARD_LINELEN 996
1470 #define MIMESEP_BEGIN "=?"
1471 #define MIMESEP_END "?="
1473 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1475 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1477 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1482 if ((cond) && *srcp) { \
1483 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1484 if (isspace(*(destp - 1))) \
1486 else if (is_plain_text && isspace(*srcp)) \
1491 left = MAX_LINELEN - 1; \
1497 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1498 gint header_len, gboolean addr_field)
1500 const gchar *cur_encoding;
1501 const gchar *out_encoding;
1505 const guchar *srcp = src;
1506 guchar *destp = dest;
1507 gboolean use_base64;
1509 if (MB_CUR_MAX > 1) {
1511 mimesep_enc = "?B?";
1514 mimesep_enc = "?Q?";
1517 cur_encoding = CS_UTF_8; /* gtk2 */
1519 out_encoding = conv_get_outgoing_charset_str();
1520 if (!strcmp(out_encoding, CS_US_ASCII))
1521 out_encoding = CS_ISO_8859_1;
1523 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1524 strlen(mimesep_enc) + strlen(MIMESEP_END);
1526 left = MAX_LINELEN - header_len;
1529 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1531 while (isspace(*srcp)) {
1534 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1537 /* output as it is if the next word is ASCII string */
1538 if (!is_next_nonascii(srcp)) {
1541 word_len = get_next_word_len(srcp);
1542 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1543 while (word_len > 0) {
1544 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1553 /* don't include parentheses in encoded strings */
1554 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1555 LBREAK_IF_REQUIRED(left < 2, FALSE);
1566 const guchar *p = srcp;
1568 gint out_enc_str_len;
1569 gint mime_block_len;
1570 gboolean cont = FALSE;
1572 while (*p != '\0') {
1573 if (isspace(*p) && !is_next_nonascii(p + 1))
1575 /* don't include parentheses in encoded
1577 if (addr_field && (*p == '(' || *p == ')'))
1580 if (MB_CUR_MAX > 1) {
1581 mb_len = mblen(p, MB_CUR_MAX);
1583 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1589 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1590 out_str = conv_codeset_strdup
1591 (part_str, cur_encoding, out_encoding);
1593 g_warning("conv_encode_header(): code conversion failed\n");
1594 conv_unreadable_8bit(part_str);
1595 out_str = g_strdup(part_str);
1597 out_str_len = strlen(out_str);
1600 out_enc_str_len = B64LEN(out_str_len);
1603 qp_get_q_encoding_len(out_str);
1607 if (mimestr_len + out_enc_str_len <= left) {
1610 } else if (cur_len == 0) {
1611 LBREAK_IF_REQUIRED(1, FALSE);
1620 Xstrndup_a(part_str, srcp, cur_len, );
1621 out_str = conv_codeset_strdup
1622 (part_str, cur_encoding, out_encoding);
1624 g_warning("conv_encode_header(): code conversion failed\n");
1625 conv_unreadable_8bit(part_str);
1626 out_str = g_strdup(part_str);
1628 out_str_len = strlen(out_str);
1631 out_enc_str_len = B64LEN(out_str_len);
1634 qp_get_q_encoding_len(out_str);
1636 Xalloca(enc_str, out_enc_str_len + 1, );
1638 base64_encode(enc_str, out_str, out_str_len);
1640 qp_q_encode(enc_str, out_str);
1644 /* output MIME-encoded string block */
1645 mime_block_len = mimestr_len + strlen(enc_str);
1646 g_snprintf(destp, mime_block_len + 1,
1647 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1648 out_encoding, mimesep_enc, enc_str);
1649 destp += mime_block_len;
1652 left -= mime_block_len;
1655 LBREAK_IF_REQUIRED(cont, FALSE);
1665 #undef LBREAK_IF_REQUIRED