2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
40 #include "quoted-printable.h"
42 #include "prefs_common.h"
52 #define SUBST_CHAR 0x5f;
55 #define iseuckanji(c) \
56 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
57 #define iseuchwkana1(c) \
58 (((c) & 0xff) == 0x8e)
59 #define iseuchwkana2(c) \
60 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
62 (((c) & 0xff) == 0x8f)
63 #define issjiskanji1(c) \
64 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
65 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
66 #define issjiskanji2(c) \
67 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
68 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
69 #define issjishwkana(c) \
70 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
73 if (state != JIS_KANJI) { \
81 if (state != JIS_ASCII) { \
89 if (state != JIS_HWKANA) { \
97 if (state != JIS_AUXKANJI) { \
102 state = JIS_AUXKANJI; \
105 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
106 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
107 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
109 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
110 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
111 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
112 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
114 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
115 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
117 static void conv_unreadable_eucjp(gchar *str);
118 static void conv_unreadable_8bit(gchar *str);
119 static void conv_unreadable_latin(gchar *str);
121 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
122 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
123 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
125 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
126 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
131 const guchar *in = inbuf;
132 guchar *out = outbuf;
133 JISState state = JIS_ASCII;
135 while (*in != '\0') {
139 if (*(in + 1) == '@' || *(in + 1) == 'B') {
142 } else if (*(in + 1) == '(' &&
144 state = JIS_AUXKANJI;
147 /* unknown escape sequence */
150 } else if (*in == '(') {
151 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
154 } else if (*(in + 1) == 'I') {
158 /* unknown escape sequence */
162 /* unknown escape sequence */
165 } else if (*in == 0x0e) {
168 } else if (*in == 0x0f) {
177 *out++ = *in++ | 0x80;
178 if (*in == '\0') break;
179 *out++ = *in++ | 0x80;
183 *out++ = *in++ | 0x80;
187 *out++ = *in++ | 0x80;
188 if (*in == '\0') break;
189 *out++ = *in++ | 0x80;
198 #define JIS_HWDAKUTEN 0x5e
199 #define JIS_HWHANDAKUTEN 0x5f
201 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
203 static guint16 h2z_tbl[] = {
205 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
206 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
208 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
209 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
211 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
212 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
214 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
215 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
218 static guint16 dakuten_tbl[] = {
220 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
221 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
223 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
224 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
227 static guint16 handakuten_tbl[] = {
229 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
237 if (jis_code < 0x21 || jis_code > 0x5f)
240 if (sound_sym == JIS_HWDAKUTEN &&
241 jis_code >= 0x36 && jis_code <= 0x4e) {
242 out_code = dakuten_tbl[jis_code - 0x30];
244 *outbuf = out_code >> 8;
245 *(outbuf + 1) = out_code & 0xff;
250 if (sound_sym == JIS_HWHANDAKUTEN &&
251 jis_code >= 0x4a && jis_code <= 0x4e) {
252 out_code = handakuten_tbl[jis_code - 0x4a];
253 *outbuf = out_code >> 8;
254 *(outbuf + 1) = out_code & 0xff;
258 out_code = h2z_tbl[jis_code - 0x20];
259 *outbuf = out_code >> 8;
260 *(outbuf + 1) = out_code & 0xff;
264 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
266 const guchar *in = inbuf;
267 guchar *out = outbuf;
268 JISState state = JIS_ASCII;
270 while (*in != '\0') {
274 } else if (iseuckanji(*in)) {
275 if (iseuckanji(*(in + 1))) {
277 *out++ = *in++ & 0x7f;
278 *out++ = *in++ & 0x7f;
283 if (*in != '\0' && !IS_ASCII(*in)) {
288 } else if (iseuchwkana1(*in)) {
289 if (iseuchwkana2(*(in + 1))) {
290 if (prefs_common.allow_jisx0201_kana) {
293 *out++ = *in++ & 0x7f;
298 if (iseuchwkana1(*(in + 2)) &&
299 iseuchwkana2(*(in + 3)))
300 len = conv_jis_hantozen
302 *(in + 1), *(in + 3));
304 len = conv_jis_hantozen
319 if (*in != '\0' && !IS_ASCII(*in)) {
324 } else if (iseucaux(*in)) {
326 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
328 *out++ = *in++ & 0x7f;
329 *out++ = *in++ & 0x7f;
332 if (*in != '\0' && !IS_ASCII(*in)) {
335 if (*in != '\0' && !IS_ASCII(*in)) {
352 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
354 const guchar *in = inbuf;
355 guchar *out = outbuf;
357 while (*in != '\0') {
360 } else if (issjiskanji1(*in)) {
361 if (issjiskanji2(*(in + 1))) {
363 guchar out2 = *(in + 1);
366 row = out1 < 0xa0 ? 0x70 : 0xb0;
368 out1 = (out1 - row) * 2 - 1;
369 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
371 out1 = (out1 - row) * 2;
375 *out++ = out1 | 0x80;
376 *out++ = out2 | 0x80;
381 if (*in != '\0' && !IS_ASCII(*in)) {
386 } else if (issjishwkana(*in)) {
398 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
402 Xalloca(eucstr, outlen, return);
404 conv_jistoeuc(eucstr, outlen, inbuf);
405 conv_euctoutf8(outbuf, outlen, eucstr);
408 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
412 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
414 strncpy2(outbuf, tmpstr, outlen);
417 strncpy2(outbuf, inbuf, outlen);
420 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
422 static iconv_t cd = (iconv_t)-1;
423 static gboolean iconv_ok = TRUE;
426 if (cd == (iconv_t)-1) {
428 strncpy2(outbuf, inbuf, outlen);
431 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
432 if (cd == (iconv_t)-1) {
433 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
434 if (cd == (iconv_t)-1) {
435 g_warning("conv_euctoutf8(): %s\n",
438 strncpy2(outbuf, inbuf, outlen);
444 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
446 strncpy2(outbuf, tmpstr, outlen);
449 strncpy2(outbuf, inbuf, outlen);
452 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
454 switch (conv_guess_ja_encoding(inbuf)) {
456 conv_jistoutf8(outbuf, outlen, inbuf);
459 conv_sjistoutf8(outbuf, outlen, inbuf);
462 conv_euctoutf8(outbuf, outlen, inbuf);
465 strncpy2(outbuf, inbuf, outlen);
470 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
472 static iconv_t cd = (iconv_t)-1;
473 static gboolean iconv_ok = TRUE;
476 if (cd == (iconv_t)-1) {
478 strncpy2(outbuf, inbuf, outlen);
481 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
482 if (cd == (iconv_t)-1) {
483 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
484 if (cd == (iconv_t)-1) {
485 g_warning("conv_utf8toeuc(): %s\n",
488 strncpy2(outbuf, inbuf, outlen);
494 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
496 strncpy2(outbuf, tmpstr, outlen);
499 strncpy2(outbuf, inbuf, outlen);
502 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
506 Xalloca(eucstr, outlen, return);
508 conv_utf8toeuc(eucstr, outlen, inbuf);
509 conv_euctojis(outbuf, outlen, eucstr);
512 static gchar valid_eucjp_tbl[][96] = {
513 /* 0xa2a0 - 0xa2ff */
514 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
516 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
517 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
519 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
521 /* 0xa3a0 - 0xa3ff */
522 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
524 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
526 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
529 /* 0xa4a0 - 0xa4ff */
530 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
535 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
537 /* 0xa5a0 - 0xa5ff */
538 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
545 /* 0xa6a0 - 0xa6ff */
546 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
547 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
548 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
551 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
553 /* 0xa7a0 - 0xa7ff */
554 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
556 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
559 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
561 /* 0xa8a0 - 0xa8ff */
562 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
564 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
570 static gboolean isprintableeuckanji(guchar c1, guchar c2)
572 if (c1 <= 0xa0 || c1 >= 0xf5)
574 if (c2 <= 0xa0 || c2 == 0xff)
577 if (c1 >= 0xa9 && c1 <= 0xaf)
580 if (c1 >= 0xa2 && c1 <= 0xa8)
581 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
584 if (c2 >= 0xd4 && c2 <= 0xfe)
586 } else if (c1 == 0xf4) {
587 if (c2 >= 0xa7 && c2 <= 0xfe)
594 static void conv_unreadable_eucjp(gchar *str)
596 register guchar *p = str;
600 /* convert CR+LF -> LF */
601 if (*p == '\r' && *(p + 1) == '\n')
602 memmove(p, p + 1, strlen(p));
603 /* printable 7 bit code */
605 } else if (iseuckanji(*p)) {
606 if (isprintableeuckanji(*p, *(p + 1))) {
607 /* printable euc-jp code */
610 /* substitute unprintable code */
619 } else if (iseuchwkana1(*p)) {
620 if (iseuchwkana2(*(p + 1)))
621 /* euc-jp hankaku kana */
625 } else if (iseucaux(*p)) {
626 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
627 /* auxiliary kanji */
632 /* substitute unprintable 1 byte code */
637 static void conv_unreadable_8bit(gchar *str)
639 register guchar *p = str;
642 /* convert CR+LF -> LF */
643 if (*p == '\r' && *(p + 1) == '\n')
644 memmove(p, p + 1, strlen(p));
645 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
650 static void conv_unreadable_latin(gchar *str)
652 register guchar *p = str;
655 /* convert CR+LF -> LF */
656 if (*p == '\r' && *(p + 1) == '\n')
657 memmove(p, p + 1, strlen(p));
658 else if ((*p & 0xff) >= 0x7f)
666 void conv_mb_alnum(gchar *str)
668 static guchar char_tbl[] = {
670 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
671 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
673 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
674 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
676 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
677 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
679 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
680 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
682 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
683 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
686 register guchar *p = str;
693 register guchar ch = *(p + 1);
695 if (ch >= 0xb0 && ch <= 0xfa) {
700 memmove(p, p + 1, len);
706 } else if (*p == 0xa1) {
707 register guchar ch = *(p + 1);
709 if (ch >= 0xa0 && ch <= 0xef &&
710 NCV != char_tbl[ch - 0xa0]) {
711 *p = char_tbl[ch - 0xa0];
714 memmove(p, p + 1, len);
720 } else if (iseuckanji(*p)) {
730 CharSet conv_guess_ja_encoding(const gchar *str)
732 const guchar *p = str;
733 CharSet guessed = C_US_ASCII;
736 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
737 if (guessed == C_US_ASCII)
738 return C_ISO_2022_JP;
740 } else if (IS_ASCII(*p)) {
742 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
743 if (*p >= 0xfd && *p <= 0xfe)
745 else if (guessed == C_SHIFT_JIS) {
746 if ((issjiskanji1(*p) &&
747 issjiskanji2(*(p + 1))) ||
749 guessed = C_SHIFT_JIS;
755 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
756 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
757 guessed = C_SHIFT_JIS;
761 } else if (issjishwkana(*p)) {
762 guessed = C_SHIFT_JIS;
772 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
774 conv_jistoutf8(outbuf, outlen, inbuf);
777 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
779 conv_sjistoutf8(outbuf, outlen, inbuf);
782 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
784 conv_euctoutf8(outbuf, outlen, inbuf);
787 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
789 strncpy2(outbuf, inbuf, outlen);
792 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
794 conv_anytoutf8(outbuf, outlen, inbuf);
797 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
799 strncpy2(outbuf, inbuf, outlen);
800 conv_unreadable_8bit(outbuf);
803 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
807 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
810 strncpy2(outbuf, tmpstr, outlen);
813 strncpy2(outbuf, inbuf, outlen);
816 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
818 strncpy2(outbuf, inbuf, outlen);
821 CodeConverter *conv_code_converter_new(const gchar *src_charset)
825 conv = g_new0(CodeConverter, 1);
826 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
827 conv->charset_str = g_strdup(src_charset);
828 conv->charset = conv_get_charset_from_str(src_charset);
833 void conv_code_converter_destroy(CodeConverter *conv)
835 g_free(conv->charset_str);
839 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
842 if (conv->code_conv_func != conv_noconv)
843 conv->code_conv_func(outbuf, outlen, inbuf);
847 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
851 strncpy2(outbuf, str, outlen);
859 gchar *conv_codeset_strdup(const gchar *inbuf,
860 const gchar *src_code, const gchar *dest_code)
864 CodeConvFunc conv_func;
866 conv_func = conv_get_code_conv_func(src_code, dest_code);
867 if (conv_func != conv_noconv) {
868 len = (strlen(inbuf) + 1) * 3;
870 if (!buf) return NULL;
872 conv_func(buf, len, inbuf);
873 return g_realloc(buf, strlen(buf) + 1);
876 return conv_iconv_strdup(inbuf, src_code, dest_code);
879 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
880 const gchar *dest_charset_str)
882 CodeConvFunc code_conv = conv_noconv;
884 CharSet dest_charset;
886 if (!src_charset_str)
887 src_charset = conv_get_locale_charset();
889 src_charset = conv_get_charset_from_str(src_charset_str);
891 /* auto detection mode */
892 if (!src_charset_str && !dest_charset_str) {
893 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
894 return conv_anytodisp;
899 dest_charset = conv_get_charset_from_str(dest_charset_str);
901 if (dest_charset == C_US_ASCII)
902 return conv_ustodisp;
904 switch (src_charset) {
922 case C_ISO_2022_JP_2:
923 case C_ISO_2022_JP_3:
924 if (dest_charset == C_AUTO)
925 code_conv = conv_jistodisp;
926 else if (dest_charset == C_EUC_JP)
927 code_conv = conv_jistoeuc;
928 else if (dest_charset == C_UTF_8)
929 code_conv = conv_jistoutf8;
932 if (dest_charset == C_AUTO)
933 code_conv = conv_sjistodisp;
934 else if (dest_charset == C_EUC_JP)
935 code_conv = conv_sjistoeuc;
936 else if (dest_charset == C_UTF_8)
937 code_conv = conv_sjistoutf8;
940 if (dest_charset == C_AUTO)
941 code_conv = conv_euctodisp;
942 else if (dest_charset == C_ISO_2022_JP ||
943 dest_charset == C_ISO_2022_JP_2 ||
944 dest_charset == C_ISO_2022_JP_3)
945 code_conv = conv_euctojis;
946 else if (dest_charset == C_UTF_8)
947 code_conv = conv_euctoutf8;
950 if (dest_charset == C_EUC_JP)
951 code_conv = conv_utf8toeuc;
952 else if (dest_charset == C_ISO_2022_JP ||
953 dest_charset == C_ISO_2022_JP_2 ||
954 dest_charset == C_ISO_2022_JP_3)
955 code_conv = conv_utf8tojis;
964 gchar *conv_iconv_strdup(const gchar *inbuf,
965 const gchar *src_code, const gchar *dest_code)
971 src_code = conv_get_outgoing_charset_str();
973 dest_code = CS_INTERNAL;
975 /* don't convert if src and dest codeset are identical */
976 if (!strcasecmp(src_code, dest_code))
977 return g_strdup(inbuf);
979 /* don't convert if current codeset is US-ASCII */
980 if (!strcasecmp(dest_code, CS_US_ASCII))
981 return g_strdup(inbuf);
983 cd = iconv_open(dest_code, src_code);
984 if (cd == (iconv_t)-1)
987 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
994 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
996 const gchar *inbuf_p;
1007 in_size = strlen(inbuf);
1009 out_size = (in_size + 1) * 2;
1010 outbuf = g_malloc(out_size);
1012 out_left = out_size;
1014 #define EXPAND_BUF() \
1016 len = outbuf_p - outbuf; \
1018 outbuf = g_realloc(outbuf, out_size); \
1019 outbuf_p = outbuf + len; \
1020 out_left = out_size - len; \
1023 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1024 &outbuf_p, &out_left)) == (size_t)-1) {
1025 if (EILSEQ == errno) {
1026 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1029 if (out_left == 0) {
1032 *outbuf_p++ = SUBST_CHAR;
1034 } else if (EINVAL == errno) {
1036 } else if (E2BIG == errno) {
1039 g_warning("conv_iconv_strdup(): %s\n",
1045 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1047 if (E2BIG == errno) {
1050 g_warning("conv_iconv_strdup(): %s\n",
1058 len = outbuf_p - outbuf;
1059 outbuf = g_realloc(outbuf, len + 1);
1065 static const struct {
1069 {C_US_ASCII, CS_US_ASCII},
1070 {C_US_ASCII, CS_ANSI_X3_4_1968},
1071 {C_UTF_8, CS_UTF_8},
1072 {C_UTF_7, CS_UTF_7},
1073 {C_ISO_8859_1, CS_ISO_8859_1},
1074 {C_ISO_8859_2, CS_ISO_8859_2},
1075 {C_ISO_8859_3, CS_ISO_8859_3},
1076 {C_ISO_8859_4, CS_ISO_8859_4},
1077 {C_ISO_8859_5, CS_ISO_8859_5},
1078 {C_ISO_8859_6, CS_ISO_8859_6},
1079 {C_ISO_8859_7, CS_ISO_8859_7},
1080 {C_ISO_8859_8, CS_ISO_8859_8},
1081 {C_ISO_8859_9, CS_ISO_8859_9},
1082 {C_ISO_8859_10, CS_ISO_8859_10},
1083 {C_ISO_8859_11, CS_ISO_8859_11},
1084 {C_ISO_8859_13, CS_ISO_8859_13},
1085 {C_ISO_8859_14, CS_ISO_8859_14},
1086 {C_ISO_8859_15, CS_ISO_8859_15},
1087 {C_BALTIC, CS_BALTIC},
1088 {C_CP1250, CS_CP1250},
1089 {C_CP1251, CS_CP1251},
1090 {C_CP1252, CS_CP1252},
1091 {C_CP1253, CS_CP1253},
1092 {C_CP1254, CS_CP1254},
1093 {C_CP1255, CS_CP1255},
1094 {C_CP1256, CS_CP1256},
1095 {C_CP1257, CS_CP1257},
1096 {C_CP1258, CS_CP1258},
1097 {C_WINDOWS_1250, CS_WINDOWS_1250},
1098 {C_WINDOWS_1251, CS_WINDOWS_1251},
1099 {C_WINDOWS_1252, CS_WINDOWS_1252},
1100 {C_WINDOWS_1253, CS_WINDOWS_1253},
1101 {C_WINDOWS_1254, CS_WINDOWS_1254},
1102 {C_WINDOWS_1255, CS_WINDOWS_1255},
1103 {C_WINDOWS_1256, CS_WINDOWS_1256},
1104 {C_WINDOWS_1257, CS_WINDOWS_1257},
1105 {C_WINDOWS_1258, CS_WINDOWS_1258},
1106 {C_KOI8_R, CS_KOI8_R},
1107 {C_KOI8_T, CS_KOI8_T},
1108 {C_KOI8_U, CS_KOI8_U},
1109 {C_ISO_2022_JP, CS_ISO_2022_JP},
1110 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1111 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1112 {C_EUC_JP, CS_EUC_JP},
1113 {C_EUC_JP, CS_EUCJP},
1114 {C_EUC_JP_MS, CS_EUC_JP_MS},
1115 {C_SHIFT_JIS, CS_SHIFT_JIS},
1116 {C_SHIFT_JIS, CS_SHIFT__JIS},
1117 {C_SHIFT_JIS, CS_SJIS},
1118 {C_ISO_2022_KR, CS_ISO_2022_KR},
1119 {C_EUC_KR, CS_EUC_KR},
1120 {C_ISO_2022_CN, CS_ISO_2022_CN},
1121 {C_EUC_CN, CS_EUC_CN},
1122 {C_GB2312, CS_GB2312},
1124 {C_EUC_TW, CS_EUC_TW},
1126 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1127 {C_TIS_620, CS_TIS_620},
1128 {C_WINDOWS_874, CS_WINDOWS_874},
1129 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1130 {C_TCVN5712_1, CS_TCVN5712_1},
1133 static const struct {
1134 gchar *const locale;
1136 CharSet out_charset;
1137 } locale_table[] = {
1138 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1139 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1140 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1141 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1142 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1143 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1144 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1145 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1146 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1147 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1148 {"zh_CN.GBK" , C_GBK , C_GB2312},
1149 {"zh_CN" , C_GB2312 , C_GB2312},
1150 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1151 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1152 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1153 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1154 {"zh_TW" , C_BIG5 , C_BIG5},
1156 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1157 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1158 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1159 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1160 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1161 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1162 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1163 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1165 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1166 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1168 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1170 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1171 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1172 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1173 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1174 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1175 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1176 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1177 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1178 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1179 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1180 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1181 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1182 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1183 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1184 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1185 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1186 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1187 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1188 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1189 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1190 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1191 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1192 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1193 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1194 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1195 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1196 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1197 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1198 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1199 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1200 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1201 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1202 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1203 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1204 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1205 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1206 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1207 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1208 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1209 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1210 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1211 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1212 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1213 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1214 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1215 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1216 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1217 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1218 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1219 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1220 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1221 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1222 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1223 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1224 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1225 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1226 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1227 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1228 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1229 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1230 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1231 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1232 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1233 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1234 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1235 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1236 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1237 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1238 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1239 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1240 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1241 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1242 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1243 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1245 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1246 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1247 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1248 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1249 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1250 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1251 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1252 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1254 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1255 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1257 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1259 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1260 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1261 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1262 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1264 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1266 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1267 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1268 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1269 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1270 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1271 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1272 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1273 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1274 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1275 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1276 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1277 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1278 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1279 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1280 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1281 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1282 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1284 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1285 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1286 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1287 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1289 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1290 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1292 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1294 {"ar_IN" , C_UTF_8 , C_UTF_8},
1295 {"en_IN" , C_UTF_8 , C_UTF_8},
1296 {"se_NO" , C_UTF_8 , C_UTF_8},
1297 {"ta_IN" , C_UTF_8 , C_UTF_8},
1298 {"te_IN" , C_UTF_8 , C_UTF_8},
1299 {"ur_PK" , C_UTF_8 , C_UTF_8},
1301 {"th_TH" , C_TIS_620 , C_TIS_620},
1302 /* {"th_TH" , C_WINDOWS_874}, */
1303 /* {"th_TH" , C_ISO_8859_11}, */
1305 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1306 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1308 {"C" , C_US_ASCII , C_US_ASCII},
1309 {"POSIX" , C_US_ASCII , C_US_ASCII},
1310 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1313 static GHashTable *conv_get_charset_to_str_table(void)
1315 static GHashTable *table;
1321 table = g_hash_table_new(NULL, g_direct_equal);
1323 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1324 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1327 (table, GUINT_TO_POINTER(charsets[i].charset),
1335 static GHashTable *conv_get_charset_from_str_table(void)
1337 static GHashTable *table;
1343 table = g_hash_table_new(str_case_hash, str_case_equal);
1345 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1346 g_hash_table_insert(table, charsets[i].name,
1347 GUINT_TO_POINTER(charsets[i].charset));
1353 const gchar *conv_get_charset_str(CharSet charset)
1357 table = conv_get_charset_to_str_table();
1358 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1361 CharSet conv_get_charset_from_str(const gchar *charset)
1365 if (!charset) return C_AUTO;
1367 table = conv_get_charset_from_str_table();
1368 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1371 CharSet conv_get_locale_charset(void)
1373 static CharSet cur_charset = -1;
1374 const gchar *cur_locale;
1378 if (cur_charset != -1)
1381 cur_locale = conv_get_current_locale();
1383 cur_charset = C_US_ASCII;
1387 if (strcasestr(cur_locale, "UTF-8")) {
1388 cur_charset = C_UTF_8;
1392 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1393 cur_charset = C_ISO_8859_15;
1397 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1400 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1401 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1402 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1403 strlen(locale_table[i].locale))) {
1404 cur_charset = locale_table[i].charset;
1406 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1407 !strchr(p + 1, '.')) {
1408 if (strlen(cur_locale) == 2 &&
1409 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1410 cur_charset = locale_table[i].charset;
1416 cur_charset = C_AUTO;
1420 const gchar *conv_get_locale_charset_str(void)
1422 static const gchar *codeset = NULL;
1425 codeset = conv_get_charset_str(conv_get_locale_charset());
1427 return codeset ? codeset : CS_INTERNAL;
1430 CharSet conv_get_internal_charset(void)
1435 const gchar *conv_get_internal_charset_str(void)
1440 CharSet conv_get_outgoing_charset(void)
1442 static CharSet out_charset = -1;
1443 const gchar *cur_locale;
1447 if (out_charset != -1)
1450 cur_locale = conv_get_current_locale();
1452 out_charset = C_AUTO;
1456 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1457 out_charset = C_ISO_8859_15;
1461 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1464 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1465 strlen(locale_table[i].locale))) {
1466 out_charset = locale_table[i].out_charset;
1468 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1469 !strchr(p + 1, '.')) {
1470 if (strlen(cur_locale) == 2 &&
1471 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1472 out_charset = locale_table[i].out_charset;
1481 const gchar *conv_get_outgoing_charset_str(void)
1483 CharSet out_charset;
1486 if (prefs_common.outgoing_charset) {
1487 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1488 g_free(prefs_common.outgoing_charset);
1489 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1490 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1491 return prefs_common.outgoing_charset;
1494 out_charset = conv_get_outgoing_charset();
1495 str = conv_get_charset_str(out_charset);
1497 return str ? str : CS_UTF_8;
1500 gboolean conv_is_multibyte_encoding(CharSet encoding)
1509 case C_ISO_2022_JP_2:
1510 case C_ISO_2022_JP_3:
1524 const gchar *conv_get_current_locale(void)
1526 const gchar *cur_locale;
1528 cur_locale = g_getenv("LC_ALL");
1529 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1530 if (!cur_locale) cur_locale = g_getenv("LANG");
1531 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1533 debug_print("current locale: %s\n",
1534 cur_locale ? cur_locale : "(none)");
1539 void conv_unmime_header_overwrite(gchar *str)
1543 CharSet cur_charset;
1545 cur_charset = conv_get_locale_charset();
1547 if (cur_charset == C_EUC_JP) {
1548 buflen = strlen(str) * 2 + 1;
1549 Xalloca(buf, buflen, return);
1550 conv_anytodisp(buf, buflen, str);
1551 unmime_header(str, buf);
1553 buflen = strlen(str) + 1;
1554 Xalloca(buf, buflen, return);
1555 unmime_header(buf, str);
1556 strncpy2(str, buf, buflen);
1560 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1561 const gchar *charset)
1563 CharSet cur_charset;
1565 cur_charset = conv_get_locale_charset();
1567 if (cur_charset == C_EUC_JP) {
1571 buflen = strlen(str) * 2 + 1;
1572 Xalloca(buf, buflen, return);
1573 conv_anytodisp(buf, buflen, str);
1574 unmime_header(outbuf, buf);
1577 unmime_header(outbuf, str);
1579 if (outbuf && !g_utf8_validate(outbuf, -1, NULL)) {
1580 if (conv_get_locale_charset() != C_INTERNAL)
1581 tmp = conv_codeset_strdup(outbuf,
1582 conv_get_locale_charset_str(),
1586 strncpy(outbuf, tmp, outlen-1);
1589 conv_unreadable_8bit(outbuf);
1596 #define MAX_LINELEN 76
1597 #define MAX_HARD_LINELEN 996
1598 #define MIMESEP_BEGIN "=?"
1599 #define MIMESEP_END "?="
1601 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1603 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1608 if ((cond) && *srcp) { \
1609 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1610 if (isspace(*(destp - 1))) \
1612 else if (is_plain_text && isspace(*srcp)) \
1617 left = MAX_LINELEN - 1; \
1623 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1624 gint header_len, gboolean addr_field)
1626 const gchar *cur_encoding;
1627 const gchar *out_encoding;
1631 const guchar *srcp = src;
1632 guchar *destp = dest;
1633 gboolean use_base64;
1635 g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1637 if (MB_CUR_MAX > 1) {
1639 mimesep_enc = "?B?";
1642 mimesep_enc = "?Q?";
1645 cur_encoding = CS_INTERNAL;
1646 out_encoding = conv_get_outgoing_charset_str();
1647 if (!strcmp(out_encoding, CS_US_ASCII))
1648 out_encoding = CS_ISO_8859_1;
1650 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1651 strlen(mimesep_enc) + strlen(MIMESEP_END);
1653 left = MAX_LINELEN - header_len;
1656 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1658 while (isspace(*srcp)) {
1661 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1664 /* output as it is if the next word is ASCII string */
1665 if (!is_next_nonascii(srcp)) {
1668 word_len = get_next_word_len(srcp);
1669 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1670 while (word_len > 0) {
1671 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1680 /* don't include parentheses in encoded strings */
1681 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1682 LBREAK_IF_REQUIRED(left < 2, FALSE);
1693 const guchar *p = srcp;
1695 gint out_enc_str_len;
1696 gint mime_block_len;
1697 gboolean cont = FALSE;
1699 while (*p != '\0') {
1700 if (isspace(*p) && !is_next_nonascii(p + 1))
1702 /* don't include parentheses in encoded
1704 if (addr_field && (*p == '(' || *p == ')'))
1707 mb_len = g_utf8_skip[*p];
1709 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1710 out_str = conv_codeset_strdup
1711 (part_str, cur_encoding, out_encoding);
1713 g_warning("conv_encode_header(): code conversion failed\n");
1714 conv_unreadable_8bit(part_str);
1715 out_str = g_strdup(part_str);
1717 out_str_len = strlen(out_str);
1720 out_enc_str_len = B64LEN(out_str_len);
1723 qp_get_q_encoding_len(out_str);
1727 if (mimestr_len + out_enc_str_len <= left) {
1730 } else if (cur_len == 0) {
1731 LBREAK_IF_REQUIRED(1, FALSE);
1740 Xstrndup_a(part_str, srcp, cur_len, );
1741 out_str = conv_codeset_strdup
1742 (part_str, cur_encoding, out_encoding);
1744 g_warning("conv_encode_header(): code conversion failed\n");
1745 conv_unreadable_8bit(part_str);
1746 out_str = g_strdup(part_str);
1748 out_str_len = strlen(out_str);
1751 out_enc_str_len = B64LEN(out_str_len);
1754 qp_get_q_encoding_len(out_str);
1756 Xalloca(enc_str, out_enc_str_len + 1, );
1758 base64_encode(enc_str, out_str, out_str_len);
1760 qp_q_encode(enc_str, out_str);
1764 /* output MIME-encoded string block */
1765 mime_block_len = mimestr_len + strlen(enc_str);
1766 g_snprintf(destp, mime_block_len + 1,
1767 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1768 out_encoding, mimesep_enc, enc_str);
1769 destp += mime_block_len;
1772 left -= mime_block_len;
1775 LBREAK_IF_REQUIRED(cont, FALSE);
1785 #undef LBREAK_IF_REQUIRED