2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27 #include <glib/gi18n.h>
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR 0x5f;
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
108 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
109 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
111 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
112 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
113 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
114 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
116 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
117 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
119 static void conv_unreadable_eucjp(gchar *str);
120 static void conv_unreadable_8bit(gchar *str);
121 static void conv_unreadable_latin(gchar *str);
123 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
124 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
125 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
133 const guchar *in = inbuf;
134 guchar *out = outbuf;
135 JISState state = JIS_ASCII;
137 while (*in != '\0') {
141 if (*(in + 1) == '@' || *(in + 1) == 'B') {
144 } else if (*(in + 1) == '(' &&
146 state = JIS_AUXKANJI;
149 /* unknown escape sequence */
152 } else if (*in == '(') {
153 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
156 } else if (*(in + 1) == 'I') {
160 /* unknown escape sequence */
164 /* unknown escape sequence */
167 } else if (*in == 0x0e) {
170 } else if (*in == 0x0f) {
179 *out++ = *in++ | 0x80;
180 if (*in == '\0') break;
181 *out++ = *in++ | 0x80;
185 *out++ = *in++ | 0x80;
189 *out++ = *in++ | 0x80;
190 if (*in == '\0') break;
191 *out++ = *in++ | 0x80;
200 #define JIS_HWDAKUTEN 0x5e
201 #define JIS_HWHANDAKUTEN 0x5f
203 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
205 static guint16 h2z_tbl[] = {
207 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
208 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
210 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
211 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
213 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
214 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
216 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
217 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
220 static guint16 dakuten_tbl[] = {
222 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
223 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
225 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
226 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
229 static guint16 handakuten_tbl[] = {
231 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
239 if (jis_code < 0x21 || jis_code > 0x5f)
242 if (sound_sym == JIS_HWDAKUTEN &&
243 jis_code >= 0x36 && jis_code <= 0x4e) {
244 out_code = dakuten_tbl[jis_code - 0x30];
246 *outbuf = out_code >> 8;
247 *(outbuf + 1) = out_code & 0xff;
252 if (sound_sym == JIS_HWHANDAKUTEN &&
253 jis_code >= 0x4a && jis_code <= 0x4e) {
254 out_code = handakuten_tbl[jis_code - 0x4a];
255 *outbuf = out_code >> 8;
256 *(outbuf + 1) = out_code & 0xff;
260 out_code = h2z_tbl[jis_code - 0x20];
261 *outbuf = out_code >> 8;
262 *(outbuf + 1) = out_code & 0xff;
266 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
268 const guchar *in = inbuf;
269 guchar *out = outbuf;
270 JISState state = JIS_ASCII;
272 while (*in != '\0') {
276 } else if (iseuckanji(*in)) {
277 if (iseuckanji(*(in + 1))) {
279 *out++ = *in++ & 0x7f;
280 *out++ = *in++ & 0x7f;
285 if (*in != '\0' && !IS_ASCII(*in)) {
290 } else if (iseuchwkana1(*in)) {
291 if (iseuchwkana2(*(in + 1))) {
292 if (prefs_common.allow_jisx0201_kana) {
295 *out++ = *in++ & 0x7f;
300 if (iseuchwkana1(*(in + 2)) &&
301 iseuchwkana2(*(in + 3)))
302 len = conv_jis_hantozen
304 *(in + 1), *(in + 3));
306 len = conv_jis_hantozen
321 if (*in != '\0' && !IS_ASCII(*in)) {
326 } else if (iseucaux(*in)) {
328 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
330 *out++ = *in++ & 0x7f;
331 *out++ = *in++ & 0x7f;
334 if (*in != '\0' && !IS_ASCII(*in)) {
337 if (*in != '\0' && !IS_ASCII(*in)) {
354 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
356 const guchar *in = inbuf;
357 guchar *out = outbuf;
359 while (*in != '\0') {
362 } else if (issjiskanji1(*in)) {
363 if (issjiskanji2(*(in + 1))) {
365 guchar out2 = *(in + 1);
368 row = out1 < 0xa0 ? 0x70 : 0xb0;
370 out1 = (out1 - row) * 2 - 1;
371 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
373 out1 = (out1 - row) * 2;
377 *out++ = out1 | 0x80;
378 *out++ = out2 | 0x80;
383 if (*in != '\0' && !IS_ASCII(*in)) {
388 } else if (issjishwkana(*in)) {
400 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
404 Xalloca(eucstr, outlen, return);
406 conv_jistoeuc(eucstr, outlen, inbuf);
407 conv_euctoutf8(outbuf, outlen, eucstr);
410 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
414 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
416 strncpy2(outbuf, tmpstr, outlen);
419 strncpy2(outbuf, inbuf, outlen);
422 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
424 static iconv_t cd = (iconv_t)-1;
425 static gboolean iconv_ok = TRUE;
428 if (cd == (iconv_t)-1) {
430 strncpy2(outbuf, inbuf, outlen);
433 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
434 if (cd == (iconv_t)-1) {
435 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
436 if (cd == (iconv_t)-1) {
437 g_warning("conv_euctoutf8(): %s\n",
440 strncpy2(outbuf, inbuf, outlen);
446 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
448 strncpy2(outbuf, tmpstr, outlen);
451 strncpy2(outbuf, inbuf, outlen);
454 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
456 switch (conv_guess_ja_encoding(inbuf)) {
458 conv_jistoutf8(outbuf, outlen, inbuf);
461 conv_sjistoutf8(outbuf, outlen, inbuf);
464 conv_euctoutf8(outbuf, outlen, inbuf);
467 strncpy2(outbuf, inbuf, outlen);
472 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
474 static iconv_t cd = (iconv_t)-1;
475 static gboolean iconv_ok = TRUE;
478 if (cd == (iconv_t)-1) {
480 strncpy2(outbuf, inbuf, outlen);
483 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
484 if (cd == (iconv_t)-1) {
485 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
486 if (cd == (iconv_t)-1) {
487 g_warning("conv_utf8toeuc(): %s\n",
490 strncpy2(outbuf, inbuf, outlen);
496 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
498 strncpy2(outbuf, tmpstr, outlen);
501 strncpy2(outbuf, inbuf, outlen);
504 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
508 Xalloca(eucstr, outlen, return);
510 conv_utf8toeuc(eucstr, outlen, inbuf);
511 conv_euctojis(outbuf, outlen, eucstr);
514 static gchar valid_eucjp_tbl[][96] = {
515 /* 0xa2a0 - 0xa2ff */
516 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
518 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
519 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
521 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
523 /* 0xa3a0 - 0xa3ff */
524 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
526 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
528 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
531 /* 0xa4a0 - 0xa4ff */
532 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
537 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
539 /* 0xa5a0 - 0xa5ff */
540 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
547 /* 0xa6a0 - 0xa6ff */
548 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
550 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
555 /* 0xa7a0 - 0xa7ff */
556 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
563 /* 0xa8a0 - 0xa8ff */
564 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
572 static gboolean isprintableeuckanji(guchar c1, guchar c2)
574 if (c1 <= 0xa0 || c1 >= 0xf5)
576 if (c2 <= 0xa0 || c2 == 0xff)
579 if (c1 >= 0xa9 && c1 <= 0xaf)
582 if (c1 >= 0xa2 && c1 <= 0xa8)
583 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
586 if (c2 >= 0xd4 && c2 <= 0xfe)
588 } else if (c1 == 0xf4) {
589 if (c2 >= 0xa7 && c2 <= 0xfe)
596 static void conv_unreadable_eucjp(gchar *str)
598 register guchar *p = str;
602 /* convert CR+LF -> LF */
603 if (*p == '\r' && *(p + 1) == '\n')
604 memmove(p, p + 1, strlen(p));
605 /* printable 7 bit code */
607 } else if (iseuckanji(*p)) {
608 if (isprintableeuckanji(*p, *(p + 1))) {
609 /* printable euc-jp code */
612 /* substitute unprintable code */
621 } else if (iseuchwkana1(*p)) {
622 if (iseuchwkana2(*(p + 1)))
623 /* euc-jp hankaku kana */
627 } else if (iseucaux(*p)) {
628 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
629 /* auxiliary kanji */
634 /* substitute unprintable 1 byte code */
639 static void conv_unreadable_8bit(gchar *str)
641 register guchar *p = str;
644 /* convert CR+LF -> LF */
645 if (*p == '\r' && *(p + 1) == '\n')
646 memmove(p, p + 1, strlen(p));
647 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
652 static void conv_unreadable_latin(gchar *str)
654 register guchar *p = str;
657 /* convert CR+LF -> LF */
658 if (*p == '\r' && *(p + 1) == '\n')
659 memmove(p, p + 1, strlen(p));
660 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
668 void conv_mb_alnum(gchar *str)
670 static guchar char_tbl[] = {
672 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
673 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
675 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
676 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
678 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
679 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
681 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
682 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
684 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
685 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
688 register guchar *p = str;
695 register guchar ch = *(p + 1);
697 if (ch >= 0xb0 && ch <= 0xfa) {
702 memmove(p, p + 1, len);
708 } else if (*p == 0xa1) {
709 register guchar ch = *(p + 1);
711 if (ch >= 0xa0 && ch <= 0xef &&
712 NCV != char_tbl[ch - 0xa0]) {
713 *p = char_tbl[ch - 0xa0];
716 memmove(p, p + 1, len);
722 } else if (iseuckanji(*p)) {
732 CharSet conv_guess_ja_encoding(const gchar *str)
734 const guchar *p = str;
735 CharSet guessed = C_US_ASCII;
738 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
739 if (guessed == C_US_ASCII)
740 return C_ISO_2022_JP;
742 } else if (IS_ASCII(*p)) {
744 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
745 if (*p >= 0xfd && *p <= 0xfe)
747 else if (guessed == C_SHIFT_JIS) {
748 if ((issjiskanji1(*p) &&
749 issjiskanji2(*(p + 1))) ||
751 guessed = C_SHIFT_JIS;
757 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
758 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
759 guessed = C_SHIFT_JIS;
763 } else if (issjishwkana(*p)) {
764 guessed = C_SHIFT_JIS;
774 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
776 conv_jistoutf8(outbuf, outlen, inbuf);
779 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
781 conv_sjistoutf8(outbuf, outlen, inbuf);
784 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
786 conv_euctoutf8(outbuf, outlen, inbuf);
789 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
791 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
792 strncpy2(outbuf, inbuf, outlen);
794 conv_ustodisp(outbuf, outlen, inbuf);
797 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
799 conv_anytoutf8(outbuf, outlen, inbuf);
800 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
801 conv_unreadable_8bit(outbuf);
804 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
806 strncpy2(outbuf, inbuf, outlen);
807 conv_unreadable_8bit(outbuf);
810 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
814 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
817 strncpy2(outbuf, tmpstr, outlen);
820 conv_utf8todisp(outbuf, outlen, inbuf);
823 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
825 strncpy2(outbuf, inbuf, outlen);
828 CodeConverter *conv_code_converter_new(const gchar *src_charset)
832 conv = g_new0(CodeConverter, 1);
833 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
834 conv->charset_str = g_strdup(src_charset);
835 conv->charset = conv_get_charset_from_str(src_charset);
840 void conv_code_converter_destroy(CodeConverter *conv)
842 g_free(conv->charset_str);
846 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
849 if (conv->code_conv_func != conv_noconv)
850 conv->code_conv_func(outbuf, outlen, inbuf);
854 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
858 strncpy2(outbuf, str, outlen);
866 gchar *conv_codeset_strdup(const gchar *inbuf,
867 const gchar *src_code, const gchar *dest_code)
871 CodeConvFunc conv_func;
873 conv_func = conv_get_code_conv_func(src_code, dest_code);
874 if (conv_func != conv_noconv) {
875 len = (strlen(inbuf) + 1) * 3;
877 if (!buf) return NULL;
879 conv_func(buf, len, inbuf);
880 return g_realloc(buf, strlen(buf) + 1);
883 return conv_iconv_strdup(inbuf, src_code, dest_code);
886 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
887 const gchar *dest_charset_str)
889 CodeConvFunc code_conv = conv_noconv;
891 CharSet dest_charset;
893 if (!src_charset_str)
894 src_charset = conv_get_locale_charset();
896 src_charset = conv_get_charset_from_str(src_charset_str);
898 /* auto detection mode */
899 if (!src_charset_str && !dest_charset_str) {
900 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
901 return conv_anytodisp;
906 dest_charset = conv_get_charset_from_str(dest_charset_str);
908 if (dest_charset == C_US_ASCII)
909 return conv_ustodisp;
911 switch (src_charset) {
929 case C_ISO_2022_JP_2:
930 case C_ISO_2022_JP_3:
931 if (dest_charset == C_AUTO)
932 code_conv = conv_jistodisp;
933 else if (dest_charset == C_EUC_JP)
934 code_conv = conv_jistoeuc;
935 else if (dest_charset == C_UTF_8)
936 code_conv = conv_jistoutf8;
939 if (dest_charset == C_AUTO)
940 code_conv = conv_sjistodisp;
941 else if (dest_charset == C_EUC_JP)
942 code_conv = conv_sjistoeuc;
943 else if (dest_charset == C_UTF_8)
944 code_conv = conv_sjistoutf8;
947 if (dest_charset == C_AUTO)
948 code_conv = conv_euctodisp;
949 else if (dest_charset == C_ISO_2022_JP ||
950 dest_charset == C_ISO_2022_JP_2 ||
951 dest_charset == C_ISO_2022_JP_3)
952 code_conv = conv_euctojis;
953 else if (dest_charset == C_UTF_8)
954 code_conv = conv_euctoutf8;
957 if (dest_charset == C_EUC_JP)
958 code_conv = conv_utf8toeuc;
959 else if (dest_charset == C_ISO_2022_JP ||
960 dest_charset == C_ISO_2022_JP_2 ||
961 dest_charset == C_ISO_2022_JP_3)
962 code_conv = conv_utf8tojis;
971 gchar *conv_iconv_strdup(const gchar *inbuf,
972 const gchar *src_code, const gchar *dest_code)
978 src_code = conv_get_outgoing_charset_str();
980 dest_code = CS_INTERNAL;
982 /* don't convert if src and dest codeset are identical */
983 if (!strcasecmp(src_code, dest_code))
984 return g_strdup(inbuf);
986 /* don't convert if current codeset is US-ASCII */
987 if (!strcasecmp(dest_code, CS_US_ASCII))
988 return g_strdup(inbuf);
990 cd = iconv_open(dest_code, src_code);
991 if (cd == (iconv_t)-1)
994 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
1001 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1003 const gchar *inbuf_p;
1014 in_size = strlen(inbuf);
1016 out_size = (in_size + 1) * 2;
1017 outbuf = g_malloc(out_size);
1019 out_left = out_size;
1021 #define EXPAND_BUF() \
1023 len = outbuf_p - outbuf; \
1025 outbuf = g_realloc(outbuf, out_size); \
1026 outbuf_p = outbuf + len; \
1027 out_left = out_size - len; \
1030 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1031 &outbuf_p, &out_left)) == (size_t)-1) {
1032 if (EILSEQ == errno) {
1033 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1036 if (out_left == 0) {
1039 *outbuf_p++ = SUBST_CHAR;
1041 } else if (EINVAL == errno) {
1043 } else if (E2BIG == errno) {
1046 g_warning("conv_iconv_strdup(): %s\n",
1052 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1054 if (E2BIG == errno) {
1057 g_warning("conv_iconv_strdup(): %s\n",
1065 len = outbuf_p - outbuf;
1066 outbuf = g_realloc(outbuf, len + 1);
1072 static const struct {
1076 {C_US_ASCII, CS_US_ASCII},
1077 {C_US_ASCII, CS_ANSI_X3_4_1968},
1078 {C_UTF_8, CS_UTF_8},
1079 {C_UTF_7, CS_UTF_7},
1080 {C_ISO_8859_1, CS_ISO_8859_1},
1081 {C_ISO_8859_2, CS_ISO_8859_2},
1082 {C_ISO_8859_3, CS_ISO_8859_3},
1083 {C_ISO_8859_4, CS_ISO_8859_4},
1084 {C_ISO_8859_5, CS_ISO_8859_5},
1085 {C_ISO_8859_6, CS_ISO_8859_6},
1086 {C_ISO_8859_7, CS_ISO_8859_7},
1087 {C_ISO_8859_8, CS_ISO_8859_8},
1088 {C_ISO_8859_9, CS_ISO_8859_9},
1089 {C_ISO_8859_10, CS_ISO_8859_10},
1090 {C_ISO_8859_11, CS_ISO_8859_11},
1091 {C_ISO_8859_13, CS_ISO_8859_13},
1092 {C_ISO_8859_14, CS_ISO_8859_14},
1093 {C_ISO_8859_15, CS_ISO_8859_15},
1094 {C_BALTIC, CS_BALTIC},
1095 {C_CP1250, CS_CP1250},
1096 {C_CP1251, CS_CP1251},
1097 {C_CP1252, CS_CP1252},
1098 {C_CP1253, CS_CP1253},
1099 {C_CP1254, CS_CP1254},
1100 {C_CP1255, CS_CP1255},
1101 {C_CP1256, CS_CP1256},
1102 {C_CP1257, CS_CP1257},
1103 {C_CP1258, CS_CP1258},
1104 {C_WINDOWS_1250, CS_WINDOWS_1250},
1105 {C_WINDOWS_1251, CS_WINDOWS_1251},
1106 {C_WINDOWS_1252, CS_WINDOWS_1252},
1107 {C_WINDOWS_1253, CS_WINDOWS_1253},
1108 {C_WINDOWS_1254, CS_WINDOWS_1254},
1109 {C_WINDOWS_1255, CS_WINDOWS_1255},
1110 {C_WINDOWS_1256, CS_WINDOWS_1256},
1111 {C_WINDOWS_1257, CS_WINDOWS_1257},
1112 {C_WINDOWS_1258, CS_WINDOWS_1258},
1113 {C_KOI8_R, CS_KOI8_R},
1114 {C_KOI8_T, CS_KOI8_T},
1115 {C_KOI8_U, CS_KOI8_U},
1116 {C_ISO_2022_JP, CS_ISO_2022_JP},
1117 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1118 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1119 {C_EUC_JP, CS_EUC_JP},
1120 {C_EUC_JP, CS_EUCJP},
1121 {C_EUC_JP_MS, CS_EUC_JP_MS},
1122 {C_SHIFT_JIS, CS_SHIFT_JIS},
1123 {C_SHIFT_JIS, CS_SHIFT__JIS},
1124 {C_SHIFT_JIS, CS_SJIS},
1125 {C_ISO_2022_KR, CS_ISO_2022_KR},
1126 {C_EUC_KR, CS_EUC_KR},
1127 {C_ISO_2022_CN, CS_ISO_2022_CN},
1128 {C_EUC_CN, CS_EUC_CN},
1129 {C_GB2312, CS_GB2312},
1131 {C_EUC_TW, CS_EUC_TW},
1133 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1134 {C_TIS_620, CS_TIS_620},
1135 {C_WINDOWS_874, CS_WINDOWS_874},
1136 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1137 {C_TCVN5712_1, CS_TCVN5712_1},
1140 static const struct {
1141 gchar *const locale;
1143 CharSet out_charset;
1144 } locale_table[] = {
1145 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1146 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1147 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1148 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1149 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1150 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1151 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1152 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1153 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1154 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1155 {"zh_CN.GBK" , C_GBK , C_GB2312},
1156 {"zh_CN" , C_GB2312 , C_GB2312},
1157 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1158 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1159 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1160 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1161 {"zh_TW" , C_BIG5 , C_BIG5},
1163 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1164 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1165 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1166 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1167 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1168 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1169 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1170 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1172 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1173 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1175 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1177 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1178 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1179 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1180 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1181 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1182 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1183 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1184 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1185 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1186 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1187 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1188 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1189 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1190 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1191 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1192 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1193 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1194 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1195 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1196 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1197 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1198 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1199 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1200 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1201 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1202 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1203 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1204 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1205 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1206 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1207 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1208 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1209 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1210 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1211 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1212 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1213 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1214 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1215 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1216 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1217 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1218 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1219 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1220 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1221 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1222 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1223 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1224 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1225 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1226 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1227 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1228 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1229 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1230 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1231 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1232 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1233 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1234 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1235 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1236 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1237 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1238 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1239 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1240 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1241 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1242 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1243 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1244 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1245 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1246 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1247 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1248 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1249 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1250 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1252 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1253 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1254 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1255 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1256 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1257 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1258 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1259 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1261 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1262 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1264 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1266 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1267 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1268 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1269 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1271 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1273 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1274 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1275 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1276 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1277 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1278 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1279 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1280 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1281 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1282 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1283 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1284 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1285 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1286 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1287 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1288 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1289 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1291 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1292 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1293 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1294 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1296 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1297 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1299 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1301 {"ar_IN" , C_UTF_8 , C_UTF_8},
1302 {"en_IN" , C_UTF_8 , C_UTF_8},
1303 {"se_NO" , C_UTF_8 , C_UTF_8},
1304 {"ta_IN" , C_UTF_8 , C_UTF_8},
1305 {"te_IN" , C_UTF_8 , C_UTF_8},
1306 {"ur_PK" , C_UTF_8 , C_UTF_8},
1308 {"th_TH" , C_TIS_620 , C_TIS_620},
1309 /* {"th_TH" , C_WINDOWS_874}, */
1310 /* {"th_TH" , C_ISO_8859_11}, */
1312 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1313 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1315 {"C" , C_US_ASCII , C_US_ASCII},
1316 {"POSIX" , C_US_ASCII , C_US_ASCII},
1317 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1320 static GHashTable *conv_get_charset_to_str_table(void)
1322 static GHashTable *table;
1328 table = g_hash_table_new(NULL, g_direct_equal);
1330 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1331 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1334 (table, GUINT_TO_POINTER(charsets[i].charset),
1342 static GHashTable *conv_get_charset_from_str_table(void)
1344 static GHashTable *table;
1350 table = g_hash_table_new(str_case_hash, str_case_equal);
1352 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1353 g_hash_table_insert(table, charsets[i].name,
1354 GUINT_TO_POINTER(charsets[i].charset));
1360 const gchar *conv_get_charset_str(CharSet charset)
1364 table = conv_get_charset_to_str_table();
1365 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1368 CharSet conv_get_charset_from_str(const gchar *charset)
1372 if (!charset) return C_AUTO;
1374 table = conv_get_charset_from_str_table();
1375 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1378 CharSet conv_get_locale_charset(void)
1380 static CharSet cur_charset = -1;
1381 const gchar *cur_locale;
1385 if (cur_charset != -1)
1388 cur_locale = conv_get_current_locale();
1390 cur_charset = C_US_ASCII;
1394 if (strcasestr(cur_locale, "UTF-8")) {
1395 cur_charset = C_UTF_8;
1399 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1400 cur_charset = C_ISO_8859_15;
1404 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1407 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1408 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1409 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1410 strlen(locale_table[i].locale))) {
1411 cur_charset = locale_table[i].charset;
1413 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1414 !strchr(p + 1, '.')) {
1415 if (strlen(cur_locale) == 2 &&
1416 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1417 cur_charset = locale_table[i].charset;
1423 cur_charset = C_AUTO;
1427 const gchar *conv_get_locale_charset_str(void)
1429 static const gchar *codeset = NULL;
1432 codeset = conv_get_charset_str(conv_get_locale_charset());
1434 return codeset ? codeset : CS_INTERNAL;
1437 CharSet conv_get_internal_charset(void)
1442 const gchar *conv_get_internal_charset_str(void)
1447 CharSet conv_get_outgoing_charset(void)
1449 static CharSet out_charset = -1;
1450 const gchar *cur_locale;
1454 if (out_charset != -1)
1457 cur_locale = conv_get_current_locale();
1459 out_charset = C_AUTO;
1463 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1464 out_charset = C_ISO_8859_15;
1468 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1471 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1472 strlen(locale_table[i].locale))) {
1473 out_charset = locale_table[i].out_charset;
1475 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1476 !strchr(p + 1, '.')) {
1477 if (strlen(cur_locale) == 2 &&
1478 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1479 out_charset = locale_table[i].out_charset;
1488 const gchar *conv_get_outgoing_charset_str(void)
1490 CharSet out_charset;
1493 if (prefs_common.outgoing_charset) {
1494 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1495 g_free(prefs_common.outgoing_charset);
1496 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1497 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1498 return prefs_common.outgoing_charset;
1501 out_charset = conv_get_outgoing_charset();
1502 str = conv_get_charset_str(out_charset);
1504 return str ? str : CS_UTF_8;
1507 gboolean conv_is_multibyte_encoding(CharSet encoding)
1516 case C_ISO_2022_JP_2:
1517 case C_ISO_2022_JP_3:
1531 const gchar *conv_get_current_locale(void)
1533 const gchar *cur_locale;
1535 cur_locale = g_getenv("LC_ALL");
1536 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1537 if (!cur_locale) cur_locale = g_getenv("LANG");
1538 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1540 debug_print("current locale: %s\n",
1541 cur_locale ? cur_locale : "(none)");
1546 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1548 gchar buf[BUFFSIZE];
1550 if (is_ascii_str(str))
1551 return unmime_header(str);
1553 if (default_encoding) {
1556 utf8_buf = conv_codeset_strdup
1557 (str, default_encoding, CS_INTERNAL);
1561 decoded_str = unmime_header(utf8_buf);
1567 if (conv_get_locale_charset() == C_EUC_JP)
1568 conv_anytodisp(buf, sizeof(buf), str);
1570 conv_localetodisp(buf, sizeof(buf), str);
1572 return unmime_header(buf);
1575 #define MAX_LINELEN 76
1576 #define MAX_HARD_LINELEN 996
1577 #define MIMESEP_BEGIN "=?"
1578 #define MIMESEP_END "?="
1580 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1582 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1587 if ((cond) && *srcp) { \
1588 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1589 if (isspace(*(destp - 1))) \
1591 else if (is_plain_text && isspace(*srcp)) \
1596 left = MAX_LINELEN - 1; \
1602 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1603 gint header_len, gboolean addr_field)
1605 const gchar *cur_encoding;
1606 const gchar *out_encoding;
1610 const guchar *srcp = src;
1611 guchar *destp = dest;
1612 gboolean use_base64;
1614 g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1616 if (MB_CUR_MAX > 1) {
1618 mimesep_enc = "?B?";
1621 mimesep_enc = "?Q?";
1624 cur_encoding = CS_INTERNAL;
1625 out_encoding = conv_get_outgoing_charset_str();
1626 if (!strcmp(out_encoding, CS_US_ASCII))
1627 out_encoding = CS_ISO_8859_1;
1629 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1630 strlen(mimesep_enc) + strlen(MIMESEP_END);
1632 left = MAX_LINELEN - header_len;
1635 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1637 while (isspace(*srcp)) {
1640 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1643 /* output as it is if the next word is ASCII string */
1644 if (!is_next_nonascii(srcp)) {
1647 word_len = get_next_word_len(srcp);
1648 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1649 while (word_len > 0) {
1650 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1659 /* don't include parentheses in encoded strings */
1660 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1661 LBREAK_IF_REQUIRED(left < 2, FALSE);
1672 const guchar *p = srcp;
1674 gint out_enc_str_len;
1675 gint mime_block_len;
1676 gboolean cont = FALSE;
1678 while (*p != '\0') {
1679 if (isspace(*p) && !is_next_nonascii(p + 1))
1681 /* don't include parentheses in encoded
1683 if (addr_field && (*p == '(' || *p == ')'))
1686 mb_len = g_utf8_skip[*p];
1688 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1689 out_str = conv_codeset_strdup
1690 (part_str, cur_encoding, out_encoding);
1692 g_warning("conv_encode_header(): code conversion failed\n");
1693 conv_unreadable_8bit(part_str);
1694 out_str = g_strdup(part_str);
1696 out_str_len = strlen(out_str);
1699 out_enc_str_len = B64LEN(out_str_len);
1702 qp_get_q_encoding_len(out_str);
1706 if (mimestr_len + out_enc_str_len <= left) {
1709 } else if (cur_len == 0) {
1710 LBREAK_IF_REQUIRED(1, FALSE);
1719 Xstrndup_a(part_str, srcp, cur_len, );
1720 out_str = conv_codeset_strdup
1721 (part_str, cur_encoding, out_encoding);
1723 g_warning("conv_encode_header(): code conversion failed\n");
1724 conv_unreadable_8bit(part_str);
1725 out_str = g_strdup(part_str);
1727 out_str_len = strlen(out_str);
1730 out_enc_str_len = B64LEN(out_str_len);
1733 qp_get_q_encoding_len(out_str);
1735 Xalloca(enc_str, out_enc_str_len + 1, );
1737 base64_encode(enc_str, out_str, out_str_len);
1739 qp_q_encode(enc_str, out_str);
1743 /* output MIME-encoded string block */
1744 mime_block_len = mimestr_len + strlen(enc_str);
1745 g_snprintf(destp, mime_block_len + 1,
1746 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1747 out_encoding, mimesep_enc, enc_str);
1748 destp += mime_block_len;
1751 left -= mime_block_len;
1754 LBREAK_IF_REQUIRED(cont, FALSE);
1764 #undef LBREAK_IF_REQUIRED
1765 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1768 GError *error = NULL;
1770 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1772 g_warning("failed to convert encoding of file name: %s\n",
1774 g_error_free(error);
1777 fs_file = g_strdup(utf8_file);
1782 gchar *conv_filename_to_utf8(const gchar *fs_file)
1784 gchar *utf8_file = NULL;
1785 GError *error = NULL;
1787 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1789 g_warning("failed to convert encoding of file name: %s\n",
1791 g_error_free(error);
1794 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1796 utf8_file = g_strdup(fs_file);
1797 conv_unreadable_8bit(utf8_file);