2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27 #include <glib/gi18n.h>
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR 0x5f;
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
108 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
109 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
111 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
112 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
113 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
114 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
116 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
117 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
119 static void conv_unreadable_eucjp(gchar *str);
120 static void conv_unreadable_8bit(gchar *str);
121 static void conv_unreadable_latin(gchar *str);
123 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
124 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
125 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
133 const guchar *in = inbuf;
134 guchar *out = outbuf;
135 JISState state = JIS_ASCII;
137 while (*in != '\0') {
141 if (*(in + 1) == '@' || *(in + 1) == 'B') {
144 } else if (*(in + 1) == '(' &&
146 state = JIS_AUXKANJI;
149 /* unknown escape sequence */
152 } else if (*in == '(') {
153 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
156 } else if (*(in + 1) == 'I') {
160 /* unknown escape sequence */
164 /* unknown escape sequence */
167 } else if (*in == 0x0e) {
170 } else if (*in == 0x0f) {
179 *out++ = *in++ | 0x80;
180 if (*in == '\0') break;
181 *out++ = *in++ | 0x80;
185 *out++ = *in++ | 0x80;
189 *out++ = *in++ | 0x80;
190 if (*in == '\0') break;
191 *out++ = *in++ | 0x80;
200 #define JIS_HWDAKUTEN 0x5e
201 #define JIS_HWHANDAKUTEN 0x5f
203 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
205 static guint16 h2z_tbl[] = {
207 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
208 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
210 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
211 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
213 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
214 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
216 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
217 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
220 static guint16 dakuten_tbl[] = {
222 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
223 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
225 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
226 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
229 static guint16 handakuten_tbl[] = {
231 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
239 if (jis_code < 0x21 || jis_code > 0x5f)
242 if (sound_sym == JIS_HWDAKUTEN &&
243 jis_code >= 0x36 && jis_code <= 0x4e) {
244 out_code = dakuten_tbl[jis_code - 0x30];
246 *outbuf = out_code >> 8;
247 *(outbuf + 1) = out_code & 0xff;
252 if (sound_sym == JIS_HWHANDAKUTEN &&
253 jis_code >= 0x4a && jis_code <= 0x4e) {
254 out_code = handakuten_tbl[jis_code - 0x4a];
255 *outbuf = out_code >> 8;
256 *(outbuf + 1) = out_code & 0xff;
260 out_code = h2z_tbl[jis_code - 0x20];
261 *outbuf = out_code >> 8;
262 *(outbuf + 1) = out_code & 0xff;
266 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
268 const guchar *in = inbuf;
269 guchar *out = outbuf;
270 JISState state = JIS_ASCII;
272 while (*in != '\0') {
276 } else if (iseuckanji(*in)) {
277 if (iseuckanji(*(in + 1))) {
279 *out++ = *in++ & 0x7f;
280 *out++ = *in++ & 0x7f;
285 if (*in != '\0' && !IS_ASCII(*in)) {
290 } else if (iseuchwkana1(*in)) {
291 if (iseuchwkana2(*(in + 1))) {
292 if (prefs_common.allow_jisx0201_kana) {
295 *out++ = *in++ & 0x7f;
300 if (iseuchwkana1(*(in + 2)) &&
301 iseuchwkana2(*(in + 3)))
302 len = conv_jis_hantozen
304 *(in + 1), *(in + 3));
306 len = conv_jis_hantozen
321 if (*in != '\0' && !IS_ASCII(*in)) {
326 } else if (iseucaux(*in)) {
328 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
330 *out++ = *in++ & 0x7f;
331 *out++ = *in++ & 0x7f;
334 if (*in != '\0' && !IS_ASCII(*in)) {
337 if (*in != '\0' && !IS_ASCII(*in)) {
354 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
356 const guchar *in = inbuf;
357 guchar *out = outbuf;
359 while (*in != '\0') {
362 } else if (issjiskanji1(*in)) {
363 if (issjiskanji2(*(in + 1))) {
365 guchar out2 = *(in + 1);
368 row = out1 < 0xa0 ? 0x70 : 0xb0;
370 out1 = (out1 - row) * 2 - 1;
371 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
373 out1 = (out1 - row) * 2;
377 *out++ = out1 | 0x80;
378 *out++ = out2 | 0x80;
383 if (*in != '\0' && !IS_ASCII(*in)) {
388 } else if (issjishwkana(*in)) {
400 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
404 Xalloca(eucstr, outlen, return);
406 conv_jistoeuc(eucstr, outlen, inbuf);
407 conv_euctoutf8(outbuf, outlen, eucstr);
410 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
414 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
416 strncpy2(outbuf, tmpstr, outlen);
419 strncpy2(outbuf, inbuf, outlen);
422 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
424 static iconv_t cd = (iconv_t)-1;
425 static gboolean iconv_ok = TRUE;
428 if (cd == (iconv_t)-1) {
430 strncpy2(outbuf, inbuf, outlen);
433 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
434 if (cd == (iconv_t)-1) {
435 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
436 if (cd == (iconv_t)-1) {
437 g_warning("conv_euctoutf8(): %s\n",
440 strncpy2(outbuf, inbuf, outlen);
446 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
448 strncpy2(outbuf, tmpstr, outlen);
451 strncpy2(outbuf, inbuf, outlen);
454 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
456 switch (conv_guess_ja_encoding(inbuf)) {
458 conv_jistoutf8(outbuf, outlen, inbuf);
461 conv_sjistoutf8(outbuf, outlen, inbuf);
464 conv_euctoutf8(outbuf, outlen, inbuf);
467 strncpy2(outbuf, inbuf, outlen);
472 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
474 static iconv_t cd = (iconv_t)-1;
475 static gboolean iconv_ok = TRUE;
478 if (cd == (iconv_t)-1) {
480 strncpy2(outbuf, inbuf, outlen);
483 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
484 if (cd == (iconv_t)-1) {
485 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
486 if (cd == (iconv_t)-1) {
487 g_warning("conv_utf8toeuc(): %s\n",
490 strncpy2(outbuf, inbuf, outlen);
496 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
498 strncpy2(outbuf, tmpstr, outlen);
501 strncpy2(outbuf, inbuf, outlen);
504 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
508 Xalloca(eucstr, outlen, return);
510 conv_utf8toeuc(eucstr, outlen, inbuf);
511 conv_euctojis(outbuf, outlen, eucstr);
514 static gchar valid_eucjp_tbl[][96] = {
515 /* 0xa2a0 - 0xa2ff */
516 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
518 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
519 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
521 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
523 /* 0xa3a0 - 0xa3ff */
524 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
526 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
528 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
531 /* 0xa4a0 - 0xa4ff */
532 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
537 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
539 /* 0xa5a0 - 0xa5ff */
540 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
547 /* 0xa6a0 - 0xa6ff */
548 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
550 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
555 /* 0xa7a0 - 0xa7ff */
556 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
563 /* 0xa8a0 - 0xa8ff */
564 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
572 static gboolean isprintableeuckanji(guchar c1, guchar c2)
574 if (c1 <= 0xa0 || c1 >= 0xf5)
576 if (c2 <= 0xa0 || c2 == 0xff)
579 if (c1 >= 0xa9 && c1 <= 0xaf)
582 if (c1 >= 0xa2 && c1 <= 0xa8)
583 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
586 if (c2 >= 0xd4 && c2 <= 0xfe)
588 } else if (c1 == 0xf4) {
589 if (c2 >= 0xa7 && c2 <= 0xfe)
596 static void conv_unreadable_eucjp(gchar *str)
598 register guchar *p = str;
602 /* convert CR+LF -> LF */
603 if (*p == '\r' && *(p + 1) == '\n')
604 memmove(p, p + 1, strlen(p));
605 /* printable 7 bit code */
607 } else if (iseuckanji(*p)) {
608 if (isprintableeuckanji(*p, *(p + 1))) {
609 /* printable euc-jp code */
612 /* substitute unprintable code */
621 } else if (iseuchwkana1(*p)) {
622 if (iseuchwkana2(*(p + 1)))
623 /* euc-jp hankaku kana */
627 } else if (iseucaux(*p)) {
628 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
629 /* auxiliary kanji */
634 /* substitute unprintable 1 byte code */
639 static void conv_unreadable_8bit(gchar *str)
641 register guchar *p = str;
644 /* convert CR+LF -> LF */
645 if (*p == '\r' && *(p + 1) == '\n')
646 memmove(p, p + 1, strlen(p));
647 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
652 static void conv_unreadable_latin(gchar *str)
654 register guchar *p = str;
657 /* convert CR+LF -> LF */
658 if (*p == '\r' && *(p + 1) == '\n')
659 memmove(p, p + 1, strlen(p));
660 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
668 void conv_mb_alnum(gchar *str)
670 static guchar char_tbl[] = {
672 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
673 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
675 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
676 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
678 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
679 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
681 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
682 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
684 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
685 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
688 register guchar *p = str;
695 register guchar ch = *(p + 1);
697 if (ch >= 0xb0 && ch <= 0xfa) {
702 memmove(p, p + 1, len);
708 } else if (*p == 0xa1) {
709 register guchar ch = *(p + 1);
711 if (ch >= 0xa0 && ch <= 0xef &&
712 NCV != char_tbl[ch - 0xa0]) {
713 *p = char_tbl[ch - 0xa0];
716 memmove(p, p + 1, len);
722 } else if (iseuckanji(*p)) {
732 CharSet conv_guess_ja_encoding(const gchar *str)
734 const guchar *p = str;
735 CharSet guessed = C_US_ASCII;
738 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
739 if (guessed == C_US_ASCII)
740 return C_ISO_2022_JP;
742 } else if (IS_ASCII(*p)) {
744 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
745 if (*p >= 0xfd && *p <= 0xfe)
747 else if (guessed == C_SHIFT_JIS) {
748 if ((issjiskanji1(*p) &&
749 issjiskanji2(*(p + 1))) ||
751 guessed = C_SHIFT_JIS;
757 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
758 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
759 guessed = C_SHIFT_JIS;
763 } else if (issjishwkana(*p)) {
764 guessed = C_SHIFT_JIS;
774 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
776 conv_jistoutf8(outbuf, outlen, inbuf);
779 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
781 conv_sjistoutf8(outbuf, outlen, inbuf);
784 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
786 conv_euctoutf8(outbuf, outlen, inbuf);
789 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
791 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
792 strncpy2(outbuf, inbuf, outlen);
794 conv_ustodisp(outbuf, outlen, inbuf);
797 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
799 conv_anytoutf8(outbuf, outlen, inbuf);
800 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
801 conv_unreadable_8bit(outbuf);
804 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
806 strncpy2(outbuf, inbuf, outlen);
807 conv_unreadable_8bit(outbuf);
810 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
814 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
817 strncpy2(outbuf, tmpstr, outlen);
820 conv_utf8todisp(outbuf, outlen, inbuf);
823 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
825 strncpy2(outbuf, inbuf, outlen);
829 conv_get_fallback_for_private_encoding(const gchar *encoding)
831 if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
832 encoding[1] == '-') {
833 if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
840 CodeConverter *conv_code_converter_new(const gchar *src_charset)
844 src_charset = conv_get_fallback_for_private_encoding(src_charset);
846 conv = g_new0(CodeConverter, 1);
847 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
848 conv->charset_str = g_strdup(src_charset);
849 conv->charset = conv_get_charset_from_str(src_charset);
854 void conv_code_converter_destroy(CodeConverter *conv)
856 g_free(conv->charset_str);
860 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
863 if (conv->code_conv_func != conv_noconv)
864 conv->code_conv_func(outbuf, outlen, inbuf);
868 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
872 strncpy2(outbuf, str, outlen);
880 gchar *conv_codeset_strdup(const gchar *inbuf,
881 const gchar *src_code, const gchar *dest_code)
885 CodeConvFunc conv_func;
887 src_code = conv_get_fallback_for_private_encoding(src_code);
888 conv_func = conv_get_code_conv_func(src_code, dest_code);
889 if (conv_func != conv_noconv) {
890 len = (strlen(inbuf) + 1) * 3;
892 if (!buf) return NULL;
894 conv_func(buf, len, inbuf);
895 return g_realloc(buf, strlen(buf) + 1);
898 return conv_iconv_strdup(inbuf, src_code, dest_code);
901 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
902 const gchar *dest_charset_str)
904 CodeConvFunc code_conv = conv_noconv;
906 CharSet dest_charset;
908 if (!src_charset_str)
909 src_charset = conv_get_locale_charset();
911 src_charset = conv_get_charset_from_str(src_charset_str);
913 /* auto detection mode */
914 if (!src_charset_str && !dest_charset_str) {
915 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
916 return conv_anytodisp;
921 dest_charset = conv_get_charset_from_str(dest_charset_str);
923 if (dest_charset == C_US_ASCII)
924 return conv_ustodisp;
926 switch (src_charset) {
944 case C_ISO_2022_JP_2:
945 case C_ISO_2022_JP_3:
946 if (dest_charset == C_AUTO)
947 code_conv = conv_jistodisp;
948 else if (dest_charset == C_EUC_JP)
949 code_conv = conv_jistoeuc;
950 else if (dest_charset == C_UTF_8)
951 code_conv = conv_jistoutf8;
954 if (dest_charset == C_AUTO)
955 code_conv = conv_sjistodisp;
956 else if (dest_charset == C_EUC_JP)
957 code_conv = conv_sjistoeuc;
958 else if (dest_charset == C_UTF_8)
959 code_conv = conv_sjistoutf8;
962 if (dest_charset == C_AUTO)
963 code_conv = conv_euctodisp;
964 else if (dest_charset == C_ISO_2022_JP ||
965 dest_charset == C_ISO_2022_JP_2 ||
966 dest_charset == C_ISO_2022_JP_3)
967 code_conv = conv_euctojis;
968 else if (dest_charset == C_UTF_8)
969 code_conv = conv_euctoutf8;
972 if (dest_charset == C_EUC_JP)
973 code_conv = conv_utf8toeuc;
974 else if (dest_charset == C_ISO_2022_JP ||
975 dest_charset == C_ISO_2022_JP_2 ||
976 dest_charset == C_ISO_2022_JP_3)
977 code_conv = conv_utf8tojis;
986 gchar *conv_iconv_strdup(const gchar *inbuf,
987 const gchar *src_code, const gchar *dest_code)
993 src_code = conv_get_outgoing_charset_str();
995 dest_code = CS_INTERNAL;
997 /* don't convert if src and dest codeset are identical */
998 if (!strcasecmp(src_code, dest_code))
999 return g_strdup(inbuf);
1001 /* don't convert if current codeset is US-ASCII */
1002 if (!strcasecmp(dest_code, CS_US_ASCII))
1003 return g_strdup(inbuf);
1005 cd = iconv_open(dest_code, src_code);
1006 if (cd == (iconv_t)-1)
1009 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
1016 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1018 const gchar *inbuf_p;
1029 in_size = strlen(inbuf);
1031 out_size = (in_size + 1) * 2;
1032 outbuf = g_malloc(out_size);
1034 out_left = out_size;
1036 #define EXPAND_BUF() \
1038 len = outbuf_p - outbuf; \
1040 outbuf = g_realloc(outbuf, out_size); \
1041 outbuf_p = outbuf + len; \
1042 out_left = out_size - len; \
1045 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1046 &outbuf_p, &out_left)) == (size_t)-1) {
1047 if (EILSEQ == errno) {
1048 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1051 if (out_left == 0) {
1054 *outbuf_p++ = SUBST_CHAR;
1056 } else if (EINVAL == errno) {
1058 } else if (E2BIG == errno) {
1061 g_warning("conv_iconv_strdup(): %s\n",
1067 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1069 if (E2BIG == errno) {
1072 g_warning("conv_iconv_strdup(): %s\n",
1080 len = outbuf_p - outbuf;
1081 outbuf = g_realloc(outbuf, len + 1);
1087 static const struct {
1091 {C_US_ASCII, CS_US_ASCII},
1092 {C_US_ASCII, CS_ANSI_X3_4_1968},
1093 {C_UTF_8, CS_UTF_8},
1094 {C_UTF_7, CS_UTF_7},
1095 {C_ISO_8859_1, CS_ISO_8859_1},
1096 {C_ISO_8859_2, CS_ISO_8859_2},
1097 {C_ISO_8859_3, CS_ISO_8859_3},
1098 {C_ISO_8859_4, CS_ISO_8859_4},
1099 {C_ISO_8859_5, CS_ISO_8859_5},
1100 {C_ISO_8859_6, CS_ISO_8859_6},
1101 {C_ISO_8859_7, CS_ISO_8859_7},
1102 {C_ISO_8859_8, CS_ISO_8859_8},
1103 {C_ISO_8859_9, CS_ISO_8859_9},
1104 {C_ISO_8859_10, CS_ISO_8859_10},
1105 {C_ISO_8859_11, CS_ISO_8859_11},
1106 {C_ISO_8859_13, CS_ISO_8859_13},
1107 {C_ISO_8859_14, CS_ISO_8859_14},
1108 {C_ISO_8859_15, CS_ISO_8859_15},
1109 {C_BALTIC, CS_BALTIC},
1110 {C_CP1250, CS_CP1250},
1111 {C_CP1251, CS_CP1251},
1112 {C_CP1252, CS_CP1252},
1113 {C_CP1253, CS_CP1253},
1114 {C_CP1254, CS_CP1254},
1115 {C_CP1255, CS_CP1255},
1116 {C_CP1256, CS_CP1256},
1117 {C_CP1257, CS_CP1257},
1118 {C_CP1258, CS_CP1258},
1119 {C_WINDOWS_1250, CS_WINDOWS_1250},
1120 {C_WINDOWS_1251, CS_WINDOWS_1251},
1121 {C_WINDOWS_1252, CS_WINDOWS_1252},
1122 {C_WINDOWS_1253, CS_WINDOWS_1253},
1123 {C_WINDOWS_1254, CS_WINDOWS_1254},
1124 {C_WINDOWS_1255, CS_WINDOWS_1255},
1125 {C_WINDOWS_1256, CS_WINDOWS_1256},
1126 {C_WINDOWS_1257, CS_WINDOWS_1257},
1127 {C_WINDOWS_1258, CS_WINDOWS_1258},
1128 {C_KOI8_R, CS_KOI8_R},
1129 {C_KOI8_T, CS_KOI8_T},
1130 {C_KOI8_U, CS_KOI8_U},
1131 {C_ISO_2022_JP, CS_ISO_2022_JP},
1132 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1133 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1134 {C_EUC_JP, CS_EUC_JP},
1135 {C_EUC_JP, CS_EUCJP},
1136 {C_EUC_JP_MS, CS_EUC_JP_MS},
1137 {C_SHIFT_JIS, CS_SHIFT_JIS},
1138 {C_SHIFT_JIS, CS_SHIFT__JIS},
1139 {C_SHIFT_JIS, CS_SJIS},
1140 {C_ISO_2022_KR, CS_ISO_2022_KR},
1141 {C_EUC_KR, CS_EUC_KR},
1142 {C_ISO_2022_CN, CS_ISO_2022_CN},
1143 {C_EUC_CN, CS_EUC_CN},
1144 {C_GB2312, CS_GB2312},
1146 {C_EUC_TW, CS_EUC_TW},
1148 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1149 {C_TIS_620, CS_TIS_620},
1150 {C_WINDOWS_874, CS_WINDOWS_874},
1151 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1152 {C_TCVN5712_1, CS_TCVN5712_1},
1155 static const struct {
1156 gchar *const locale;
1158 CharSet out_charset;
1159 } locale_table[] = {
1160 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1161 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1162 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1163 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1164 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1165 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1166 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1167 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1168 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1169 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1170 {"zh_CN.GBK" , C_GBK , C_GBK},
1171 {"zh_CN" , C_GB2312 , C_GB2312},
1172 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1173 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1174 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1175 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1176 {"zh_TW" , C_BIG5 , C_BIG5},
1178 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1179 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1180 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1181 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1182 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1183 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1184 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1185 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1187 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1188 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1190 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1192 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1193 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1194 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1195 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1196 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1197 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1198 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1199 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1200 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1201 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1202 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1203 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1204 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1205 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1206 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1207 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1208 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1209 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1210 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1211 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1212 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1213 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1214 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1215 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1216 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1217 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1218 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1219 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1220 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1221 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1222 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1223 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1224 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1225 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1226 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1227 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1228 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1229 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1230 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1231 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1232 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1233 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1234 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1235 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1236 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1237 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1238 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1239 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1240 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1241 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1242 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1243 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1244 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1245 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1246 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1247 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1248 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1249 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1250 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1251 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1252 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1253 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1254 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1255 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1256 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1257 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1258 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1259 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1260 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1261 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1262 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1263 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1264 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1265 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1267 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1268 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1269 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1270 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1271 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1272 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1273 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1274 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1276 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1277 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1279 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1281 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1282 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1283 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1284 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1286 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1288 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1289 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1290 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1291 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1292 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1293 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1294 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1295 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1296 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1297 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1298 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1299 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1300 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1301 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1302 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1303 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1304 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1306 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1307 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1308 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1309 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1311 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1312 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1314 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1316 {"ar_IN" , C_UTF_8 , C_UTF_8},
1317 {"en_IN" , C_UTF_8 , C_UTF_8},
1318 {"se_NO" , C_UTF_8 , C_UTF_8},
1319 {"ta_IN" , C_UTF_8 , C_UTF_8},
1320 {"te_IN" , C_UTF_8 , C_UTF_8},
1321 {"ur_PK" , C_UTF_8 , C_UTF_8},
1323 {"th_TH" , C_TIS_620 , C_TIS_620},
1324 /* {"th_TH" , C_WINDOWS_874}, */
1325 /* {"th_TH" , C_ISO_8859_11}, */
1327 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1328 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1330 {"C" , C_US_ASCII , C_US_ASCII},
1331 {"POSIX" , C_US_ASCII , C_US_ASCII},
1332 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1335 static GHashTable *conv_get_charset_to_str_table(void)
1337 static GHashTable *table;
1343 table = g_hash_table_new(NULL, g_direct_equal);
1345 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1346 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1349 (table, GUINT_TO_POINTER(charsets[i].charset),
1357 static GHashTable *conv_get_charset_from_str_table(void)
1359 static GHashTable *table;
1365 table = g_hash_table_new(str_case_hash, str_case_equal);
1367 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1368 g_hash_table_insert(table, charsets[i].name,
1369 GUINT_TO_POINTER(charsets[i].charset));
1375 const gchar *conv_get_charset_str(CharSet charset)
1379 table = conv_get_charset_to_str_table();
1380 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1383 CharSet conv_get_charset_from_str(const gchar *charset)
1387 if (!charset) return C_AUTO;
1389 table = conv_get_charset_from_str_table();
1390 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1393 CharSet conv_get_locale_charset(void)
1395 static CharSet cur_charset = -1;
1396 const gchar *cur_locale;
1400 if (cur_charset != -1)
1403 cur_locale = conv_get_current_locale();
1405 cur_charset = C_US_ASCII;
1409 if (strcasestr(cur_locale, "UTF-8")) {
1410 cur_charset = C_UTF_8;
1414 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1415 cur_charset = C_ISO_8859_15;
1419 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1422 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1423 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1424 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1425 strlen(locale_table[i].locale))) {
1426 cur_charset = locale_table[i].charset;
1428 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1429 !strchr(p + 1, '.')) {
1430 if (strlen(cur_locale) == 2 &&
1431 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1432 cur_charset = locale_table[i].charset;
1438 cur_charset = C_AUTO;
1442 const gchar *conv_get_locale_charset_str(void)
1444 static const gchar *codeset = NULL;
1447 codeset = conv_get_charset_str(conv_get_locale_charset());
1449 return codeset ? codeset : CS_INTERNAL;
1452 CharSet conv_get_internal_charset(void)
1457 const gchar *conv_get_internal_charset_str(void)
1462 CharSet conv_get_outgoing_charset(void)
1464 static CharSet out_charset = -1;
1465 const gchar *cur_locale;
1469 if (out_charset != -1)
1472 cur_locale = conv_get_current_locale();
1474 out_charset = C_AUTO;
1478 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1479 out_charset = C_ISO_8859_15;
1483 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1486 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1487 strlen(locale_table[i].locale))) {
1488 out_charset = locale_table[i].out_charset;
1490 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1491 !strchr(p + 1, '.')) {
1492 if (strlen(cur_locale) == 2 &&
1493 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1494 out_charset = locale_table[i].out_charset;
1503 const gchar *conv_get_outgoing_charset_str(void)
1505 CharSet out_charset;
1508 if (prefs_common.outgoing_charset) {
1509 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1510 g_free(prefs_common.outgoing_charset);
1511 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1512 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1513 return prefs_common.outgoing_charset;
1516 out_charset = conv_get_outgoing_charset();
1517 str = conv_get_charset_str(out_charset);
1519 return str ? str : CS_UTF_8;
1522 gboolean conv_is_multibyte_encoding(CharSet encoding)
1531 case C_ISO_2022_JP_2:
1532 case C_ISO_2022_JP_3:
1547 const gchar *conv_get_current_locale(void)
1549 const gchar *cur_locale;
1551 cur_locale = g_getenv("LC_ALL");
1552 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1553 if (!cur_locale) cur_locale = g_getenv("LANG");
1554 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1556 debug_print("current locale: %s\n",
1557 cur_locale ? cur_locale : "(none)");
1562 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1564 gchar buf[BUFFSIZE];
1566 if (is_ascii_str(str))
1567 return unmime_header(str);
1569 if (default_encoding) {
1572 utf8_buf = conv_codeset_strdup
1573 (str, default_encoding, CS_INTERNAL);
1577 decoded_str = unmime_header(utf8_buf);
1583 if (conv_get_locale_charset() == C_EUC_JP)
1584 conv_anytodisp(buf, sizeof(buf), str);
1586 conv_localetodisp(buf, sizeof(buf), str);
1588 return unmime_header(buf);
1591 #define MAX_LINELEN 76
1592 #define MAX_HARD_LINELEN 996
1593 #define MIMESEP_BEGIN "=?"
1594 #define MIMESEP_END "?="
1596 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1598 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1603 if ((cond) && *srcp) { \
1604 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1605 if (isspace(*(destp - 1))) \
1607 else if (is_plain_text && isspace(*srcp)) \
1612 left = MAX_LINELEN - 1; \
1618 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1619 gint header_len, gboolean addr_field)
1621 const gchar *cur_encoding;
1622 const gchar *out_encoding;
1626 const guchar *srcp = src;
1627 guchar *destp = dest;
1628 gboolean use_base64;
1630 g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1632 if (MB_CUR_MAX > 1) {
1634 mimesep_enc = "?B?";
1637 mimesep_enc = "?Q?";
1640 cur_encoding = CS_INTERNAL;
1641 out_encoding = conv_get_outgoing_charset_str();
1642 if (!strcmp(out_encoding, CS_US_ASCII))
1643 out_encoding = CS_ISO_8859_1;
1645 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1646 strlen(mimesep_enc) + strlen(MIMESEP_END);
1648 left = MAX_LINELEN - header_len;
1651 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1653 while (isspace(*srcp)) {
1656 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1659 /* output as it is if the next word is ASCII string */
1660 if (!is_next_nonascii(srcp)) {
1663 word_len = get_next_word_len(srcp);
1664 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1665 while (word_len > 0) {
1666 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1675 /* don't include parentheses in encoded strings */
1676 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1677 LBREAK_IF_REQUIRED(left < 2, FALSE);
1688 const guchar *p = srcp;
1690 gint out_enc_str_len;
1691 gint mime_block_len;
1692 gboolean cont = FALSE;
1694 while (*p != '\0') {
1695 if (isspace(*p) && !is_next_nonascii(p + 1))
1697 /* don't include parentheses in encoded
1699 if (addr_field && (*p == '(' || *p == ')'))
1702 mb_len = g_utf8_skip[*p];
1704 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1705 out_str = conv_codeset_strdup
1706 (part_str, cur_encoding, out_encoding);
1708 g_warning("conv_encode_header(): code conversion failed\n");
1709 conv_unreadable_8bit(part_str);
1710 out_str = g_strdup(part_str);
1712 out_str_len = strlen(out_str);
1715 out_enc_str_len = B64LEN(out_str_len);
1718 qp_get_q_encoding_len(out_str);
1722 if (mimestr_len + out_enc_str_len <= left) {
1725 } else if (cur_len == 0) {
1726 LBREAK_IF_REQUIRED(1, FALSE);
1735 Xstrndup_a(part_str, srcp, cur_len, );
1736 out_str = conv_codeset_strdup
1737 (part_str, cur_encoding, out_encoding);
1739 g_warning("conv_encode_header(): code conversion failed\n");
1740 conv_unreadable_8bit(part_str);
1741 out_str = g_strdup(part_str);
1743 out_str_len = strlen(out_str);
1746 out_enc_str_len = B64LEN(out_str_len);
1749 qp_get_q_encoding_len(out_str);
1751 Xalloca(enc_str, out_enc_str_len + 1, );
1753 base64_encode(enc_str, out_str, out_str_len);
1755 qp_q_encode(enc_str, out_str);
1759 /* output MIME-encoded string block */
1760 mime_block_len = mimestr_len + strlen(enc_str);
1761 g_snprintf(destp, mime_block_len + 1,
1762 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1763 out_encoding, mimesep_enc, enc_str);
1764 destp += mime_block_len;
1767 left -= mime_block_len;
1770 LBREAK_IF_REQUIRED(cont, FALSE);
1780 #undef LBREAK_IF_REQUIRED
1781 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1784 GError *error = NULL;
1786 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1788 g_warning("failed to convert encoding of file name: %s\n",
1790 g_error_free(error);
1793 fs_file = g_strdup(utf8_file);
1798 gchar *conv_filename_to_utf8(const gchar *fs_file)
1800 gchar *utf8_file = NULL;
1801 GError *error = NULL;
1803 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1805 g_warning("failed to convert encoding of file name: %s\n",
1807 g_error_free(error);
1810 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1812 utf8_file = g_strdup(fs_file);
1813 conv_unreadable_8bit(utf8_file);