2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27 #include <glib/gi18n.h>
42 #include "quoted-printable.h"
44 #include "prefs_common.h"
54 #define SUBST_CHAR 0x5f;
57 #define iseuckanji(c) \
58 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
59 #define iseuchwkana1(c) \
60 (((c) & 0xff) == 0x8e)
61 #define iseuchwkana2(c) \
62 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
64 (((c) & 0xff) == 0x8f)
65 #define issjiskanji1(c) \
66 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
67 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
68 #define issjiskanji2(c) \
69 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
70 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
71 #define issjishwkana(c) \
72 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
75 if (state != JIS_KANJI) { \
83 if (state != JIS_ASCII) { \
91 if (state != JIS_HWKANA) { \
99 if (state != JIS_AUXKANJI) { \
104 state = JIS_AUXKANJI; \
107 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
108 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
109 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
111 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
112 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
113 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
114 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
116 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
117 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
119 static void conv_unreadable_eucjp(gchar *str);
120 static void conv_unreadable_8bit(gchar *str);
121 static void conv_unreadable_latin(gchar *str);
123 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
124 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
125 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
127 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
128 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
129 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
131 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
133 const guchar *in = inbuf;
134 guchar *out = outbuf;
135 JISState state = JIS_ASCII;
137 while (*in != '\0') {
141 if (*(in + 1) == '@' || *(in + 1) == 'B') {
144 } else if (*(in + 1) == '(' &&
146 state = JIS_AUXKANJI;
149 /* unknown escape sequence */
152 } else if (*in == '(') {
153 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
156 } else if (*(in + 1) == 'I') {
160 /* unknown escape sequence */
164 /* unknown escape sequence */
167 } else if (*in == 0x0e) {
170 } else if (*in == 0x0f) {
179 *out++ = *in++ | 0x80;
180 if (*in == '\0') break;
181 *out++ = *in++ | 0x80;
185 *out++ = *in++ | 0x80;
189 *out++ = *in++ | 0x80;
190 if (*in == '\0') break;
191 *out++ = *in++ | 0x80;
200 #define JIS_HWDAKUTEN 0x5e
201 #define JIS_HWHANDAKUTEN 0x5f
203 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
205 static guint16 h2z_tbl[] = {
207 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
208 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
210 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
211 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
213 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
214 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
216 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
217 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
220 static guint16 dakuten_tbl[] = {
222 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
223 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
225 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
226 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
229 static guint16 handakuten_tbl[] = {
231 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
239 if (jis_code < 0x21 || jis_code > 0x5f)
242 if (sound_sym == JIS_HWDAKUTEN &&
243 jis_code >= 0x36 && jis_code <= 0x4e) {
244 out_code = dakuten_tbl[jis_code - 0x30];
246 *outbuf = out_code >> 8;
247 *(outbuf + 1) = out_code & 0xff;
252 if (sound_sym == JIS_HWHANDAKUTEN &&
253 jis_code >= 0x4a && jis_code <= 0x4e) {
254 out_code = handakuten_tbl[jis_code - 0x4a];
255 *outbuf = out_code >> 8;
256 *(outbuf + 1) = out_code & 0xff;
260 out_code = h2z_tbl[jis_code - 0x20];
261 *outbuf = out_code >> 8;
262 *(outbuf + 1) = out_code & 0xff;
266 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
268 const guchar *in = inbuf;
269 guchar *out = outbuf;
270 JISState state = JIS_ASCII;
272 while (*in != '\0') {
276 } else if (iseuckanji(*in)) {
277 if (iseuckanji(*(in + 1))) {
279 *out++ = *in++ & 0x7f;
280 *out++ = *in++ & 0x7f;
285 if (*in != '\0' && !IS_ASCII(*in)) {
290 } else if (iseuchwkana1(*in)) {
291 if (iseuchwkana2(*(in + 1))) {
292 if (prefs_common.allow_jisx0201_kana) {
295 *out++ = *in++ & 0x7f;
300 if (iseuchwkana1(*(in + 2)) &&
301 iseuchwkana2(*(in + 3)))
302 len = conv_jis_hantozen
304 *(in + 1), *(in + 3));
306 len = conv_jis_hantozen
321 if (*in != '\0' && !IS_ASCII(*in)) {
326 } else if (iseucaux(*in)) {
328 if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
330 *out++ = *in++ & 0x7f;
331 *out++ = *in++ & 0x7f;
334 if (*in != '\0' && !IS_ASCII(*in)) {
337 if (*in != '\0' && !IS_ASCII(*in)) {
354 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
356 const guchar *in = inbuf;
357 guchar *out = outbuf;
359 while (*in != '\0') {
362 } else if (issjiskanji1(*in)) {
363 if (issjiskanji2(*(in + 1))) {
365 guchar out2 = *(in + 1);
368 row = out1 < 0xa0 ? 0x70 : 0xb0;
370 out1 = (out1 - row) * 2 - 1;
371 out2 -= out2 > 0x7f ? 0x20 : 0x1f;
373 out1 = (out1 - row) * 2;
377 *out++ = out1 | 0x80;
378 *out++ = out2 | 0x80;
383 if (*in != '\0' && !IS_ASCII(*in)) {
388 } else if (issjishwkana(*in)) {
400 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
404 Xalloca(eucstr, outlen, return);
406 conv_jistoeuc(eucstr, outlen, inbuf);
407 conv_euctoutf8(outbuf, outlen, eucstr);
410 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
414 tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
416 strncpy2(outbuf, tmpstr, outlen);
419 strncpy2(outbuf, inbuf, outlen);
422 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
424 static iconv_t cd = (iconv_t)-1;
425 static gboolean iconv_ok = TRUE;
428 if (cd == (iconv_t)-1) {
430 strncpy2(outbuf, inbuf, outlen);
433 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
434 if (cd == (iconv_t)-1) {
435 cd = iconv_open(CS_UTF_8, CS_EUC_JP);
436 if (cd == (iconv_t)-1) {
437 g_warning("conv_euctoutf8(): %s\n",
440 strncpy2(outbuf, inbuf, outlen);
446 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
448 strncpy2(outbuf, tmpstr, outlen);
451 strncpy2(outbuf, inbuf, outlen);
454 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
456 switch (conv_guess_ja_encoding(inbuf)) {
458 conv_jistoutf8(outbuf, outlen, inbuf);
461 conv_sjistoutf8(outbuf, outlen, inbuf);
464 conv_euctoutf8(outbuf, outlen, inbuf);
467 strncpy2(outbuf, inbuf, outlen);
472 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
474 static iconv_t cd = (iconv_t)-1;
475 static gboolean iconv_ok = TRUE;
478 if (cd == (iconv_t)-1) {
480 strncpy2(outbuf, inbuf, outlen);
483 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
484 if (cd == (iconv_t)-1) {
485 cd = iconv_open(CS_EUC_JP, CS_UTF_8);
486 if (cd == (iconv_t)-1) {
487 g_warning("conv_utf8toeuc(): %s\n",
490 strncpy2(outbuf, inbuf, outlen);
496 tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
498 strncpy2(outbuf, tmpstr, outlen);
501 strncpy2(outbuf, inbuf, outlen);
504 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
508 Xalloca(eucstr, outlen, return);
510 conv_utf8toeuc(eucstr, outlen, inbuf);
511 conv_euctojis(outbuf, outlen, eucstr);
514 static gchar valid_eucjp_tbl[][96] = {
515 /* 0xa2a0 - 0xa2ff */
516 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
518 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
519 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
521 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
523 /* 0xa3a0 - 0xa3ff */
524 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
526 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
528 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 },
531 /* 0xa4a0 - 0xa4ff */
532 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
537 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
539 /* 0xa5a0 - 0xa5ff */
540 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
547 /* 0xa6a0 - 0xa6ff */
548 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
550 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
555 /* 0xa7a0 - 0xa7ff */
556 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
563 /* 0xa8a0 - 0xa8ff */
564 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
566 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
572 static gboolean isprintableeuckanji(guchar c1, guchar c2)
574 if (c1 <= 0xa0 || c1 >= 0xf5)
576 if (c2 <= 0xa0 || c2 == 0xff)
579 if (c1 >= 0xa9 && c1 <= 0xaf)
582 if (c1 >= 0xa2 && c1 <= 0xa8)
583 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
586 if (c2 >= 0xd4 && c2 <= 0xfe)
588 } else if (c1 == 0xf4) {
589 if (c2 >= 0xa7 && c2 <= 0xfe)
596 static void conv_unreadable_eucjp(gchar *str)
598 register guchar *p = str;
602 /* convert CR+LF -> LF */
603 if (*p == '\r' && *(p + 1) == '\n')
604 memmove(p, p + 1, strlen(p));
605 /* printable 7 bit code */
607 } else if (iseuckanji(*p)) {
608 if (isprintableeuckanji(*p, *(p + 1))) {
609 /* printable euc-jp code */
612 /* substitute unprintable code */
621 } else if (iseuchwkana1(*p)) {
622 if (iseuchwkana2(*(p + 1)))
623 /* euc-jp hankaku kana */
627 } else if (iseucaux(*p)) {
628 if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
629 /* auxiliary kanji */
634 /* substitute unprintable 1 byte code */
639 static void conv_unreadable_8bit(gchar *str)
641 register guchar *p = str;
644 /* convert CR+LF -> LF */
645 if (*p == '\r' && *(p + 1) == '\n')
646 memmove(p, p + 1, strlen(p));
647 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
652 static void conv_unreadable_latin(gchar *str)
654 register guchar *p = str;
657 /* convert CR+LF -> LF */
658 if (*p == '\r' && *(p + 1) == '\n')
659 memmove(p, p + 1, strlen(p));
660 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
668 void conv_mb_alnum(gchar *str)
670 static guchar char_tbl[] = {
672 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
673 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
675 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
676 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
678 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
679 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
681 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
682 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
684 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
685 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
688 register guchar *p = str;
695 register guchar ch = *(p + 1);
697 if (ch >= 0xb0 && ch <= 0xfa) {
702 memmove(p, p + 1, len);
708 } else if (*p == 0xa1) {
709 register guchar ch = *(p + 1);
711 if (ch >= 0xa0 && ch <= 0xef &&
712 NCV != char_tbl[ch - 0xa0]) {
713 *p = char_tbl[ch - 0xa0];
716 memmove(p, p + 1, len);
722 } else if (iseuckanji(*p)) {
732 CharSet conv_guess_ja_encoding(const gchar *str)
734 const guchar *p = str;
735 CharSet guessed = C_US_ASCII;
738 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
739 if (guessed == C_US_ASCII)
740 return C_ISO_2022_JP;
742 } else if (IS_ASCII(*p)) {
744 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
745 if (*p >= 0xfd && *p <= 0xfe)
747 else if (guessed == C_SHIFT_JIS) {
748 if ((issjiskanji1(*p) &&
749 issjiskanji2(*(p + 1))) ||
751 guessed = C_SHIFT_JIS;
757 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
758 if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
759 guessed = C_SHIFT_JIS;
763 } else if (issjishwkana(*p)) {
764 guessed = C_SHIFT_JIS;
774 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
776 conv_jistoutf8(outbuf, outlen, inbuf);
779 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
781 conv_sjistoutf8(outbuf, outlen, inbuf);
784 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
786 conv_euctoutf8(outbuf, outlen, inbuf);
789 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
791 if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
792 strncpy2(outbuf, inbuf, outlen);
794 conv_ustodisp(outbuf, outlen, inbuf);
797 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
799 conv_anytoutf8(outbuf, outlen, inbuf);
800 if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
801 conv_unreadable_8bit(outbuf);
804 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
806 strncpy2(outbuf, inbuf, outlen);
807 conv_unreadable_8bit(outbuf);
810 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
814 tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
817 strncpy2(outbuf, tmpstr, outlen);
820 conv_utf8todisp(outbuf, outlen, inbuf);
823 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
825 strncpy2(outbuf, inbuf, outlen);
828 CodeConverter *conv_code_converter_new(const gchar *src_charset)
832 conv = g_new0(CodeConverter, 1);
833 conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
834 conv->charset_str = g_strdup(src_charset);
835 conv->charset = conv_get_charset_from_str(src_charset);
840 void conv_code_converter_destroy(CodeConverter *conv)
842 g_free(conv->charset_str);
846 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
849 if (conv->code_conv_func != conv_noconv)
850 conv->code_conv_func(outbuf, outlen, inbuf);
854 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
858 strncpy2(outbuf, str, outlen);
866 gchar *conv_codeset_strdup(const gchar *inbuf,
867 const gchar *src_code, const gchar *dest_code)
871 CodeConvFunc conv_func;
873 conv_func = conv_get_code_conv_func(src_code, dest_code);
874 if (conv_func != conv_noconv) {
875 len = (strlen(inbuf) + 1) * 3;
877 if (!buf) return NULL;
879 conv_func(buf, len, inbuf);
880 return g_realloc(buf, strlen(buf) + 1);
883 return conv_iconv_strdup(inbuf, src_code, dest_code);
886 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
887 const gchar *dest_charset_str)
889 CodeConvFunc code_conv = conv_noconv;
891 CharSet dest_charset;
893 if (!src_charset_str)
894 src_charset = conv_get_locale_charset();
896 src_charset = conv_get_charset_from_str(src_charset_str);
898 /* auto detection mode */
899 if (!src_charset_str && !dest_charset_str) {
900 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
901 return conv_anytodisp;
906 dest_charset = conv_get_charset_from_str(dest_charset_str);
908 if (dest_charset == C_US_ASCII)
909 return conv_ustodisp;
911 switch (src_charset) {
929 case C_ISO_2022_JP_2:
930 case C_ISO_2022_JP_3:
931 if (dest_charset == C_AUTO)
932 code_conv = conv_jistodisp;
933 else if (dest_charset == C_EUC_JP)
934 code_conv = conv_jistoeuc;
935 else if (dest_charset == C_UTF_8)
936 code_conv = conv_jistoutf8;
939 if (dest_charset == C_AUTO)
940 code_conv = conv_sjistodisp;
941 else if (dest_charset == C_EUC_JP)
942 code_conv = conv_sjistoeuc;
943 else if (dest_charset == C_UTF_8)
944 code_conv = conv_sjistoutf8;
947 if (dest_charset == C_AUTO)
948 code_conv = conv_euctodisp;
949 else if (dest_charset == C_ISO_2022_JP ||
950 dest_charset == C_ISO_2022_JP_2 ||
951 dest_charset == C_ISO_2022_JP_3)
952 code_conv = conv_euctojis;
953 else if (dest_charset == C_UTF_8)
954 code_conv = conv_euctoutf8;
957 if (dest_charset == C_EUC_JP)
958 code_conv = conv_utf8toeuc;
959 else if (dest_charset == C_ISO_2022_JP ||
960 dest_charset == C_ISO_2022_JP_2 ||
961 dest_charset == C_ISO_2022_JP_3)
962 code_conv = conv_utf8tojis;
971 gchar *conv_iconv_strdup(const gchar *inbuf,
972 const gchar *src_code, const gchar *dest_code)
978 src_code = conv_get_outgoing_charset_str();
980 dest_code = CS_INTERNAL;
982 /* don't convert if src and dest codeset are identical */
983 if (!strcasecmp(src_code, dest_code))
984 return g_strdup(inbuf);
986 /* don't convert if current codeset is US-ASCII */
987 if (!strcasecmp(dest_code, CS_US_ASCII))
988 return g_strdup(inbuf);
990 cd = iconv_open(dest_code, src_code);
991 if (cd == (iconv_t)-1)
994 outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
1001 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1003 const gchar *inbuf_p;
1014 in_size = strlen(inbuf);
1016 out_size = (in_size + 1) * 2;
1017 outbuf = g_malloc(out_size);
1019 out_left = out_size;
1021 #define EXPAND_BUF() \
1023 len = outbuf_p - outbuf; \
1025 outbuf = g_realloc(outbuf, out_size); \
1026 outbuf_p = outbuf + len; \
1027 out_left = out_size - len; \
1030 while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1031 &outbuf_p, &out_left)) == (size_t)-1) {
1032 if (EILSEQ == errno) {
1033 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1036 if (out_left == 0) {
1039 *outbuf_p++ = SUBST_CHAR;
1041 } else if (EINVAL == errno) {
1043 } else if (E2BIG == errno) {
1046 g_warning("conv_iconv_strdup(): %s\n",
1052 while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1054 if (E2BIG == errno) {
1057 g_warning("conv_iconv_strdup(): %s\n",
1065 len = outbuf_p - outbuf;
1066 outbuf = g_realloc(outbuf, len + 1);
1072 static const struct {
1076 {C_US_ASCII, CS_US_ASCII},
1077 {C_US_ASCII, CS_ANSI_X3_4_1968},
1078 {C_UTF_8, CS_UTF_8},
1079 {C_UTF_7, CS_UTF_7},
1080 {C_ISO_8859_1, CS_ISO_8859_1},
1081 {C_ISO_8859_2, CS_ISO_8859_2},
1082 {C_ISO_8859_3, CS_ISO_8859_3},
1083 {C_ISO_8859_4, CS_ISO_8859_4},
1084 {C_ISO_8859_5, CS_ISO_8859_5},
1085 {C_ISO_8859_6, CS_ISO_8859_6},
1086 {C_ISO_8859_7, CS_ISO_8859_7},
1087 {C_ISO_8859_8, CS_ISO_8859_8},
1088 {C_ISO_8859_9, CS_ISO_8859_9},
1089 {C_ISO_8859_10, CS_ISO_8859_10},
1090 {C_ISO_8859_11, CS_ISO_8859_11},
1091 {C_ISO_8859_13, CS_ISO_8859_13},
1092 {C_ISO_8859_14, CS_ISO_8859_14},
1093 {C_ISO_8859_15, CS_ISO_8859_15},
1094 {C_BALTIC, CS_BALTIC},
1095 {C_CP1250, CS_CP1250},
1096 {C_CP1251, CS_CP1251},
1097 {C_CP1252, CS_CP1252},
1098 {C_CP1253, CS_CP1253},
1099 {C_CP1254, CS_CP1254},
1100 {C_CP1255, CS_CP1255},
1101 {C_CP1256, CS_CP1256},
1102 {C_CP1257, CS_CP1257},
1103 {C_CP1258, CS_CP1258},
1104 {C_WINDOWS_1250, CS_WINDOWS_1250},
1105 {C_WINDOWS_1251, CS_WINDOWS_1251},
1106 {C_WINDOWS_1252, CS_WINDOWS_1252},
1107 {C_WINDOWS_1253, CS_WINDOWS_1253},
1108 {C_WINDOWS_1254, CS_WINDOWS_1254},
1109 {C_WINDOWS_1255, CS_WINDOWS_1255},
1110 {C_WINDOWS_1256, CS_WINDOWS_1256},
1111 {C_WINDOWS_1257, CS_WINDOWS_1257},
1112 {C_WINDOWS_1258, CS_WINDOWS_1258},
1113 {C_KOI8_R, CS_KOI8_R},
1114 {C_KOI8_T, CS_KOI8_T},
1115 {C_KOI8_U, CS_KOI8_U},
1116 {C_ISO_2022_JP, CS_ISO_2022_JP},
1117 {C_ISO_2022_JP_2, CS_ISO_2022_JP_2},
1118 {C_ISO_2022_JP_3, CS_ISO_2022_JP_3},
1119 {C_EUC_JP, CS_EUC_JP},
1120 {C_EUC_JP, CS_EUCJP},
1121 {C_EUC_JP_MS, CS_EUC_JP_MS},
1122 {C_SHIFT_JIS, CS_SHIFT_JIS},
1123 {C_SHIFT_JIS, CS_SHIFT__JIS},
1124 {C_SHIFT_JIS, CS_SJIS},
1125 {C_ISO_2022_KR, CS_ISO_2022_KR},
1126 {C_EUC_KR, CS_EUC_KR},
1127 {C_ISO_2022_CN, CS_ISO_2022_CN},
1128 {C_EUC_CN, CS_EUC_CN},
1129 {C_GB2312, CS_GB2312},
1131 {C_EUC_TW, CS_EUC_TW},
1133 {C_BIG5_HKSCS, CS_BIG5_HKSCS},
1134 {C_TIS_620, CS_TIS_620},
1135 {C_WINDOWS_874, CS_WINDOWS_874},
1136 {C_GEORGIAN_PS, CS_GEORGIAN_PS},
1137 {C_TCVN5712_1, CS_TCVN5712_1},
1140 static const struct {
1141 gchar *const locale;
1143 CharSet out_charset;
1144 } locale_table[] = {
1145 {"ja_JP.eucJP" , C_EUC_JP , C_ISO_2022_JP},
1146 {"ja_JP.EUC-JP" , C_EUC_JP , C_ISO_2022_JP},
1147 {"ja_JP.EUC" , C_EUC_JP , C_ISO_2022_JP},
1148 {"ja_JP.ujis" , C_EUC_JP , C_ISO_2022_JP},
1149 {"ja_JP.SJIS" , C_SHIFT_JIS , C_ISO_2022_JP},
1150 {"ja_JP.JIS" , C_ISO_2022_JP , C_ISO_2022_JP},
1151 {"ja_JP" , C_EUC_JP , C_ISO_2022_JP},
1152 {"ko_KR.EUC-KR" , C_EUC_KR , C_EUC_KR},
1153 {"ko_KR" , C_EUC_KR , C_EUC_KR},
1154 {"zh_CN.GB2312" , C_GB2312 , C_GB2312},
1155 {"zh_CN.GBK" , C_GBK , C_GBK},
1156 {"zh_CN" , C_GB2312 , C_GB2312},
1157 {"zh_HK" , C_BIG5_HKSCS , C_BIG5_HKSCS},
1158 {"zh_TW.eucTW" , C_EUC_TW , C_BIG5},
1159 {"zh_TW.EUC-TW" , C_EUC_TW , C_BIG5},
1160 {"zh_TW.Big5" , C_BIG5 , C_BIG5},
1161 {"zh_TW" , C_BIG5 , C_BIG5},
1163 {"ru_RU.KOI8-R" , C_KOI8_R , C_KOI8_R},
1164 {"ru_RU.KOI8R" , C_KOI8_R , C_KOI8_R},
1165 {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1166 {"ru_RU" , C_ISO_8859_5 , C_KOI8_R},
1167 {"tg_TJ" , C_KOI8_T , C_KOI8_T},
1168 {"ru_UA" , C_KOI8_U , C_KOI8_U},
1169 {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1170 {"uk_UA" , C_KOI8_U , C_KOI8_U},
1172 {"be_BY" , C_WINDOWS_1251, C_WINDOWS_1251},
1173 {"bg_BG" , C_WINDOWS_1251, C_WINDOWS_1251},
1175 {"yi_US" , C_WINDOWS_1255, C_WINDOWS_1255},
1177 {"af_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1178 {"br_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1179 {"ca_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1180 {"da_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1181 {"de_AT" , C_ISO_8859_1 , C_ISO_8859_1},
1182 {"de_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1183 {"de_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1184 {"de_DE" , C_ISO_8859_1 , C_ISO_8859_1},
1185 {"de_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1186 {"en_AU" , C_ISO_8859_1 , C_ISO_8859_1},
1187 {"en_BW" , C_ISO_8859_1 , C_ISO_8859_1},
1188 {"en_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1189 {"en_DK" , C_ISO_8859_1 , C_ISO_8859_1},
1190 {"en_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1191 {"en_HK" , C_ISO_8859_1 , C_ISO_8859_1},
1192 {"en_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1193 {"en_NZ" , C_ISO_8859_1 , C_ISO_8859_1},
1194 {"en_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1195 {"en_SG" , C_ISO_8859_1 , C_ISO_8859_1},
1196 {"en_US" , C_ISO_8859_1 , C_ISO_8859_1},
1197 {"en_ZA" , C_ISO_8859_1 , C_ISO_8859_1},
1198 {"en_ZW" , C_ISO_8859_1 , C_ISO_8859_1},
1199 {"es_AR" , C_ISO_8859_1 , C_ISO_8859_1},
1200 {"es_BO" , C_ISO_8859_1 , C_ISO_8859_1},
1201 {"es_CL" , C_ISO_8859_1 , C_ISO_8859_1},
1202 {"es_CO" , C_ISO_8859_1 , C_ISO_8859_1},
1203 {"es_CR" , C_ISO_8859_1 , C_ISO_8859_1},
1204 {"es_DO" , C_ISO_8859_1 , C_ISO_8859_1},
1205 {"es_EC" , C_ISO_8859_1 , C_ISO_8859_1},
1206 {"es_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1207 {"es_GT" , C_ISO_8859_1 , C_ISO_8859_1},
1208 {"es_HN" , C_ISO_8859_1 , C_ISO_8859_1},
1209 {"es_MX" , C_ISO_8859_1 , C_ISO_8859_1},
1210 {"es_NI" , C_ISO_8859_1 , C_ISO_8859_1},
1211 {"es_PA" , C_ISO_8859_1 , C_ISO_8859_1},
1212 {"es_PE" , C_ISO_8859_1 , C_ISO_8859_1},
1213 {"es_PR" , C_ISO_8859_1 , C_ISO_8859_1},
1214 {"es_PY" , C_ISO_8859_1 , C_ISO_8859_1},
1215 {"es_SV" , C_ISO_8859_1 , C_ISO_8859_1},
1216 {"es_US" , C_ISO_8859_1 , C_ISO_8859_1},
1217 {"es_UY" , C_ISO_8859_1 , C_ISO_8859_1},
1218 {"es_VE" , C_ISO_8859_1 , C_ISO_8859_1},
1219 {"et_EE" , C_ISO_8859_1 , C_ISO_8859_1},
1220 {"eu_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1221 {"fi_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1222 {"fo_FO" , C_ISO_8859_1 , C_ISO_8859_1},
1223 {"fr_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1224 {"fr_CA" , C_ISO_8859_1 , C_ISO_8859_1},
1225 {"fr_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1226 {"fr_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1227 {"fr_LU" , C_ISO_8859_1 , C_ISO_8859_1},
1228 {"ga_IE" , C_ISO_8859_1 , C_ISO_8859_1},
1229 {"gl_ES" , C_ISO_8859_1 , C_ISO_8859_1},
1230 {"gv_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1231 {"id_ID" , C_ISO_8859_1 , C_ISO_8859_1},
1232 {"is_IS" , C_ISO_8859_1 , C_ISO_8859_1},
1233 {"it_CH" , C_ISO_8859_1 , C_ISO_8859_1},
1234 {"it_IT" , C_ISO_8859_1 , C_ISO_8859_1},
1235 {"kl_GL" , C_ISO_8859_1 , C_ISO_8859_1},
1236 {"kw_GB" , C_ISO_8859_1 , C_ISO_8859_1},
1237 {"ms_MY" , C_ISO_8859_1 , C_ISO_8859_1},
1238 {"nl_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1239 {"nl_NL" , C_ISO_8859_1 , C_ISO_8859_1},
1240 {"nn_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1241 {"no_NO" , C_ISO_8859_1 , C_ISO_8859_1},
1242 {"oc_FR" , C_ISO_8859_1 , C_ISO_8859_1},
1243 {"pt_BR" , C_ISO_8859_1 , C_ISO_8859_1},
1244 {"pt_PT" , C_ISO_8859_1 , C_ISO_8859_1},
1245 {"sq_AL" , C_ISO_8859_1 , C_ISO_8859_1},
1246 {"sv_FI" , C_ISO_8859_1 , C_ISO_8859_1},
1247 {"sv_SE" , C_ISO_8859_1 , C_ISO_8859_1},
1248 {"tl_PH" , C_ISO_8859_1 , C_ISO_8859_1},
1249 {"uz_UZ" , C_ISO_8859_1 , C_ISO_8859_1},
1250 {"wa_BE" , C_ISO_8859_1 , C_ISO_8859_1},
1252 {"bs_BA" , C_ISO_8859_2 , C_ISO_8859_2},
1253 {"cs_CZ" , C_ISO_8859_2 , C_ISO_8859_2},
1254 {"hr_HR" , C_ISO_8859_2 , C_ISO_8859_2},
1255 {"hu_HU" , C_ISO_8859_2 , C_ISO_8859_2},
1256 {"pl_PL" , C_ISO_8859_2 , C_ISO_8859_2},
1257 {"ro_RO" , C_ISO_8859_2 , C_ISO_8859_2},
1258 {"sk_SK" , C_ISO_8859_2 , C_ISO_8859_2},
1259 {"sl_SI" , C_ISO_8859_2 , C_ISO_8859_2},
1261 {"sr_YU@cyrillic" , C_ISO_8859_5 , C_ISO_8859_5},
1262 {"sr_YU" , C_ISO_8859_2 , C_ISO_8859_2},
1264 {"mt_MT" , C_ISO_8859_3 , C_ISO_8859_3},
1266 {"lt_LT.iso88594" , C_ISO_8859_4 , C_ISO_8859_4},
1267 {"lt_LT.ISO8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1268 {"lt_LT.ISO_8859-4" , C_ISO_8859_4 , C_ISO_8859_4},
1269 {"lt_LT" , C_ISO_8859_13 , C_ISO_8859_13},
1271 {"mk_MK" , C_ISO_8859_5 , C_ISO_8859_5},
1273 {"ar_AE" , C_ISO_8859_6 , C_ISO_8859_6},
1274 {"ar_BH" , C_ISO_8859_6 , C_ISO_8859_6},
1275 {"ar_DZ" , C_ISO_8859_6 , C_ISO_8859_6},
1276 {"ar_EG" , C_ISO_8859_6 , C_ISO_8859_6},
1277 {"ar_IQ" , C_ISO_8859_6 , C_ISO_8859_6},
1278 {"ar_JO" , C_ISO_8859_6 , C_ISO_8859_6},
1279 {"ar_KW" , C_ISO_8859_6 , C_ISO_8859_6},
1280 {"ar_LB" , C_ISO_8859_6 , C_ISO_8859_6},
1281 {"ar_LY" , C_ISO_8859_6 , C_ISO_8859_6},
1282 {"ar_MA" , C_ISO_8859_6 , C_ISO_8859_6},
1283 {"ar_OM" , C_ISO_8859_6 , C_ISO_8859_6},
1284 {"ar_QA" , C_ISO_8859_6 , C_ISO_8859_6},
1285 {"ar_SA" , C_ISO_8859_6 , C_ISO_8859_6},
1286 {"ar_SD" , C_ISO_8859_6 , C_ISO_8859_6},
1287 {"ar_SY" , C_ISO_8859_6 , C_ISO_8859_6},
1288 {"ar_TN" , C_ISO_8859_6 , C_ISO_8859_6},
1289 {"ar_YE" , C_ISO_8859_6 , C_ISO_8859_6},
1291 {"el_GR" , C_ISO_8859_7 , C_ISO_8859_7},
1292 {"he_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1293 {"iw_IL" , C_ISO_8859_8 , C_ISO_8859_8},
1294 {"tr_TR" , C_ISO_8859_9 , C_ISO_8859_9},
1296 {"lv_LV" , C_ISO_8859_13 , C_ISO_8859_13},
1297 {"mi_NZ" , C_ISO_8859_13 , C_ISO_8859_13},
1299 {"cy_GB" , C_ISO_8859_14 , C_ISO_8859_14},
1301 {"ar_IN" , C_UTF_8 , C_UTF_8},
1302 {"en_IN" , C_UTF_8 , C_UTF_8},
1303 {"se_NO" , C_UTF_8 , C_UTF_8},
1304 {"ta_IN" , C_UTF_8 , C_UTF_8},
1305 {"te_IN" , C_UTF_8 , C_UTF_8},
1306 {"ur_PK" , C_UTF_8 , C_UTF_8},
1308 {"th_TH" , C_TIS_620 , C_TIS_620},
1309 /* {"th_TH" , C_WINDOWS_874}, */
1310 /* {"th_TH" , C_ISO_8859_11}, */
1312 {"ka_GE" , C_GEORGIAN_PS , C_GEORGIAN_PS},
1313 {"vi_VN.TCVN" , C_TCVN5712_1 , C_TCVN5712_1},
1315 {"C" , C_US_ASCII , C_US_ASCII},
1316 {"POSIX" , C_US_ASCII , C_US_ASCII},
1317 {"ANSI_X3.4-1968" , C_US_ASCII , C_US_ASCII},
1320 static GHashTable *conv_get_charset_to_str_table(void)
1322 static GHashTable *table;
1328 table = g_hash_table_new(NULL, g_direct_equal);
1330 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1331 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1334 (table, GUINT_TO_POINTER(charsets[i].charset),
1342 static GHashTable *conv_get_charset_from_str_table(void)
1344 static GHashTable *table;
1350 table = g_hash_table_new(str_case_hash, str_case_equal);
1352 for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1353 g_hash_table_insert(table, charsets[i].name,
1354 GUINT_TO_POINTER(charsets[i].charset));
1360 const gchar *conv_get_charset_str(CharSet charset)
1364 table = conv_get_charset_to_str_table();
1365 return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1368 CharSet conv_get_charset_from_str(const gchar *charset)
1372 if (!charset) return C_AUTO;
1374 table = conv_get_charset_from_str_table();
1375 return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1378 CharSet conv_get_locale_charset(void)
1380 static CharSet cur_charset = -1;
1381 const gchar *cur_locale;
1385 if (cur_charset != -1)
1388 cur_locale = conv_get_current_locale();
1390 cur_charset = C_US_ASCII;
1394 if (strcasestr(cur_locale, "UTF-8")) {
1395 cur_charset = C_UTF_8;
1399 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1400 cur_charset = C_ISO_8859_15;
1404 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1407 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1408 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1409 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1410 strlen(locale_table[i].locale))) {
1411 cur_charset = locale_table[i].charset;
1413 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1414 !strchr(p + 1, '.')) {
1415 if (strlen(cur_locale) == 2 &&
1416 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1417 cur_charset = locale_table[i].charset;
1423 cur_charset = C_AUTO;
1427 const gchar *conv_get_locale_charset_str(void)
1429 static const gchar *codeset = NULL;
1432 codeset = conv_get_charset_str(conv_get_locale_charset());
1434 return codeset ? codeset : CS_INTERNAL;
1437 CharSet conv_get_internal_charset(void)
1442 const gchar *conv_get_internal_charset_str(void)
1447 CharSet conv_get_outgoing_charset(void)
1449 static CharSet out_charset = -1;
1450 const gchar *cur_locale;
1454 if (out_charset != -1)
1457 cur_locale = conv_get_current_locale();
1459 out_charset = C_AUTO;
1463 if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1464 out_charset = C_ISO_8859_15;
1468 for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1471 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1472 strlen(locale_table[i].locale))) {
1473 out_charset = locale_table[i].out_charset;
1475 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1476 !strchr(p + 1, '.')) {
1477 if (strlen(cur_locale) == 2 &&
1478 !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1479 out_charset = locale_table[i].out_charset;
1488 const gchar *conv_get_outgoing_charset_str(void)
1490 CharSet out_charset;
1493 if (prefs_common.outgoing_charset) {
1494 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1495 g_free(prefs_common.outgoing_charset);
1496 prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1497 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1498 return prefs_common.outgoing_charset;
1501 out_charset = conv_get_outgoing_charset();
1502 str = conv_get_charset_str(out_charset);
1504 return str ? str : CS_UTF_8;
1507 gboolean conv_is_multibyte_encoding(CharSet encoding)
1516 case C_ISO_2022_JP_2:
1517 case C_ISO_2022_JP_3:
1532 const gchar *conv_get_current_locale(void)
1534 const gchar *cur_locale;
1536 cur_locale = g_getenv("LC_ALL");
1537 if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1538 if (!cur_locale) cur_locale = g_getenv("LANG");
1539 if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1541 debug_print("current locale: %s\n",
1542 cur_locale ? cur_locale : "(none)");
1547 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1549 gchar buf[BUFFSIZE];
1551 if (is_ascii_str(str))
1552 return unmime_header(str);
1554 if (default_encoding) {
1557 utf8_buf = conv_codeset_strdup
1558 (str, default_encoding, CS_INTERNAL);
1562 decoded_str = unmime_header(utf8_buf);
1568 if (conv_get_locale_charset() == C_EUC_JP)
1569 conv_anytodisp(buf, sizeof(buf), str);
1571 conv_localetodisp(buf, sizeof(buf), str);
1573 return unmime_header(buf);
1576 #define MAX_LINELEN 76
1577 #define MAX_HARD_LINELEN 996
1578 #define MIMESEP_BEGIN "=?"
1579 #define MIMESEP_END "?="
1581 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1583 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1588 if ((cond) && *srcp) { \
1589 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1590 if (isspace(*(destp - 1))) \
1592 else if (is_plain_text && isspace(*srcp)) \
1597 left = MAX_LINELEN - 1; \
1603 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1604 gint header_len, gboolean addr_field)
1606 const gchar *cur_encoding;
1607 const gchar *out_encoding;
1611 const guchar *srcp = src;
1612 guchar *destp = dest;
1613 gboolean use_base64;
1615 g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1617 if (MB_CUR_MAX > 1) {
1619 mimesep_enc = "?B?";
1622 mimesep_enc = "?Q?";
1625 cur_encoding = CS_INTERNAL;
1626 out_encoding = conv_get_outgoing_charset_str();
1627 if (!strcmp(out_encoding, CS_US_ASCII))
1628 out_encoding = CS_ISO_8859_1;
1630 mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1631 strlen(mimesep_enc) + strlen(MIMESEP_END);
1633 left = MAX_LINELEN - header_len;
1636 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1638 while (isspace(*srcp)) {
1641 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1644 /* output as it is if the next word is ASCII string */
1645 if (!is_next_nonascii(srcp)) {
1648 word_len = get_next_word_len(srcp);
1649 LBREAK_IF_REQUIRED(left < word_len, TRUE);
1650 while (word_len > 0) {
1651 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1660 /* don't include parentheses in encoded strings */
1661 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1662 LBREAK_IF_REQUIRED(left < 2, FALSE);
1673 const guchar *p = srcp;
1675 gint out_enc_str_len;
1676 gint mime_block_len;
1677 gboolean cont = FALSE;
1679 while (*p != '\0') {
1680 if (isspace(*p) && !is_next_nonascii(p + 1))
1682 /* don't include parentheses in encoded
1684 if (addr_field && (*p == '(' || *p == ')'))
1687 mb_len = g_utf8_skip[*p];
1689 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1690 out_str = conv_codeset_strdup
1691 (part_str, cur_encoding, out_encoding);
1693 g_warning("conv_encode_header(): code conversion failed\n");
1694 conv_unreadable_8bit(part_str);
1695 out_str = g_strdup(part_str);
1697 out_str_len = strlen(out_str);
1700 out_enc_str_len = B64LEN(out_str_len);
1703 qp_get_q_encoding_len(out_str);
1707 if (mimestr_len + out_enc_str_len <= left) {
1710 } else if (cur_len == 0) {
1711 LBREAK_IF_REQUIRED(1, FALSE);
1720 Xstrndup_a(part_str, srcp, cur_len, );
1721 out_str = conv_codeset_strdup
1722 (part_str, cur_encoding, out_encoding);
1724 g_warning("conv_encode_header(): code conversion failed\n");
1725 conv_unreadable_8bit(part_str);
1726 out_str = g_strdup(part_str);
1728 out_str_len = strlen(out_str);
1731 out_enc_str_len = B64LEN(out_str_len);
1734 qp_get_q_encoding_len(out_str);
1736 Xalloca(enc_str, out_enc_str_len + 1, );
1738 base64_encode(enc_str, out_str, out_str_len);
1740 qp_q_encode(enc_str, out_str);
1744 /* output MIME-encoded string block */
1745 mime_block_len = mimestr_len + strlen(enc_str);
1746 g_snprintf(destp, mime_block_len + 1,
1747 MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1748 out_encoding, mimesep_enc, enc_str);
1749 destp += mime_block_len;
1752 left -= mime_block_len;
1755 LBREAK_IF_REQUIRED(cont, FALSE);
1765 #undef LBREAK_IF_REQUIRED
1766 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1769 GError *error = NULL;
1771 fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1773 g_warning("failed to convert encoding of file name: %s\n",
1775 g_error_free(error);
1778 fs_file = g_strdup(utf8_file);
1783 gchar *conv_filename_to_utf8(const gchar *fs_file)
1785 gchar *utf8_file = NULL;
1786 GError *error = NULL;
1788 utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1790 g_warning("failed to convert encoding of file name: %s\n",
1792 g_error_free(error);
1795 if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1797 utf8_file = g_strdup(fs_file);
1798 conv_unreadable_8bit(utf8_file);