src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2005 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include "defs.h"
  25
  26 #include <glib.h>
  27 #include <glib/gi18n.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <stdlib.h>
  31 #include <errno.h>
  32
  33 #if HAVE_LOCALE_H
  34 #  include <locale.h>
  35 #endif
  36
  37 #include <iconv.h>
  38
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      0x5f;
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 108 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 109 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 110
 111 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 112 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 113 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 114 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 115
 116 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 117 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 118
 119 static void conv_unreadable_eucjp(gchar *str);
 120 static void conv_unreadable_8bit(gchar *str);
 121 static void conv_unreadable_latin(gchar *str);
 122
 123 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 124 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 125 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 126
 127 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 128 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 129 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
 130
 131 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 132 {
 133         const guchar *in = inbuf;
 134         guchar *out = outbuf;
 135         JISState state = JIS_ASCII;
 136
 137         while (*in != '\0') {
 138                 if (*in == ESC) {
 139                         in++;
 140                         if (*in == '$') {
 141                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 142                                         state = JIS_KANJI;
 143                                         in += 2;
 144                                 } else if (*(in + 1) == '(' &&
 145                                            *(in + 2) == 'D') {
 146                                         state = JIS_AUXKANJI;
 147                                         in += 3;
 148                                 } else {
 149                                         /* unknown escape sequence */
 150                                         state = JIS_ASCII;
 151                                 }
 152                         } else if (*in == '(') {
 153                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 154                                         state = JIS_ASCII;
 155                                         in += 2;
 156                                 } else if (*(in + 1) == 'I') {
 157                                         state = JIS_HWKANA;
 158                                         in += 2;
 159                                 } else {
 160                                         /* unknown escape sequence */
 161                                         state = JIS_ASCII;
 162                                 }
 163                         } else {
 164                                 /* unknown escape sequence */
 165                                 state = JIS_ASCII;
 166                         }
 167                 } else if (*in == 0x0e) {
 168                         state = JIS_HWKANA;
 169                         in++;
 170                 } else if (*in == 0x0f) {
 171                         state = JIS_ASCII;
 172                         in++;
 173                 } else {
 174                         switch (state) {
 175                         case JIS_ASCII:
 176                                 *out++ = *in++;
 177                                 break;
 178                         case JIS_KANJI:
 179                                 *out++ = *in++ | 0x80;
 180                                 if (*in == '\0') break;
 181                                 *out++ = *in++ | 0x80;
 182                                 break;
 183                         case JIS_HWKANA:
 184                                 *out++ = 0x8e;
 185                                 *out++ = *in++ | 0x80;
 186                                 break;
 187                         case JIS_AUXKANJI:
 188                                 *out++ = 0x8f;
 189                                 *out++ = *in++ | 0x80;
 190                                 if (*in == '\0') break;
 191                                 *out++ = *in++ | 0x80;
 192                                 break;
 193                         }
 194                 }
 195         }
 196
 197         *out = '\0';
 198 }
 199
 200 #define JIS_HWDAKUTEN           0x5e
 201 #define JIS_HWHANDAKUTEN        0x5f
 202
 203 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
 204 {
 205         static guint16 h2z_tbl[] = {
 206                 /* 0x20 - 0x2f */
 207                 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
 208                 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
 209                 /* 0x30 - 0x3f */
 210                 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
 211                 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
 212                 /* 0x40 - 0x4f */
 213                 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
 214                 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
 215                 /* 0x50 - 0x5f */
 216                 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
 217                 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
 218         };
 219
 220         static guint16 dakuten_tbl[] = {
 221                 /* 0x30 - 0x3f */
 222                 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
 223                 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
 224                 /* 0x40 - 0x4f */
 225                 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
 226                 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
 227         };
 228
 229         static guint16 handakuten_tbl[] = {
 230                 /* 0x4a - 0x4e */
 231                 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
 232         };
 233
 234         guint16 out_code;
 235
 236         jis_code &= 0x7f;
 237         sound_sym &= 0x7f;
 238
 239         if (jis_code < 0x21 || jis_code > 0x5f)
 240                 return 0;
 241
 242         if (sound_sym == JIS_HWDAKUTEN &&
 243             jis_code >= 0x36 && jis_code <= 0x4e) {
 244                 out_code = dakuten_tbl[jis_code - 0x30];
 245                 if (out_code != 0) {
 246                         *outbuf = out_code >> 8;
 247                         *(outbuf + 1) = out_code & 0xff;
 248                         return 2;
 249                 }
 250         }
 251
 252         if (sound_sym == JIS_HWHANDAKUTEN &&
 253             jis_code >= 0x4a && jis_code <= 0x4e) {
 254                 out_code = handakuten_tbl[jis_code - 0x4a];
 255                 *outbuf = out_code >> 8;
 256                 *(outbuf + 1) = out_code & 0xff;
 257                 return 2;
 258         }
 259
 260         out_code = h2z_tbl[jis_code - 0x20];
 261         *outbuf = out_code >> 8;
 262         *(outbuf + 1) = out_code & 0xff;
 263         return 1;
 264 }
 265
 266 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 267 {
 268         const guchar *in = inbuf;
 269         guchar *out = outbuf;
 270         JISState state = JIS_ASCII;
 271
 272         while (*in != '\0') {
 273                 if (IS_ASCII(*in)) {
 274                         K_OUT();
 275                         *out++ = *in++;
 276                 } else if (iseuckanji(*in)) {
 277                         if (iseuckanji(*(in + 1))) {
 278                                 K_IN();
 279                                 *out++ = *in++ & 0x7f;
 280                                 *out++ = *in++ & 0x7f;
 281                         } else {
 282                                 K_OUT();
 283                                 *out++ = SUBST_CHAR;
 284                                 in++;
 285                                 if (*in != '\0' && !IS_ASCII(*in)) {
 286                                         *out++ = SUBST_CHAR;
 287                                         in++;
 288                                 }
 289                         }
 290                 } else if (iseuchwkana1(*in)) {
 291                         if (iseuchwkana2(*(in + 1))) {
 292                                 if (prefs_common.allow_jisx0201_kana) {
 293                                         HW_IN();
 294                                         in++;
 295                                         *out++ = *in++ & 0x7f;
 296                                 } else {
 297                                         guchar jis_ch[2];
 298                                         gint len;
 299
 300                                         if (iseuchwkana1(*(in + 2)) &&
 301                                             iseuchwkana2(*(in + 3)))
 302                                                 len = conv_jis_hantozen
 303                                                         (jis_ch,
 304                                                          *(in + 1), *(in + 3));
 305                                         else
 306                                                 len = conv_jis_hantozen
 307                                                         (jis_ch,
 308                                                          *(in + 1), '\0');
 309                                         if (len == 0)
 310                                                 in += 2;
 311                                         else {
 312                                                 K_IN();
 313                                                 in += len * 2;
 314                                                 *out++ = jis_ch[0];
 315                                                 *out++ = jis_ch[1];
 316                                         }
 317                                 }
 318                         } else {
 319                                 K_OUT();
 320                                 in++;
 321                                 if (*in != '\0' && !IS_ASCII(*in)) {
 322                                         *out++ = SUBST_CHAR;
 323                                         in++;
 324                                 }
 325                         }
 326                 } else if (iseucaux(*in)) {
 327                         in++;
 328                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 329                                 AUX_IN();
 330                                 *out++ = *in++ & 0x7f;
 331                                 *out++ = *in++ & 0x7f;
 332                         } else {
 333                                 K_OUT();
 334                                 if (*in != '\0' && !IS_ASCII(*in)) {
 335                                         *out++ = SUBST_CHAR;
 336                                         in++;
 337                                         if (*in != '\0' && !IS_ASCII(*in)) {
 338                                                 *out++ = SUBST_CHAR;
 339                                                 in++;
 340                                         }
 341                                 }
 342                         }
 343                 } else {
 344                         K_OUT();
 345                         *out++ = SUBST_CHAR;
 346                         in++;
 347                 }
 348         }
 349
 350         K_OUT();
 351         *out = '\0';
 352 }
 353
 354 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 355 {
 356         const guchar *in = inbuf;
 357         guchar *out = outbuf;
 358
 359         while (*in != '\0') {
 360                 if (IS_ASCII(*in)) {
 361                         *out++ = *in++;
 362                 } else if (issjiskanji1(*in)) {
 363                         if (issjiskanji2(*(in + 1))) {
 364                                 guchar out1 = *in;
 365                                 guchar out2 = *(in + 1);
 366                                 guchar row;
 367
 368                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 369                                 if (out2 < 0x9f) {
 370                                         out1 = (out1 - row) * 2 - 1;
 371                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 372                                 } else {
 373                                         out1 = (out1 - row) * 2;
 374                                         out2 -= 0x7e;
 375                                 }
 376
 377                                 *out++ = out1 | 0x80;
 378                                 *out++ = out2 | 0x80;
 379                                 in += 2;
 380                         } else {
 381                                 *out++ = SUBST_CHAR;
 382                                 in++;
 383                                 if (*in != '\0' && !IS_ASCII(*in)) {
 384                                         *out++ = SUBST_CHAR;
 385                                         in++;
 386                                 }
 387                         }
 388                 } else if (issjishwkana(*in)) {
 389                         *out++ = 0x8e;
 390                         *out++ = *in++;
 391                 } else {
 392                         *out++ = SUBST_CHAR;
 393                         in++;
 394                 }
 395         }
 396
 397         *out = '\0';
 398 }
 399
 400 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 401 {
 402         gchar *eucstr;
 403
 404         Xalloca(eucstr, outlen, return);
 405
 406         conv_jistoeuc(eucstr, outlen, inbuf);
 407         conv_euctoutf8(outbuf, outlen, eucstr);
 408 }
 409
 410 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 411 {
 412         gchar *tmpstr;
 413
 414         tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
 415         if (tmpstr) {
 416                 strncpy2(outbuf, tmpstr, outlen);
 417                 g_free(tmpstr);
 418         } else
 419                 strncpy2(outbuf, inbuf, outlen);
 420 }
 421
 422 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 423 {
 424         static iconv_t cd = (iconv_t)-1;
 425         static gboolean iconv_ok = TRUE;
 426         gchar *tmpstr;
 427
 428         if (cd == (iconv_t)-1) {
 429                 if (!iconv_ok) {
 430                         strncpy2(outbuf, inbuf, outlen);
 431                         return;
 432                 }
 433                 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
 434                 if (cd == (iconv_t)-1) {
 435                         cd = iconv_open(CS_UTF_8, CS_EUC_JP);
 436                         if (cd == (iconv_t)-1) {
 437                                 g_warning("conv_euctoutf8(): %s\n",
 438                                           g_strerror(errno));
 439                                 iconv_ok = FALSE;
 440                                 strncpy2(outbuf, inbuf, outlen);
 441                                 return;
 442                         }
 443                 }
 444         }
 445
 446         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 447         if (tmpstr) {
 448                 strncpy2(outbuf, tmpstr, outlen);
 449                 g_free(tmpstr);
 450         } else
 451                 strncpy2(outbuf, inbuf, outlen);
 452 }
 453
 454 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 455 {
 456         switch (conv_guess_ja_encoding(inbuf)) {
 457         case C_ISO_2022_JP:
 458                 conv_jistoutf8(outbuf, outlen, inbuf);
 459                 break;
 460         case C_SHIFT_JIS:
 461                 conv_sjistoutf8(outbuf, outlen, inbuf);
 462                 break;
 463         case C_EUC_JP:
 464                 conv_euctoutf8(outbuf, outlen, inbuf);
 465                 break;
 466         default:
 467                 strncpy2(outbuf, inbuf, outlen);
 468                 break;
 469         }
 470 }
 471
 472 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 473 {
 474         static iconv_t cd = (iconv_t)-1;
 475         static gboolean iconv_ok = TRUE;
 476         gchar *tmpstr;
 477
 478         if (cd == (iconv_t)-1) {
 479                 if (!iconv_ok) {
 480                         strncpy2(outbuf, inbuf, outlen);
 481                         return;
 482                 }
 483                 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
 484                 if (cd == (iconv_t)-1) {
 485                         cd = iconv_open(CS_EUC_JP, CS_UTF_8);
 486                         if (cd == (iconv_t)-1) {
 487                                 g_warning("conv_utf8toeuc(): %s\n",
 488                                           g_strerror(errno));
 489                                 iconv_ok = FALSE;
 490                                 strncpy2(outbuf, inbuf, outlen);
 491                                 return;
 492                         }
 493                 }
 494         }
 495
 496         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 497         if (tmpstr) {
 498                 strncpy2(outbuf, tmpstr, outlen);
 499                 g_free(tmpstr);
 500         } else
 501                 strncpy2(outbuf, inbuf, outlen);
 502 }
 503
 504 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 505 {
 506         gchar *eucstr;
 507
 508         Xalloca(eucstr, outlen, return);
 509
 510         conv_utf8toeuc(eucstr, outlen, inbuf);
 511         conv_euctojis(outbuf, outlen, eucstr);
 512 }
 513
 514 static gchar valid_eucjp_tbl[][96] = {
 515         /* 0xa2a0 - 0xa2ff */
 516         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 517           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 518           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 519           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 520           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 521           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 522
 523         /* 0xa3a0 - 0xa3ff */
 524         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 525           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 526           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 527           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 528           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 529           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 530
 531         /* 0xa4a0 - 0xa4ff */
 532         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 533           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 534           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 535           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 536           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 537           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 538
 539         /* 0xa5a0 - 0xa5ff */
 540         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 541           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 542           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 543           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 544           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 545           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 546
 547         /* 0xa6a0 - 0xa6ff */
 548         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 549           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 550           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 551           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 552           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 553           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 554
 555         /* 0xa7a0 - 0xa7ff */
 556         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 557           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 558           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 559           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 560           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 561           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 562
 563         /* 0xa8a0 - 0xa8ff */
 564         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 565           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 566           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 567           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 568           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 569           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 570 };
 571
 572 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 573 {
 574         if (c1 <= 0xa0 || c1 >= 0xf5)
 575                 return FALSE;
 576         if (c2 <= 0xa0 || c2 == 0xff)
 577                 return FALSE;
 578
 579         if (c1 >= 0xa9 && c1 <= 0xaf)
 580                 return FALSE;
 581
 582         if (c1 >= 0xa2 && c1 <= 0xa8)
 583                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 584
 585         if (c1 == 0xcf) {
 586                 if (c2 >= 0xd4 && c2 <= 0xfe)
 587                         return FALSE;
 588         } else if (c1 == 0xf4) {
 589                 if (c2 >= 0xa7 && c2 <= 0xfe)
 590                         return FALSE;
 591         }
 592
 593         return TRUE;
 594 }
 595
 596 static void conv_unreadable_eucjp(gchar *str)
 597 {
 598         register guchar *p = str;
 599
 600         while (*p != '\0') {
 601                 if (IS_ASCII(*p)) {
 602                         /* convert CR+LF -> LF */
 603                         if (*p == '\r' && *(p + 1) == '\n')
 604                                 memmove(p, p + 1, strlen(p));
 605                         /* printable 7 bit code */
 606                         p++;
 607                 } else if (iseuckanji(*p)) {
 608                         if (isprintableeuckanji(*p, *(p + 1))) {
 609                                 /* printable euc-jp code */
 610                                 p += 2;
 611                         } else {
 612                                 /* substitute unprintable code */
 613                                 *p++ = SUBST_CHAR;
 614                                 if (*p != '\0') {
 615                                         if (IS_ASCII(*p))
 616                                                 p++;
 617                                         else
 618                                                 *p++ = SUBST_CHAR;
 619                                 }
 620                         }
 621                 } else if (iseuchwkana1(*p)) {
 622                         if (iseuchwkana2(*(p + 1)))
 623                                 /* euc-jp hankaku kana */
 624                                 p += 2;
 625                         else
 626                                 *p++ = SUBST_CHAR;
 627                 } else if (iseucaux(*p)) {
 628                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 629                                 /* auxiliary kanji */
 630                                 p += 3;
 631                         } else
 632                                 *p++ = SUBST_CHAR;
 633                 } else
 634                         /* substitute unprintable 1 byte code */
 635                         *p++ = SUBST_CHAR;
 636         }
 637 }
 638
 639 static void conv_unreadable_8bit(gchar *str)
 640 {
 641         register guchar *p = str;
 642
 643         while (*p != '\0') {
 644                 /* convert CR+LF -> LF */
 645                 if (*p == '\r' && *(p + 1) == '\n')
 646                         memmove(p, p + 1, strlen(p));
 647                 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
 648                 p++;
 649         }
 650 }
 651
 652 static void conv_unreadable_latin(gchar *str)
 653 {
 654         register guchar *p = str;
 655
 656         while (*p != '\0') {
 657                 /* convert CR+LF -> LF */
 658                 if (*p == '\r' && *(p + 1) == '\n')
 659                         memmove(p, p + 1, strlen(p));
 660                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 661                         *p = SUBST_CHAR;
 662                 p++;
 663         }
 664 }
 665
 666 #define NCV     '\0'
 667
 668 void conv_mb_alnum(gchar *str)
 669 {
 670         static guchar char_tbl[] = {
 671                 /* 0xa0 - 0xaf */
 672                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 673                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 674                 /* 0xb0 - 0xbf */
 675                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 676                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 677                 /* 0xc0 - 0xcf */
 678                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 679                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 680                 /* 0xd0 - 0xdf */
 681                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 682                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 683                 /* 0xe0 - 0xef */
 684                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 685                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 686         };
 687
 688         register guchar *p = str;
 689         register gint len;
 690
 691         len = strlen(str);
 692
 693         while (len > 1) {
 694                 if (*p == 0xa3) {
 695                         register guchar ch = *(p + 1);
 696
 697                         if (ch >= 0xb0 && ch <= 0xfa) {
 698                                 /* [a-zA-Z] */
 699                                 *p = ch & 0x7f;
 700                                 p++;
 701                                 len--;
 702                                 memmove(p, p + 1, len);
 703                                 len--;
 704                         } else  {
 705                                 p += 2;
 706                                 len -= 2;
 707                         }
 708                 } else if (*p == 0xa1) {
 709                         register guchar ch = *(p + 1);
 710
 711                         if (ch >= 0xa0 && ch <= 0xef &&
 712                             NCV != char_tbl[ch - 0xa0]) {
 713                                 *p = char_tbl[ch - 0xa0];
 714                                 p++;
 715                                 len--;
 716                                 memmove(p, p + 1, len);
 717                                 len--;
 718                         } else {
 719                                 p += 2;
 720                                 len -= 2;
 721                         }
 722                 } else if (iseuckanji(*p)) {
 723                         p += 2;
 724                         len -= 2;
 725                 } else {
 726                         p++;
 727                         len--;
 728                 }
 729         }
 730 }
 731
 732 CharSet conv_guess_ja_encoding(const gchar *str)
 733 {
 734         const guchar *p = str;
 735         CharSet guessed = C_US_ASCII;
 736
 737         while (*p != '\0') {
 738                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 739                         if (guessed == C_US_ASCII)
 740                                 return C_ISO_2022_JP;
 741                         p += 2;
 742                 } else if (IS_ASCII(*p)) {
 743                         p++;
 744                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 745                         if (*p >= 0xfd && *p <= 0xfe)
 746                                 return C_EUC_JP;
 747                         else if (guessed == C_SHIFT_JIS) {
 748                                 if ((issjiskanji1(*p) &&
 749                                      issjiskanji2(*(p + 1))) ||
 750                                     issjishwkana(*p))
 751                                         guessed = C_SHIFT_JIS;
 752                                 else
 753                                         guessed = C_EUC_JP;
 754                         } else
 755                                 guessed = C_EUC_JP;
 756                         p += 2;
 757                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 758                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 759                                 guessed = C_SHIFT_JIS;
 760                         else
 761                                 return C_SHIFT_JIS;
 762                         p += 2;
 763                 } else if (issjishwkana(*p)) {
 764                         guessed = C_SHIFT_JIS;
 765                         p++;
 766                 } else {
 767                         p++;
 768                 }
 769         }
 770
 771         return guessed;
 772 }
 773
 774 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 775 {
 776         conv_jistoutf8(outbuf, outlen, inbuf);
 777 }
 778
 779 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 780 {
 781         conv_sjistoutf8(outbuf, outlen, inbuf);
 782 }
 783
 784 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 785 {
 786         conv_euctoutf8(outbuf, outlen, inbuf);
 787 }
 788
 789 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 790 {
 791         if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
 792                 strncpy2(outbuf, inbuf, outlen);
 793         else
 794                 conv_ustodisp(outbuf, outlen, inbuf);
 795 }
 796
 797 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 798 {
 799         conv_anytoutf8(outbuf, outlen, inbuf);
 800         if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
 801                 conv_unreadable_8bit(outbuf);
 802 }
 803
 804 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 805 {
 806         strncpy2(outbuf, inbuf, outlen);
 807         conv_unreadable_8bit(outbuf);
 808 }
 809
 810 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 811 {
 812         gchar *tmpstr;
 813
 814         tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
 815                                    CS_INTERNAL);
 816         if (tmpstr) {
 817                 strncpy2(outbuf, tmpstr, outlen);
 818                 g_free(tmpstr);
 819         } else
 820                 conv_utf8todisp(outbuf, outlen, inbuf);
 821 }
 822
 823 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 824 {
 825         strncpy2(outbuf, inbuf, outlen);
 826 }
 827
 828 CodeConverter *conv_code_converter_new(const gchar *src_charset)
 829 {
 830         CodeConverter *conv;
 831
 832         conv = g_new0(CodeConverter, 1);
 833         conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
 834         conv->charset_str = g_strdup(src_charset);
 835         conv->charset = conv_get_charset_from_str(src_charset);
 836
 837         return conv;
 838 }
 839
 840 void conv_code_converter_destroy(CodeConverter *conv)
 841 {
 842         g_free(conv->charset_str);
 843         g_free(conv);
 844 }
 845
 846 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 847                   const gchar *inbuf)
 848 {
 849         if (conv->code_conv_func != conv_noconv)
 850                 conv->code_conv_func(outbuf, outlen, inbuf);
 851         else {
 852                 gchar *str;
 853
 854                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 855                 if (!str)
 856                         return -1;
 857                 else {
 858                         strncpy2(outbuf, str, outlen);
 859                         g_free(str);
 860                 }
 861         }
 862
 863         return 0;
 864 }
 865
 866 gchar *conv_codeset_strdup(const gchar *inbuf,
 867                            const gchar *src_code, const gchar *dest_code)
 868 {
 869         gchar *buf;
 870         size_t len;
 871         CodeConvFunc conv_func;
 872
 873         conv_func = conv_get_code_conv_func(src_code, dest_code);
 874         if (conv_func != conv_noconv) {
 875                 len = (strlen(inbuf) + 1) * 3;
 876                 buf = g_malloc(len);
 877                 if (!buf) return NULL;
 878
 879                 conv_func(buf, len, inbuf);
 880                 return g_realloc(buf, strlen(buf) + 1);
 881         }
 882
 883         return conv_iconv_strdup(inbuf, src_code, dest_code);
 884 }
 885
 886 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 887                                      const gchar *dest_charset_str)
 888 {
 889         CodeConvFunc code_conv = conv_noconv;
 890         CharSet src_charset;
 891         CharSet dest_charset;
 892
 893         if (!src_charset_str)
 894                 src_charset = conv_get_locale_charset();
 895         else
 896                 src_charset = conv_get_charset_from_str(src_charset_str);
 897
 898         /* auto detection mode */
 899         if (!src_charset_str && !dest_charset_str) {
 900                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 901                         return conv_anytodisp;
 902                 else
 903                         return conv_noconv;
 904         }
 905
 906         dest_charset = conv_get_charset_from_str(dest_charset_str);
 907
 908         if (dest_charset == C_US_ASCII)
 909                 return conv_ustodisp;
 910
 911         switch (src_charset) {
 912         case C_US_ASCII:
 913         case C_ISO_8859_1:
 914         case C_ISO_8859_2:
 915         case C_ISO_8859_3:
 916         case C_ISO_8859_4:
 917         case C_ISO_8859_5:
 918         case C_ISO_8859_6:
 919         case C_ISO_8859_7:
 920         case C_ISO_8859_8:
 921         case C_ISO_8859_9:
 922         case C_ISO_8859_10:
 923         case C_ISO_8859_11:
 924         case C_ISO_8859_13:
 925         case C_ISO_8859_14:
 926         case C_ISO_8859_15:
 927                 break;
 928         case C_ISO_2022_JP:
 929         case C_ISO_2022_JP_2:
 930         case C_ISO_2022_JP_3:
 931                 if (dest_charset == C_AUTO)
 932                         code_conv = conv_jistodisp;
 933                 else if (dest_charset == C_EUC_JP)
 934                         code_conv = conv_jistoeuc;
 935                 else if (dest_charset == C_UTF_8)
 936                         code_conv = conv_jistoutf8;
 937                 break;
 938         case C_SHIFT_JIS:
 939                 if (dest_charset == C_AUTO)
 940                         code_conv = conv_sjistodisp;
 941                 else if (dest_charset == C_EUC_JP)
 942                         code_conv = conv_sjistoeuc;
 943                 else if (dest_charset == C_UTF_8)
 944                         code_conv = conv_sjistoutf8;
 945                 break;
 946         case C_EUC_JP:
 947                 if (dest_charset == C_AUTO)
 948                         code_conv = conv_euctodisp;
 949                 else if (dest_charset == C_ISO_2022_JP   ||
 950                          dest_charset == C_ISO_2022_JP_2 ||
 951                          dest_charset == C_ISO_2022_JP_3)
 952                         code_conv = conv_euctojis;
 953                 else if (dest_charset == C_UTF_8)
 954                         code_conv = conv_euctoutf8;
 955                 break;
 956         case C_UTF_8:
 957                 if (dest_charset == C_EUC_JP)
 958                         code_conv = conv_utf8toeuc;
 959                 else if (dest_charset == C_ISO_2022_JP   ||
 960                          dest_charset == C_ISO_2022_JP_2 ||
 961                          dest_charset == C_ISO_2022_JP_3)
 962                         code_conv = conv_utf8tojis;
 963                 break;
 964         default:
 965                 break;
 966         }
 967
 968         return code_conv;
 969 }
 970
 971 gchar *conv_iconv_strdup(const gchar *inbuf,
 972                          const gchar *src_code, const gchar *dest_code)
 973 {
 974         iconv_t cd;
 975         gchar *outbuf;
 976
 977         if (!src_code)
 978                 src_code = conv_get_outgoing_charset_str();
 979         if (!dest_code)
 980                 dest_code = CS_INTERNAL;
 981
 982         /* don't convert if src and dest codeset are identical */
 983         if (!strcasecmp(src_code, dest_code))
 984                 return g_strdup(inbuf);
 985
 986         /* don't convert if current codeset is US-ASCII */
 987         if (!strcasecmp(dest_code, CS_US_ASCII))
 988                 return g_strdup(inbuf);
 989
 990         cd = iconv_open(dest_code, src_code);
 991         if (cd == (iconv_t)-1)
 992                 return NULL;
 993
 994         outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
 995
 996         iconv_close(cd);
 997
 998         return outbuf;
 999 }
1000
1001 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1002 {
1003         const gchar *inbuf_p;
1004         gchar *outbuf;
1005         gchar *outbuf_p;
1006         size_t in_size;
1007         size_t in_left;
1008         size_t out_size;
1009         size_t out_left;
1010         size_t n_conv;
1011         size_t len;
1012
1013         inbuf_p = inbuf;
1014         in_size = strlen(inbuf);
1015         in_left = in_size;
1016         out_size = (in_size + 1) * 2;
1017         outbuf = g_malloc(out_size);
1018         outbuf_p = outbuf;
1019         out_left = out_size;
1020
1021 #define EXPAND_BUF()                            \
1022 {                                               \
1023         len = outbuf_p - outbuf;                \
1024         out_size *= 2;                          \
1025         outbuf = g_realloc(outbuf, out_size);   \
1026         outbuf_p = outbuf + len;                \
1027         out_left = out_size - len;              \
1028 }
1029
1030         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1031                                &outbuf_p, &out_left)) == (size_t)-1) {
1032                 if (EILSEQ == errno) {
1033                         //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1034                         inbuf_p++;
1035                         in_left--;
1036                         if (out_left == 0) {
1037                                 EXPAND_BUF();
1038                         }
1039                         *outbuf_p++ = SUBST_CHAR;
1040                         out_left--;
1041                 } else if (EINVAL == errno) {
1042                         break;
1043                 } else if (E2BIG == errno) {
1044                         EXPAND_BUF();
1045                 } else {
1046                         g_warning("conv_iconv_strdup(): %s\n",
1047                                   g_strerror(errno));
1048                         break;
1049                 }
1050         }
1051
1052         while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1053                (size_t)-1) {
1054                 if (E2BIG == errno) {
1055                         EXPAND_BUF();
1056                 } else {
1057                         g_warning("conv_iconv_strdup(): %s\n",
1058                                   g_strerror(errno));
1059                         break;
1060                 }
1061         }
1062
1063 #undef EXPAND_BUF
1064
1065         len = outbuf_p - outbuf;
1066         outbuf = g_realloc(outbuf, len + 1);
1067         outbuf[len] = '\0';
1068
1069         return outbuf;
1070 }
1071
1072 static const struct {
1073         CharSet charset;
1074         gchar *const name;
1075 } charsets[] = {
1076         {C_US_ASCII,            CS_US_ASCII},
1077         {C_US_ASCII,            CS_ANSI_X3_4_1968},
1078         {C_UTF_8,               CS_UTF_8},
1079         {C_UTF_7,               CS_UTF_7},
1080         {C_ISO_8859_1,          CS_ISO_8859_1},
1081         {C_ISO_8859_2,          CS_ISO_8859_2},
1082         {C_ISO_8859_3,          CS_ISO_8859_3},
1083         {C_ISO_8859_4,          CS_ISO_8859_4},
1084         {C_ISO_8859_5,          CS_ISO_8859_5},
1085         {C_ISO_8859_6,          CS_ISO_8859_6},
1086         {C_ISO_8859_7,          CS_ISO_8859_7},
1087         {C_ISO_8859_8,          CS_ISO_8859_8},
1088         {C_ISO_8859_9,          CS_ISO_8859_9},
1089         {C_ISO_8859_10,         CS_ISO_8859_10},
1090         {C_ISO_8859_11,         CS_ISO_8859_11},
1091         {C_ISO_8859_13,         CS_ISO_8859_13},
1092         {C_ISO_8859_14,         CS_ISO_8859_14},
1093         {C_ISO_8859_15,         CS_ISO_8859_15},
1094         {C_BALTIC,              CS_BALTIC},
1095         {C_CP1250,              CS_CP1250},
1096         {C_CP1251,              CS_CP1251},
1097         {C_CP1252,              CS_CP1252},
1098         {C_CP1253,              CS_CP1253},
1099         {C_CP1254,              CS_CP1254},
1100         {C_CP1255,              CS_CP1255},
1101         {C_CP1256,              CS_CP1256},
1102         {C_CP1257,              CS_CP1257},
1103         {C_CP1258,              CS_CP1258},
1104         {C_WINDOWS_1250,        CS_WINDOWS_1250},
1105         {C_WINDOWS_1251,        CS_WINDOWS_1251},
1106         {C_WINDOWS_1252,        CS_WINDOWS_1252},
1107         {C_WINDOWS_1253,        CS_WINDOWS_1253},
1108         {C_WINDOWS_1254,        CS_WINDOWS_1254},
1109         {C_WINDOWS_1255,        CS_WINDOWS_1255},
1110         {C_WINDOWS_1256,        CS_WINDOWS_1256},
1111         {C_WINDOWS_1257,        CS_WINDOWS_1257},
1112         {C_WINDOWS_1258,        CS_WINDOWS_1258},
1113         {C_KOI8_R,              CS_KOI8_R},
1114         {C_KOI8_T,              CS_KOI8_T},
1115         {C_KOI8_U,              CS_KOI8_U},
1116         {C_ISO_2022_JP,         CS_ISO_2022_JP},
1117         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
1118         {C_ISO_2022_JP_3,       CS_ISO_2022_JP_3},
1119         {C_EUC_JP,              CS_EUC_JP},
1120         {C_EUC_JP,              CS_EUCJP},
1121         {C_EUC_JP_MS,           CS_EUC_JP_MS},
1122         {C_SHIFT_JIS,           CS_SHIFT_JIS},
1123         {C_SHIFT_JIS,           CS_SHIFT__JIS},
1124         {C_SHIFT_JIS,           CS_SJIS},
1125         {C_ISO_2022_KR,         CS_ISO_2022_KR},
1126         {C_EUC_KR,              CS_EUC_KR},
1127         {C_ISO_2022_CN,         CS_ISO_2022_CN},
1128         {C_EUC_CN,              CS_EUC_CN},
1129         {C_GB2312,              CS_GB2312},
1130         {C_GBK,                 CS_GBK},
1131         {C_EUC_TW,              CS_EUC_TW},
1132         {C_BIG5,                CS_BIG5},
1133         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
1134         {C_TIS_620,             CS_TIS_620},
1135         {C_WINDOWS_874,         CS_WINDOWS_874},
1136         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
1137         {C_TCVN5712_1,          CS_TCVN5712_1},
1138 };
1139
1140 static const struct {
1141         gchar *const locale;
1142         CharSet charset;
1143         CharSet out_charset;
1144 } locale_table[] = {
1145         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
1146         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
1147         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
1148         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
1149         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
1150         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
1151         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
1152         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
1153         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
1154         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
1155         {"zh_CN.GBK"    , C_GBK         , C_GB2312},
1156         {"zh_CN"        , C_GB2312      , C_GB2312},
1157         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
1158         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
1159         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
1160         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
1161         {"zh_TW"        , C_BIG5        , C_BIG5},
1162
1163         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
1164         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
1165         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1166         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
1167         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
1168         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
1169         {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1170         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
1171
1172         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1173         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1174
1175         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1176
1177         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1178         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1179         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1180         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1181         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
1182         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1183         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1184         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
1185         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1186         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
1187         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
1188         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1189         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1190         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1191         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
1192         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1193         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1194         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1195         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
1196         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1197         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1198         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
1199         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
1200         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
1201         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
1202         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
1203         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
1204         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
1205         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
1206         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1207         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
1208         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
1209         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
1210         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
1211         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
1212         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
1213         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
1214         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1215         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1216         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1217         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1218         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1219         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1220         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1221         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1222         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1223         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1224         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1225         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1226         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1227         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1228         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1229         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1230         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1231         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1232         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1233         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1234         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1235         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1236         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1237         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1238         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1239         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1240         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1241         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1242         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1243         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1244         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1245         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1246         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1247         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1248         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1249         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1250         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1251
1252         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1253         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1254         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1255         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1256         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1257         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1258         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1259         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1260
1261         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1262         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1263
1264         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1265
1266         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1267         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1268         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1269         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1270
1271         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1272
1273         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1274         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1275         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1276         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1277         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1278         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1279         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1280         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1281         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1282         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1283         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1284         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1285         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1286         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1287         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1288         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1289         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1290
1291         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1292         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1293         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1294         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1295
1296         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1297         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1298
1299         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1300
1301         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1302         {"en_IN"        , C_UTF_8       , C_UTF_8},
1303         {"se_NO"        , C_UTF_8       , C_UTF_8},
1304         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1305         {"te_IN"        , C_UTF_8       , C_UTF_8},
1306         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1307
1308         {"th_TH"        , C_TIS_620     , C_TIS_620},
1309         /* {"th_TH"     , C_WINDOWS_874}, */
1310         /* {"th_TH"     , C_ISO_8859_11}, */
1311
1312         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1313         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1314
1315         {"C"                    , C_US_ASCII    , C_US_ASCII},
1316         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1317         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1318 };
1319
1320 static GHashTable *conv_get_charset_to_str_table(void)
1321 {
1322         static GHashTable *table;
1323         gint i;
1324
1325         if (table)
1326                 return table;
1327
1328         table = g_hash_table_new(NULL, g_direct_equal);
1329
1330         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1331                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1332                     == NULL) {
1333                         g_hash_table_insert
1334                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1335                                  charsets[i].name);
1336                 }
1337         }
1338
1339         return table;
1340 }
1341
1342 static GHashTable *conv_get_charset_from_str_table(void)
1343 {
1344         static GHashTable *table;
1345         gint i;
1346
1347         if (table)
1348                 return table;
1349
1350         table = g_hash_table_new(str_case_hash, str_case_equal);
1351
1352         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1353                 g_hash_table_insert(table, charsets[i].name,
1354                                     GUINT_TO_POINTER(charsets[i].charset));
1355         }
1356
1357         return table;
1358 }
1359
1360 const gchar *conv_get_charset_str(CharSet charset)
1361 {
1362         GHashTable *table;
1363
1364         table = conv_get_charset_to_str_table();
1365         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1366 }
1367
1368 CharSet conv_get_charset_from_str(const gchar *charset)
1369 {
1370         GHashTable *table;
1371
1372         if (!charset) return C_AUTO;
1373
1374         table = conv_get_charset_from_str_table();
1375         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1376 }
1377
1378 CharSet conv_get_locale_charset(void)
1379 {
1380         static CharSet cur_charset = -1;
1381         const gchar *cur_locale;
1382         const gchar *p;
1383         gint i;
1384
1385         if (cur_charset != -1)
1386                 return cur_charset;
1387
1388         cur_locale = conv_get_current_locale();
1389         if (!cur_locale) {
1390                 cur_charset = C_US_ASCII;
1391                 return cur_charset;
1392         }
1393
1394         if (strcasestr(cur_locale, "UTF-8")) {
1395                 cur_charset = C_UTF_8;
1396                 return cur_charset;
1397         }
1398
1399         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1400                 cur_charset = C_ISO_8859_15;
1401                 return cur_charset;
1402         }
1403
1404         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1405                 const gchar *p;
1406
1407                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1408                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1409                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1410                                  strlen(locale_table[i].locale))) {
1411                         cur_charset = locale_table[i].charset;
1412                         return cur_charset;
1413                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1414                          !strchr(p + 1, '.')) {
1415                         if (strlen(cur_locale) == 2 &&
1416                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1417                                 cur_charset = locale_table[i].charset;
1418                                 return cur_charset;
1419                         }
1420                 }
1421         }
1422
1423         cur_charset = C_AUTO;
1424         return cur_charset;
1425 }
1426
1427 const gchar *conv_get_locale_charset_str(void)
1428 {
1429         static const gchar *codeset = NULL;
1430
1431         if (!codeset)
1432                 codeset = conv_get_charset_str(conv_get_locale_charset());
1433
1434         return codeset ? codeset : CS_INTERNAL;
1435 }
1436
1437 CharSet conv_get_internal_charset(void)
1438 {
1439         return C_INTERNAL;
1440 }
1441
1442 const gchar *conv_get_internal_charset_str(void)
1443 {
1444         return CS_INTERNAL;
1445 }
1446
1447 CharSet conv_get_outgoing_charset(void)
1448 {
1449         static CharSet out_charset = -1;
1450         const gchar *cur_locale;
1451         const gchar *p;
1452         gint i;
1453
1454         if (out_charset != -1)
1455                 return out_charset;
1456
1457         cur_locale = conv_get_current_locale();
1458         if (!cur_locale) {
1459                 out_charset = C_AUTO;
1460                 return out_charset;
1461         }
1462
1463         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1464                 out_charset = C_ISO_8859_15;
1465                 return out_charset;
1466         }
1467
1468         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1469                 const gchar *p;
1470
1471                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1472                                  strlen(locale_table[i].locale))) {
1473                         out_charset = locale_table[i].out_charset;
1474                         break;
1475                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1476                          !strchr(p + 1, '.')) {
1477                         if (strlen(cur_locale) == 2 &&
1478                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1479                                 out_charset = locale_table[i].out_charset;
1480                                 break;
1481                         }
1482                 }
1483         }
1484
1485         return out_charset;
1486 }
1487
1488 const gchar *conv_get_outgoing_charset_str(void)
1489 {
1490         CharSet out_charset;
1491         const gchar *str;
1492
1493         if (prefs_common.outgoing_charset) {
1494                 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1495                         g_free(prefs_common.outgoing_charset);
1496                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1497                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1498                         return prefs_common.outgoing_charset;
1499         }
1500
1501         out_charset = conv_get_outgoing_charset();
1502         str = conv_get_charset_str(out_charset);
1503
1504         return str ? str : CS_UTF_8;
1505 }
1506
1507 gboolean conv_is_multibyte_encoding(CharSet encoding)
1508 {
1509         switch (encoding) {
1510         case C_EUC_JP:
1511         case C_EUC_JP_MS:
1512         case C_EUC_KR:
1513         case C_EUC_TW:
1514         case C_EUC_CN:
1515         case C_ISO_2022_JP:
1516         case C_ISO_2022_JP_2:
1517         case C_ISO_2022_JP_3:
1518         case C_ISO_2022_KR:
1519         case C_ISO_2022_CN:
1520         case C_SHIFT_JIS:
1521         case C_GB2312:
1522         case C_BIG5:
1523         case C_UTF_8:
1524         case C_UTF_7:
1525                 return TRUE;
1526         default:
1527                 return FALSE;
1528         }
1529 }
1530
1531 const gchar *conv_get_current_locale(void)
1532 {
1533         const gchar *cur_locale;
1534
1535         cur_locale = g_getenv("LC_ALL");
1536         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1537         if (!cur_locale) cur_locale = g_getenv("LANG");
1538         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1539
1540         debug_print("current locale: %s\n",
1541                     cur_locale ? cur_locale : "(none)");
1542
1543         return cur_locale;
1544 }
1545
1546 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1547 {
1548         gchar buf[BUFFSIZE];
1549
1550         if (is_ascii_str(str))
1551                 return unmime_header(str);
1552
1553         if (default_encoding) {
1554                 gchar *utf8_buf;
1555
1556                 utf8_buf = conv_codeset_strdup
1557                         (str, default_encoding, CS_INTERNAL);
1558                 if (utf8_buf) {
1559                         gchar *decoded_str;
1560
1561                         decoded_str = unmime_header(utf8_buf);
1562                         g_free(utf8_buf);
1563                         return decoded_str;
1564                 }
1565         }
1566
1567         if (conv_get_locale_charset() == C_EUC_JP)
1568                 conv_anytodisp(buf, sizeof(buf), str);
1569         else
1570                 conv_localetodisp(buf, sizeof(buf), str);
1571
1572         return unmime_header(buf);
1573 }
1574
1575 #define MAX_LINELEN             76
1576 #define MAX_HARD_LINELEN        996
1577 #define MIMESEP_BEGIN           "=?"
1578 #define MIMESEP_END             "?="
1579
1580 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1581 {                                                                       \
1582         if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {         \
1583                 *destp = '\0';                                          \
1584                 return;                                                 \
1585         }                                                               \
1586                                                                         \
1587         if ((cond) && *srcp) {                                          \
1588                 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1589                         if (isspace(*(destp - 1)))                      \
1590                                 destp--;                                \
1591                         else if (is_plain_text && isspace(*srcp))       \
1592                                 srcp++;                                 \
1593                         if (*srcp) {                                    \
1594                                 *destp++ = '\n';                        \
1595                                 *destp++ = ' ';                         \
1596                                 left = MAX_LINELEN - 1;                 \
1597                         }                                               \
1598                 }                                                       \
1599         }                                                               \
1600 }
1601
1602 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1603                         gint header_len, gboolean addr_field)
1604 {
1605         const gchar *cur_encoding;
1606         const gchar *out_encoding;
1607         gint mimestr_len;
1608         gchar *mimesep_enc;
1609         gint left;
1610         const guchar *srcp = src;
1611         guchar *destp = dest;
1612         gboolean use_base64;
1613
1614         g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1615
1616         if (MB_CUR_MAX > 1) {
1617                 use_base64 = TRUE;
1618                 mimesep_enc = "?B?";
1619         } else {
1620                 use_base64 = FALSE;
1621                 mimesep_enc = "?Q?";
1622         }
1623
1624         cur_encoding = CS_INTERNAL;
1625         out_encoding = conv_get_outgoing_charset_str();
1626         if (!strcmp(out_encoding, CS_US_ASCII))
1627                 out_encoding = CS_ISO_8859_1;
1628
1629         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1630                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1631
1632         left = MAX_LINELEN - header_len;
1633
1634         while (*srcp) {
1635                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1636
1637                 while (isspace(*srcp)) {
1638                         *destp++ = *srcp++;
1639                         left--;
1640                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1641                 }
1642
1643                 /* output as it is if the next word is ASCII string */
1644                 if (!is_next_nonascii(srcp)) {
1645                         gint word_len;
1646
1647                         word_len = get_next_word_len(srcp);
1648                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1649                         while (word_len > 0) {
1650                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1651                                 *destp++ = *srcp++;
1652                                 left--;
1653                                 word_len--;
1654                         }
1655
1656                         continue;
1657                 }
1658
1659                 /* don't include parentheses in encoded strings */
1660                 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1661                         LBREAK_IF_REQUIRED(left < 2, FALSE);
1662                         *destp++ = *srcp++;
1663                         left--;
1664                 }
1665
1666                 while (1) {
1667                         gint mb_len = 0;
1668                         gint cur_len = 0;
1669                         gchar *part_str;
1670                         gchar *out_str;
1671                         gchar *enc_str;
1672                         const guchar *p = srcp;
1673                         gint out_str_len;
1674                         gint out_enc_str_len;
1675                         gint mime_block_len;
1676                         gboolean cont = FALSE;
1677
1678                         while (*p != '\0') {
1679                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1680                                         break;
1681                                 /* don't include parentheses in encoded
1682                                    strings */
1683                                 if (addr_field && (*p == '(' || *p == ')'))
1684                                         break;
1685
1686                                 mb_len = g_utf8_skip[*p];
1687
1688                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1689                                 out_str = conv_codeset_strdup
1690                                         (part_str, cur_encoding, out_encoding);
1691                                 if (!out_str) {
1692                                         g_warning("conv_encode_header(): code conversion failed\n");
1693                                         conv_unreadable_8bit(part_str);
1694                                         out_str = g_strdup(part_str);
1695                                 }
1696                                 out_str_len = strlen(out_str);
1697
1698                                 if (use_base64)
1699                                         out_enc_str_len = B64LEN(out_str_len);
1700                                 else
1701                                         out_enc_str_len =
1702                                                 qp_get_q_encoding_len(out_str);
1703
1704                                 g_free(out_str);
1705
1706                                 if (mimestr_len + out_enc_str_len <= left) {
1707                                         cur_len += mb_len;
1708                                         p += mb_len;
1709                                 } else if (cur_len == 0) {
1710                                         LBREAK_IF_REQUIRED(1, FALSE);
1711                                         continue;
1712                                 } else {
1713                                         cont = TRUE;
1714                                         break;
1715                                 }
1716                         }
1717
1718                         if (cur_len > 0) {
1719                                 Xstrndup_a(part_str, srcp, cur_len, );
1720                                 out_str = conv_codeset_strdup
1721                                         (part_str, cur_encoding, out_encoding);
1722                                 if (!out_str) {
1723                                         g_warning("conv_encode_header(): code conversion failed\n");
1724                                         conv_unreadable_8bit(part_str);
1725                                         out_str = g_strdup(part_str);
1726                                 }
1727                                 out_str_len = strlen(out_str);
1728
1729                                 if (use_base64)
1730                                         out_enc_str_len = B64LEN(out_str_len);
1731                                 else
1732                                         out_enc_str_len =
1733                                                 qp_get_q_encoding_len(out_str);
1734
1735                                 Xalloca(enc_str, out_enc_str_len + 1, );
1736                                 if (use_base64)
1737                                         base64_encode(enc_str, out_str, out_str_len);
1738                                 else
1739                                         qp_q_encode(enc_str, out_str);
1740
1741                                 g_free(out_str);
1742
1743                                 /* output MIME-encoded string block */
1744                                 mime_block_len = mimestr_len + strlen(enc_str);
1745                                 g_snprintf(destp, mime_block_len + 1,
1746                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1747                                            out_encoding, mimesep_enc, enc_str);
1748                                 destp += mime_block_len;
1749                                 srcp += cur_len;
1750
1751                                 left -= mime_block_len;
1752                         }
1753
1754                         LBREAK_IF_REQUIRED(cont, FALSE);
1755
1756                         if (cur_len == 0)
1757                                 break;
1758                 }
1759         }
1760
1761         *destp = '\0';
1762 }
1763
1764 #undef LBREAK_IF_REQUIRED
1765 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1766 {
1767         gchar *fs_file;
1768         GError *error = NULL;
1769
1770         fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1771         if (error) {
1772                 g_warning("failed to convert encoding of file name: %s\n",
1773                           error->message);
1774                 g_error_free(error);
1775         }
1776         if (!fs_file)
1777                 fs_file = g_strdup(utf8_file);
1778
1779         return fs_file;
1780 }
1781
1782 gchar *conv_filename_to_utf8(const gchar *fs_file)
1783 {
1784         gchar *utf8_file = NULL;
1785         GError *error = NULL;
1786
1787         utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1788         if (error) {
1789                 g_warning("failed to convert encoding of file name: %s\n",
1790                           error->message);
1791                 g_error_free(error);
1792         }
1793
1794         if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1795                 g_free(utf8_file);
1796                 utf8_file = g_strdup(fs_file);
1797                 conv_unreadable_8bit(utf8_file);
1798         }
1799
1800         return utf8_file;
1801 }