src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2005 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include "defs.h"
  25
  26 #include <glib.h>
  27 #include <glib/gi18n.h>
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <stdlib.h>
  31 #include <errno.h>
  32
  33 #if HAVE_LOCALE_H
  34 #  include <locale.h>
  35 #endif
  36
  37 #include <iconv.h>
  38
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      0x5f;
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 108 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 109 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 110
 111 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 112 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 113 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 114 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 115
 116 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 117 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 118
 119 static void conv_unreadable_eucjp(gchar *str);
 120 static void conv_unreadable_8bit(gchar *str);
 121 static void conv_unreadable_latin(gchar *str);
 122
 123 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 124 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 125 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 126
 127 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 128 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 129 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
 130
 131 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 132 {
 133         const guchar *in = inbuf;
 134         guchar *out = outbuf;
 135         JISState state = JIS_ASCII;
 136
 137         while (*in != '\0') {
 138                 if (*in == ESC) {
 139                         in++;
 140                         if (*in == '$') {
 141                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 142                                         state = JIS_KANJI;
 143                                         in += 2;
 144                                 } else if (*(in + 1) == '(' &&
 145                                            *(in + 2) == 'D') {
 146                                         state = JIS_AUXKANJI;
 147                                         in += 3;
 148                                 } else {
 149                                         /* unknown escape sequence */
 150                                         state = JIS_ASCII;
 151                                 }
 152                         } else if (*in == '(') {
 153                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 154                                         state = JIS_ASCII;
 155                                         in += 2;
 156                                 } else if (*(in + 1) == 'I') {
 157                                         state = JIS_HWKANA;
 158                                         in += 2;
 159                                 } else {
 160                                         /* unknown escape sequence */
 161                                         state = JIS_ASCII;
 162                                 }
 163                         } else {
 164                                 /* unknown escape sequence */
 165                                 state = JIS_ASCII;
 166                         }
 167                 } else if (*in == 0x0e) {
 168                         state = JIS_HWKANA;
 169                         in++;
 170                 } else if (*in == 0x0f) {
 171                         state = JIS_ASCII;
 172                         in++;
 173                 } else {
 174                         switch (state) {
 175                         case JIS_ASCII:
 176                                 *out++ = *in++;
 177                                 break;
 178                         case JIS_KANJI:
 179                                 *out++ = *in++ | 0x80;
 180                                 if (*in == '\0') break;
 181                                 *out++ = *in++ | 0x80;
 182                                 break;
 183                         case JIS_HWKANA:
 184                                 *out++ = 0x8e;
 185                                 *out++ = *in++ | 0x80;
 186                                 break;
 187                         case JIS_AUXKANJI:
 188                                 *out++ = 0x8f;
 189                                 *out++ = *in++ | 0x80;
 190                                 if (*in == '\0') break;
 191                                 *out++ = *in++ | 0x80;
 192                                 break;
 193                         }
 194                 }
 195         }
 196
 197         *out = '\0';
 198 }
 199
 200 #define JIS_HWDAKUTEN           0x5e
 201 #define JIS_HWHANDAKUTEN        0x5f
 202
 203 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
 204 {
 205         static guint16 h2z_tbl[] = {
 206                 /* 0x20 - 0x2f */
 207                 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
 208                 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
 209                 /* 0x30 - 0x3f */
 210                 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
 211                 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
 212                 /* 0x40 - 0x4f */
 213                 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
 214                 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
 215                 /* 0x50 - 0x5f */
 216                 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
 217                 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
 218         };
 219
 220         static guint16 dakuten_tbl[] = {
 221                 /* 0x30 - 0x3f */
 222                 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
 223                 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
 224                 /* 0x40 - 0x4f */
 225                 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
 226                 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
 227         };
 228
 229         static guint16 handakuten_tbl[] = {
 230                 /* 0x4a - 0x4e */
 231                 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
 232         };
 233
 234         guint16 out_code;
 235
 236         jis_code &= 0x7f;
 237         sound_sym &= 0x7f;
 238
 239         if (jis_code < 0x21 || jis_code > 0x5f)
 240                 return 0;
 241
 242         if (sound_sym == JIS_HWDAKUTEN &&
 243             jis_code >= 0x36 && jis_code <= 0x4e) {
 244                 out_code = dakuten_tbl[jis_code - 0x30];
 245                 if (out_code != 0) {
 246                         *outbuf = out_code >> 8;
 247                         *(outbuf + 1) = out_code & 0xff;
 248                         return 2;
 249                 }
 250         }
 251
 252         if (sound_sym == JIS_HWHANDAKUTEN &&
 253             jis_code >= 0x4a && jis_code <= 0x4e) {
 254                 out_code = handakuten_tbl[jis_code - 0x4a];
 255                 *outbuf = out_code >> 8;
 256                 *(outbuf + 1) = out_code & 0xff;
 257                 return 2;
 258         }
 259
 260         out_code = h2z_tbl[jis_code - 0x20];
 261         *outbuf = out_code >> 8;
 262         *(outbuf + 1) = out_code & 0xff;
 263         return 1;
 264 }
 265
 266 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 267 {
 268         const guchar *in = inbuf;
 269         guchar *out = outbuf;
 270         JISState state = JIS_ASCII;
 271
 272         while (*in != '\0') {
 273                 if (IS_ASCII(*in)) {
 274                         K_OUT();
 275                         *out++ = *in++;
 276                 } else if (iseuckanji(*in)) {
 277                         if (iseuckanji(*(in + 1))) {
 278                                 K_IN();
 279                                 *out++ = *in++ & 0x7f;
 280                                 *out++ = *in++ & 0x7f;
 281                         } else {
 282                                 K_OUT();
 283                                 *out++ = SUBST_CHAR;
 284                                 in++;
 285                                 if (*in != '\0' && !IS_ASCII(*in)) {
 286                                         *out++ = SUBST_CHAR;
 287                                         in++;
 288                                 }
 289                         }
 290                 } else if (iseuchwkana1(*in)) {
 291                         if (iseuchwkana2(*(in + 1))) {
 292                                 if (prefs_common.allow_jisx0201_kana) {
 293                                         HW_IN();
 294                                         in++;
 295                                         *out++ = *in++ & 0x7f;
 296                                 } else {
 297                                         guchar jis_ch[2];
 298                                         gint len;
 299
 300                                         if (iseuchwkana1(*(in + 2)) &&
 301                                             iseuchwkana2(*(in + 3)))
 302                                                 len = conv_jis_hantozen
 303                                                         (jis_ch,
 304                                                          *(in + 1), *(in + 3));
 305                                         else
 306                                                 len = conv_jis_hantozen
 307                                                         (jis_ch,
 308                                                          *(in + 1), '\0');
 309                                         if (len == 0)
 310                                                 in += 2;
 311                                         else {
 312                                                 K_IN();
 313                                                 in += len * 2;
 314                                                 *out++ = jis_ch[0];
 315                                                 *out++ = jis_ch[1];
 316                                         }
 317                                 }
 318                         } else {
 319                                 K_OUT();
 320                                 in++;
 321                                 if (*in != '\0' && !IS_ASCII(*in)) {
 322                                         *out++ = SUBST_CHAR;
 323                                         in++;
 324                                 }
 325                         }
 326                 } else if (iseucaux(*in)) {
 327                         in++;
 328                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 329                                 AUX_IN();
 330                                 *out++ = *in++ & 0x7f;
 331                                 *out++ = *in++ & 0x7f;
 332                         } else {
 333                                 K_OUT();
 334                                 if (*in != '\0' && !IS_ASCII(*in)) {
 335                                         *out++ = SUBST_CHAR;
 336                                         in++;
 337                                         if (*in != '\0' && !IS_ASCII(*in)) {
 338                                                 *out++ = SUBST_CHAR;
 339                                                 in++;
 340                                         }
 341                                 }
 342                         }
 343                 } else {
 344                         K_OUT();
 345                         *out++ = SUBST_CHAR;
 346                         in++;
 347                 }
 348         }
 349
 350         K_OUT();
 351         *out = '\0';
 352 }
 353
 354 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 355 {
 356         const guchar *in = inbuf;
 357         guchar *out = outbuf;
 358
 359         while (*in != '\0') {
 360                 if (IS_ASCII(*in)) {
 361                         *out++ = *in++;
 362                 } else if (issjiskanji1(*in)) {
 363                         if (issjiskanji2(*(in + 1))) {
 364                                 guchar out1 = *in;
 365                                 guchar out2 = *(in + 1);
 366                                 guchar row;
 367
 368                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 369                                 if (out2 < 0x9f) {
 370                                         out1 = (out1 - row) * 2 - 1;
 371                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 372                                 } else {
 373                                         out1 = (out1 - row) * 2;
 374                                         out2 -= 0x7e;
 375                                 }
 376
 377                                 *out++ = out1 | 0x80;
 378                                 *out++ = out2 | 0x80;
 379                                 in += 2;
 380                         } else {
 381                                 *out++ = SUBST_CHAR;
 382                                 in++;
 383                                 if (*in != '\0' && !IS_ASCII(*in)) {
 384                                         *out++ = SUBST_CHAR;
 385                                         in++;
 386                                 }
 387                         }
 388                 } else if (issjishwkana(*in)) {
 389                         *out++ = 0x8e;
 390                         *out++ = *in++;
 391                 } else {
 392                         *out++ = SUBST_CHAR;
 393                         in++;
 394                 }
 395         }
 396
 397         *out = '\0';
 398 }
 399
 400 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 401 {
 402         gchar *eucstr;
 403
 404         Xalloca(eucstr, outlen, return);
 405
 406         conv_jistoeuc(eucstr, outlen, inbuf);
 407         conv_euctoutf8(outbuf, outlen, eucstr);
 408 }
 409
 410 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 411 {
 412         gchar *tmpstr;
 413
 414         tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
 415         if (tmpstr) {
 416                 strncpy2(outbuf, tmpstr, outlen);
 417                 g_free(tmpstr);
 418         } else
 419                 strncpy2(outbuf, inbuf, outlen);
 420 }
 421
 422 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 423 {
 424         static iconv_t cd = (iconv_t)-1;
 425         static gboolean iconv_ok = TRUE;
 426         gchar *tmpstr;
 427
 428         if (cd == (iconv_t)-1) {
 429                 if (!iconv_ok) {
 430                         strncpy2(outbuf, inbuf, outlen);
 431                         return;
 432                 }
 433                 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
 434                 if (cd == (iconv_t)-1) {
 435                         cd = iconv_open(CS_UTF_8, CS_EUC_JP);
 436                         if (cd == (iconv_t)-1) {
 437                                 g_warning("conv_euctoutf8(): %s\n",
 438                                           g_strerror(errno));
 439                                 iconv_ok = FALSE;
 440                                 strncpy2(outbuf, inbuf, outlen);
 441                                 return;
 442                         }
 443                 }
 444         }
 445
 446         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 447         if (tmpstr) {
 448                 strncpy2(outbuf, tmpstr, outlen);
 449                 g_free(tmpstr);
 450         } else
 451                 strncpy2(outbuf, inbuf, outlen);
 452 }
 453
 454 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 455 {
 456         switch (conv_guess_ja_encoding(inbuf)) {
 457         case C_ISO_2022_JP:
 458                 conv_jistoutf8(outbuf, outlen, inbuf);
 459                 break;
 460         case C_SHIFT_JIS:
 461                 conv_sjistoutf8(outbuf, outlen, inbuf);
 462                 break;
 463         case C_EUC_JP:
 464                 conv_euctoutf8(outbuf, outlen, inbuf);
 465                 break;
 466         default:
 467                 strncpy2(outbuf, inbuf, outlen);
 468                 break;
 469         }
 470 }
 471
 472 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 473 {
 474         static iconv_t cd = (iconv_t)-1;
 475         static gboolean iconv_ok = TRUE;
 476         gchar *tmpstr;
 477
 478         if (cd == (iconv_t)-1) {
 479                 if (!iconv_ok) {
 480                         strncpy2(outbuf, inbuf, outlen);
 481                         return;
 482                 }
 483                 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
 484                 if (cd == (iconv_t)-1) {
 485                         cd = iconv_open(CS_EUC_JP, CS_UTF_8);
 486                         if (cd == (iconv_t)-1) {
 487                                 g_warning("conv_utf8toeuc(): %s\n",
 488                                           g_strerror(errno));
 489                                 iconv_ok = FALSE;
 490                                 strncpy2(outbuf, inbuf, outlen);
 491                                 return;
 492                         }
 493                 }
 494         }
 495
 496         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 497         if (tmpstr) {
 498                 strncpy2(outbuf, tmpstr, outlen);
 499                 g_free(tmpstr);
 500         } else
 501                 strncpy2(outbuf, inbuf, outlen);
 502 }
 503
 504 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 505 {
 506         gchar *eucstr;
 507
 508         Xalloca(eucstr, outlen, return);
 509
 510         conv_utf8toeuc(eucstr, outlen, inbuf);
 511         conv_euctojis(outbuf, outlen, eucstr);
 512 }
 513
 514 static gchar valid_eucjp_tbl[][96] = {
 515         /* 0xa2a0 - 0xa2ff */
 516         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 517           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 518           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 519           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 520           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 521           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 522
 523         /* 0xa3a0 - 0xa3ff */
 524         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 525           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 526           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 527           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 528           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 529           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 530
 531         /* 0xa4a0 - 0xa4ff */
 532         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 533           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 534           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 535           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 536           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 537           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 538
 539         /* 0xa5a0 - 0xa5ff */
 540         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 541           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 542           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 543           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 544           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 545           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 546
 547         /* 0xa6a0 - 0xa6ff */
 548         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 549           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 550           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 551           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 552           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 553           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 554
 555         /* 0xa7a0 - 0xa7ff */
 556         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 557           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 558           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 559           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 560           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 561           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 562
 563         /* 0xa8a0 - 0xa8ff */
 564         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 565           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 566           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 567           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 568           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 569           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 570 };
 571
 572 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 573 {
 574         if (c1 <= 0xa0 || c1 >= 0xf5)
 575                 return FALSE;
 576         if (c2 <= 0xa0 || c2 == 0xff)
 577                 return FALSE;
 578
 579         if (c1 >= 0xa9 && c1 <= 0xaf)
 580                 return FALSE;
 581
 582         if (c1 >= 0xa2 && c1 <= 0xa8)
 583                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 584
 585         if (c1 == 0xcf) {
 586                 if (c2 >= 0xd4 && c2 <= 0xfe)
 587                         return FALSE;
 588         } else if (c1 == 0xf4) {
 589                 if (c2 >= 0xa7 && c2 <= 0xfe)
 590                         return FALSE;
 591         }
 592
 593         return TRUE;
 594 }
 595
 596 static void conv_unreadable_eucjp(gchar *str)
 597 {
 598         register guchar *p = str;
 599
 600         while (*p != '\0') {
 601                 if (IS_ASCII(*p)) {
 602                         /* convert CR+LF -> LF */
 603                         if (*p == '\r' && *(p + 1) == '\n')
 604                                 memmove(p, p + 1, strlen(p));
 605                         /* printable 7 bit code */
 606                         p++;
 607                 } else if (iseuckanji(*p)) {
 608                         if (isprintableeuckanji(*p, *(p + 1))) {
 609                                 /* printable euc-jp code */
 610                                 p += 2;
 611                         } else {
 612                                 /* substitute unprintable code */
 613                                 *p++ = SUBST_CHAR;
 614                                 if (*p != '\0') {
 615                                         if (IS_ASCII(*p))
 616                                                 p++;
 617                                         else
 618                                                 *p++ = SUBST_CHAR;
 619                                 }
 620                         }
 621                 } else if (iseuchwkana1(*p)) {
 622                         if (iseuchwkana2(*(p + 1)))
 623                                 /* euc-jp hankaku kana */
 624                                 p += 2;
 625                         else
 626                                 *p++ = SUBST_CHAR;
 627                 } else if (iseucaux(*p)) {
 628                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 629                                 /* auxiliary kanji */
 630                                 p += 3;
 631                         } else
 632                                 *p++ = SUBST_CHAR;
 633                 } else
 634                         /* substitute unprintable 1 byte code */
 635                         *p++ = SUBST_CHAR;
 636         }
 637 }
 638
 639 static void conv_unreadable_8bit(gchar *str)
 640 {
 641         register guchar *p = str;
 642
 643         while (*p != '\0') {
 644                 /* convert CR+LF -> LF */
 645                 if (*p == '\r' && *(p + 1) == '\n')
 646                         memmove(p, p + 1, strlen(p));
 647                 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
 648                 p++;
 649         }
 650 }
 651
 652 static void conv_unreadable_latin(gchar *str)
 653 {
 654         register guchar *p = str;
 655
 656         while (*p != '\0') {
 657                 /* convert CR+LF -> LF */
 658                 if (*p == '\r' && *(p + 1) == '\n')
 659                         memmove(p, p + 1, strlen(p));
 660                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 661                         *p = SUBST_CHAR;
 662                 p++;
 663         }
 664 }
 665
 666 #define NCV     '\0'
 667
 668 void conv_mb_alnum(gchar *str)
 669 {
 670         static guchar char_tbl[] = {
 671                 /* 0xa0 - 0xaf */
 672                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 673                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 674                 /* 0xb0 - 0xbf */
 675                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 676                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 677                 /* 0xc0 - 0xcf */
 678                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 679                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 680                 /* 0xd0 - 0xdf */
 681                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 682                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 683                 /* 0xe0 - 0xef */
 684                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 685                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 686         };
 687
 688         register guchar *p = str;
 689         register gint len;
 690
 691         len = strlen(str);
 692
 693         while (len > 1) {
 694                 if (*p == 0xa3) {
 695                         register guchar ch = *(p + 1);
 696
 697                         if (ch >= 0xb0 && ch <= 0xfa) {
 698                                 /* [a-zA-Z] */
 699                                 *p = ch & 0x7f;
 700                                 p++;
 701                                 len--;
 702                                 memmove(p, p + 1, len);
 703                                 len--;
 704                         } else  {
 705                                 p += 2;
 706                                 len -= 2;
 707                         }
 708                 } else if (*p == 0xa1) {
 709                         register guchar ch = *(p + 1);
 710
 711                         if (ch >= 0xa0 && ch <= 0xef &&
 712                             NCV != char_tbl[ch - 0xa0]) {
 713                                 *p = char_tbl[ch - 0xa0];
 714                                 p++;
 715                                 len--;
 716                                 memmove(p, p + 1, len);
 717                                 len--;
 718                         } else {
 719                                 p += 2;
 720                                 len -= 2;
 721                         }
 722                 } else if (iseuckanji(*p)) {
 723                         p += 2;
 724                         len -= 2;
 725                 } else {
 726                         p++;
 727                         len--;
 728                 }
 729         }
 730 }
 731
 732 CharSet conv_guess_ja_encoding(const gchar *str)
 733 {
 734         const guchar *p = str;
 735         CharSet guessed = C_US_ASCII;
 736
 737         while (*p != '\0') {
 738                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 739                         if (guessed == C_US_ASCII)
 740                                 return C_ISO_2022_JP;
 741                         p += 2;
 742                 } else if (IS_ASCII(*p)) {
 743                         p++;
 744                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 745                         if (*p >= 0xfd && *p <= 0xfe)
 746                                 return C_EUC_JP;
 747                         else if (guessed == C_SHIFT_JIS) {
 748                                 if ((issjiskanji1(*p) &&
 749                                      issjiskanji2(*(p + 1))) ||
 750                                     issjishwkana(*p))
 751                                         guessed = C_SHIFT_JIS;
 752                                 else
 753                                         guessed = C_EUC_JP;
 754                         } else
 755                                 guessed = C_EUC_JP;
 756                         p += 2;
 757                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 758                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 759                                 guessed = C_SHIFT_JIS;
 760                         else
 761                                 return C_SHIFT_JIS;
 762                         p += 2;
 763                 } else if (issjishwkana(*p)) {
 764                         guessed = C_SHIFT_JIS;
 765                         p++;
 766                 } else {
 767                         p++;
 768                 }
 769         }
 770
 771         return guessed;
 772 }
 773
 774 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 775 {
 776         conv_jistoutf8(outbuf, outlen, inbuf);
 777 }
 778
 779 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 780 {
 781         conv_sjistoutf8(outbuf, outlen, inbuf);
 782 }
 783
 784 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 785 {
 786         conv_euctoutf8(outbuf, outlen, inbuf);
 787 }
 788
 789 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 790 {
 791         if (g_utf8_validate(inbuf, -1, NULL) == TRUE)
 792                 strncpy2(outbuf, inbuf, outlen);
 793         else
 794                 conv_ustodisp(outbuf, outlen, inbuf);
 795 }
 796
 797 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 798 {
 799         conv_anytoutf8(outbuf, outlen, inbuf);
 800         if (g_utf8_validate(outbuf, -1, NULL) != TRUE)
 801                 conv_unreadable_8bit(outbuf);
 802 }
 803
 804 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 805 {
 806         strncpy2(outbuf, inbuf, outlen);
 807         conv_unreadable_8bit(outbuf);
 808 }
 809
 810 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 811 {
 812         gchar *tmpstr;
 813
 814         tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
 815                                    CS_INTERNAL);
 816         if (tmpstr) {
 817                 strncpy2(outbuf, tmpstr, outlen);
 818                 g_free(tmpstr);
 819         } else
 820                 conv_utf8todisp(outbuf, outlen, inbuf);
 821 }
 822
 823 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 824 {
 825         strncpy2(outbuf, inbuf, outlen);
 826 }
 827
 828 CodeConverter *conv_code_converter_new(const gchar *src_charset)
 829 {
 830         CodeConverter *conv;
 831
 832         conv = g_new0(CodeConverter, 1);
 833         conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
 834         conv->charset_str = g_strdup(src_charset);
 835         conv->charset = conv_get_charset_from_str(src_charset);
 836
 837         return conv;
 838 }
 839
 840 void conv_code_converter_destroy(CodeConverter *conv)
 841 {
 842         g_free(conv->charset_str);
 843         g_free(conv);
 844 }
 845
 846 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 847                   const gchar *inbuf)
 848 {
 849         if (conv->code_conv_func != conv_noconv)
 850                 conv->code_conv_func(outbuf, outlen, inbuf);
 851         else {
 852                 gchar *str;
 853
 854                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 855                 if (!str)
 856                         return -1;
 857                 else {
 858                         strncpy2(outbuf, str, outlen);
 859                         g_free(str);
 860                 }
 861         }
 862
 863         return 0;
 864 }
 865
 866 gchar *conv_codeset_strdup(const gchar *inbuf,
 867                            const gchar *src_code, const gchar *dest_code)
 868 {
 869         gchar *buf;
 870         size_t len;
 871         CodeConvFunc conv_func;
 872
 873         conv_func = conv_get_code_conv_func(src_code, dest_code);
 874         if (conv_func != conv_noconv) {
 875                 len = (strlen(inbuf) + 1) * 3;
 876                 buf = g_malloc(len);
 877                 if (!buf) return NULL;
 878
 879                 conv_func(buf, len, inbuf);
 880                 return g_realloc(buf, strlen(buf) + 1);
 881         }
 882
 883         return conv_iconv_strdup(inbuf, src_code, dest_code);
 884 }
 885
 886 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 887                                      const gchar *dest_charset_str)
 888 {
 889         CodeConvFunc code_conv = conv_noconv;
 890         CharSet src_charset;
 891         CharSet dest_charset;
 892
 893         if (!src_charset_str)
 894                 src_charset = conv_get_locale_charset();
 895         else
 896                 src_charset = conv_get_charset_from_str(src_charset_str);
 897
 898         /* auto detection mode */
 899         if (!src_charset_str && !dest_charset_str) {
 900                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 901                         return conv_anytodisp;
 902                 else
 903                         return conv_noconv;
 904         }
 905
 906         dest_charset = conv_get_charset_from_str(dest_charset_str);
 907
 908         if (dest_charset == C_US_ASCII)
 909                 return conv_ustodisp;
 910
 911         switch (src_charset) {
 912         case C_US_ASCII:
 913         case C_ISO_8859_1:
 914         case C_ISO_8859_2:
 915         case C_ISO_8859_3:
 916         case C_ISO_8859_4:
 917         case C_ISO_8859_5:
 918         case C_ISO_8859_6:
 919         case C_ISO_8859_7:
 920         case C_ISO_8859_8:
 921         case C_ISO_8859_9:
 922         case C_ISO_8859_10:
 923         case C_ISO_8859_11:
 924         case C_ISO_8859_13:
 925         case C_ISO_8859_14:
 926         case C_ISO_8859_15:
 927                 break;
 928         case C_ISO_2022_JP:
 929         case C_ISO_2022_JP_2:
 930         case C_ISO_2022_JP_3:
 931                 if (dest_charset == C_AUTO)
 932                         code_conv = conv_jistodisp;
 933                 else if (dest_charset == C_EUC_JP)
 934                         code_conv = conv_jistoeuc;
 935                 else if (dest_charset == C_UTF_8)
 936                         code_conv = conv_jistoutf8;
 937                 break;
 938         case C_SHIFT_JIS:
 939                 if (dest_charset == C_AUTO)
 940                         code_conv = conv_sjistodisp;
 941                 else if (dest_charset == C_EUC_JP)
 942                         code_conv = conv_sjistoeuc;
 943                 else if (dest_charset == C_UTF_8)
 944                         code_conv = conv_sjistoutf8;
 945                 break;
 946         case C_EUC_JP:
 947                 if (dest_charset == C_AUTO)
 948                         code_conv = conv_euctodisp;
 949                 else if (dest_charset == C_ISO_2022_JP   ||
 950                          dest_charset == C_ISO_2022_JP_2 ||
 951                          dest_charset == C_ISO_2022_JP_3)
 952                         code_conv = conv_euctojis;
 953                 else if (dest_charset == C_UTF_8)
 954                         code_conv = conv_euctoutf8;
 955                 break;
 956         case C_UTF_8:
 957                 if (dest_charset == C_EUC_JP)
 958                         code_conv = conv_utf8toeuc;
 959                 else if (dest_charset == C_ISO_2022_JP   ||
 960                          dest_charset == C_ISO_2022_JP_2 ||
 961                          dest_charset == C_ISO_2022_JP_3)
 962                         code_conv = conv_utf8tojis;
 963                 break;
 964         default:
 965                 break;
 966         }
 967
 968         return code_conv;
 969 }
 970
 971 gchar *conv_iconv_strdup(const gchar *inbuf,
 972                          const gchar *src_code, const gchar *dest_code)
 973 {
 974         iconv_t cd;
 975         gchar *outbuf;
 976
 977         if (!src_code)
 978                 src_code = conv_get_outgoing_charset_str();
 979         if (!dest_code)
 980                 dest_code = CS_INTERNAL;
 981
 982         /* don't convert if src and dest codeset are identical */
 983         if (!strcasecmp(src_code, dest_code))
 984                 return g_strdup(inbuf);
 985
 986         /* don't convert if current codeset is US-ASCII */
 987         if (!strcasecmp(dest_code, CS_US_ASCII))
 988                 return g_strdup(inbuf);
 989
 990         cd = iconv_open(dest_code, src_code);
 991         if (cd == (iconv_t)-1)
 992                 return NULL;
 993
 994         outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
 995
 996         iconv_close(cd);
 997
 998         return outbuf;
 999 }
1000
1001 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
1002 {
1003         const gchar *inbuf_p;
1004         gchar *outbuf;
1005         gchar *outbuf_p;
1006         size_t in_size;
1007         size_t in_left;
1008         size_t out_size;
1009         size_t out_left;
1010         size_t n_conv;
1011         size_t len;
1012
1013         inbuf_p = inbuf;
1014         in_size = strlen(inbuf);
1015         in_left = in_size;
1016         out_size = (in_size + 1) * 2;
1017         outbuf = g_malloc(out_size);
1018         outbuf_p = outbuf;
1019         out_left = out_size;
1020
1021 #define EXPAND_BUF()                            \
1022 {                                               \
1023         len = outbuf_p - outbuf;                \
1024         out_size *= 2;                          \
1025         outbuf = g_realloc(outbuf, out_size);   \
1026         outbuf_p = outbuf + len;                \
1027         out_left = out_size - len;              \
1028 }
1029
1030         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1031                                &outbuf_p, &out_left)) == (size_t)-1) {
1032                 if (EILSEQ == errno) {
1033                         //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1034                         inbuf_p++;
1035                         in_left--;
1036                         if (out_left == 0) {
1037                                 EXPAND_BUF();
1038                         }
1039                         *outbuf_p++ = SUBST_CHAR;
1040                         out_left--;
1041                 } else if (EINVAL == errno) {
1042                         break;
1043                 } else if (E2BIG == errno) {
1044                         EXPAND_BUF();
1045                 } else {
1046                         g_warning("conv_iconv_strdup(): %s\n",
1047                                   g_strerror(errno));
1048                         break;
1049                 }
1050         }
1051
1052         while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1053                (size_t)-1) {
1054                 if (E2BIG == errno) {
1055                         EXPAND_BUF();
1056                 } else {
1057                         g_warning("conv_iconv_strdup(): %s\n",
1058                                   g_strerror(errno));
1059                         break;
1060                 }
1061         }
1062
1063 #undef EXPAND_BUF
1064
1065         len = outbuf_p - outbuf;
1066         outbuf = g_realloc(outbuf, len + 1);
1067         outbuf[len] = '\0';
1068
1069         return outbuf;
1070 }
1071
1072 static const struct {
1073         CharSet charset;
1074         gchar *const name;
1075 } charsets[] = {
1076         {C_US_ASCII,            CS_US_ASCII},
1077         {C_US_ASCII,            CS_ANSI_X3_4_1968},
1078         {C_UTF_8,               CS_UTF_8},
1079         {C_UTF_7,               CS_UTF_7},
1080         {C_ISO_8859_1,          CS_ISO_8859_1},
1081         {C_ISO_8859_2,          CS_ISO_8859_2},
1082         {C_ISO_8859_3,          CS_ISO_8859_3},
1083         {C_ISO_8859_4,          CS_ISO_8859_4},
1084         {C_ISO_8859_5,          CS_ISO_8859_5},
1085         {C_ISO_8859_6,          CS_ISO_8859_6},
1086         {C_ISO_8859_7,          CS_ISO_8859_7},
1087         {C_ISO_8859_8,          CS_ISO_8859_8},
1088         {C_ISO_8859_9,          CS_ISO_8859_9},
1089         {C_ISO_8859_10,         CS_ISO_8859_10},
1090         {C_ISO_8859_11,         CS_ISO_8859_11},
1091         {C_ISO_8859_13,         CS_ISO_8859_13},
1092         {C_ISO_8859_14,         CS_ISO_8859_14},
1093         {C_ISO_8859_15,         CS_ISO_8859_15},
1094         {C_BALTIC,              CS_BALTIC},
1095         {C_CP1250,              CS_CP1250},
1096         {C_CP1251,              CS_CP1251},
1097         {C_CP1252,              CS_CP1252},
1098         {C_CP1253,              CS_CP1253},
1099         {C_CP1254,              CS_CP1254},
1100         {C_CP1255,              CS_CP1255},
1101         {C_CP1256,              CS_CP1256},
1102         {C_CP1257,              CS_CP1257},
1103         {C_CP1258,              CS_CP1258},
1104         {C_WINDOWS_1250,        CS_WINDOWS_1250},
1105         {C_WINDOWS_1251,        CS_WINDOWS_1251},
1106         {C_WINDOWS_1252,        CS_WINDOWS_1252},
1107         {C_WINDOWS_1253,        CS_WINDOWS_1253},
1108         {C_WINDOWS_1254,        CS_WINDOWS_1254},
1109         {C_WINDOWS_1255,        CS_WINDOWS_1255},
1110         {C_WINDOWS_1256,        CS_WINDOWS_1256},
1111         {C_WINDOWS_1257,        CS_WINDOWS_1257},
1112         {C_WINDOWS_1258,        CS_WINDOWS_1258},
1113         {C_KOI8_R,              CS_KOI8_R},
1114         {C_KOI8_T,              CS_KOI8_T},
1115         {C_KOI8_U,              CS_KOI8_U},
1116         {C_ISO_2022_JP,         CS_ISO_2022_JP},
1117         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
1118         {C_ISO_2022_JP_3,       CS_ISO_2022_JP_3},
1119         {C_EUC_JP,              CS_EUC_JP},
1120         {C_EUC_JP,              CS_EUCJP},
1121         {C_EUC_JP_MS,           CS_EUC_JP_MS},
1122         {C_SHIFT_JIS,           CS_SHIFT_JIS},
1123         {C_SHIFT_JIS,           CS_SHIFT__JIS},
1124         {C_SHIFT_JIS,           CS_SJIS},
1125         {C_ISO_2022_KR,         CS_ISO_2022_KR},
1126         {C_EUC_KR,              CS_EUC_KR},
1127         {C_ISO_2022_CN,         CS_ISO_2022_CN},
1128         {C_EUC_CN,              CS_EUC_CN},
1129         {C_GB2312,              CS_GB2312},
1130         {C_GBK,                 CS_GBK},
1131         {C_EUC_TW,              CS_EUC_TW},
1132         {C_BIG5,                CS_BIG5},
1133         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
1134         {C_TIS_620,             CS_TIS_620},
1135         {C_WINDOWS_874,         CS_WINDOWS_874},
1136         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
1137         {C_TCVN5712_1,          CS_TCVN5712_1},
1138 };
1139
1140 static const struct {
1141         gchar *const locale;
1142         CharSet charset;
1143         CharSet out_charset;
1144 } locale_table[] = {
1145         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
1146         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
1147         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
1148         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
1149         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
1150         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
1151         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
1152         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
1153         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
1154         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
1155         {"zh_CN.GBK"    , C_GBK         , C_GBK},
1156         {"zh_CN"        , C_GB2312      , C_GB2312},
1157         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
1158         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
1159         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
1160         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
1161         {"zh_TW"        , C_BIG5        , C_BIG5},
1162
1163         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
1164         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
1165         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1166         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
1167         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
1168         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
1169         {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1170         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
1171
1172         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1173         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1174
1175         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1176
1177         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1178         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1179         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1180         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1181         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
1182         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1183         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1184         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
1185         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1186         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
1187         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
1188         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1189         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1190         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1191         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
1192         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1193         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1194         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1195         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
1196         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1197         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1198         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
1199         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
1200         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
1201         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
1202         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
1203         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
1204         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
1205         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
1206         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1207         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
1208         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
1209         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
1210         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
1211         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
1212         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
1213         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
1214         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1215         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1216         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1217         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1218         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1219         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1220         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1221         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1222         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1223         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1224         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1225         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1226         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1227         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1228         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1229         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1230         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1231         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1232         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1233         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1234         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1235         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1236         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1237         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1238         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1239         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1240         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1241         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1242         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1243         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1244         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1245         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1246         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1247         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1248         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1249         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1250         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1251
1252         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1253         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1254         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1255         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1256         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1257         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1258         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1259         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1260
1261         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1262         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1263
1264         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1265
1266         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1267         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1268         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1269         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1270
1271         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1272
1273         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1274         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1275         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1276         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1277         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1278         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1279         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1280         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1281         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1282         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1283         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1284         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1285         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1286         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1287         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1288         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1289         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1290
1291         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1292         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1293         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1294         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1295
1296         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1297         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1298
1299         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1300
1301         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1302         {"en_IN"        , C_UTF_8       , C_UTF_8},
1303         {"se_NO"        , C_UTF_8       , C_UTF_8},
1304         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1305         {"te_IN"        , C_UTF_8       , C_UTF_8},
1306         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1307
1308         {"th_TH"        , C_TIS_620     , C_TIS_620},
1309         /* {"th_TH"     , C_WINDOWS_874}, */
1310         /* {"th_TH"     , C_ISO_8859_11}, */
1311
1312         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1313         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1314
1315         {"C"                    , C_US_ASCII    , C_US_ASCII},
1316         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1317         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1318 };
1319
1320 static GHashTable *conv_get_charset_to_str_table(void)
1321 {
1322         static GHashTable *table;
1323         gint i;
1324
1325         if (table)
1326                 return table;
1327
1328         table = g_hash_table_new(NULL, g_direct_equal);
1329
1330         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1331                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1332                     == NULL) {
1333                         g_hash_table_insert
1334                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1335                                  charsets[i].name);
1336                 }
1337         }
1338
1339         return table;
1340 }
1341
1342 static GHashTable *conv_get_charset_from_str_table(void)
1343 {
1344         static GHashTable *table;
1345         gint i;
1346
1347         if (table)
1348                 return table;
1349
1350         table = g_hash_table_new(str_case_hash, str_case_equal);
1351
1352         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1353                 g_hash_table_insert(table, charsets[i].name,
1354                                     GUINT_TO_POINTER(charsets[i].charset));
1355         }
1356
1357         return table;
1358 }
1359
1360 const gchar *conv_get_charset_str(CharSet charset)
1361 {
1362         GHashTable *table;
1363
1364         table = conv_get_charset_to_str_table();
1365         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1366 }
1367
1368 CharSet conv_get_charset_from_str(const gchar *charset)
1369 {
1370         GHashTable *table;
1371
1372         if (!charset) return C_AUTO;
1373
1374         table = conv_get_charset_from_str_table();
1375         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1376 }
1377
1378 CharSet conv_get_locale_charset(void)
1379 {
1380         static CharSet cur_charset = -1;
1381         const gchar *cur_locale;
1382         const gchar *p;
1383         gint i;
1384
1385         if (cur_charset != -1)
1386                 return cur_charset;
1387
1388         cur_locale = conv_get_current_locale();
1389         if (!cur_locale) {
1390                 cur_charset = C_US_ASCII;
1391                 return cur_charset;
1392         }
1393
1394         if (strcasestr(cur_locale, "UTF-8")) {
1395                 cur_charset = C_UTF_8;
1396                 return cur_charset;
1397         }
1398
1399         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1400                 cur_charset = C_ISO_8859_15;
1401                 return cur_charset;
1402         }
1403
1404         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1405                 const gchar *p;
1406
1407                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1408                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1409                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1410                                  strlen(locale_table[i].locale))) {
1411                         cur_charset = locale_table[i].charset;
1412                         return cur_charset;
1413                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1414                          !strchr(p + 1, '.')) {
1415                         if (strlen(cur_locale) == 2 &&
1416                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1417                                 cur_charset = locale_table[i].charset;
1418                                 return cur_charset;
1419                         }
1420                 }
1421         }
1422
1423         cur_charset = C_AUTO;
1424         return cur_charset;
1425 }
1426
1427 const gchar *conv_get_locale_charset_str(void)
1428 {
1429         static const gchar *codeset = NULL;
1430
1431         if (!codeset)
1432                 codeset = conv_get_charset_str(conv_get_locale_charset());
1433
1434         return codeset ? codeset : CS_INTERNAL;
1435 }
1436
1437 CharSet conv_get_internal_charset(void)
1438 {
1439         return C_INTERNAL;
1440 }
1441
1442 const gchar *conv_get_internal_charset_str(void)
1443 {
1444         return CS_INTERNAL;
1445 }
1446
1447 CharSet conv_get_outgoing_charset(void)
1448 {
1449         static CharSet out_charset = -1;
1450         const gchar *cur_locale;
1451         const gchar *p;
1452         gint i;
1453
1454         if (out_charset != -1)
1455                 return out_charset;
1456
1457         cur_locale = conv_get_current_locale();
1458         if (!cur_locale) {
1459                 out_charset = C_AUTO;
1460                 return out_charset;
1461         }
1462
1463         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1464                 out_charset = C_ISO_8859_15;
1465                 return out_charset;
1466         }
1467
1468         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1469                 const gchar *p;
1470
1471                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1472                                  strlen(locale_table[i].locale))) {
1473                         out_charset = locale_table[i].out_charset;
1474                         break;
1475                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1476                          !strchr(p + 1, '.')) {
1477                         if (strlen(cur_locale) == 2 &&
1478                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1479                                 out_charset = locale_table[i].out_charset;
1480                                 break;
1481                         }
1482                 }
1483         }
1484
1485         return out_charset;
1486 }
1487
1488 const gchar *conv_get_outgoing_charset_str(void)
1489 {
1490         CharSet out_charset;
1491         const gchar *str;
1492
1493         if (prefs_common.outgoing_charset) {
1494                 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1495                         g_free(prefs_common.outgoing_charset);
1496                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1497                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1498                         return prefs_common.outgoing_charset;
1499         }
1500
1501         out_charset = conv_get_outgoing_charset();
1502         str = conv_get_charset_str(out_charset);
1503
1504         return str ? str : CS_UTF_8;
1505 }
1506
1507 gboolean conv_is_multibyte_encoding(CharSet encoding)
1508 {
1509         switch (encoding) {
1510         case C_EUC_JP:
1511         case C_EUC_JP_MS:
1512         case C_EUC_KR:
1513         case C_EUC_TW:
1514         case C_EUC_CN:
1515         case C_ISO_2022_JP:
1516         case C_ISO_2022_JP_2:
1517         case C_ISO_2022_JP_3:
1518         case C_ISO_2022_KR:
1519         case C_ISO_2022_CN:
1520         case C_SHIFT_JIS:
1521         case C_GB2312:
1522         case C_GBK:
1523         case C_BIG5:
1524         case C_UTF_8:
1525         case C_UTF_7:
1526                 return TRUE;
1527         default:
1528                 return FALSE;
1529         }
1530 }
1531
1532 const gchar *conv_get_current_locale(void)
1533 {
1534         const gchar *cur_locale;
1535
1536         cur_locale = g_getenv("LC_ALL");
1537         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1538         if (!cur_locale) cur_locale = g_getenv("LANG");
1539         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1540
1541         debug_print("current locale: %s\n",
1542                     cur_locale ? cur_locale : "(none)");
1543
1544         return cur_locale;
1545 }
1546
1547 gchar *conv_unmime_header(const gchar *str, const gchar *default_encoding)
1548 {
1549         gchar buf[BUFFSIZE];
1550
1551         if (is_ascii_str(str))
1552                 return unmime_header(str);
1553
1554         if (default_encoding) {
1555                 gchar *utf8_buf;
1556
1557                 utf8_buf = conv_codeset_strdup
1558                         (str, default_encoding, CS_INTERNAL);
1559                 if (utf8_buf) {
1560                         gchar *decoded_str;
1561
1562                         decoded_str = unmime_header(utf8_buf);
1563                         g_free(utf8_buf);
1564                         return decoded_str;
1565                 }
1566         }
1567
1568         if (conv_get_locale_charset() == C_EUC_JP)
1569                 conv_anytodisp(buf, sizeof(buf), str);
1570         else
1571                 conv_localetodisp(buf, sizeof(buf), str);
1572
1573         return unmime_header(buf);
1574 }
1575
1576 #define MAX_LINELEN             76
1577 #define MAX_HARD_LINELEN        996
1578 #define MIMESEP_BEGIN           "=?"
1579 #define MIMESEP_END             "?="
1580
1581 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1582 {                                                                       \
1583         if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {         \
1584                 *destp = '\0';                                          \
1585                 return;                                                 \
1586         }                                                               \
1587                                                                         \
1588         if ((cond) && *srcp) {                                          \
1589                 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1590                         if (isspace(*(destp - 1)))                      \
1591                                 destp--;                                \
1592                         else if (is_plain_text && isspace(*srcp))       \
1593                                 srcp++;                                 \
1594                         if (*srcp) {                                    \
1595                                 *destp++ = '\n';                        \
1596                                 *destp++ = ' ';                         \
1597                                 left = MAX_LINELEN - 1;                 \
1598                         }                                               \
1599                 }                                                       \
1600         }                                                               \
1601 }
1602
1603 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1604                         gint header_len, gboolean addr_field)
1605 {
1606         const gchar *cur_encoding;
1607         const gchar *out_encoding;
1608         gint mimestr_len;
1609         gchar *mimesep_enc;
1610         gint left;
1611         const guchar *srcp = src;
1612         guchar *destp = dest;
1613         gboolean use_base64;
1614
1615         g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1616
1617         if (MB_CUR_MAX > 1) {
1618                 use_base64 = TRUE;
1619                 mimesep_enc = "?B?";
1620         } else {
1621                 use_base64 = FALSE;
1622                 mimesep_enc = "?Q?";
1623         }
1624
1625         cur_encoding = CS_INTERNAL;
1626         out_encoding = conv_get_outgoing_charset_str();
1627         if (!strcmp(out_encoding, CS_US_ASCII))
1628                 out_encoding = CS_ISO_8859_1;
1629
1630         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1631                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1632
1633         left = MAX_LINELEN - header_len;
1634
1635         while (*srcp) {
1636                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1637
1638                 while (isspace(*srcp)) {
1639                         *destp++ = *srcp++;
1640                         left--;
1641                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1642                 }
1643
1644                 /* output as it is if the next word is ASCII string */
1645                 if (!is_next_nonascii(srcp)) {
1646                         gint word_len;
1647
1648                         word_len = get_next_word_len(srcp);
1649                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1650                         while (word_len > 0) {
1651                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1652                                 *destp++ = *srcp++;
1653                                 left--;
1654                                 word_len--;
1655                         }
1656
1657                         continue;
1658                 }
1659
1660                 /* don't include parentheses in encoded strings */
1661                 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1662                         LBREAK_IF_REQUIRED(left < 2, FALSE);
1663                         *destp++ = *srcp++;
1664                         left--;
1665                 }
1666
1667                 while (1) {
1668                         gint mb_len = 0;
1669                         gint cur_len = 0;
1670                         gchar *part_str;
1671                         gchar *out_str;
1672                         gchar *enc_str;
1673                         const guchar *p = srcp;
1674                         gint out_str_len;
1675                         gint out_enc_str_len;
1676                         gint mime_block_len;
1677                         gboolean cont = FALSE;
1678
1679                         while (*p != '\0') {
1680                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1681                                         break;
1682                                 /* don't include parentheses in encoded
1683                                    strings */
1684                                 if (addr_field && (*p == '(' || *p == ')'))
1685                                         break;
1686
1687                                 mb_len = g_utf8_skip[*p];
1688
1689                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1690                                 out_str = conv_codeset_strdup
1691                                         (part_str, cur_encoding, out_encoding);
1692                                 if (!out_str) {
1693                                         g_warning("conv_encode_header(): code conversion failed\n");
1694                                         conv_unreadable_8bit(part_str);
1695                                         out_str = g_strdup(part_str);
1696                                 }
1697                                 out_str_len = strlen(out_str);
1698
1699                                 if (use_base64)
1700                                         out_enc_str_len = B64LEN(out_str_len);
1701                                 else
1702                                         out_enc_str_len =
1703                                                 qp_get_q_encoding_len(out_str);
1704
1705                                 g_free(out_str);
1706
1707                                 if (mimestr_len + out_enc_str_len <= left) {
1708                                         cur_len += mb_len;
1709                                         p += mb_len;
1710                                 } else if (cur_len == 0) {
1711                                         LBREAK_IF_REQUIRED(1, FALSE);
1712                                         continue;
1713                                 } else {
1714                                         cont = TRUE;
1715                                         break;
1716                                 }
1717                         }
1718
1719                         if (cur_len > 0) {
1720                                 Xstrndup_a(part_str, srcp, cur_len, );
1721                                 out_str = conv_codeset_strdup
1722                                         (part_str, cur_encoding, out_encoding);
1723                                 if (!out_str) {
1724                                         g_warning("conv_encode_header(): code conversion failed\n");
1725                                         conv_unreadable_8bit(part_str);
1726                                         out_str = g_strdup(part_str);
1727                                 }
1728                                 out_str_len = strlen(out_str);
1729
1730                                 if (use_base64)
1731                                         out_enc_str_len = B64LEN(out_str_len);
1732                                 else
1733                                         out_enc_str_len =
1734                                                 qp_get_q_encoding_len(out_str);
1735
1736                                 Xalloca(enc_str, out_enc_str_len + 1, );
1737                                 if (use_base64)
1738                                         base64_encode(enc_str, out_str, out_str_len);
1739                                 else
1740                                         qp_q_encode(enc_str, out_str);
1741
1742                                 g_free(out_str);
1743
1744                                 /* output MIME-encoded string block */
1745                                 mime_block_len = mimestr_len + strlen(enc_str);
1746                                 g_snprintf(destp, mime_block_len + 1,
1747                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1748                                            out_encoding, mimesep_enc, enc_str);
1749                                 destp += mime_block_len;
1750                                 srcp += cur_len;
1751
1752                                 left -= mime_block_len;
1753                         }
1754
1755                         LBREAK_IF_REQUIRED(cont, FALSE);
1756
1757                         if (cur_len == 0)
1758                                 break;
1759                 }
1760         }
1761
1762         *destp = '\0';
1763 }
1764
1765 #undef LBREAK_IF_REQUIRED
1766 gchar *conv_filename_from_utf8(const gchar *utf8_file)
1767 {
1768         gchar *fs_file;
1769         GError *error = NULL;
1770
1771         fs_file = g_filename_from_utf8(utf8_file, -1, NULL, NULL, &error);
1772         if (error) {
1773                 g_warning("failed to convert encoding of file name: %s\n",
1774                           error->message);
1775                 g_error_free(error);
1776         }
1777         if (!fs_file)
1778                 fs_file = g_strdup(utf8_file);
1779
1780         return fs_file;
1781 }
1782
1783 gchar *conv_filename_to_utf8(const gchar *fs_file)
1784 {
1785         gchar *utf8_file = NULL;
1786         GError *error = NULL;
1787
1788         utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
1789         if (error) {
1790                 g_warning("failed to convert encoding of file name: %s\n",
1791                           error->message);
1792                 g_error_free(error);
1793         }
1794
1795         if (!utf8_file || !g_utf8_validate(utf8_file, -1, NULL)) {
1796                 g_free(utf8_file);
1797                 utf8_file = g_strdup(fs_file);
1798                 conv_unreadable_8bit(utf8_file);
1799         }
1800
1801         return utf8_file;
1802 }