src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2005 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <stdlib.h>
  28 #include <errno.h>
  29
  30 #if HAVE_LOCALE_H
  31 #  include <locale.h>
  32 #endif
  33
  34 #include <iconv.h>
  35
  36 #include "intl.h"
  37 #include "codeconv.h"
  38 #include "unmime.h"
  39 #include "base64.h"
  40 #include "quoted-printable.h"
  41 #include "utils.h"
  42 #include "prefs_common.h"
  43
  44 typedef enum
  45 {
  46         JIS_ASCII,
  47         JIS_KANJI,
  48         JIS_HWKANA,
  49         JIS_AUXKANJI
  50 } JISState;
  51
  52 #define SUBST_CHAR      0x5f;
  53 #define ESC             '\033'
  54
  55 #define iseuckanji(c) \
  56         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  57 #define iseuchwkana1(c) \
  58         (((c) & 0xff) == 0x8e)
  59 #define iseuchwkana2(c) \
  60         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  61 #define iseucaux(c) \
  62         (((c) & 0xff) == 0x8f)
  63 #define issjiskanji1(c) \
  64         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  65          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  66 #define issjiskanji2(c) \
  67         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  68          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  69 #define issjishwkana(c) \
  70         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  71
  72 #define K_IN()                          \
  73         if (state != JIS_KANJI) {       \
  74                 *out++ = ESC;           \
  75                 *out++ = '$';           \
  76                 *out++ = 'B';           \
  77                 state = JIS_KANJI;      \
  78         }
  79
  80 #define K_OUT()                         \
  81         if (state != JIS_ASCII) {       \
  82                 *out++ = ESC;           \
  83                 *out++ = '(';           \
  84                 *out++ = 'B';           \
  85                 state = JIS_ASCII;      \
  86         }
  87
  88 #define HW_IN()                         \
  89         if (state != JIS_HWKANA) {      \
  90                 *out++ = ESC;           \
  91                 *out++ = '(';           \
  92                 *out++ = 'I';           \
  93                 state = JIS_HWKANA;     \
  94         }
  95
  96 #define AUX_IN()                        \
  97         if (state != JIS_AUXKANJI) {    \
  98                 *out++ = ESC;           \
  99                 *out++ = '$';           \
 100                 *out++ = '(';           \
 101                 *out++ = 'D';           \
 102                 state = JIS_AUXKANJI;   \
 103         }
 104
 105 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 106 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 107 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 108
 109 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 110 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 111 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 112 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf);
 113
 114 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf);
 115 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf);
 116
 117 static void conv_unreadable_eucjp(gchar *str);
 118 static void conv_unreadable_8bit(gchar *str);
 119 static void conv_unreadable_latin(gchar *str);
 120
 121 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 122 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 123 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 124
 125 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 126 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf);
 127 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf);
 128
 129 static void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 130 {
 131         const guchar *in = inbuf;
 132         guchar *out = outbuf;
 133         JISState state = JIS_ASCII;
 134
 135         while (*in != '\0') {
 136                 if (*in == ESC) {
 137                         in++;
 138                         if (*in == '$') {
 139                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 140                                         state = JIS_KANJI;
 141                                         in += 2;
 142                                 } else if (*(in + 1) == '(' &&
 143                                            *(in + 2) == 'D') {
 144                                         state = JIS_AUXKANJI;
 145                                         in += 3;
 146                                 } else {
 147                                         /* unknown escape sequence */
 148                                         state = JIS_ASCII;
 149                                 }
 150                         } else if (*in == '(') {
 151                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 152                                         state = JIS_ASCII;
 153                                         in += 2;
 154                                 } else if (*(in + 1) == 'I') {
 155                                         state = JIS_HWKANA;
 156                                         in += 2;
 157                                 } else {
 158                                         /* unknown escape sequence */
 159                                         state = JIS_ASCII;
 160                                 }
 161                         } else {
 162                                 /* unknown escape sequence */
 163                                 state = JIS_ASCII;
 164                         }
 165                 } else if (*in == 0x0e) {
 166                         state = JIS_HWKANA;
 167                         in++;
 168                 } else if (*in == 0x0f) {
 169                         state = JIS_ASCII;
 170                         in++;
 171                 } else {
 172                         switch (state) {
 173                         case JIS_ASCII:
 174                                 *out++ = *in++;
 175                                 break;
 176                         case JIS_KANJI:
 177                                 *out++ = *in++ | 0x80;
 178                                 if (*in == '\0') break;
 179                                 *out++ = *in++ | 0x80;
 180                                 break;
 181                         case JIS_HWKANA:
 182                                 *out++ = 0x8e;
 183                                 *out++ = *in++ | 0x80;
 184                                 break;
 185                         case JIS_AUXKANJI:
 186                                 *out++ = 0x8f;
 187                                 *out++ = *in++ | 0x80;
 188                                 if (*in == '\0') break;
 189                                 *out++ = *in++ | 0x80;
 190                                 break;
 191                         }
 192                 }
 193         }
 194
 195         *out = '\0';
 196 }
 197
 198 #define JIS_HWDAKUTEN           0x5e
 199 #define JIS_HWHANDAKUTEN        0x5f
 200
 201 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
 202 {
 203         static guint16 h2z_tbl[] = {
 204                 /* 0x20 - 0x2f */
 205                 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
 206                 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
 207                 /* 0x30 - 0x3f */
 208                 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
 209                 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
 210                 /* 0x40 - 0x4f */
 211                 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
 212                 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
 213                 /* 0x50 - 0x5f */
 214                 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
 215                 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
 216         };
 217
 218         static guint16 dakuten_tbl[] = {
 219                 /* 0x30 - 0x3f */
 220                 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
 221                 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
 222                 /* 0x40 - 0x4f */
 223                 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
 224                 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
 225         };
 226
 227         static guint16 handakuten_tbl[] = {
 228                 /* 0x4a - 0x4e */
 229                 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
 230         };
 231
 232         guint16 out_code;
 233
 234         jis_code &= 0x7f;
 235         sound_sym &= 0x7f;
 236
 237         if (jis_code < 0x21 || jis_code > 0x5f)
 238                 return 0;
 239
 240         if (sound_sym == JIS_HWDAKUTEN &&
 241             jis_code >= 0x36 && jis_code <= 0x4e) {
 242                 out_code = dakuten_tbl[jis_code - 0x30];
 243                 if (out_code != 0) {
 244                         *outbuf = out_code >> 8;
 245                         *(outbuf + 1) = out_code & 0xff;
 246                         return 2;
 247                 }
 248         }
 249
 250         if (sound_sym == JIS_HWHANDAKUTEN &&
 251             jis_code >= 0x4a && jis_code <= 0x4e) {
 252                 out_code = handakuten_tbl[jis_code - 0x4a];
 253                 *outbuf = out_code >> 8;
 254                 *(outbuf + 1) = out_code & 0xff;
 255                 return 2;
 256         }
 257
 258         out_code = h2z_tbl[jis_code - 0x20];
 259         *outbuf = out_code >> 8;
 260         *(outbuf + 1) = out_code & 0xff;
 261         return 1;
 262 }
 263
 264 static void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 265 {
 266         const guchar *in = inbuf;
 267         guchar *out = outbuf;
 268         JISState state = JIS_ASCII;
 269
 270         while (*in != '\0') {
 271                 if (IS_ASCII(*in)) {
 272                         K_OUT();
 273                         *out++ = *in++;
 274                 } else if (iseuckanji(*in)) {
 275                         if (iseuckanji(*(in + 1))) {
 276                                 K_IN();
 277                                 *out++ = *in++ & 0x7f;
 278                                 *out++ = *in++ & 0x7f;
 279                         } else {
 280                                 K_OUT();
 281                                 *out++ = SUBST_CHAR;
 282                                 in++;
 283                                 if (*in != '\0' && !IS_ASCII(*in)) {
 284                                         *out++ = SUBST_CHAR;
 285                                         in++;
 286                                 }
 287                         }
 288                 } else if (iseuchwkana1(*in)) {
 289                         if (iseuchwkana2(*(in + 1))) {
 290                                 if (prefs_common.allow_jisx0201_kana) {
 291                                         HW_IN();
 292                                         in++;
 293                                         *out++ = *in++ & 0x7f;
 294                                 } else {
 295                                         guchar jis_ch[2];
 296                                         gint len;
 297
 298                                         if (iseuchwkana1(*(in + 2)) &&
 299                                             iseuchwkana2(*(in + 3)))
 300                                                 len = conv_jis_hantozen
 301                                                         (jis_ch,
 302                                                          *(in + 1), *(in + 3));
 303                                         else
 304                                                 len = conv_jis_hantozen
 305                                                         (jis_ch,
 306                                                          *(in + 1), '\0');
 307                                         if (len == 0)
 308                                                 in += 2;
 309                                         else {
 310                                                 K_IN();
 311                                                 in += len * 2;
 312                                                 *out++ = jis_ch[0];
 313                                                 *out++ = jis_ch[1];
 314                                         }
 315                                 }
 316                         } else {
 317                                 K_OUT();
 318                                 in++;
 319                                 if (*in != '\0' && !IS_ASCII(*in)) {
 320                                         *out++ = SUBST_CHAR;
 321                                         in++;
 322                                 }
 323                         }
 324                 } else if (iseucaux(*in)) {
 325                         in++;
 326                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 327                                 AUX_IN();
 328                                 *out++ = *in++ & 0x7f;
 329                                 *out++ = *in++ & 0x7f;
 330                         } else {
 331                                 K_OUT();
 332                                 if (*in != '\0' && !IS_ASCII(*in)) {
 333                                         *out++ = SUBST_CHAR;
 334                                         in++;
 335                                         if (*in != '\0' && !IS_ASCII(*in)) {
 336                                                 *out++ = SUBST_CHAR;
 337                                                 in++;
 338                                         }
 339                                 }
 340                         }
 341                 } else {
 342                         K_OUT();
 343                         *out++ = SUBST_CHAR;
 344                         in++;
 345                 }
 346         }
 347
 348         K_OUT();
 349         *out = '\0';
 350 }
 351
 352 static void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 353 {
 354         const guchar *in = inbuf;
 355         guchar *out = outbuf;
 356
 357         while (*in != '\0') {
 358                 if (IS_ASCII(*in)) {
 359                         *out++ = *in++;
 360                 } else if (issjiskanji1(*in)) {
 361                         if (issjiskanji2(*(in + 1))) {
 362                                 guchar out1 = *in;
 363                                 guchar out2 = *(in + 1);
 364                                 guchar row;
 365
 366                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 367                                 if (out2 < 0x9f) {
 368                                         out1 = (out1 - row) * 2 - 1;
 369                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 370                                 } else {
 371                                         out1 = (out1 - row) * 2;
 372                                         out2 -= 0x7e;
 373                                 }
 374
 375                                 *out++ = out1 | 0x80;
 376                                 *out++ = out2 | 0x80;
 377                                 in += 2;
 378                         } else {
 379                                 *out++ = SUBST_CHAR;
 380                                 in++;
 381                                 if (*in != '\0' && !IS_ASCII(*in)) {
 382                                         *out++ = SUBST_CHAR;
 383                                         in++;
 384                                 }
 385                         }
 386                 } else if (issjishwkana(*in)) {
 387                         *out++ = 0x8e;
 388                         *out++ = *in++;
 389                 } else {
 390                         *out++ = SUBST_CHAR;
 391                         in++;
 392                 }
 393         }
 394
 395         *out = '\0';
 396 }
 397
 398 static void conv_jistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 399 {
 400         gchar *eucstr;
 401
 402         Xalloca(eucstr, outlen, return);
 403
 404         conv_jistoeuc(eucstr, outlen, inbuf);
 405         conv_euctoutf8(outbuf, outlen, eucstr);
 406 }
 407
 408 static void conv_sjistoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 409 {
 410         gchar *tmpstr;
 411
 412         tmpstr = conv_iconv_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
 413         if (tmpstr) {
 414                 strncpy2(outbuf, tmpstr, outlen);
 415                 g_free(tmpstr);
 416         } else
 417                 strncpy2(outbuf, inbuf, outlen);
 418 }
 419
 420 static void conv_euctoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 421 {
 422         static iconv_t cd = (iconv_t)-1;
 423         static gboolean iconv_ok = TRUE;
 424         gchar *tmpstr;
 425
 426         if (cd == (iconv_t)-1) {
 427                 if (!iconv_ok) {
 428                         strncpy2(outbuf, inbuf, outlen);
 429                         return;
 430                 }
 431                 cd = iconv_open(CS_UTF_8, CS_EUC_JP_MS);
 432                 if (cd == (iconv_t)-1) {
 433                         cd = iconv_open(CS_UTF_8, CS_EUC_JP);
 434                         if (cd == (iconv_t)-1) {
 435                                 g_warning("conv_euctoutf8(): %s\n",
 436                                           g_strerror(errno));
 437                                 iconv_ok = FALSE;
 438                                 strncpy2(outbuf, inbuf, outlen);
 439                                 return;
 440                         }
 441                 }
 442         }
 443
 444         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 445         if (tmpstr) {
 446                 strncpy2(outbuf, tmpstr, outlen);
 447                 g_free(tmpstr);
 448         } else
 449                 strncpy2(outbuf, inbuf, outlen);
 450 }
 451
 452 static void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 453 {
 454         switch (conv_guess_ja_encoding(inbuf)) {
 455         case C_ISO_2022_JP:
 456                 conv_jistoutf8(outbuf, outlen, inbuf);
 457                 break;
 458         case C_SHIFT_JIS:
 459                 conv_sjistoutf8(outbuf, outlen, inbuf);
 460                 break;
 461         case C_EUC_JP:
 462                 conv_euctoutf8(outbuf, outlen, inbuf);
 463                 break;
 464         default:
 465                 strncpy2(outbuf, inbuf, outlen);
 466                 break;
 467         }
 468 }
 469
 470 static void conv_utf8toeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 471 {
 472         static iconv_t cd = (iconv_t)-1;
 473         static gboolean iconv_ok = TRUE;
 474         gchar *tmpstr;
 475
 476         if (cd == (iconv_t)-1) {
 477                 if (!iconv_ok) {
 478                         strncpy2(outbuf, inbuf, outlen);
 479                         return;
 480                 }
 481                 cd = iconv_open(CS_EUC_JP_MS, CS_UTF_8);
 482                 if (cd == (iconv_t)-1) {
 483                         cd = iconv_open(CS_EUC_JP, CS_UTF_8);
 484                         if (cd == (iconv_t)-1) {
 485                                 g_warning("conv_utf8toeuc(): %s\n",
 486                                           g_strerror(errno));
 487                                 iconv_ok = FALSE;
 488                                 strncpy2(outbuf, inbuf, outlen);
 489                                 return;
 490                         }
 491                 }
 492         }
 493
 494         tmpstr = conv_iconv_strdup_with_cd(inbuf, cd);
 495         if (tmpstr) {
 496                 strncpy2(outbuf, tmpstr, outlen);
 497                 g_free(tmpstr);
 498         } else
 499                 strncpy2(outbuf, inbuf, outlen);
 500 }
 501
 502 static void conv_utf8tojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 503 {
 504         gchar *eucstr;
 505
 506         Xalloca(eucstr, outlen, return);
 507
 508         conv_utf8toeuc(eucstr, outlen, inbuf);
 509         conv_euctojis(outbuf, outlen, eucstr);
 510 }
 511
 512 static gchar valid_eucjp_tbl[][96] = {
 513         /* 0xa2a0 - 0xa2ff */
 514         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 515           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 516           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 517           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 518           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 519           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 520
 521         /* 0xa3a0 - 0xa3ff */
 522         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 523           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 524           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 525           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 526           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 527           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 528
 529         /* 0xa4a0 - 0xa4ff */
 530         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 531           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 532           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 533           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 534           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 535           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 536
 537         /* 0xa5a0 - 0xa5ff */
 538         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 539           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 540           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 541           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 542           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 543           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 544
 545         /* 0xa6a0 - 0xa6ff */
 546         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 547           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 548           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 549           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 550           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 551           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 552
 553         /* 0xa7a0 - 0xa7ff */
 554         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 555           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 556           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 557           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 558           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 559           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 560
 561         /* 0xa8a0 - 0xa8ff */
 562         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 563           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 564           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 565           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 566           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 567           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 568 };
 569
 570 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 571 {
 572         if (c1 <= 0xa0 || c1 >= 0xf5)
 573                 return FALSE;
 574         if (c2 <= 0xa0 || c2 == 0xff)
 575                 return FALSE;
 576
 577         if (c1 >= 0xa9 && c1 <= 0xaf)
 578                 return FALSE;
 579
 580         if (c1 >= 0xa2 && c1 <= 0xa8)
 581                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 582
 583         if (c1 == 0xcf) {
 584                 if (c2 >= 0xd4 && c2 <= 0xfe)
 585                         return FALSE;
 586         } else if (c1 == 0xf4) {
 587                 if (c2 >= 0xa7 && c2 <= 0xfe)
 588                         return FALSE;
 589         }
 590
 591         return TRUE;
 592 }
 593
 594 static void conv_unreadable_eucjp(gchar *str)
 595 {
 596         register guchar *p = str;
 597
 598         while (*p != '\0') {
 599                 if (IS_ASCII(*p)) {
 600                         /* convert CR+LF -> LF */
 601                         if (*p == '\r' && *(p + 1) == '\n')
 602                                 memmove(p, p + 1, strlen(p));
 603                         /* printable 7 bit code */
 604                         p++;
 605                 } else if (iseuckanji(*p)) {
 606                         if (isprintableeuckanji(*p, *(p + 1))) {
 607                                 /* printable euc-jp code */
 608                                 p += 2;
 609                         } else {
 610                                 /* substitute unprintable code */
 611                                 *p++ = SUBST_CHAR;
 612                                 if (*p != '\0') {
 613                                         if (IS_ASCII(*p))
 614                                                 p++;
 615                                         else
 616                                                 *p++ = SUBST_CHAR;
 617                                 }
 618                         }
 619                 } else if (iseuchwkana1(*p)) {
 620                         if (iseuchwkana2(*(p + 1)))
 621                                 /* euc-jp hankaku kana */
 622                                 p += 2;
 623                         else
 624                                 *p++ = SUBST_CHAR;
 625                 } else if (iseucaux(*p)) {
 626                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 627                                 /* auxiliary kanji */
 628                                 p += 3;
 629                         } else
 630                                 *p++ = SUBST_CHAR;
 631                 } else
 632                         /* substitute unprintable 1 byte code */
 633                         *p++ = SUBST_CHAR;
 634         }
 635 }
 636
 637 static void conv_unreadable_8bit(gchar *str)
 638 {
 639         register guchar *p = str;
 640
 641         while (*p != '\0') {
 642                 /* convert CR+LF -> LF */
 643                 if (*p == '\r' && *(p + 1) == '\n')
 644                         memmove(p, p + 1, strlen(p));
 645                 else if (!IS_ASCII(*p)) *p = SUBST_CHAR;
 646                 p++;
 647         }
 648 }
 649
 650 static void conv_unreadable_latin(gchar *str)
 651 {
 652         register guchar *p = str;
 653
 654         while (*p != '\0') {
 655                 /* convert CR+LF -> LF */
 656                 if (*p == '\r' && *(p + 1) == '\n')
 657                         memmove(p, p + 1, strlen(p));
 658                 else if ((*p & 0xff) >= 0x7f)
 659                         *p = SUBST_CHAR;
 660                 p++;
 661         }
 662 }
 663
 664 #define NCV     '\0'
 665
 666 void conv_mb_alnum(gchar *str)
 667 {
 668         static guchar char_tbl[] = {
 669                 /* 0xa0 - 0xaf */
 670                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 671                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 672                 /* 0xb0 - 0xbf */
 673                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 674                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 675                 /* 0xc0 - 0xcf */
 676                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 677                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 678                 /* 0xd0 - 0xdf */
 679                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 680                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 681                 /* 0xe0 - 0xef */
 682                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 683                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 684         };
 685
 686         register guchar *p = str;
 687         register gint len;
 688
 689         len = strlen(str);
 690
 691         while (len > 1) {
 692                 if (*p == 0xa3) {
 693                         register guchar ch = *(p + 1);
 694
 695                         if (ch >= 0xb0 && ch <= 0xfa) {
 696                                 /* [a-zA-Z] */
 697                                 *p = ch & 0x7f;
 698                                 p++;
 699                                 len--;
 700                                 memmove(p, p + 1, len);
 701                                 len--;
 702                         } else  {
 703                                 p += 2;
 704                                 len -= 2;
 705                         }
 706                 } else if (*p == 0xa1) {
 707                         register guchar ch = *(p + 1);
 708
 709                         if (ch >= 0xa0 && ch <= 0xef &&
 710                             NCV != char_tbl[ch - 0xa0]) {
 711                                 *p = char_tbl[ch - 0xa0];
 712                                 p++;
 713                                 len--;
 714                                 memmove(p, p + 1, len);
 715                                 len--;
 716                         } else {
 717                                 p += 2;
 718                                 len -= 2;
 719                         }
 720                 } else if (iseuckanji(*p)) {
 721                         p += 2;
 722                         len -= 2;
 723                 } else {
 724                         p++;
 725                         len--;
 726                 }
 727         }
 728 }
 729
 730 CharSet conv_guess_ja_encoding(const gchar *str)
 731 {
 732         const guchar *p = str;
 733         CharSet guessed = C_US_ASCII;
 734
 735         while (*p != '\0') {
 736                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 737                         if (guessed == C_US_ASCII)
 738                                 return C_ISO_2022_JP;
 739                         p += 2;
 740                 } else if (IS_ASCII(*p)) {
 741                         p++;
 742                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 743                         if (*p >= 0xfd && *p <= 0xfe)
 744                                 return C_EUC_JP;
 745                         else if (guessed == C_SHIFT_JIS) {
 746                                 if ((issjiskanji1(*p) &&
 747                                      issjiskanji2(*(p + 1))) ||
 748                                     issjishwkana(*p))
 749                                         guessed = C_SHIFT_JIS;
 750                                 else
 751                                         guessed = C_EUC_JP;
 752                         } else
 753                                 guessed = C_EUC_JP;
 754                         p += 2;
 755                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 756                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 757                                 guessed = C_SHIFT_JIS;
 758                         else
 759                                 return C_SHIFT_JIS;
 760                         p += 2;
 761                 } else if (issjishwkana(*p)) {
 762                         guessed = C_SHIFT_JIS;
 763                         p++;
 764                 } else {
 765                         p++;
 766                 }
 767         }
 768
 769         return guessed;
 770 }
 771
 772 static void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 773 {
 774         conv_jistoutf8(outbuf, outlen, inbuf);
 775 }
 776
 777 static void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 778 {
 779         conv_sjistoutf8(outbuf, outlen, inbuf);
 780 }
 781
 782 static void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 783 {
 784         conv_euctoutf8(outbuf, outlen, inbuf);
 785 }
 786
 787 void conv_utf8todisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 788 {
 789         strncpy2(outbuf, inbuf, outlen);
 790 }
 791
 792 static void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 793 {
 794         conv_anytoutf8(outbuf, outlen, inbuf);
 795 }
 796
 797 static void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 798 {
 799         strncpy2(outbuf, inbuf, outlen);
 800         conv_unreadable_8bit(outbuf);
 801 }
 802
 803 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 804 {
 805         gchar *tmpstr;
 806
 807         tmpstr = conv_iconv_strdup(inbuf, conv_get_locale_charset_str(),
 808                                    CS_INTERNAL);
 809         if (tmpstr) {
 810                 strncpy2(outbuf, tmpstr, outlen);
 811                 g_free(tmpstr);
 812         } else
 813                 strncpy2(outbuf, inbuf, outlen);
 814 }
 815
 816 static void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 817 {
 818         strncpy2(outbuf, inbuf, outlen);
 819 }
 820
 821 CodeConverter *conv_code_converter_new(const gchar *src_charset)
 822 {
 823         CodeConverter *conv;
 824
 825         conv = g_new0(CodeConverter, 1);
 826         conv->code_conv_func = conv_get_code_conv_func(src_charset, NULL);
 827         conv->charset_str = g_strdup(src_charset);
 828         conv->charset = conv_get_charset_from_str(src_charset);
 829
 830         return conv;
 831 }
 832
 833 void conv_code_converter_destroy(CodeConverter *conv)
 834 {
 835         g_free(conv->charset_str);
 836         g_free(conv);
 837 }
 838
 839 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 840                   const gchar *inbuf)
 841 {
 842         if (conv->code_conv_func != conv_noconv)
 843                 conv->code_conv_func(outbuf, outlen, inbuf);
 844         else {
 845                 gchar *str;
 846
 847                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 848                 if (!str)
 849                         return -1;
 850                 else {
 851                         strncpy2(outbuf, str, outlen);
 852                         g_free(str);
 853                 }
 854         }
 855
 856         return 0;
 857 }
 858
 859 gchar *conv_codeset_strdup(const gchar *inbuf,
 860                            const gchar *src_code, const gchar *dest_code)
 861 {
 862         gchar *buf;
 863         size_t len;
 864         CodeConvFunc conv_func;
 865
 866         conv_func = conv_get_code_conv_func(src_code, dest_code);
 867         if (conv_func != conv_noconv) {
 868                 len = (strlen(inbuf) + 1) * 3;
 869                 buf = g_malloc(len);
 870                 if (!buf) return NULL;
 871
 872                 conv_func(buf, len, inbuf);
 873                 return g_realloc(buf, strlen(buf) + 1);
 874         }
 875
 876         return conv_iconv_strdup(inbuf, src_code, dest_code);
 877 }
 878
 879 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 880                                      const gchar *dest_charset_str)
 881 {
 882         CodeConvFunc code_conv = conv_noconv;
 883         CharSet src_charset;
 884         CharSet dest_charset;
 885
 886         if (!src_charset_str)
 887                 src_charset = conv_get_locale_charset();
 888         else
 889                 src_charset = conv_get_charset_from_str(src_charset_str);
 890
 891         /* auto detection mode */
 892         if (!src_charset_str && !dest_charset_str) {
 893                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 894                         return conv_anytodisp;
 895                 else
 896                         return conv_noconv;
 897         }
 898
 899         dest_charset = conv_get_charset_from_str(dest_charset_str);
 900
 901         if (dest_charset == C_US_ASCII)
 902                 return conv_ustodisp;
 903
 904         switch (src_charset) {
 905         case C_US_ASCII:
 906         case C_ISO_8859_1:
 907         case C_ISO_8859_2:
 908         case C_ISO_8859_3:
 909         case C_ISO_8859_4:
 910         case C_ISO_8859_5:
 911         case C_ISO_8859_6:
 912         case C_ISO_8859_7:
 913         case C_ISO_8859_8:
 914         case C_ISO_8859_9:
 915         case C_ISO_8859_10:
 916         case C_ISO_8859_11:
 917         case C_ISO_8859_13:
 918         case C_ISO_8859_14:
 919         case C_ISO_8859_15:
 920                 break;
 921         case C_ISO_2022_JP:
 922         case C_ISO_2022_JP_2:
 923         case C_ISO_2022_JP_3:
 924                 if (dest_charset == C_AUTO)
 925                         code_conv = conv_jistodisp;
 926                 else if (dest_charset == C_EUC_JP)
 927                         code_conv = conv_jistoeuc;
 928                 else if (dest_charset == C_UTF_8)
 929                         code_conv = conv_jistoutf8;
 930                 break;
 931         case C_SHIFT_JIS:
 932                 if (dest_charset == C_AUTO)
 933                         code_conv = conv_sjistodisp;
 934                 else if (dest_charset == C_EUC_JP)
 935                         code_conv = conv_sjistoeuc;
 936                 else if (dest_charset == C_UTF_8)
 937                         code_conv = conv_sjistoutf8;
 938                 break;
 939         case C_EUC_JP:
 940                 if (dest_charset == C_AUTO)
 941                         code_conv = conv_euctodisp;
 942                 else if (dest_charset == C_ISO_2022_JP   ||
 943                          dest_charset == C_ISO_2022_JP_2 ||
 944                          dest_charset == C_ISO_2022_JP_3)
 945                         code_conv = conv_euctojis;
 946                 else if (dest_charset == C_UTF_8)
 947                         code_conv = conv_euctoutf8;
 948                 break;
 949         case C_UTF_8:
 950                 if (dest_charset == C_EUC_JP)
 951                         code_conv = conv_utf8toeuc;
 952                 else if (dest_charset == C_ISO_2022_JP   ||
 953                          dest_charset == C_ISO_2022_JP_2 ||
 954                          dest_charset == C_ISO_2022_JP_3)
 955                         code_conv = conv_utf8tojis;
 956                 break;
 957         default:
 958                 break;
 959         }
 960
 961         return code_conv;
 962 }
 963
 964 gchar *conv_iconv_strdup(const gchar *inbuf,
 965                          const gchar *src_code, const gchar *dest_code)
 966 {
 967         iconv_t cd;
 968         gchar *outbuf;
 969
 970         if (!src_code)
 971                 src_code = conv_get_outgoing_charset_str();
 972         if (!dest_code)
 973                 dest_code = CS_INTERNAL;
 974
 975         /* don't convert if src and dest codeset are identical */
 976         if (!strcasecmp(src_code, dest_code))
 977                 return g_strdup(inbuf);
 978
 979         /* don't convert if current codeset is US-ASCII */
 980         if (!strcasecmp(dest_code, CS_US_ASCII))
 981                 return g_strdup(inbuf);
 982
 983         cd = iconv_open(dest_code, src_code);
 984         if (cd == (iconv_t)-1)
 985                 return NULL;
 986
 987         outbuf = conv_iconv_strdup_with_cd(inbuf, cd);
 988
 989         iconv_close(cd);
 990
 991         return outbuf;
 992 }
 993
 994 gchar *conv_iconv_strdup_with_cd(const gchar *inbuf, iconv_t cd)
 995 {
 996         const gchar *inbuf_p;
 997         gchar *outbuf;
 998         gchar *outbuf_p;
 999         size_t in_size;
1000         size_t in_left;
1001         size_t out_size;
1002         size_t out_left;
1003         size_t n_conv;
1004         size_t len;
1005
1006         inbuf_p = inbuf;
1007         in_size = strlen(inbuf);
1008         in_left = in_size;
1009         out_size = (in_size + 1) * 2;
1010         outbuf = g_malloc(out_size);
1011         outbuf_p = outbuf;
1012         out_left = out_size;
1013
1014 #define EXPAND_BUF()                            \
1015 {                                               \
1016         len = outbuf_p - outbuf;                \
1017         out_size *= 2;                          \
1018         outbuf = g_realloc(outbuf, out_size);   \
1019         outbuf_p = outbuf + len;                \
1020         out_left = out_size - len;              \
1021 }
1022
1023         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
1024                                &outbuf_p, &out_left)) == (size_t)-1) {
1025                 if (EILSEQ == errno) {
1026                         //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1027                         inbuf_p++;
1028                         in_left--;
1029                         if (out_left == 0) {
1030                                 EXPAND_BUF();
1031                         }
1032                         *outbuf_p++ = SUBST_CHAR;
1033                         out_left--;
1034                 } else if (EINVAL == errno) {
1035                         break;
1036                 } else if (E2BIG == errno) {
1037                         EXPAND_BUF();
1038                 } else {
1039                         g_warning("conv_iconv_strdup(): %s\n",
1040                                   g_strerror(errno));
1041                         break;
1042                 }
1043         }
1044
1045         while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
1046                (size_t)-1) {
1047                 if (E2BIG == errno) {
1048                         EXPAND_BUF();
1049                 } else {
1050                         g_warning("conv_iconv_strdup(): %s\n",
1051                                   g_strerror(errno));
1052                         break;
1053                 }
1054         }
1055
1056 #undef EXPAND_BUF
1057
1058         len = outbuf_p - outbuf;
1059         outbuf = g_realloc(outbuf, len + 1);
1060         outbuf[len] = '\0';
1061
1062         return outbuf;
1063 }
1064
1065 static const struct {
1066         CharSet charset;
1067         gchar *const name;
1068 } charsets[] = {
1069         {C_US_ASCII,            CS_US_ASCII},
1070         {C_US_ASCII,            CS_ANSI_X3_4_1968},
1071         {C_UTF_8,               CS_UTF_8},
1072         {C_UTF_7,               CS_UTF_7},
1073         {C_ISO_8859_1,          CS_ISO_8859_1},
1074         {C_ISO_8859_2,          CS_ISO_8859_2},
1075         {C_ISO_8859_3,          CS_ISO_8859_3},
1076         {C_ISO_8859_4,          CS_ISO_8859_4},
1077         {C_ISO_8859_5,          CS_ISO_8859_5},
1078         {C_ISO_8859_6,          CS_ISO_8859_6},
1079         {C_ISO_8859_7,          CS_ISO_8859_7},
1080         {C_ISO_8859_8,          CS_ISO_8859_8},
1081         {C_ISO_8859_9,          CS_ISO_8859_9},
1082         {C_ISO_8859_10,         CS_ISO_8859_10},
1083         {C_ISO_8859_11,         CS_ISO_8859_11},
1084         {C_ISO_8859_13,         CS_ISO_8859_13},
1085         {C_ISO_8859_14,         CS_ISO_8859_14},
1086         {C_ISO_8859_15,         CS_ISO_8859_15},
1087         {C_BALTIC,              CS_BALTIC},
1088         {C_CP1250,              CS_CP1250},
1089         {C_CP1251,              CS_CP1251},
1090         {C_CP1252,              CS_CP1252},
1091         {C_CP1253,              CS_CP1253},
1092         {C_CP1254,              CS_CP1254},
1093         {C_CP1255,              CS_CP1255},
1094         {C_CP1256,              CS_CP1256},
1095         {C_CP1257,              CS_CP1257},
1096         {C_CP1258,              CS_CP1258},
1097         {C_WINDOWS_1250,        CS_WINDOWS_1250},
1098         {C_WINDOWS_1251,        CS_WINDOWS_1251},
1099         {C_WINDOWS_1252,        CS_WINDOWS_1252},
1100         {C_WINDOWS_1253,        CS_WINDOWS_1253},
1101         {C_WINDOWS_1254,        CS_WINDOWS_1254},
1102         {C_WINDOWS_1255,        CS_WINDOWS_1255},
1103         {C_WINDOWS_1256,        CS_WINDOWS_1256},
1104         {C_WINDOWS_1257,        CS_WINDOWS_1257},
1105         {C_WINDOWS_1258,        CS_WINDOWS_1258},
1106         {C_KOI8_R,              CS_KOI8_R},
1107         {C_KOI8_T,              CS_KOI8_T},
1108         {C_KOI8_U,              CS_KOI8_U},
1109         {C_ISO_2022_JP,         CS_ISO_2022_JP},
1110         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
1111         {C_ISO_2022_JP_3,       CS_ISO_2022_JP_3},
1112         {C_EUC_JP,              CS_EUC_JP},
1113         {C_EUC_JP,              CS_EUCJP},
1114         {C_EUC_JP_MS,           CS_EUC_JP_MS},
1115         {C_SHIFT_JIS,           CS_SHIFT_JIS},
1116         {C_SHIFT_JIS,           CS_SHIFT__JIS},
1117         {C_SHIFT_JIS,           CS_SJIS},
1118         {C_ISO_2022_KR,         CS_ISO_2022_KR},
1119         {C_EUC_KR,              CS_EUC_KR},
1120         {C_ISO_2022_CN,         CS_ISO_2022_CN},
1121         {C_EUC_CN,              CS_EUC_CN},
1122         {C_GB2312,              CS_GB2312},
1123         {C_GBK,                 CS_GBK},
1124         {C_EUC_TW,              CS_EUC_TW},
1125         {C_BIG5,                CS_BIG5},
1126         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
1127         {C_TIS_620,             CS_TIS_620},
1128         {C_WINDOWS_874,         CS_WINDOWS_874},
1129         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
1130         {C_TCVN5712_1,          CS_TCVN5712_1},
1131 };
1132
1133 static const struct {
1134         gchar *const locale;
1135         CharSet charset;
1136         CharSet out_charset;
1137 } locale_table[] = {
1138         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
1139         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
1140         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
1141         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
1142         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
1143         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
1144         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
1145         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
1146         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
1147         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
1148         {"zh_CN.GBK"    , C_GBK         , C_GB2312},
1149         {"zh_CN"        , C_GB2312      , C_GB2312},
1150         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
1151         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
1152         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
1153         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
1154         {"zh_TW"        , C_BIG5        , C_BIG5},
1155
1156         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
1157         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
1158         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1159         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
1160         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
1161         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
1162         {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1163         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
1164
1165         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1166         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1167
1168         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1169
1170         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1171         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1172         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1173         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1174         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
1175         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1176         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1177         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
1178         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1179         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
1180         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
1181         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1182         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1183         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1184         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
1185         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1186         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1187         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1188         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
1189         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1190         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1191         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
1192         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
1193         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
1194         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
1195         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
1196         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
1197         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
1198         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
1199         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1200         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
1201         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
1202         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
1203         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
1204         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
1205         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
1206         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
1207         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1208         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1209         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1210         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1211         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1212         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1213         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1214         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1215         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1216         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1217         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1218         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1219         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1220         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1221         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1222         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1223         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1224         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1225         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1226         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1227         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1228         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1229         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1230         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1231         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1232         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1233         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1234         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1235         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1236         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1237         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1238         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1239         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1240         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1241         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1242         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1243         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1244
1245         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1246         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1247         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1248         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1249         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1250         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1251         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1252         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1253
1254         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1255         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1256
1257         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1258
1259         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1260         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1261         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1262         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1263
1264         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1265
1266         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1267         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1268         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1269         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1270         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1271         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1272         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1273         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1274         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1275         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1276         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1277         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1278         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1279         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1280         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1281         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1282         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1283
1284         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1285         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1286         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1287         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1288
1289         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1290         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1291
1292         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1293
1294         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1295         {"en_IN"        , C_UTF_8       , C_UTF_8},
1296         {"se_NO"        , C_UTF_8       , C_UTF_8},
1297         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1298         {"te_IN"        , C_UTF_8       , C_UTF_8},
1299         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1300
1301         {"th_TH"        , C_TIS_620     , C_TIS_620},
1302         /* {"th_TH"     , C_WINDOWS_874}, */
1303         /* {"th_TH"     , C_ISO_8859_11}, */
1304
1305         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1306         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1307
1308         {"C"                    , C_US_ASCII    , C_US_ASCII},
1309         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1310         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1311 };
1312
1313 static GHashTable *conv_get_charset_to_str_table(void)
1314 {
1315         static GHashTable *table;
1316         gint i;
1317
1318         if (table)
1319                 return table;
1320
1321         table = g_hash_table_new(NULL, g_direct_equal);
1322
1323         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1324                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1325                     == NULL) {
1326                         g_hash_table_insert
1327                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1328                                  charsets[i].name);
1329                 }
1330         }
1331
1332         return table;
1333 }
1334
1335 static GHashTable *conv_get_charset_from_str_table(void)
1336 {
1337         static GHashTable *table;
1338         gint i;
1339
1340         if (table)
1341                 return table;
1342
1343         table = g_hash_table_new(str_case_hash, str_case_equal);
1344
1345         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1346                 g_hash_table_insert(table, charsets[i].name,
1347                                     GUINT_TO_POINTER(charsets[i].charset));
1348         }
1349
1350         return table;
1351 }
1352
1353 const gchar *conv_get_charset_str(CharSet charset)
1354 {
1355         GHashTable *table;
1356
1357         table = conv_get_charset_to_str_table();
1358         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1359 }
1360
1361 CharSet conv_get_charset_from_str(const gchar *charset)
1362 {
1363         GHashTable *table;
1364
1365         if (!charset) return C_AUTO;
1366
1367         table = conv_get_charset_from_str_table();
1368         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1369 }
1370
1371 CharSet conv_get_locale_charset(void)
1372 {
1373         static CharSet cur_charset = -1;
1374         const gchar *cur_locale;
1375         const gchar *p;
1376         gint i;
1377
1378         if (cur_charset != -1)
1379                 return cur_charset;
1380
1381         cur_locale = conv_get_current_locale();
1382         if (!cur_locale) {
1383                 cur_charset = C_US_ASCII;
1384                 return cur_charset;
1385         }
1386
1387         if (strcasestr(cur_locale, "UTF-8")) {
1388                 cur_charset = C_UTF_8;
1389                 return cur_charset;
1390         }
1391
1392         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1393                 cur_charset = C_ISO_8859_15;
1394                 return cur_charset;
1395         }
1396
1397         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1398                 const gchar *p;
1399
1400                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1401                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1402                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1403                                  strlen(locale_table[i].locale))) {
1404                         cur_charset = locale_table[i].charset;
1405                         return cur_charset;
1406                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1407                          !strchr(p + 1, '.')) {
1408                         if (strlen(cur_locale) == 2 &&
1409                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1410                                 cur_charset = locale_table[i].charset;
1411                                 return cur_charset;
1412                         }
1413                 }
1414         }
1415
1416         cur_charset = C_AUTO;
1417         return cur_charset;
1418 }
1419
1420 const gchar *conv_get_locale_charset_str(void)
1421 {
1422         static const gchar *codeset = NULL;
1423
1424         if (!codeset)
1425                 codeset = conv_get_charset_str(conv_get_locale_charset());
1426
1427         return codeset ? codeset : CS_INTERNAL;
1428 }
1429
1430 CharSet conv_get_internal_charset(void)
1431 {
1432         return C_INTERNAL;
1433 }
1434
1435 const gchar *conv_get_internal_charset_str(void)
1436 {
1437         return CS_INTERNAL;
1438 }
1439
1440 CharSet conv_get_outgoing_charset(void)
1441 {
1442         static CharSet out_charset = -1;
1443         const gchar *cur_locale;
1444         const gchar *p;
1445         gint i;
1446
1447         if (out_charset != -1)
1448                 return out_charset;
1449
1450         cur_locale = conv_get_current_locale();
1451         if (!cur_locale) {
1452                 out_charset = C_AUTO;
1453                 return out_charset;
1454         }
1455
1456         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1457                 out_charset = C_ISO_8859_15;
1458                 return out_charset;
1459         }
1460
1461         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1462                 const gchar *p;
1463
1464                 if (!g_ascii_strncasecmp(cur_locale, locale_table[i].locale,
1465                                  strlen(locale_table[i].locale))) {
1466                         out_charset = locale_table[i].out_charset;
1467                         break;
1468                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1469                          !strchr(p + 1, '.')) {
1470                         if (strlen(cur_locale) == 2 &&
1471                             !g_ascii_strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1472                                 out_charset = locale_table[i].out_charset;
1473                                 break;
1474                         }
1475                 }
1476         }
1477
1478         return out_charset;
1479 }
1480
1481 const gchar *conv_get_outgoing_charset_str(void)
1482 {
1483         CharSet out_charset;
1484         const gchar *str;
1485
1486         if (prefs_common.outgoing_charset) {
1487                 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1488                         g_free(prefs_common.outgoing_charset);
1489                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1490                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1491                         return prefs_common.outgoing_charset;
1492         }
1493
1494         out_charset = conv_get_outgoing_charset();
1495         str = conv_get_charset_str(out_charset);
1496
1497         return str ? str : CS_UTF_8;
1498 }
1499
1500 gboolean conv_is_multibyte_encoding(CharSet encoding)
1501 {
1502         switch (encoding) {
1503         case C_EUC_JP:
1504         case C_EUC_JP_MS:
1505         case C_EUC_KR:
1506         case C_EUC_TW:
1507         case C_EUC_CN:
1508         case C_ISO_2022_JP:
1509         case C_ISO_2022_JP_2:
1510         case C_ISO_2022_JP_3:
1511         case C_ISO_2022_KR:
1512         case C_ISO_2022_CN:
1513         case C_SHIFT_JIS:
1514         case C_GB2312:
1515         case C_BIG5:
1516         case C_UTF_8:
1517         case C_UTF_7:
1518                 return TRUE;
1519         default:
1520                 return FALSE;
1521         }
1522 }
1523
1524 const gchar *conv_get_current_locale(void)
1525 {
1526         const gchar *cur_locale;
1527
1528         cur_locale = g_getenv("LC_ALL");
1529         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1530         if (!cur_locale) cur_locale = g_getenv("LANG");
1531         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1532
1533         debug_print("current locale: %s\n",
1534                     cur_locale ? cur_locale : "(none)");
1535
1536         return cur_locale;
1537 }
1538
1539 void conv_unmime_header_overwrite(gchar *str)
1540 {
1541         gchar *buf;
1542         gint buflen;
1543         CharSet cur_charset;
1544
1545         cur_charset = conv_get_locale_charset();
1546
1547         if (cur_charset == C_EUC_JP) {
1548                 buflen = strlen(str) * 2 + 1;
1549                 Xalloca(buf, buflen, return);
1550                 conv_anytodisp(buf, buflen, str);
1551                 unmime_header(str, buf);
1552         } else {
1553                 buflen = strlen(str) + 1;
1554                 Xalloca(buf, buflen, return);
1555                 unmime_header(buf, str);
1556                 strncpy2(str, buf, buflen);
1557         }
1558 }
1559
1560 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1561                         const gchar *charset)
1562 {
1563         CharSet cur_charset;
1564
1565         cur_charset = conv_get_locale_charset();
1566
1567         if (cur_charset == C_EUC_JP) {
1568                 gchar *buf;
1569                 gint buflen;
1570
1571                 buflen = strlen(str) * 2 + 1;
1572                 Xalloca(buf, buflen, return);
1573                 conv_anytodisp(buf, buflen, str);
1574                 unmime_header(outbuf, buf);
1575         } else {
1576                 gchar *tmp = NULL;
1577                 unmime_header(outbuf, str);
1578
1579                 if (outbuf && !g_utf8_validate(outbuf, -1, NULL)) {
1580                         if (conv_get_locale_charset() != C_INTERNAL)
1581                                 tmp = conv_codeset_strdup(outbuf,
1582                                         conv_get_locale_charset_str(),
1583                                         CS_INTERNAL);
1584
1585                         if (tmp) {
1586                                 strncpy(outbuf, tmp, outlen-1);
1587                                 g_free(tmp);
1588                         } else {
1589                                 conv_unreadable_8bit(outbuf);
1590                         }
1591                 }
1592         }
1593
1594 }
1595
1596 #define MAX_LINELEN             76
1597 #define MAX_HARD_LINELEN        996
1598 #define MIMESEP_BEGIN           "=?"
1599 #define MIMESEP_END             "?="
1600
1601 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1602 {                                                                       \
1603         if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {         \
1604                 *destp = '\0';                                          \
1605                 return;                                                 \
1606         }                                                               \
1607                                                                         \
1608         if ((cond) && *srcp) {                                          \
1609                 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1610                         if (isspace(*(destp - 1)))                      \
1611                                 destp--;                                \
1612                         else if (is_plain_text && isspace(*srcp))       \
1613                                 srcp++;                                 \
1614                         if (*srcp) {                                    \
1615                                 *destp++ = '\n';                        \
1616                                 *destp++ = ' ';                         \
1617                                 left = MAX_LINELEN - 1;                 \
1618                         }                                               \
1619                 }                                                       \
1620         }                                                               \
1621 }
1622
1623 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1624                         gint header_len, gboolean addr_field)
1625 {
1626         const gchar *cur_encoding;
1627         const gchar *out_encoding;
1628         gint mimestr_len;
1629         gchar *mimesep_enc;
1630         gint left;
1631         const guchar *srcp = src;
1632         guchar *destp = dest;
1633         gboolean use_base64;
1634
1635         g_return_if_fail(g_utf8_validate(src, -1, NULL) == TRUE);
1636
1637         if (MB_CUR_MAX > 1) {
1638                 use_base64 = TRUE;
1639                 mimesep_enc = "?B?";
1640         } else {
1641                 use_base64 = FALSE;
1642                 mimesep_enc = "?Q?";
1643         }
1644
1645         cur_encoding = CS_INTERNAL;
1646         out_encoding = conv_get_outgoing_charset_str();
1647         if (!strcmp(out_encoding, CS_US_ASCII))
1648                 out_encoding = CS_ISO_8859_1;
1649
1650         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1651                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1652
1653         left = MAX_LINELEN - header_len;
1654
1655         while (*srcp) {
1656                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1657
1658                 while (isspace(*srcp)) {
1659                         *destp++ = *srcp++;
1660                         left--;
1661                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1662                 }
1663
1664                 /* output as it is if the next word is ASCII string */
1665                 if (!is_next_nonascii(srcp)) {
1666                         gint word_len;
1667
1668                         word_len = get_next_word_len(srcp);
1669                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1670                         while (word_len > 0) {
1671                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1672                                 *destp++ = *srcp++;
1673                                 left--;
1674                                 word_len--;
1675                         }
1676
1677                         continue;
1678                 }
1679
1680                 /* don't include parentheses in encoded strings */
1681                 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1682                         LBREAK_IF_REQUIRED(left < 2, FALSE);
1683                         *destp++ = *srcp++;
1684                         left--;
1685                 }
1686
1687                 while (1) {
1688                         gint mb_len = 0;
1689                         gint cur_len = 0;
1690                         gchar *part_str;
1691                         gchar *out_str;
1692                         gchar *enc_str;
1693                         const guchar *p = srcp;
1694                         gint out_str_len;
1695                         gint out_enc_str_len;
1696                         gint mime_block_len;
1697                         gboolean cont = FALSE;
1698
1699                         while (*p != '\0') {
1700                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1701                                         break;
1702                                 /* don't include parentheses in encoded
1703                                    strings */
1704                                 if (addr_field && (*p == '(' || *p == ')'))
1705                                         break;
1706
1707                                 mb_len = g_utf8_skip[*p];
1708
1709                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1710                                 out_str = conv_codeset_strdup
1711                                         (part_str, cur_encoding, out_encoding);
1712                                 if (!out_str) {
1713                                         g_warning("conv_encode_header(): code conversion failed\n");
1714                                         conv_unreadable_8bit(part_str);
1715                                         out_str = g_strdup(part_str);
1716                                 }
1717                                 out_str_len = strlen(out_str);
1718
1719                                 if (use_base64)
1720                                         out_enc_str_len = B64LEN(out_str_len);
1721                                 else
1722                                         out_enc_str_len =
1723                                                 qp_get_q_encoding_len(out_str);
1724
1725                                 g_free(out_str);
1726
1727                                 if (mimestr_len + out_enc_str_len <= left) {
1728                                         cur_len += mb_len;
1729                                         p += mb_len;
1730                                 } else if (cur_len == 0) {
1731                                         LBREAK_IF_REQUIRED(1, FALSE);
1732                                         continue;
1733                                 } else {
1734                                         cont = TRUE;
1735                                         break;
1736                                 }
1737                         }
1738
1739                         if (cur_len > 0) {
1740                                 Xstrndup_a(part_str, srcp, cur_len, );
1741                                 out_str = conv_codeset_strdup
1742                                         (part_str, cur_encoding, out_encoding);
1743                                 if (!out_str) {
1744                                         g_warning("conv_encode_header(): code conversion failed\n");
1745                                         conv_unreadable_8bit(part_str);
1746                                         out_str = g_strdup(part_str);
1747                                 }
1748                                 out_str_len = strlen(out_str);
1749
1750                                 if (use_base64)
1751                                         out_enc_str_len = B64LEN(out_str_len);
1752                                 else
1753                                         out_enc_str_len =
1754                                                 qp_get_q_encoding_len(out_str);
1755
1756                                 Xalloca(enc_str, out_enc_str_len + 1, );
1757                                 if (use_base64)
1758                                         base64_encode(enc_str, out_str, out_str_len);
1759                                 else
1760                                         qp_q_encode(enc_str, out_str);
1761
1762                                 g_free(out_str);
1763
1764                                 /* output MIME-encoded string block */
1765                                 mime_block_len = mimestr_len + strlen(enc_str);
1766                                 g_snprintf(destp, mime_block_len + 1,
1767                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1768                                            out_encoding, mimesep_enc, enc_str);
1769                                 destp += mime_block_len;
1770                                 srcp += cur_len;
1771
1772                                 left -= mime_block_len;
1773                         }
1774
1775                         LBREAK_IF_REQUIRED(cont, FALSE);
1776
1777                         if (cur_len == 0)
1778                                 break;
1779                 }
1780         }
1781
1782         *destp = '\0';
1783 }
1784
1785 #undef LBREAK_IF_REQUIRED